11ae08745Sheppo /* 21ae08745Sheppo * CDDL HEADER START 31ae08745Sheppo * 41ae08745Sheppo * The contents of this file are subject to the terms of the 51ae08745Sheppo * Common Development and Distribution License (the "License"). 61ae08745Sheppo * You may not use this file except in compliance with the License. 71ae08745Sheppo * 81ae08745Sheppo * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91ae08745Sheppo * or http://www.opensolaris.org/os/licensing. 101ae08745Sheppo * See the License for the specific language governing permissions 111ae08745Sheppo * and limitations under the License. 121ae08745Sheppo * 131ae08745Sheppo * When distributing Covered Code, include this CDDL HEADER in each 141ae08745Sheppo * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151ae08745Sheppo * If applicable, add the following below this CDDL HEADER, with the 161ae08745Sheppo * fields enclosed by brackets "[]" replaced with your own identifying 171ae08745Sheppo * information: Portions Copyright [yyyy] [name of copyright owner] 181ae08745Sheppo * 191ae08745Sheppo * CDDL HEADER END 201ae08745Sheppo */ 211ae08745Sheppo 221ae08745Sheppo /* 23d84f0041SAlexandre Chartre * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 241ae08745Sheppo * Use is subject to license terms. 251ae08745Sheppo */ 261ae08745Sheppo 271ae08745Sheppo /* 281ae08745Sheppo * LDoms virtual disk client (vdc) device driver 291ae08745Sheppo * 301ae08745Sheppo * This driver runs on a guest logical domain and communicates with the virtual 311ae08745Sheppo * disk server (vds) driver running on the service domain which is exporting 321ae08745Sheppo * virtualized "disks" to the guest logical domain. 331ae08745Sheppo * 341ae08745Sheppo * The driver can be divided into four sections: 351ae08745Sheppo * 361ae08745Sheppo * 1) generic device driver housekeeping 371ae08745Sheppo * _init, _fini, attach, detach, ops structures, etc. 381ae08745Sheppo * 391ae08745Sheppo * 2) communication channel setup 401ae08745Sheppo * Setup the communications link over the LDC channel that vdc uses to 411ae08745Sheppo * talk to the vDisk server. Initialise the descriptor ring which 421ae08745Sheppo * allows the LDC clients to transfer data via memory mappings. 431ae08745Sheppo * 441ae08745Sheppo * 3) Support exported to upper layers (filesystems, etc) 451ae08745Sheppo * The upper layers call into vdc via strategy(9E) and DKIO(7I) 461ae08745Sheppo * ioctl calls. vdc will copy the data to be written to the descriptor 471ae08745Sheppo * ring or maps the buffer to store the data read by the vDisk 481ae08745Sheppo * server into the descriptor ring. It then sends a message to the 491ae08745Sheppo * vDisk server requesting it to complete the operation. 501ae08745Sheppo * 511ae08745Sheppo * 4) Handling responses from vDisk server. 521ae08745Sheppo * The vDisk server will ACK some or all of the messages vdc sends to it 531ae08745Sheppo * (this is configured during the handshake). Upon receipt of an ACK 541ae08745Sheppo * vdc will check the descriptor ring and signal to the upper layer 551ae08745Sheppo * code waiting on the IO. 561ae08745Sheppo */ 571ae08745Sheppo 58e1ebb9ecSlm66018 #include <sys/atomic.h> 591ae08745Sheppo #include <sys/conf.h> 601ae08745Sheppo #include <sys/disp.h> 611ae08745Sheppo #include <sys/ddi.h> 621ae08745Sheppo #include <sys/dkio.h> 631ae08745Sheppo #include <sys/efi_partition.h> 641ae08745Sheppo #include <sys/fcntl.h> 651ae08745Sheppo #include <sys/file.h> 66366a92acSlm66018 #include <sys/kstat.h> 671ae08745Sheppo #include <sys/mach_descrip.h> 681ae08745Sheppo #include <sys/modctl.h> 691ae08745Sheppo #include <sys/mdeg.h> 701ae08745Sheppo #include <sys/note.h> 711ae08745Sheppo #include <sys/open.h> 72d10e4ef2Snarayan #include <sys/sdt.h> 731ae08745Sheppo #include <sys/stat.h> 741ae08745Sheppo #include <sys/sunddi.h> 751ae08745Sheppo #include <sys/types.h> 761ae08745Sheppo #include <sys/promif.h> 772f5224aeSachartre #include <sys/var.h> 781ae08745Sheppo #include <sys/vtoc.h> 791ae08745Sheppo #include <sys/archsystm.h> 801ae08745Sheppo #include <sys/sysmacros.h> 811ae08745Sheppo 821ae08745Sheppo #include <sys/cdio.h> 831ae08745Sheppo #include <sys/dktp/fdisk.h> 8487a7269eSachartre #include <sys/dktp/dadkio.h> 852f5224aeSachartre #include <sys/mhd.h> 861ae08745Sheppo #include <sys/scsi/generic/sense.h> 872f5224aeSachartre #include <sys/scsi/impl/uscsi.h> 882f5224aeSachartre #include <sys/scsi/impl/services.h> 892f5224aeSachartre #include <sys/scsi/targets/sddef.h> 901ae08745Sheppo 911ae08745Sheppo #include <sys/ldoms.h> 921ae08745Sheppo #include <sys/ldc.h> 931ae08745Sheppo #include <sys/vio_common.h> 941ae08745Sheppo #include <sys/vio_mailbox.h> 9517cadca8Slm66018 #include <sys/vio_util.h> 961ae08745Sheppo #include <sys/vdsk_common.h> 971ae08745Sheppo #include <sys/vdsk_mailbox.h> 981ae08745Sheppo #include <sys/vdc.h> 991ae08745Sheppo 100342440ecSPrasad Singamsetty #define VD_OLDVTOC_LIMIT 0x7fffffff 101342440ecSPrasad Singamsetty 1021ae08745Sheppo /* 1031ae08745Sheppo * function prototypes 1041ae08745Sheppo */ 1051ae08745Sheppo 1061ae08745Sheppo /* standard driver functions */ 1071ae08745Sheppo static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 1081ae08745Sheppo static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 1091ae08745Sheppo static int vdc_strategy(struct buf *buf); 1101ae08745Sheppo static int vdc_print(dev_t dev, char *str); 1111ae08745Sheppo static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 1121ae08745Sheppo static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 1131ae08745Sheppo static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 1141ae08745Sheppo static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 1151ae08745Sheppo cred_t *credp, int *rvalp); 1161ae08745Sheppo static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 1171ae08745Sheppo static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 1181ae08745Sheppo 1191ae08745Sheppo static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 1201ae08745Sheppo void *arg, void **resultp); 1211ae08745Sheppo static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 1221ae08745Sheppo static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 1235b98b509Sachartre static int vdc_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, 1245b98b509Sachartre int mod_flags, char *name, caddr_t valuep, int *lengthp); 1251ae08745Sheppo 1261ae08745Sheppo /* setup */ 1270d0c8d4bSnarayan static void vdc_min(struct buf *bufp); 1280a55fbb7Slm66018 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 1298cd10891Snarayan static int vdc_do_ldc_init(vdc_t *vdc, vdc_server_t *srvr); 1301ae08745Sheppo static int vdc_start_ldc_connection(vdc_t *vdc); 1311ae08745Sheppo static int vdc_create_device_nodes(vdc_t *vdc); 1324bac2208Snarayan static int vdc_create_device_nodes_efi(vdc_t *vdc); 1334bac2208Snarayan static int vdc_create_device_nodes_vtoc(vdc_t *vdc); 134366a92acSlm66018 static void vdc_create_io_kstats(vdc_t *vdc); 135366a92acSlm66018 static void vdc_create_err_kstats(vdc_t *vdc); 136366a92acSlm66018 static void vdc_set_err_kstats(vdc_t *vdc); 137655fd6a9Sachartre static int vdc_get_md_node(dev_info_t *dip, md_t **mdpp, 1388cd10891Snarayan mde_cookie_t *vd_nodep); 1398cd10891Snarayan static int vdc_init_ports(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_nodep); 1408cd10891Snarayan static void vdc_fini_ports(vdc_t *vdc); 1418cd10891Snarayan static void vdc_switch_server(vdc_t *vdcp); 1420a55fbb7Slm66018 static int vdc_do_ldc_up(vdc_t *vdc); 1438cd10891Snarayan static void vdc_terminate_ldc(vdc_t *vdc, vdc_server_t *srvr); 1441ae08745Sheppo static int vdc_init_descriptor_ring(vdc_t *vdc); 1451ae08745Sheppo static void vdc_destroy_descriptor_ring(vdc_t *vdc); 1464bac2208Snarayan static int vdc_setup_devid(vdc_t *vdc); 147edcc0754Sachartre static void vdc_store_label_efi(vdc_t *, efi_gpt_t *, efi_gpe_t *); 148342440ecSPrasad Singamsetty static void vdc_store_label_vtoc(vdc_t *, struct dk_geom *, 149342440ecSPrasad Singamsetty struct extvtoc *); 15078fcd0a1Sachartre static void vdc_store_label_unk(vdc_t *vdc); 15178fcd0a1Sachartre static boolean_t vdc_is_opened(vdc_t *vdc); 152de3a5331SRamesh Chitrothu static void vdc_update_size(vdc_t *vdc, size_t, size_t, size_t); 153*65908c77Syu, larry liu - Sun Microsystems - Beijing China static int vdc_update_vio_bsize(vdc_t *vdc, uint32_t); 1541ae08745Sheppo 1551ae08745Sheppo /* handshake with vds */ 1560a55fbb7Slm66018 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 1573af08d82Slm66018 static int vdc_ver_negotiation(vdc_t *vdcp); 1581ae08745Sheppo static int vdc_init_attr_negotiation(vdc_t *vdc); 1593af08d82Slm66018 static int vdc_attr_negotiation(vdc_t *vdcp); 1601ae08745Sheppo static int vdc_init_dring_negotiate(vdc_t *vdc); 1613af08d82Slm66018 static int vdc_dring_negotiation(vdc_t *vdcp); 1623af08d82Slm66018 static int vdc_send_rdx(vdc_t *vdcp); 1633af08d82Slm66018 static int vdc_rdx_exchange(vdc_t *vdcp); 1640a55fbb7Slm66018 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 1651ae08745Sheppo 1660a55fbb7Slm66018 /* processing incoming messages from vDisk server */ 1671ae08745Sheppo static void vdc_process_msg_thread(vdc_t *vdc); 1683af08d82Slm66018 static int vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp); 1693af08d82Slm66018 1700a55fbb7Slm66018 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 1713af08d82Slm66018 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t *msg); 1720a55fbb7Slm66018 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 1730a55fbb7Slm66018 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 1740a55fbb7Slm66018 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 1753af08d82Slm66018 static int vdc_send_request(vdc_t *vdcp, int operation, 1763af08d82Slm66018 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 1773af08d82Slm66018 int cb_type, void *cb_arg, vio_desc_direction_t dir); 1783af08d82Slm66018 static int vdc_map_to_shared_dring(vdc_t *vdcp, int idx); 1793af08d82Slm66018 static int vdc_populate_descriptor(vdc_t *vdcp, int operation, 1803af08d82Slm66018 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 1813af08d82Slm66018 int cb_type, void *cb_arg, vio_desc_direction_t dir); 1822f5224aeSachartre static int vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, 1832f5224aeSachartre size_t nbytes, int slice, diskaddr_t offset, int cb_type, 1842f5224aeSachartre void *cb_arg, vio_desc_direction_t dir, boolean_t); 1853af08d82Slm66018 1863af08d82Slm66018 static int vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp); 18711f54b6eSAlexandre Chartre static int vdc_drain_response(vdc_t *vdcp, vio_cb_type_t cb_type, 18811f54b6eSAlexandre Chartre struct buf *buf); 1891ae08745Sheppo static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 1903af08d82Slm66018 static int vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep); 191e1ebb9ecSlm66018 static int vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg); 1921ae08745Sheppo 1931ae08745Sheppo /* dkio */ 1942f5224aeSachartre static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode, 1952f5224aeSachartre int *rvalp); 196edcc0754Sachartre static int vd_process_efi_ioctl(void *vdisk, int cmd, uintptr_t arg); 19778fcd0a1Sachartre static void vdc_create_fake_geometry(vdc_t *vdc); 19878fcd0a1Sachartre static int vdc_validate_geometry(vdc_t *vdc); 19978fcd0a1Sachartre static void vdc_validate(vdc_t *vdc); 20078fcd0a1Sachartre static void vdc_validate_task(void *arg); 201d10e4ef2Snarayan static int vdc_null_copy_func(vdc_t *vdc, void *from, void *to, 202d10e4ef2Snarayan int mode, int dir); 2034bac2208Snarayan static int vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 2044bac2208Snarayan int mode, int dir); 2054bac2208Snarayan static int vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 2064bac2208Snarayan int mode, int dir); 207d10e4ef2Snarayan static int vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, 208d10e4ef2Snarayan int mode, int dir); 209d10e4ef2Snarayan static int vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, 210d10e4ef2Snarayan int mode, int dir); 211342440ecSPrasad Singamsetty static int vdc_get_extvtoc_convert(vdc_t *vdc, void *from, void *to, 212342440ecSPrasad Singamsetty int mode, int dir); 213342440ecSPrasad Singamsetty static int vdc_set_extvtoc_convert(vdc_t *vdc, void *from, void *to, 214342440ecSPrasad Singamsetty int mode, int dir); 215d10e4ef2Snarayan static int vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, 216d10e4ef2Snarayan int mode, int dir); 217d10e4ef2Snarayan static int vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, 218d10e4ef2Snarayan int mode, int dir); 2194bac2208Snarayan static int vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, 2204bac2208Snarayan int mode, int dir); 2214bac2208Snarayan static int vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, 2224bac2208Snarayan int mode, int dir); 2231ae08745Sheppo 2242f5224aeSachartre static void vdc_ownership_update(vdc_t *vdc, int ownership_flags); 2252f5224aeSachartre static int vdc_access_set(vdc_t *vdc, uint64_t flags, int mode); 2262f5224aeSachartre static vdc_io_t *vdc_failfast_io_queue(vdc_t *vdc, struct buf *buf); 2272f5224aeSachartre static int vdc_failfast_check_resv(vdc_t *vdc); 2282f5224aeSachartre 2291ae08745Sheppo /* 2301ae08745Sheppo * Module variables 2311ae08745Sheppo */ 232e1ebb9ecSlm66018 233e1ebb9ecSlm66018 /* 234e1ebb9ecSlm66018 * Tunable variables to control how long vdc waits before timing out on 235e1ebb9ecSlm66018 * various operations 236e1ebb9ecSlm66018 */ 2373c96341aSnarayan static int vdc_hshake_retries = 3; 238e1ebb9ecSlm66018 239655fd6a9Sachartre static int vdc_timeout = 0; /* units: seconds */ 2408cd10891Snarayan static int vdc_ldcup_timeout = 1; /* units: seconds */ 241655fd6a9Sachartre 2423af08d82Slm66018 static uint64_t vdc_hz_min_ldc_delay; 2433af08d82Slm66018 static uint64_t vdc_min_timeout_ldc = 1 * MILLISEC; 2443af08d82Slm66018 static uint64_t vdc_hz_max_ldc_delay; 2453af08d82Slm66018 static uint64_t vdc_max_timeout_ldc = 100 * MILLISEC; 2463af08d82Slm66018 2473af08d82Slm66018 static uint64_t vdc_ldc_read_init_delay = 1 * MILLISEC; 2483af08d82Slm66018 static uint64_t vdc_ldc_read_max_delay = 100 * MILLISEC; 249e1ebb9ecSlm66018 250e1ebb9ecSlm66018 /* values for dumping - need to run in a tighter loop */ 251e1ebb9ecSlm66018 static uint64_t vdc_usec_timeout_dump = 100 * MILLISEC; /* 0.1s units: ns */ 252e1ebb9ecSlm66018 static int vdc_dump_retries = 100; 253e1ebb9ecSlm66018 2542f5224aeSachartre static uint16_t vdc_scsi_timeout = 60; /* 60s units: seconds */ 2552f5224aeSachartre 2562f5224aeSachartre static uint64_t vdc_ownership_delay = 6 * MICROSEC; /* 6s units: usec */ 2572f5224aeSachartre 258e1ebb9ecSlm66018 /* Count of the number of vdc instances attached */ 259e1ebb9ecSlm66018 static volatile uint32_t vdc_instance_count = 0; 2601ae08745Sheppo 2612f5224aeSachartre /* Tunable to log all SCSI errors */ 2622f5224aeSachartre static boolean_t vdc_scsi_log_error = B_FALSE; 2632f5224aeSachartre 2641ae08745Sheppo /* Soft state pointer */ 2651ae08745Sheppo static void *vdc_state; 2661ae08745Sheppo 2673af08d82Slm66018 /* 2683af08d82Slm66018 * Controlling the verbosity of the error/debug messages 2693af08d82Slm66018 * 2703af08d82Slm66018 * vdc_msglevel - controls level of messages 2713af08d82Slm66018 * vdc_matchinst - 64-bit variable where each bit corresponds 2723af08d82Slm66018 * to the vdc instance the vdc_msglevel applies. 2733af08d82Slm66018 */ 2743af08d82Slm66018 int vdc_msglevel = 0x0; 2753af08d82Slm66018 uint64_t vdc_matchinst = 0ull; 2761ae08745Sheppo 2770a55fbb7Slm66018 /* 2780a55fbb7Slm66018 * Supported vDisk protocol version pairs. 2790a55fbb7Slm66018 * 2800a55fbb7Slm66018 * The first array entry is the latest and preferred version. 2810a55fbb7Slm66018 */ 28217cadca8Slm66018 static const vio_ver_t vdc_version[] = {{1, 1}}; 2831ae08745Sheppo 2841ae08745Sheppo static struct cb_ops vdc_cb_ops = { 2851ae08745Sheppo vdc_open, /* cb_open */ 2861ae08745Sheppo vdc_close, /* cb_close */ 2871ae08745Sheppo vdc_strategy, /* cb_strategy */ 2881ae08745Sheppo vdc_print, /* cb_print */ 2891ae08745Sheppo vdc_dump, /* cb_dump */ 2901ae08745Sheppo vdc_read, /* cb_read */ 2911ae08745Sheppo vdc_write, /* cb_write */ 2921ae08745Sheppo vdc_ioctl, /* cb_ioctl */ 2931ae08745Sheppo nodev, /* cb_devmap */ 2941ae08745Sheppo nodev, /* cb_mmap */ 2951ae08745Sheppo nodev, /* cb_segmap */ 2961ae08745Sheppo nochpoll, /* cb_chpoll */ 2975b98b509Sachartre vdc_prop_op, /* cb_prop_op */ 2981ae08745Sheppo NULL, /* cb_str */ 2991ae08745Sheppo D_MP | D_64BIT, /* cb_flag */ 3001ae08745Sheppo CB_REV, /* cb_rev */ 3011ae08745Sheppo vdc_aread, /* cb_aread */ 3021ae08745Sheppo vdc_awrite /* cb_awrite */ 3031ae08745Sheppo }; 3041ae08745Sheppo 3051ae08745Sheppo static struct dev_ops vdc_ops = { 3061ae08745Sheppo DEVO_REV, /* devo_rev */ 3071ae08745Sheppo 0, /* devo_refcnt */ 3081ae08745Sheppo vdc_getinfo, /* devo_getinfo */ 3091ae08745Sheppo nulldev, /* devo_identify */ 3101ae08745Sheppo nulldev, /* devo_probe */ 3111ae08745Sheppo vdc_attach, /* devo_attach */ 3121ae08745Sheppo vdc_detach, /* devo_detach */ 3131ae08745Sheppo nodev, /* devo_reset */ 3141ae08745Sheppo &vdc_cb_ops, /* devo_cb_ops */ 3151ae08745Sheppo NULL, /* devo_bus_ops */ 31619397407SSherry Moore nulldev, /* devo_power */ 31719397407SSherry Moore ddi_quiesce_not_needed, /* devo_quiesce */ 3181ae08745Sheppo }; 3191ae08745Sheppo 3201ae08745Sheppo static struct modldrv modldrv = { 3211ae08745Sheppo &mod_driverops, 322205eeb1aSlm66018 "virtual disk client", 3231ae08745Sheppo &vdc_ops, 3241ae08745Sheppo }; 3251ae08745Sheppo 3261ae08745Sheppo static struct modlinkage modlinkage = { 3271ae08745Sheppo MODREV_1, 3281ae08745Sheppo &modldrv, 3291ae08745Sheppo NULL 3301ae08745Sheppo }; 3311ae08745Sheppo 3321ae08745Sheppo /* -------------------------------------------------------------------------- */ 3331ae08745Sheppo 3341ae08745Sheppo /* 3351ae08745Sheppo * Device Driver housekeeping and setup 3361ae08745Sheppo */ 3371ae08745Sheppo 3381ae08745Sheppo int 3391ae08745Sheppo _init(void) 3401ae08745Sheppo { 3411ae08745Sheppo int status; 3421ae08745Sheppo 3431ae08745Sheppo if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 3441ae08745Sheppo return (status); 3451ae08745Sheppo if ((status = mod_install(&modlinkage)) != 0) 3461ae08745Sheppo ddi_soft_state_fini(&vdc_state); 3471ae08745Sheppo return (status); 3481ae08745Sheppo } 3491ae08745Sheppo 3501ae08745Sheppo int 3511ae08745Sheppo _info(struct modinfo *modinfop) 3521ae08745Sheppo { 3531ae08745Sheppo return (mod_info(&modlinkage, modinfop)); 3541ae08745Sheppo } 3551ae08745Sheppo 3561ae08745Sheppo int 3571ae08745Sheppo _fini(void) 3581ae08745Sheppo { 3591ae08745Sheppo int status; 3601ae08745Sheppo 3611ae08745Sheppo if ((status = mod_remove(&modlinkage)) != 0) 3621ae08745Sheppo return (status); 3631ae08745Sheppo ddi_soft_state_fini(&vdc_state); 3641ae08745Sheppo return (0); 3651ae08745Sheppo } 3661ae08745Sheppo 3671ae08745Sheppo static int 3681ae08745Sheppo vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 3691ae08745Sheppo { 3701ae08745Sheppo _NOTE(ARGUNUSED(dip)) 3711ae08745Sheppo 3720d0c8d4bSnarayan int instance = VDCUNIT((dev_t)arg); 3731ae08745Sheppo vdc_t *vdc = NULL; 3741ae08745Sheppo 3751ae08745Sheppo switch (cmd) { 3761ae08745Sheppo case DDI_INFO_DEVT2DEVINFO: 3771ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 3781ae08745Sheppo *resultp = NULL; 3791ae08745Sheppo return (DDI_FAILURE); 3801ae08745Sheppo } 3811ae08745Sheppo *resultp = vdc->dip; 3821ae08745Sheppo return (DDI_SUCCESS); 3831ae08745Sheppo case DDI_INFO_DEVT2INSTANCE: 3841ae08745Sheppo *resultp = (void *)(uintptr_t)instance; 3851ae08745Sheppo return (DDI_SUCCESS); 3861ae08745Sheppo default: 3871ae08745Sheppo *resultp = NULL; 3881ae08745Sheppo return (DDI_FAILURE); 3891ae08745Sheppo } 3901ae08745Sheppo } 3911ae08745Sheppo 3921ae08745Sheppo static int 3931ae08745Sheppo vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3941ae08745Sheppo { 3952f5224aeSachartre kt_did_t failfast_tid, ownership_tid; 3961ae08745Sheppo int instance; 3971ae08745Sheppo int rv; 398d7400d00Sachartre vdc_server_t *srvr; 3991ae08745Sheppo vdc_t *vdc = NULL; 4001ae08745Sheppo 4011ae08745Sheppo switch (cmd) { 4021ae08745Sheppo case DDI_DETACH: 4031ae08745Sheppo /* the real work happens below */ 4041ae08745Sheppo break; 4051ae08745Sheppo case DDI_SUSPEND: 4061ae08745Sheppo /* nothing to do for this non-device */ 4071ae08745Sheppo return (DDI_SUCCESS); 4081ae08745Sheppo default: 4091ae08745Sheppo return (DDI_FAILURE); 4101ae08745Sheppo } 4111ae08745Sheppo 4121ae08745Sheppo ASSERT(cmd == DDI_DETACH); 4131ae08745Sheppo instance = ddi_get_instance(dip); 4143af08d82Slm66018 DMSGX(1, "[%d] Entered\n", instance); 4151ae08745Sheppo 4161ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 417e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 4181ae08745Sheppo return (DDI_FAILURE); 4191ae08745Sheppo } 4201ae08745Sheppo 4212f5224aeSachartre /* 4222f5224aeSachartre * This function is called when vdc is detached or if it has failed to 4232f5224aeSachartre * attach. In that case, the attach may have fail before the vdisk type 4242f5224aeSachartre * has been set so we can't call vdc_is_opened(). However as the attach 4252f5224aeSachartre * has failed, we know that the vdisk is not opened and we can safely 4262f5224aeSachartre * detach. 4272f5224aeSachartre */ 4282f5224aeSachartre if (vdc->vdisk_type != VD_DISK_TYPE_UNK && vdc_is_opened(vdc)) { 4293af08d82Slm66018 DMSG(vdc, 0, "[%d] Cannot detach: device is open", instance); 4301ae08745Sheppo return (DDI_FAILURE); 4311ae08745Sheppo } 4321ae08745Sheppo 43378fcd0a1Sachartre if (vdc->dkio_flush_pending) { 43478fcd0a1Sachartre DMSG(vdc, 0, 43578fcd0a1Sachartre "[%d] Cannot detach: %d outstanding DKIO flushes\n", 43678fcd0a1Sachartre instance, vdc->dkio_flush_pending); 43778fcd0a1Sachartre return (DDI_FAILURE); 43878fcd0a1Sachartre } 43978fcd0a1Sachartre 44078fcd0a1Sachartre if (vdc->validate_pending) { 44178fcd0a1Sachartre DMSG(vdc, 0, 44278fcd0a1Sachartre "[%d] Cannot detach: %d outstanding validate request\n", 44378fcd0a1Sachartre instance, vdc->validate_pending); 44478fcd0a1Sachartre return (DDI_FAILURE); 44578fcd0a1Sachartre } 44678fcd0a1Sachartre 4473af08d82Slm66018 DMSG(vdc, 0, "[%d] proceeding...\n", instance); 4483af08d82Slm66018 4492f5224aeSachartre /* If we took ownership, release ownership */ 4502f5224aeSachartre mutex_enter(&vdc->ownership_lock); 4512f5224aeSachartre if (vdc->ownership & VDC_OWNERSHIP_GRANTED) { 4522f5224aeSachartre rv = vdc_access_set(vdc, VD_ACCESS_SET_CLEAR, FKIOCTL); 4532f5224aeSachartre if (rv == 0) { 4542f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_NONE); 4552f5224aeSachartre } 4562f5224aeSachartre } 4572f5224aeSachartre mutex_exit(&vdc->ownership_lock); 4582f5224aeSachartre 4593af08d82Slm66018 /* mark instance as detaching */ 4603af08d82Slm66018 vdc->lifecycle = VDC_LC_DETACHING; 4611ae08745Sheppo 4621ae08745Sheppo /* 463d7400d00Sachartre * Try and disable callbacks to prevent another handshake. We have to 464d7400d00Sachartre * disable callbacks for all servers. 4651ae08745Sheppo */ 466d7400d00Sachartre for (srvr = vdc->server_list; srvr != NULL; srvr = srvr->next) { 467d7400d00Sachartre rv = ldc_set_cb_mode(srvr->ldc_handle, LDC_CB_DISABLE); 468d7400d00Sachartre DMSG(vdc, 0, "callback disabled (ldc=%lu, rv=%d)\n", 469d7400d00Sachartre srvr->ldc_id, rv); 4708cd10891Snarayan } 4711ae08745Sheppo 4721ae08745Sheppo if (vdc->initialized & VDC_THREAD) { 4733af08d82Slm66018 mutex_enter(&vdc->read_lock); 4743af08d82Slm66018 if ((vdc->read_state == VDC_READ_WAITING) || 4753af08d82Slm66018 (vdc->read_state == VDC_READ_RESET)) { 4763af08d82Slm66018 vdc->read_state = VDC_READ_RESET; 4773af08d82Slm66018 cv_signal(&vdc->read_cv); 4781ae08745Sheppo } 4793af08d82Slm66018 4803af08d82Slm66018 mutex_exit(&vdc->read_lock); 4813af08d82Slm66018 4823af08d82Slm66018 /* wake up any thread waiting for connection to come online */ 4833af08d82Slm66018 mutex_enter(&vdc->lock); 4843af08d82Slm66018 if (vdc->state == VDC_STATE_INIT_WAITING) { 4853af08d82Slm66018 DMSG(vdc, 0, 4863af08d82Slm66018 "[%d] write reset - move to resetting state...\n", 4873af08d82Slm66018 instance); 4883af08d82Slm66018 vdc->state = VDC_STATE_RESETTING; 4893af08d82Slm66018 cv_signal(&vdc->initwait_cv); 4903af08d82Slm66018 } 4913af08d82Slm66018 mutex_exit(&vdc->lock); 4923af08d82Slm66018 4933af08d82Slm66018 /* now wait until state transitions to VDC_STATE_DETACH */ 4943af08d82Slm66018 thread_join(vdc->msg_proc_thr->t_did); 4953af08d82Slm66018 ASSERT(vdc->state == VDC_STATE_DETACH); 4963af08d82Slm66018 DMSG(vdc, 0, "[%d] Reset thread exit and join ..\n", 4973af08d82Slm66018 vdc->instance); 4981ae08745Sheppo } 4991ae08745Sheppo 5001ae08745Sheppo mutex_enter(&vdc->lock); 5011ae08745Sheppo 5021ae08745Sheppo if (vdc->initialized & VDC_DRING) 5031ae08745Sheppo vdc_destroy_descriptor_ring(vdc); 5041ae08745Sheppo 5058cd10891Snarayan vdc_fini_ports(vdc); 5061ae08745Sheppo 5072f5224aeSachartre if (vdc->failfast_thread) { 5082f5224aeSachartre failfast_tid = vdc->failfast_thread->t_did; 5092f5224aeSachartre vdc->failfast_interval = 0; 5102f5224aeSachartre cv_signal(&vdc->failfast_cv); 5112f5224aeSachartre } else { 5122f5224aeSachartre failfast_tid = 0; 5132f5224aeSachartre } 5142f5224aeSachartre 5152f5224aeSachartre if (vdc->ownership & VDC_OWNERSHIP_WANTED) { 5162f5224aeSachartre ownership_tid = vdc->ownership_thread->t_did; 5172f5224aeSachartre vdc->ownership = VDC_OWNERSHIP_NONE; 5182f5224aeSachartre cv_signal(&vdc->ownership_cv); 5192f5224aeSachartre } else { 5202f5224aeSachartre ownership_tid = 0; 5212f5224aeSachartre } 5222f5224aeSachartre 5231ae08745Sheppo mutex_exit(&vdc->lock); 5241ae08745Sheppo 5252f5224aeSachartre if (failfast_tid != 0) 5262f5224aeSachartre thread_join(failfast_tid); 5272f5224aeSachartre 5282f5224aeSachartre if (ownership_tid != 0) 5292f5224aeSachartre thread_join(ownership_tid); 5302f5224aeSachartre 5315b98b509Sachartre if (vdc->initialized & VDC_MINOR) 5321ae08745Sheppo ddi_remove_minor_node(dip, NULL); 5331ae08745Sheppo 534366a92acSlm66018 if (vdc->io_stats) { 535366a92acSlm66018 kstat_delete(vdc->io_stats); 536366a92acSlm66018 vdc->io_stats = NULL; 537366a92acSlm66018 } 538366a92acSlm66018 539366a92acSlm66018 if (vdc->err_stats) { 540366a92acSlm66018 kstat_delete(vdc->err_stats); 541366a92acSlm66018 vdc->err_stats = NULL; 542366a92acSlm66018 } 543366a92acSlm66018 5441ae08745Sheppo if (vdc->initialized & VDC_LOCKS) { 5451ae08745Sheppo mutex_destroy(&vdc->lock); 5463af08d82Slm66018 mutex_destroy(&vdc->read_lock); 5472f5224aeSachartre mutex_destroy(&vdc->ownership_lock); 5483af08d82Slm66018 cv_destroy(&vdc->initwait_cv); 5493af08d82Slm66018 cv_destroy(&vdc->dring_free_cv); 5503af08d82Slm66018 cv_destroy(&vdc->membind_cv); 5513af08d82Slm66018 cv_destroy(&vdc->sync_pending_cv); 5523af08d82Slm66018 cv_destroy(&vdc->sync_blocked_cv); 5533af08d82Slm66018 cv_destroy(&vdc->read_cv); 5543af08d82Slm66018 cv_destroy(&vdc->running_cv); 5552f5224aeSachartre cv_destroy(&vdc->ownership_cv); 5562f5224aeSachartre cv_destroy(&vdc->failfast_cv); 5572f5224aeSachartre cv_destroy(&vdc->failfast_io_cv); 5581ae08745Sheppo } 5591ae08745Sheppo 5601ae08745Sheppo if (vdc->minfo) 5611ae08745Sheppo kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 5621ae08745Sheppo 5631ae08745Sheppo if (vdc->cinfo) 5641ae08745Sheppo kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 5651ae08745Sheppo 5661ae08745Sheppo if (vdc->vtoc) 567342440ecSPrasad Singamsetty kmem_free(vdc->vtoc, sizeof (struct extvtoc)); 5681ae08745Sheppo 56978fcd0a1Sachartre if (vdc->geom) 57078fcd0a1Sachartre kmem_free(vdc->geom, sizeof (struct dk_geom)); 5710a55fbb7Slm66018 5724bac2208Snarayan if (vdc->devid) { 5734bac2208Snarayan ddi_devid_unregister(dip); 5744bac2208Snarayan ddi_devid_free(vdc->devid); 5754bac2208Snarayan } 5764bac2208Snarayan 5771ae08745Sheppo if (vdc->initialized & VDC_SOFT_STATE) 5781ae08745Sheppo ddi_soft_state_free(vdc_state, instance); 5791ae08745Sheppo 5803af08d82Slm66018 DMSG(vdc, 0, "[%d] End %p\n", instance, (void *)vdc); 5811ae08745Sheppo 5821ae08745Sheppo return (DDI_SUCCESS); 5831ae08745Sheppo } 5841ae08745Sheppo 5851ae08745Sheppo 5861ae08745Sheppo static int 5871ae08745Sheppo vdc_do_attach(dev_info_t *dip) 5881ae08745Sheppo { 5891ae08745Sheppo int instance; 5901ae08745Sheppo vdc_t *vdc = NULL; 5911ae08745Sheppo int status; 592655fd6a9Sachartre md_t *mdp; 5938cd10891Snarayan mde_cookie_t vd_node; 5941ae08745Sheppo 5951ae08745Sheppo ASSERT(dip != NULL); 5961ae08745Sheppo 5971ae08745Sheppo instance = ddi_get_instance(dip); 5981ae08745Sheppo if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 599e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't alloc state structure", 600e1ebb9ecSlm66018 instance); 6011ae08745Sheppo return (DDI_FAILURE); 6021ae08745Sheppo } 6031ae08745Sheppo 6041ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 605e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 6061ae08745Sheppo return (DDI_FAILURE); 6071ae08745Sheppo } 6081ae08745Sheppo 6091ae08745Sheppo /* 6101ae08745Sheppo * We assign the value to initialized in this case to zero out the 6111ae08745Sheppo * variable and then set bits in it to indicate what has been done 6121ae08745Sheppo */ 6131ae08745Sheppo vdc->initialized = VDC_SOFT_STATE; 6141ae08745Sheppo 6153af08d82Slm66018 vdc_hz_min_ldc_delay = drv_usectohz(vdc_min_timeout_ldc); 6163af08d82Slm66018 vdc_hz_max_ldc_delay = drv_usectohz(vdc_max_timeout_ldc); 6171ae08745Sheppo 6181ae08745Sheppo vdc->dip = dip; 6191ae08745Sheppo vdc->instance = instance; 6201ae08745Sheppo vdc->vdisk_type = VD_DISK_TYPE_UNK; 6214bac2208Snarayan vdc->vdisk_label = VD_DISK_LABEL_UNK; 6223af08d82Slm66018 vdc->state = VDC_STATE_INIT; 6233af08d82Slm66018 vdc->lifecycle = VDC_LC_ATTACHING; 6241ae08745Sheppo vdc->session_id = 0; 625*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vdisk_bsize = DEV_BSIZE; 626*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vio_bmask = 0; 627*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vio_bshift = 0; 628*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->max_xfer_sz = maxphys / vdc->vdisk_bsize; 6291ae08745Sheppo 63017cadca8Slm66018 /* 63117cadca8Slm66018 * We assume, for now, that the vDisk server will export 'read' 63217cadca8Slm66018 * operations to us at a minimum (this is needed because of checks 63317cadca8Slm66018 * in vdc for supported operations early in the handshake process). 63417cadca8Slm66018 * The vDisk server will return ENOTSUP if this is not the case. 63517cadca8Slm66018 * The value will be overwritten during the attribute exchange with 63617cadca8Slm66018 * the bitmask of operations exported by server. 63717cadca8Slm66018 */ 63817cadca8Slm66018 vdc->operations = VD_OP_MASK_READ; 63917cadca8Slm66018 6401ae08745Sheppo vdc->vtoc = NULL; 64178fcd0a1Sachartre vdc->geom = NULL; 6421ae08745Sheppo vdc->cinfo = NULL; 6431ae08745Sheppo vdc->minfo = NULL; 6441ae08745Sheppo 6451ae08745Sheppo mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 6463af08d82Slm66018 cv_init(&vdc->initwait_cv, NULL, CV_DRIVER, NULL); 6473af08d82Slm66018 cv_init(&vdc->dring_free_cv, NULL, CV_DRIVER, NULL); 6483af08d82Slm66018 cv_init(&vdc->membind_cv, NULL, CV_DRIVER, NULL); 6493af08d82Slm66018 cv_init(&vdc->running_cv, NULL, CV_DRIVER, NULL); 6503af08d82Slm66018 6513af08d82Slm66018 vdc->threads_pending = 0; 6523af08d82Slm66018 vdc->sync_op_pending = B_FALSE; 6533af08d82Slm66018 vdc->sync_op_blocked = B_FALSE; 6543af08d82Slm66018 cv_init(&vdc->sync_pending_cv, NULL, CV_DRIVER, NULL); 6553af08d82Slm66018 cv_init(&vdc->sync_blocked_cv, NULL, CV_DRIVER, NULL); 6563af08d82Slm66018 6572f5224aeSachartre mutex_init(&vdc->ownership_lock, NULL, MUTEX_DRIVER, NULL); 6582f5224aeSachartre cv_init(&vdc->ownership_cv, NULL, CV_DRIVER, NULL); 6592f5224aeSachartre cv_init(&vdc->failfast_cv, NULL, CV_DRIVER, NULL); 6602f5224aeSachartre cv_init(&vdc->failfast_io_cv, NULL, CV_DRIVER, NULL); 6612f5224aeSachartre 6623af08d82Slm66018 /* init blocking msg read functionality */ 6633af08d82Slm66018 mutex_init(&vdc->read_lock, NULL, MUTEX_DRIVER, NULL); 6643af08d82Slm66018 cv_init(&vdc->read_cv, NULL, CV_DRIVER, NULL); 6653af08d82Slm66018 vdc->read_state = VDC_READ_IDLE; 6663af08d82Slm66018 6671ae08745Sheppo vdc->initialized |= VDC_LOCKS; 6681ae08745Sheppo 669655fd6a9Sachartre /* get device and port MD node for this disk instance */ 6708cd10891Snarayan if (vdc_get_md_node(dip, &mdp, &vd_node) != 0) { 671655fd6a9Sachartre cmn_err(CE_NOTE, "[%d] Could not get machine description node", 672655fd6a9Sachartre instance); 673655fd6a9Sachartre return (DDI_FAILURE); 674655fd6a9Sachartre } 675655fd6a9Sachartre 6768cd10891Snarayan if (vdc_init_ports(vdc, mdp, vd_node) != 0) { 6778cd10891Snarayan cmn_err(CE_NOTE, "[%d] Error initialising ports", instance); 6788cd10891Snarayan return (DDI_FAILURE); 679655fd6a9Sachartre } 680655fd6a9Sachartre 681655fd6a9Sachartre (void) md_fini_handle(mdp); 682655fd6a9Sachartre 683de3a5331SRamesh Chitrothu /* Create the kstats for saving the I/O statistics used by iostat(1M) */ 684de3a5331SRamesh Chitrothu vdc_create_io_kstats(vdc); 685de3a5331SRamesh Chitrothu vdc_create_err_kstats(vdc); 686de3a5331SRamesh Chitrothu 687de3a5331SRamesh Chitrothu /* Initialize remaining structures before starting the msg thread */ 688de3a5331SRamesh Chitrothu vdc->vdisk_label = VD_DISK_LABEL_UNK; 689342440ecSPrasad Singamsetty vdc->vtoc = kmem_zalloc(sizeof (struct extvtoc), KM_SLEEP); 690de3a5331SRamesh Chitrothu vdc->geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP); 691de3a5331SRamesh Chitrothu vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 692de3a5331SRamesh Chitrothu 6933af08d82Slm66018 /* initialize the thread responsible for managing state with server */ 6943af08d82Slm66018 vdc->msg_proc_thr = thread_create(NULL, 0, vdc_process_msg_thread, 6951ae08745Sheppo vdc, 0, &p0, TS_RUN, minclsyspri); 6963af08d82Slm66018 if (vdc->msg_proc_thr == NULL) { 6971ae08745Sheppo cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 6981ae08745Sheppo instance); 6991ae08745Sheppo return (DDI_FAILURE); 7001ae08745Sheppo } 7013af08d82Slm66018 7021ae08745Sheppo vdc->initialized |= VDC_THREAD; 7031ae08745Sheppo 704e1ebb9ecSlm66018 atomic_inc_32(&vdc_instance_count); 7051ae08745Sheppo 7060a55fbb7Slm66018 /* 70778fcd0a1Sachartre * Check the disk label. This will send requests and do the handshake. 70878fcd0a1Sachartre * We don't really care about the disk label now. What we really need is 70978fcd0a1Sachartre * the handshake do be done so that we know the type of the disk (slice 71078fcd0a1Sachartre * or full disk) and the appropriate device nodes can be created. 7110a55fbb7Slm66018 */ 71278fcd0a1Sachartre 71378fcd0a1Sachartre mutex_enter(&vdc->lock); 71478fcd0a1Sachartre (void) vdc_validate_geometry(vdc); 71578fcd0a1Sachartre mutex_exit(&vdc->lock); 7161ae08745Sheppo 7171ae08745Sheppo /* 7185b98b509Sachartre * Now that we have the device info we can create the device nodes 7191ae08745Sheppo */ 7201ae08745Sheppo status = vdc_create_device_nodes(vdc); 7211ae08745Sheppo if (status) { 7223af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to create device nodes", 7231ae08745Sheppo instance); 7243af08d82Slm66018 goto return_status; 7251ae08745Sheppo } 7261ae08745Sheppo 7274bac2208Snarayan /* 7284bac2208Snarayan * Setup devid 7294bac2208Snarayan */ 7304bac2208Snarayan if (vdc_setup_devid(vdc)) { 7313af08d82Slm66018 DMSG(vdc, 0, "[%d] No device id available\n", instance); 7324bac2208Snarayan } 7334bac2208Snarayan 734366a92acSlm66018 /* 735366a92acSlm66018 * Fill in the fields of the error statistics kstat that were not 736366a92acSlm66018 * available when creating the kstat 737366a92acSlm66018 */ 738366a92acSlm66018 vdc_set_err_kstats(vdc); 739366a92acSlm66018 7401ae08745Sheppo ddi_report_dev(dip); 7413af08d82Slm66018 vdc->lifecycle = VDC_LC_ONLINE; 7423af08d82Slm66018 DMSG(vdc, 0, "[%d] Attach tasks successful\n", instance); 7431ae08745Sheppo 7443af08d82Slm66018 return_status: 7453af08d82Slm66018 DMSG(vdc, 0, "[%d] Attach completed\n", instance); 7461ae08745Sheppo return (status); 7471ae08745Sheppo } 7481ae08745Sheppo 7491ae08745Sheppo static int 7501ae08745Sheppo vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 7511ae08745Sheppo { 7521ae08745Sheppo int status; 7531ae08745Sheppo 7541ae08745Sheppo switch (cmd) { 7551ae08745Sheppo case DDI_ATTACH: 7561ae08745Sheppo if ((status = vdc_do_attach(dip)) != 0) 7571ae08745Sheppo (void) vdc_detach(dip, DDI_DETACH); 7581ae08745Sheppo return (status); 7591ae08745Sheppo case DDI_RESUME: 7601ae08745Sheppo /* nothing to do for this non-device */ 7611ae08745Sheppo return (DDI_SUCCESS); 7621ae08745Sheppo default: 7631ae08745Sheppo return (DDI_FAILURE); 7641ae08745Sheppo } 7651ae08745Sheppo } 7661ae08745Sheppo 7671ae08745Sheppo static int 7688cd10891Snarayan vdc_do_ldc_init(vdc_t *vdc, vdc_server_t *srvr) 7691ae08745Sheppo { 7701ae08745Sheppo int status = 0; 7711ae08745Sheppo ldc_status_t ldc_state; 7721ae08745Sheppo ldc_attr_t ldc_attr; 7731ae08745Sheppo 7741ae08745Sheppo ASSERT(vdc != NULL); 7758cd10891Snarayan ASSERT(srvr != NULL); 7761ae08745Sheppo 7771ae08745Sheppo ldc_attr.devclass = LDC_DEV_BLK; 7781ae08745Sheppo ldc_attr.instance = vdc->instance; 7791ae08745Sheppo ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 780e1ebb9ecSlm66018 ldc_attr.mtu = VD_LDC_MTU; 7811ae08745Sheppo 7828cd10891Snarayan if ((srvr->state & VDC_LDC_INIT) == 0) { 7838cd10891Snarayan status = ldc_init(srvr->ldc_id, &ldc_attr, 7848cd10891Snarayan &srvr->ldc_handle); 7851ae08745Sheppo if (status != 0) { 7863af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_init(chan %ld) returned %d", 7878cd10891Snarayan vdc->instance, srvr->ldc_id, status); 7881ae08745Sheppo return (status); 7891ae08745Sheppo } 7908cd10891Snarayan srvr->state |= VDC_LDC_INIT; 7911ae08745Sheppo } 7928cd10891Snarayan status = ldc_status(srvr->ldc_handle, &ldc_state); 7931ae08745Sheppo if (status != 0) { 7943af08d82Slm66018 DMSG(vdc, 0, "[%d] Cannot discover LDC status [err=%d]", 795e1ebb9ecSlm66018 vdc->instance, status); 7968cd10891Snarayan goto init_exit; 7971ae08745Sheppo } 7988cd10891Snarayan srvr->ldc_state = ldc_state; 7991ae08745Sheppo 8008cd10891Snarayan if ((srvr->state & VDC_LDC_CB) == 0) { 8018cd10891Snarayan status = ldc_reg_callback(srvr->ldc_handle, vdc_handle_cb, 8028cd10891Snarayan (caddr_t)srvr); 8031ae08745Sheppo if (status != 0) { 8043af08d82Slm66018 DMSG(vdc, 0, "[%d] LDC callback reg. failed (%d)", 805e1ebb9ecSlm66018 vdc->instance, status); 8068cd10891Snarayan goto init_exit; 8071ae08745Sheppo } 8088cd10891Snarayan srvr->state |= VDC_LDC_CB; 8091ae08745Sheppo } 8101ae08745Sheppo 8111ae08745Sheppo /* 8121ae08745Sheppo * At this stage we have initialised LDC, we will now try and open 8131ae08745Sheppo * the connection. 8141ae08745Sheppo */ 8158cd10891Snarayan if (srvr->ldc_state == LDC_INIT) { 8168cd10891Snarayan status = ldc_open(srvr->ldc_handle); 8171ae08745Sheppo if (status != 0) { 8183af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_open(chan %ld) returned %d", 8198cd10891Snarayan vdc->instance, srvr->ldc_id, status); 8208cd10891Snarayan goto init_exit; 8211ae08745Sheppo } 8228cd10891Snarayan srvr->state |= VDC_LDC_OPEN; 8238cd10891Snarayan } 8248cd10891Snarayan 8258cd10891Snarayan init_exit: 8268cd10891Snarayan if (status) { 8278cd10891Snarayan vdc_terminate_ldc(vdc, srvr); 8281ae08745Sheppo } 8291ae08745Sheppo 8301ae08745Sheppo return (status); 8311ae08745Sheppo } 8321ae08745Sheppo 8331ae08745Sheppo static int 8341ae08745Sheppo vdc_start_ldc_connection(vdc_t *vdc) 8351ae08745Sheppo { 8361ae08745Sheppo int status = 0; 8371ae08745Sheppo 8381ae08745Sheppo ASSERT(vdc != NULL); 8391ae08745Sheppo 8403af08d82Slm66018 ASSERT(MUTEX_HELD(&vdc->lock)); 8411ae08745Sheppo 8420a55fbb7Slm66018 status = vdc_do_ldc_up(vdc); 8431ae08745Sheppo 8443af08d82Slm66018 DMSG(vdc, 0, "[%d] Finished bringing up LDC\n", vdc->instance); 8451ae08745Sheppo 8463af08d82Slm66018 return (status); 8473af08d82Slm66018 } 8483af08d82Slm66018 8493af08d82Slm66018 static int 8503af08d82Slm66018 vdc_stop_ldc_connection(vdc_t *vdcp) 8513af08d82Slm66018 { 8523af08d82Slm66018 int status; 8533af08d82Slm66018 8548cd10891Snarayan ASSERT(vdcp != NULL); 8558cd10891Snarayan 8568cd10891Snarayan ASSERT(MUTEX_HELD(&vdcp->lock)); 8578cd10891Snarayan 8583af08d82Slm66018 DMSG(vdcp, 0, ": Resetting connection to vDisk server : state %d\n", 8593af08d82Slm66018 vdcp->state); 8603af08d82Slm66018 8618cd10891Snarayan status = ldc_down(vdcp->curr_server->ldc_handle); 8623af08d82Slm66018 DMSG(vdcp, 0, "ldc_down() = %d\n", status); 8633af08d82Slm66018 8643af08d82Slm66018 vdcp->initialized &= ~VDC_HANDSHAKE; 8653af08d82Slm66018 DMSG(vdcp, 0, "initialized=%x\n", vdcp->initialized); 8661ae08745Sheppo 8671ae08745Sheppo return (status); 8681ae08745Sheppo } 8691ae08745Sheppo 870366a92acSlm66018 static void 871366a92acSlm66018 vdc_create_io_kstats(vdc_t *vdc) 872366a92acSlm66018 { 873366a92acSlm66018 if (vdc->io_stats != NULL) { 874366a92acSlm66018 DMSG(vdc, 0, "[%d] I/O kstat already exists\n", vdc->instance); 875366a92acSlm66018 return; 876366a92acSlm66018 } 877366a92acSlm66018 878366a92acSlm66018 vdc->io_stats = kstat_create(VDC_DRIVER_NAME, vdc->instance, NULL, 879366a92acSlm66018 "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 880366a92acSlm66018 if (vdc->io_stats != NULL) { 881366a92acSlm66018 vdc->io_stats->ks_lock = &vdc->lock; 882366a92acSlm66018 kstat_install(vdc->io_stats); 883366a92acSlm66018 } else { 884366a92acSlm66018 cmn_err(CE_NOTE, "[%d] Failed to create kstat: I/O statistics" 885366a92acSlm66018 " will not be gathered", vdc->instance); 886366a92acSlm66018 } 887366a92acSlm66018 } 888366a92acSlm66018 889366a92acSlm66018 static void 890366a92acSlm66018 vdc_create_err_kstats(vdc_t *vdc) 891366a92acSlm66018 { 892366a92acSlm66018 vd_err_stats_t *stp; 893366a92acSlm66018 char kstatmodule_err[KSTAT_STRLEN]; 894366a92acSlm66018 char kstatname[KSTAT_STRLEN]; 895366a92acSlm66018 int ndata = (sizeof (vd_err_stats_t) / sizeof (kstat_named_t)); 896366a92acSlm66018 int instance = vdc->instance; 897366a92acSlm66018 898366a92acSlm66018 if (vdc->err_stats != NULL) { 899366a92acSlm66018 DMSG(vdc, 0, "[%d] ERR kstat already exists\n", vdc->instance); 900366a92acSlm66018 return; 901366a92acSlm66018 } 902366a92acSlm66018 903366a92acSlm66018 (void) snprintf(kstatmodule_err, sizeof (kstatmodule_err), 904366a92acSlm66018 "%serr", VDC_DRIVER_NAME); 905366a92acSlm66018 (void) snprintf(kstatname, sizeof (kstatname), 906366a92acSlm66018 "%s%d,err", VDC_DRIVER_NAME, instance); 907366a92acSlm66018 908366a92acSlm66018 vdc->err_stats = kstat_create(kstatmodule_err, instance, kstatname, 909366a92acSlm66018 "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT); 910366a92acSlm66018 911366a92acSlm66018 if (vdc->err_stats == NULL) { 912366a92acSlm66018 cmn_err(CE_NOTE, "[%d] Failed to create kstat: Error statistics" 913366a92acSlm66018 " will not be gathered", instance); 914366a92acSlm66018 return; 915366a92acSlm66018 } 916366a92acSlm66018 917366a92acSlm66018 stp = (vd_err_stats_t *)vdc->err_stats->ks_data; 918366a92acSlm66018 kstat_named_init(&stp->vd_softerrs, "Soft Errors", 919366a92acSlm66018 KSTAT_DATA_UINT32); 920366a92acSlm66018 kstat_named_init(&stp->vd_transerrs, "Transport Errors", 921366a92acSlm66018 KSTAT_DATA_UINT32); 922366a92acSlm66018 kstat_named_init(&stp->vd_protoerrs, "Protocol Errors", 923366a92acSlm66018 KSTAT_DATA_UINT32); 924366a92acSlm66018 kstat_named_init(&stp->vd_vid, "Vendor", 925366a92acSlm66018 KSTAT_DATA_CHAR); 926366a92acSlm66018 kstat_named_init(&stp->vd_pid, "Product", 927366a92acSlm66018 KSTAT_DATA_CHAR); 928366a92acSlm66018 kstat_named_init(&stp->vd_capacity, "Size", 929366a92acSlm66018 KSTAT_DATA_ULONGLONG); 930366a92acSlm66018 931366a92acSlm66018 vdc->err_stats->ks_update = nulldev; 932366a92acSlm66018 933366a92acSlm66018 kstat_install(vdc->err_stats); 934366a92acSlm66018 } 935366a92acSlm66018 936366a92acSlm66018 static void 937366a92acSlm66018 vdc_set_err_kstats(vdc_t *vdc) 938366a92acSlm66018 { 939366a92acSlm66018 vd_err_stats_t *stp; 940366a92acSlm66018 941366a92acSlm66018 if (vdc->err_stats == NULL) 942366a92acSlm66018 return; 943366a92acSlm66018 944366a92acSlm66018 mutex_enter(&vdc->lock); 945366a92acSlm66018 946366a92acSlm66018 stp = (vd_err_stats_t *)vdc->err_stats->ks_data; 947366a92acSlm66018 ASSERT(stp != NULL); 948366a92acSlm66018 949*65908c77Syu, larry liu - Sun Microsystems - Beijing China stp->vd_capacity.value.ui64 = vdc->vdisk_size * vdc->vdisk_bsize; 950366a92acSlm66018 (void) strcpy(stp->vd_vid.value.c, "SUN"); 951366a92acSlm66018 (void) strcpy(stp->vd_pid.value.c, "VDSK"); 952366a92acSlm66018 953366a92acSlm66018 mutex_exit(&vdc->lock); 954366a92acSlm66018 } 955366a92acSlm66018 9564bac2208Snarayan static int 9574bac2208Snarayan vdc_create_device_nodes_efi(vdc_t *vdc) 9584bac2208Snarayan { 9594bac2208Snarayan ddi_remove_minor_node(vdc->dip, "h"); 9604bac2208Snarayan ddi_remove_minor_node(vdc->dip, "h,raw"); 9614bac2208Snarayan 9624bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "wd", S_IFBLK, 9634bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 9644bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 9654bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd'", 9664bac2208Snarayan vdc->instance); 9674bac2208Snarayan return (EIO); 9684bac2208Snarayan } 9694bac2208Snarayan 9704bac2208Snarayan /* if any device node is created we set this flag */ 9714bac2208Snarayan vdc->initialized |= VDC_MINOR; 9724bac2208Snarayan 9734bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "wd,raw", S_IFCHR, 9744bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 9754bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 9764bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd,raw'", 9774bac2208Snarayan vdc->instance); 9784bac2208Snarayan return (EIO); 9794bac2208Snarayan } 9804bac2208Snarayan 9814bac2208Snarayan return (0); 9824bac2208Snarayan } 9834bac2208Snarayan 9844bac2208Snarayan static int 9854bac2208Snarayan vdc_create_device_nodes_vtoc(vdc_t *vdc) 9864bac2208Snarayan { 9874bac2208Snarayan ddi_remove_minor_node(vdc->dip, "wd"); 9884bac2208Snarayan ddi_remove_minor_node(vdc->dip, "wd,raw"); 9894bac2208Snarayan 9904bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "h", S_IFBLK, 9914bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 9924bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 9934bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h'", 9944bac2208Snarayan vdc->instance); 9954bac2208Snarayan return (EIO); 9964bac2208Snarayan } 9974bac2208Snarayan 9984bac2208Snarayan /* if any device node is created we set this flag */ 9994bac2208Snarayan vdc->initialized |= VDC_MINOR; 10004bac2208Snarayan 10014bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "h,raw", S_IFCHR, 10024bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 10034bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 10044bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h,raw'", 10054bac2208Snarayan vdc->instance); 10064bac2208Snarayan return (EIO); 10074bac2208Snarayan } 10084bac2208Snarayan 10094bac2208Snarayan return (0); 10104bac2208Snarayan } 10111ae08745Sheppo 10121ae08745Sheppo /* 10131ae08745Sheppo * Function: 10141ae08745Sheppo * vdc_create_device_nodes 10151ae08745Sheppo * 10161ae08745Sheppo * Description: 10171ae08745Sheppo * This function creates the block and character device nodes under 10185b98b509Sachartre * /devices. It is called as part of the attach(9E) of the instance 10195b98b509Sachartre * during the handshake with vds after vds has sent the attributes 10205b98b509Sachartre * to vdc. 10211ae08745Sheppo * 10221ae08745Sheppo * If the device is of type VD_DISK_TYPE_SLICE then the minor node 10231ae08745Sheppo * of 2 is used in keeping with the Solaris convention that slice 2 10241ae08745Sheppo * refers to a whole disk. Slices start at 'a' 10251ae08745Sheppo * 10261ae08745Sheppo * Parameters: 10271ae08745Sheppo * vdc - soft state pointer 10281ae08745Sheppo * 10291ae08745Sheppo * Return Values 10301ae08745Sheppo * 0 - Success 10311ae08745Sheppo * EIO - Failed to create node 10321ae08745Sheppo * EINVAL - Unknown type of disk exported 10331ae08745Sheppo */ 10341ae08745Sheppo static int 10351ae08745Sheppo vdc_create_device_nodes(vdc_t *vdc) 10361ae08745Sheppo { 10374bac2208Snarayan char name[sizeof ("s,raw")]; 10381ae08745Sheppo dev_info_t *dip = NULL; 10394bac2208Snarayan int instance, status; 10401ae08745Sheppo int num_slices = 1; 10411ae08745Sheppo int i; 10421ae08745Sheppo 10431ae08745Sheppo ASSERT(vdc != NULL); 10441ae08745Sheppo 10451ae08745Sheppo instance = vdc->instance; 10461ae08745Sheppo dip = vdc->dip; 10471ae08745Sheppo 10481ae08745Sheppo switch (vdc->vdisk_type) { 10491ae08745Sheppo case VD_DISK_TYPE_DISK: 10501ae08745Sheppo num_slices = V_NUMPAR; 10511ae08745Sheppo break; 10521ae08745Sheppo case VD_DISK_TYPE_SLICE: 10531ae08745Sheppo num_slices = 1; 10541ae08745Sheppo break; 10551ae08745Sheppo case VD_DISK_TYPE_UNK: 10561ae08745Sheppo default: 10571ae08745Sheppo return (EINVAL); 10581ae08745Sheppo } 10591ae08745Sheppo 10604bac2208Snarayan /* 10614bac2208Snarayan * Minor nodes are different for EFI disks: EFI disks do not have 10624bac2208Snarayan * a minor node 'g' for the minor number corresponding to slice 10634bac2208Snarayan * VD_EFI_WD_SLICE (slice 7) instead they have a minor node 'wd' 10644bac2208Snarayan * representing the whole disk. 10654bac2208Snarayan */ 10661ae08745Sheppo for (i = 0; i < num_slices; i++) { 10674bac2208Snarayan 10684bac2208Snarayan if (i == VD_EFI_WD_SLICE) { 10694bac2208Snarayan if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 10704bac2208Snarayan status = vdc_create_device_nodes_efi(vdc); 10714bac2208Snarayan else 10724bac2208Snarayan status = vdc_create_device_nodes_vtoc(vdc); 10734bac2208Snarayan if (status != 0) 10744bac2208Snarayan return (status); 10754bac2208Snarayan continue; 10764bac2208Snarayan } 10774bac2208Snarayan 10781ae08745Sheppo (void) snprintf(name, sizeof (name), "%c", 'a' + i); 10791ae08745Sheppo if (ddi_create_minor_node(dip, name, S_IFBLK, 10801ae08745Sheppo VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 1081e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't add block node '%s'", 1082e1ebb9ecSlm66018 instance, name); 10831ae08745Sheppo return (EIO); 10841ae08745Sheppo } 10851ae08745Sheppo 10861ae08745Sheppo /* if any device node is created we set this flag */ 10871ae08745Sheppo vdc->initialized |= VDC_MINOR; 10881ae08745Sheppo 108987a7269eSachartre (void) snprintf(name, sizeof (name), "%c%s", 'a' + i, ",raw"); 109087a7269eSachartre 10911ae08745Sheppo if (ddi_create_minor_node(dip, name, S_IFCHR, 10921ae08745Sheppo VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 1093e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't add raw node '%s'", 1094e1ebb9ecSlm66018 instance, name); 10951ae08745Sheppo return (EIO); 10961ae08745Sheppo } 10971ae08745Sheppo } 10981ae08745Sheppo 10991ae08745Sheppo return (0); 11001ae08745Sheppo } 11011ae08745Sheppo 11021ae08745Sheppo /* 11035b98b509Sachartre * Driver prop_op(9e) entry point function. Return the number of blocks for 11045b98b509Sachartre * the partition in question or forward the request to the property facilities. 11051ae08745Sheppo */ 11061ae08745Sheppo static int 11075b98b509Sachartre vdc_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 11085b98b509Sachartre char *name, caddr_t valuep, int *lengthp) 11091ae08745Sheppo { 11105b98b509Sachartre int instance = ddi_get_instance(dip); 11115b98b509Sachartre vdc_t *vdc; 11125b98b509Sachartre uint64_t nblocks; 11135b98b509Sachartre uint_t blksize; 11141ae08745Sheppo 11155b98b509Sachartre vdc = ddi_get_soft_state(vdc_state, instance); 11161ae08745Sheppo 11175b98b509Sachartre if (dev == DDI_DEV_T_ANY || vdc == NULL) { 11185b98b509Sachartre return (ddi_prop_op(dev, dip, prop_op, mod_flags, 11195b98b509Sachartre name, valuep, lengthp)); 11201ae08745Sheppo } 11211ae08745Sheppo 11225b98b509Sachartre mutex_enter(&vdc->lock); 11235b98b509Sachartre (void) vdc_validate_geometry(vdc); 112478fcd0a1Sachartre if (vdc->vdisk_label == VD_DISK_LABEL_UNK) { 11255b98b509Sachartre mutex_exit(&vdc->lock); 11265b98b509Sachartre return (ddi_prop_op(dev, dip, prop_op, mod_flags, 11275b98b509Sachartre name, valuep, lengthp)); 112878fcd0a1Sachartre } 11295b98b509Sachartre nblocks = vdc->slice[VDCPART(dev)].nblocks; 1130*65908c77Syu, larry liu - Sun Microsystems - Beijing China blksize = vdc->vdisk_bsize; 11315b98b509Sachartre mutex_exit(&vdc->lock); 113278fcd0a1Sachartre 11335b98b509Sachartre return (ddi_prop_op_nblocks_blksize(dev, dip, prop_op, mod_flags, 11345b98b509Sachartre name, valuep, lengthp, nblocks, blksize)); 11351ae08745Sheppo } 11361ae08745Sheppo 113778fcd0a1Sachartre /* 113878fcd0a1Sachartre * Function: 113978fcd0a1Sachartre * vdc_is_opened 114078fcd0a1Sachartre * 114178fcd0a1Sachartre * Description: 114278fcd0a1Sachartre * This function checks if any slice of a given virtual disk is 114378fcd0a1Sachartre * currently opened. 114478fcd0a1Sachartre * 114578fcd0a1Sachartre * Parameters: 114678fcd0a1Sachartre * vdc - soft state pointer 114778fcd0a1Sachartre * 114878fcd0a1Sachartre * Return Values 114978fcd0a1Sachartre * B_TRUE - at least one slice is opened. 115078fcd0a1Sachartre * B_FALSE - no slice is opened. 115178fcd0a1Sachartre */ 115278fcd0a1Sachartre static boolean_t 115378fcd0a1Sachartre vdc_is_opened(vdc_t *vdc) 115478fcd0a1Sachartre { 115578fcd0a1Sachartre int i, nslices; 115678fcd0a1Sachartre 115778fcd0a1Sachartre switch (vdc->vdisk_type) { 115878fcd0a1Sachartre case VD_DISK_TYPE_DISK: 115978fcd0a1Sachartre nslices = V_NUMPAR; 116078fcd0a1Sachartre break; 116178fcd0a1Sachartre case VD_DISK_TYPE_SLICE: 116278fcd0a1Sachartre nslices = 1; 116378fcd0a1Sachartre break; 116478fcd0a1Sachartre case VD_DISK_TYPE_UNK: 116578fcd0a1Sachartre default: 116678fcd0a1Sachartre ASSERT(0); 116778fcd0a1Sachartre } 116878fcd0a1Sachartre 116978fcd0a1Sachartre /* check if there's any layered open */ 117078fcd0a1Sachartre for (i = 0; i < nslices; i++) { 117178fcd0a1Sachartre if (vdc->open_lyr[i] > 0) 117278fcd0a1Sachartre return (B_TRUE); 117378fcd0a1Sachartre } 117478fcd0a1Sachartre 117578fcd0a1Sachartre /* check if there is any other kind of open */ 117678fcd0a1Sachartre for (i = 0; i < OTYPCNT; i++) { 117778fcd0a1Sachartre if (vdc->open[i] != 0) 117878fcd0a1Sachartre return (B_TRUE); 117978fcd0a1Sachartre } 118078fcd0a1Sachartre 118178fcd0a1Sachartre return (B_FALSE); 118278fcd0a1Sachartre } 118378fcd0a1Sachartre 118478fcd0a1Sachartre static int 118578fcd0a1Sachartre vdc_mark_opened(vdc_t *vdc, int slice, int flag, int otyp) 118678fcd0a1Sachartre { 118778fcd0a1Sachartre uint8_t slicemask; 118878fcd0a1Sachartre int i; 118978fcd0a1Sachartre 119078fcd0a1Sachartre ASSERT(otyp < OTYPCNT); 119178fcd0a1Sachartre ASSERT(slice < V_NUMPAR); 119278fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 119378fcd0a1Sachartre 119478fcd0a1Sachartre slicemask = 1 << slice; 119578fcd0a1Sachartre 119678fcd0a1Sachartre /* check if slice is already exclusively opened */ 119778fcd0a1Sachartre if (vdc->open_excl & slicemask) 119878fcd0a1Sachartre return (EBUSY); 119978fcd0a1Sachartre 120078fcd0a1Sachartre /* if open exclusive, check if slice is already opened */ 120178fcd0a1Sachartre if (flag & FEXCL) { 120278fcd0a1Sachartre if (vdc->open_lyr[slice] > 0) 120378fcd0a1Sachartre return (EBUSY); 120478fcd0a1Sachartre for (i = 0; i < OTYPCNT; i++) { 120578fcd0a1Sachartre if (vdc->open[i] & slicemask) 120678fcd0a1Sachartre return (EBUSY); 120778fcd0a1Sachartre } 120878fcd0a1Sachartre vdc->open_excl |= slicemask; 120978fcd0a1Sachartre } 121078fcd0a1Sachartre 121178fcd0a1Sachartre /* mark slice as opened */ 121278fcd0a1Sachartre if (otyp == OTYP_LYR) { 121378fcd0a1Sachartre vdc->open_lyr[slice]++; 121478fcd0a1Sachartre } else { 121578fcd0a1Sachartre vdc->open[otyp] |= slicemask; 121678fcd0a1Sachartre } 121778fcd0a1Sachartre 121878fcd0a1Sachartre return (0); 121978fcd0a1Sachartre } 122078fcd0a1Sachartre 122178fcd0a1Sachartre static void 122278fcd0a1Sachartre vdc_mark_closed(vdc_t *vdc, int slice, int flag, int otyp) 122378fcd0a1Sachartre { 122478fcd0a1Sachartre uint8_t slicemask; 122578fcd0a1Sachartre 122678fcd0a1Sachartre ASSERT(otyp < OTYPCNT); 122778fcd0a1Sachartre ASSERT(slice < V_NUMPAR); 122878fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 122978fcd0a1Sachartre 123078fcd0a1Sachartre slicemask = 1 << slice; 123178fcd0a1Sachartre 123278fcd0a1Sachartre if (otyp == OTYP_LYR) { 123378fcd0a1Sachartre ASSERT(vdc->open_lyr[slice] > 0); 123478fcd0a1Sachartre vdc->open_lyr[slice]--; 123578fcd0a1Sachartre } else { 123678fcd0a1Sachartre vdc->open[otyp] &= ~slicemask; 123778fcd0a1Sachartre } 123878fcd0a1Sachartre 123978fcd0a1Sachartre if (flag & FEXCL) 124078fcd0a1Sachartre vdc->open_excl &= ~slicemask; 124178fcd0a1Sachartre } 124278fcd0a1Sachartre 12431ae08745Sheppo static int 12441ae08745Sheppo vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 12451ae08745Sheppo { 12461ae08745Sheppo _NOTE(ARGUNUSED(cred)) 12471ae08745Sheppo 1248179e09c2Sachartre int instance, nodelay; 124978fcd0a1Sachartre int slice, status = 0; 12501ae08745Sheppo vdc_t *vdc; 12511ae08745Sheppo 12521ae08745Sheppo ASSERT(dev != NULL); 12530d0c8d4bSnarayan instance = VDCUNIT(*dev); 12541ae08745Sheppo 125578fcd0a1Sachartre if (otyp >= OTYPCNT) 12561ae08745Sheppo return (EINVAL); 12571ae08745Sheppo 12581ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1259e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 12601ae08745Sheppo return (ENXIO); 12611ae08745Sheppo } 12621ae08745Sheppo 12633af08d82Slm66018 DMSG(vdc, 0, "minor = %d flag = %x, otyp = %x\n", 12643af08d82Slm66018 getminor(*dev), flag, otyp); 12651ae08745Sheppo 126678fcd0a1Sachartre slice = VDCPART(*dev); 126778fcd0a1Sachartre 1268179e09c2Sachartre nodelay = flag & (FNDELAY | FNONBLOCK); 1269179e09c2Sachartre 1270179e09c2Sachartre if ((flag & FWRITE) && (!nodelay) && 1271179e09c2Sachartre !(VD_OP_SUPPORTED(vdc->operations, VD_OP_BWRITE))) { 1272179e09c2Sachartre return (EROFS); 1273179e09c2Sachartre } 1274179e09c2Sachartre 12751ae08745Sheppo mutex_enter(&vdc->lock); 127678fcd0a1Sachartre 127778fcd0a1Sachartre status = vdc_mark_opened(vdc, slice, flag, otyp); 127878fcd0a1Sachartre 127978fcd0a1Sachartre if (status != 0) { 128078fcd0a1Sachartre mutex_exit(&vdc->lock); 128178fcd0a1Sachartre return (status); 128278fcd0a1Sachartre } 128378fcd0a1Sachartre 1284179e09c2Sachartre if (nodelay) { 128578fcd0a1Sachartre 128678fcd0a1Sachartre /* don't resubmit a validate request if there's already one */ 128778fcd0a1Sachartre if (vdc->validate_pending > 0) { 128878fcd0a1Sachartre mutex_exit(&vdc->lock); 128978fcd0a1Sachartre return (0); 129078fcd0a1Sachartre } 129178fcd0a1Sachartre 129278fcd0a1Sachartre /* call vdc_validate() asynchronously to avoid blocking */ 129378fcd0a1Sachartre if (taskq_dispatch(system_taskq, vdc_validate_task, 129478fcd0a1Sachartre (void *)vdc, TQ_NOSLEEP) == NULL) { 129578fcd0a1Sachartre vdc_mark_closed(vdc, slice, flag, otyp); 129678fcd0a1Sachartre mutex_exit(&vdc->lock); 129778fcd0a1Sachartre return (ENXIO); 129878fcd0a1Sachartre } 129978fcd0a1Sachartre 130078fcd0a1Sachartre vdc->validate_pending++; 130178fcd0a1Sachartre mutex_exit(&vdc->lock); 130278fcd0a1Sachartre return (0); 130378fcd0a1Sachartre } 130478fcd0a1Sachartre 13051ae08745Sheppo mutex_exit(&vdc->lock); 13061ae08745Sheppo 130778fcd0a1Sachartre vdc_validate(vdc); 130878fcd0a1Sachartre 130978fcd0a1Sachartre mutex_enter(&vdc->lock); 131078fcd0a1Sachartre 131178fcd0a1Sachartre if (vdc->vdisk_label == VD_DISK_LABEL_UNK || 1312edcc0754Sachartre vdc->slice[slice].nblocks == 0) { 131378fcd0a1Sachartre vdc_mark_closed(vdc, slice, flag, otyp); 131478fcd0a1Sachartre status = EIO; 131578fcd0a1Sachartre } 131678fcd0a1Sachartre 131778fcd0a1Sachartre mutex_exit(&vdc->lock); 131878fcd0a1Sachartre 131978fcd0a1Sachartre return (status); 13201ae08745Sheppo } 13211ae08745Sheppo 13221ae08745Sheppo static int 13231ae08745Sheppo vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 13241ae08745Sheppo { 13251ae08745Sheppo _NOTE(ARGUNUSED(cred)) 13261ae08745Sheppo 13271ae08745Sheppo int instance; 132878fcd0a1Sachartre int slice; 13292f5224aeSachartre int rv, rval; 13301ae08745Sheppo vdc_t *vdc; 13311ae08745Sheppo 13320d0c8d4bSnarayan instance = VDCUNIT(dev); 13331ae08745Sheppo 133478fcd0a1Sachartre if (otyp >= OTYPCNT) 13351ae08745Sheppo return (EINVAL); 13361ae08745Sheppo 13371ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1338e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 13391ae08745Sheppo return (ENXIO); 13401ae08745Sheppo } 13411ae08745Sheppo 13423af08d82Slm66018 DMSG(vdc, 0, "[%d] flag = %x, otyp = %x\n", instance, flag, otyp); 13431ae08745Sheppo 134478fcd0a1Sachartre slice = VDCPART(dev); 134578fcd0a1Sachartre 13468259acd8Szk194757 /* 13478259acd8Szk194757 * Attempt to flush the W$ on a close operation. If this is 13488259acd8Szk194757 * not a supported IOCTL command or the backing device is read-only 13498259acd8Szk194757 * do not fail the close operation. 13508259acd8Szk194757 */ 13512f5224aeSachartre rv = vd_process_ioctl(dev, DKIOCFLUSHWRITECACHE, NULL, FKIOCTL, &rval); 13528259acd8Szk194757 13538259acd8Szk194757 if (rv != 0 && rv != ENOTSUP && rv != ENOTTY && rv != EROFS) { 13548259acd8Szk194757 DMSG(vdc, 0, "[%d] flush failed with error %d on close\n", 13558259acd8Szk194757 instance, rv); 13568259acd8Szk194757 return (EIO); 13578259acd8Szk194757 } 13588259acd8Szk194757 13591ae08745Sheppo mutex_enter(&vdc->lock); 136078fcd0a1Sachartre vdc_mark_closed(vdc, slice, flag, otyp); 13611ae08745Sheppo mutex_exit(&vdc->lock); 13621ae08745Sheppo 13631ae08745Sheppo return (0); 13641ae08745Sheppo } 13651ae08745Sheppo 13661ae08745Sheppo static int 13671ae08745Sheppo vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 13681ae08745Sheppo { 13691ae08745Sheppo _NOTE(ARGUNUSED(credp)) 13701ae08745Sheppo 13712f5224aeSachartre return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode, rvalp)); 13721ae08745Sheppo } 13731ae08745Sheppo 13741ae08745Sheppo static int 13751ae08745Sheppo vdc_print(dev_t dev, char *str) 13761ae08745Sheppo { 13770d0c8d4bSnarayan cmn_err(CE_NOTE, "vdc%d: %s", VDCUNIT(dev), str); 13781ae08745Sheppo return (0); 13791ae08745Sheppo } 13801ae08745Sheppo 13811ae08745Sheppo static int 13821ae08745Sheppo vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 13831ae08745Sheppo { 1384d10e4ef2Snarayan int rv; 1385d10e4ef2Snarayan size_t nbytes = nblk * DEV_BSIZE; 13860d0c8d4bSnarayan int instance = VDCUNIT(dev); 1387d10e4ef2Snarayan vdc_t *vdc = NULL; 1388*65908c77Syu, larry liu - Sun Microsystems - Beijing China diskaddr_t vio_blkno; 13891ae08745Sheppo 13901ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1391e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 13921ae08745Sheppo return (ENXIO); 13931ae08745Sheppo } 13941ae08745Sheppo 13953af08d82Slm66018 DMSG(vdc, 2, "[%d] dump %ld bytes at block 0x%lx : addr=0x%p\n", 13963af08d82Slm66018 instance, nbytes, blkno, (void *)addr); 1397*65908c77Syu, larry liu - Sun Microsystems - Beijing China 1398*65908c77Syu, larry liu - Sun Microsystems - Beijing China /* convert logical block to vio block */ 1399*65908c77Syu, larry liu - Sun Microsystems - Beijing China if ((blkno & vdc->vio_bmask) != 0) { 1400*65908c77Syu, larry liu - Sun Microsystems - Beijing China DMSG(vdc, 0, "Misaligned block number (%lu)\n", blkno); 1401*65908c77Syu, larry liu - Sun Microsystems - Beijing China return (EINVAL); 1402*65908c77Syu, larry liu - Sun Microsystems - Beijing China } 1403*65908c77Syu, larry liu - Sun Microsystems - Beijing China vio_blkno = blkno >> vdc->vio_bshift; 1404*65908c77Syu, larry liu - Sun Microsystems - Beijing China 14053af08d82Slm66018 rv = vdc_send_request(vdc, VD_OP_BWRITE, addr, nbytes, 1406*65908c77Syu, larry liu - Sun Microsystems - Beijing China VDCPART(dev), vio_blkno, CB_STRATEGY, 0, VIO_write_dir); 14073af08d82Slm66018 if (rv) { 14083af08d82Slm66018 DMSG(vdc, 0, "Failed to do a disk dump (err=%d)\n", rv); 14091ae08745Sheppo return (rv); 14101ae08745Sheppo } 14111ae08745Sheppo 14123af08d82Slm66018 if (ddi_in_panic()) 141311f54b6eSAlexandre Chartre (void) vdc_drain_response(vdc, CB_STRATEGY, NULL); 14143af08d82Slm66018 14153af08d82Slm66018 DMSG(vdc, 0, "[%d] End\n", instance); 14163af08d82Slm66018 14173af08d82Slm66018 return (0); 14183af08d82Slm66018 } 14193af08d82Slm66018 14201ae08745Sheppo /* -------------------------------------------------------------------------- */ 14211ae08745Sheppo 14221ae08745Sheppo /* 14231ae08745Sheppo * Disk access routines 14241ae08745Sheppo * 14251ae08745Sheppo */ 14261ae08745Sheppo 14271ae08745Sheppo /* 14281ae08745Sheppo * vdc_strategy() 14291ae08745Sheppo * 14301ae08745Sheppo * Return Value: 14311ae08745Sheppo * 0: As per strategy(9E), the strategy() function must return 0 14321ae08745Sheppo * [ bioerror(9f) sets b_flags to the proper error code ] 14331ae08745Sheppo */ 14341ae08745Sheppo static int 14351ae08745Sheppo vdc_strategy(struct buf *buf) 14361ae08745Sheppo { 1437*65908c77Syu, larry liu - Sun Microsystems - Beijing China diskaddr_t vio_blkno; 14381ae08745Sheppo int rv = -1; 14391ae08745Sheppo vdc_t *vdc = NULL; 14400d0c8d4bSnarayan int instance = VDCUNIT(buf->b_edev); 14411ae08745Sheppo int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 144287a7269eSachartre int slice; 14431ae08745Sheppo 14441ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1445e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 14461ae08745Sheppo bioerror(buf, ENXIO); 14471ae08745Sheppo biodone(buf); 14481ae08745Sheppo return (0); 14491ae08745Sheppo } 14501ae08745Sheppo 14513af08d82Slm66018 DMSG(vdc, 2, "[%d] %s %ld bytes at block %llx : b_addr=0x%p\n", 14523af08d82Slm66018 instance, (buf->b_flags & B_READ) ? "Read" : "Write", 14533af08d82Slm66018 buf->b_bcount, buf->b_lblkno, (void *)buf->b_un.b_addr); 1454d10e4ef2Snarayan 14551ae08745Sheppo bp_mapin(buf); 14561ae08745Sheppo 145787a7269eSachartre if ((long)buf->b_private == VD_SLICE_NONE) { 145887a7269eSachartre /* I/O using an absolute disk offset */ 145987a7269eSachartre slice = VD_SLICE_NONE; 146087a7269eSachartre } else { 146187a7269eSachartre slice = VDCPART(buf->b_edev); 146287a7269eSachartre } 146387a7269eSachartre 1464*65908c77Syu, larry liu - Sun Microsystems - Beijing China /* 1465*65908c77Syu, larry liu - Sun Microsystems - Beijing China * In the buf structure, b_lblkno represents a logical block number 1466*65908c77Syu, larry liu - Sun Microsystems - Beijing China * using a block size of 512 bytes. For the VIO request, this block 1467*65908c77Syu, larry liu - Sun Microsystems - Beijing China * number has to be converted to be represented with the block size 1468*65908c77Syu, larry liu - Sun Microsystems - Beijing China * used by the VIO protocol. 1469*65908c77Syu, larry liu - Sun Microsystems - Beijing China */ 1470*65908c77Syu, larry liu - Sun Microsystems - Beijing China if ((buf->b_lblkno & vdc->vio_bmask) != 0) { 1471*65908c77Syu, larry liu - Sun Microsystems - Beijing China bioerror(buf, EINVAL); 1472*65908c77Syu, larry liu - Sun Microsystems - Beijing China biodone(buf); 1473*65908c77Syu, larry liu - Sun Microsystems - Beijing China return (0); 1474*65908c77Syu, larry liu - Sun Microsystems - Beijing China } 1475*65908c77Syu, larry liu - Sun Microsystems - Beijing China vio_blkno = buf->b_lblkno >> vdc->vio_bshift; 1476*65908c77Syu, larry liu - Sun Microsystems - Beijing China 14773af08d82Slm66018 rv = vdc_send_request(vdc, op, (caddr_t)buf->b_un.b_addr, 1478*65908c77Syu, larry liu - Sun Microsystems - Beijing China buf->b_bcount, slice, vio_blkno, 14793af08d82Slm66018 CB_STRATEGY, buf, (op == VD_OP_BREAD) ? VIO_read_dir : 14803af08d82Slm66018 VIO_write_dir); 14813af08d82Slm66018 1482d10e4ef2Snarayan /* 1483d10e4ef2Snarayan * If the request was successfully sent, the strategy call returns and 1484d10e4ef2Snarayan * the ACK handler calls the bioxxx functions when the vDisk server is 1485366a92acSlm66018 * done otherwise we handle the error here. 1486d10e4ef2Snarayan */ 1487d10e4ef2Snarayan if (rv) { 14883af08d82Slm66018 DMSG(vdc, 0, "Failed to read/write (err=%d)\n", rv); 14891ae08745Sheppo bioerror(buf, rv); 14901ae08745Sheppo biodone(buf); 14913c2ebf09Sachartre } else if (ddi_in_panic()) { 149211f54b6eSAlexandre Chartre rv = vdc_drain_response(vdc, CB_STRATEGY, buf); 149311f54b6eSAlexandre Chartre if (rv != 0) { 149411f54b6eSAlexandre Chartre bioerror(buf, EIO); 149511f54b6eSAlexandre Chartre biodone(buf); 149611f54b6eSAlexandre Chartre } 1497d10e4ef2Snarayan } 1498d10e4ef2Snarayan 14991ae08745Sheppo return (0); 15001ae08745Sheppo } 15011ae08745Sheppo 15020d0c8d4bSnarayan /* 15030d0c8d4bSnarayan * Function: 15040d0c8d4bSnarayan * vdc_min 15050d0c8d4bSnarayan * 15060d0c8d4bSnarayan * Description: 15070d0c8d4bSnarayan * Routine to limit the size of a data transfer. Used in 15080d0c8d4bSnarayan * conjunction with physio(9F). 15090d0c8d4bSnarayan * 15100d0c8d4bSnarayan * Arguments: 15110d0c8d4bSnarayan * bp - pointer to the indicated buf(9S) struct. 15120d0c8d4bSnarayan * 15130d0c8d4bSnarayan */ 15140d0c8d4bSnarayan static void 15150d0c8d4bSnarayan vdc_min(struct buf *bufp) 15160d0c8d4bSnarayan { 15170d0c8d4bSnarayan vdc_t *vdc = NULL; 15180d0c8d4bSnarayan int instance = VDCUNIT(bufp->b_edev); 15190d0c8d4bSnarayan 15200d0c8d4bSnarayan vdc = ddi_get_soft_state(vdc_state, instance); 15210d0c8d4bSnarayan VERIFY(vdc != NULL); 15220d0c8d4bSnarayan 1523*65908c77Syu, larry liu - Sun Microsystems - Beijing China if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->vdisk_bsize)) { 1524*65908c77Syu, larry liu - Sun Microsystems - Beijing China bufp->b_bcount = vdc->max_xfer_sz * vdc->vdisk_bsize; 15250d0c8d4bSnarayan } 15260d0c8d4bSnarayan } 15271ae08745Sheppo 15281ae08745Sheppo static int 15291ae08745Sheppo vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 15301ae08745Sheppo { 15311ae08745Sheppo _NOTE(ARGUNUSED(cred)) 15321ae08745Sheppo 15330d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 15340d0c8d4bSnarayan return (physio(vdc_strategy, NULL, dev, B_READ, vdc_min, uio)); 15351ae08745Sheppo } 15361ae08745Sheppo 15371ae08745Sheppo static int 15381ae08745Sheppo vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 15391ae08745Sheppo { 15401ae08745Sheppo _NOTE(ARGUNUSED(cred)) 15411ae08745Sheppo 15420d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 15430d0c8d4bSnarayan return (physio(vdc_strategy, NULL, dev, B_WRITE, vdc_min, uio)); 15441ae08745Sheppo } 15451ae08745Sheppo 15461ae08745Sheppo static int 15471ae08745Sheppo vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 15481ae08745Sheppo { 15491ae08745Sheppo _NOTE(ARGUNUSED(cred)) 15501ae08745Sheppo 15510d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 15520d0c8d4bSnarayan return (aphysio(vdc_strategy, anocancel, dev, B_READ, vdc_min, aio)); 15531ae08745Sheppo } 15541ae08745Sheppo 15551ae08745Sheppo static int 15561ae08745Sheppo vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 15571ae08745Sheppo { 15581ae08745Sheppo _NOTE(ARGUNUSED(cred)) 15591ae08745Sheppo 15600d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 15610d0c8d4bSnarayan return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, vdc_min, aio)); 15621ae08745Sheppo } 15631ae08745Sheppo 15641ae08745Sheppo 15651ae08745Sheppo /* -------------------------------------------------------------------------- */ 15661ae08745Sheppo 15671ae08745Sheppo /* 15681ae08745Sheppo * Handshake support 15691ae08745Sheppo */ 15701ae08745Sheppo 15711ae08745Sheppo 15720a55fbb7Slm66018 /* 15730a55fbb7Slm66018 * Function: 15740a55fbb7Slm66018 * vdc_init_ver_negotiation() 15750a55fbb7Slm66018 * 15760a55fbb7Slm66018 * Description: 15770a55fbb7Slm66018 * 15780a55fbb7Slm66018 * Arguments: 15790a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 15800a55fbb7Slm66018 * 15810a55fbb7Slm66018 * Return Code: 15820a55fbb7Slm66018 * 0 - Success 15830a55fbb7Slm66018 */ 15841ae08745Sheppo static int 15850a55fbb7Slm66018 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 15861ae08745Sheppo { 15871ae08745Sheppo vio_ver_msg_t pkt; 15881ae08745Sheppo size_t msglen = sizeof (pkt); 15891ae08745Sheppo int status = -1; 15901ae08745Sheppo 15911ae08745Sheppo ASSERT(vdc != NULL); 15921ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 15931ae08745Sheppo 15943af08d82Slm66018 DMSG(vdc, 0, "[%d] Entered.\n", vdc->instance); 1595e1ebb9ecSlm66018 15961ae08745Sheppo /* 15971ae08745Sheppo * set the Session ID to a unique value 15981ae08745Sheppo * (the lower 32 bits of the clock tick) 15991ae08745Sheppo */ 16001ae08745Sheppo vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 16013af08d82Slm66018 DMSG(vdc, 0, "[%d] Set SID to 0x%lx\n", vdc->instance, vdc->session_id); 16021ae08745Sheppo 16031ae08745Sheppo pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 16041ae08745Sheppo pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 16051ae08745Sheppo pkt.tag.vio_subtype_env = VIO_VER_INFO; 16061ae08745Sheppo pkt.tag.vio_sid = vdc->session_id; 16071ae08745Sheppo pkt.dev_class = VDEV_DISK; 16080a55fbb7Slm66018 pkt.ver_major = ver.major; 16090a55fbb7Slm66018 pkt.ver_minor = ver.minor; 16101ae08745Sheppo 16110a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 16123af08d82Slm66018 DMSG(vdc, 0, "[%d] Ver info sent (status = %d)\n", 16133af08d82Slm66018 vdc->instance, status); 16141ae08745Sheppo if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 16153af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to send Ver negotiation info: " 16168cd10891Snarayan "id(%lx) rv(%d) size(%ld)", vdc->instance, 16178cd10891Snarayan vdc->curr_server->ldc_handle, status, msglen); 16181ae08745Sheppo if (msglen != sizeof (vio_ver_msg_t)) 16191ae08745Sheppo status = ENOMSG; 16201ae08745Sheppo } 16211ae08745Sheppo 16221ae08745Sheppo return (status); 16231ae08745Sheppo } 16241ae08745Sheppo 16250a55fbb7Slm66018 /* 16260a55fbb7Slm66018 * Function: 16273af08d82Slm66018 * vdc_ver_negotiation() 16283af08d82Slm66018 * 16293af08d82Slm66018 * Description: 16303af08d82Slm66018 * 16313af08d82Slm66018 * Arguments: 16323af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 16333af08d82Slm66018 * 16343af08d82Slm66018 * Return Code: 16353af08d82Slm66018 * 0 - Success 16363af08d82Slm66018 */ 16373af08d82Slm66018 static int 16383af08d82Slm66018 vdc_ver_negotiation(vdc_t *vdcp) 16393af08d82Slm66018 { 16403af08d82Slm66018 vio_msg_t vio_msg; 16413af08d82Slm66018 int status; 16423af08d82Slm66018 16433af08d82Slm66018 if (status = vdc_init_ver_negotiation(vdcp, vdc_version[0])) 16443af08d82Slm66018 return (status); 16453af08d82Slm66018 16463af08d82Slm66018 /* release lock and wait for response */ 16473af08d82Slm66018 mutex_exit(&vdcp->lock); 16483af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 16493af08d82Slm66018 mutex_enter(&vdcp->lock); 16503af08d82Slm66018 if (status) { 16513af08d82Slm66018 DMSG(vdcp, 0, 16523af08d82Slm66018 "[%d] Failed waiting for Ver negotiation response, rv(%d)", 16533af08d82Slm66018 vdcp->instance, status); 16543af08d82Slm66018 return (status); 16553af08d82Slm66018 } 16563af08d82Slm66018 16573af08d82Slm66018 /* check type and sub_type ... */ 16583af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 16593af08d82Slm66018 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 16603af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid ver negotiation response\n", 16613af08d82Slm66018 vdcp->instance); 16623af08d82Slm66018 return (EPROTO); 16633af08d82Slm66018 } 16643af08d82Slm66018 16653af08d82Slm66018 return (vdc_handle_ver_msg(vdcp, (vio_ver_msg_t *)&vio_msg)); 16663af08d82Slm66018 } 16673af08d82Slm66018 16683af08d82Slm66018 /* 16693af08d82Slm66018 * Function: 16700a55fbb7Slm66018 * vdc_init_attr_negotiation() 16710a55fbb7Slm66018 * 16720a55fbb7Slm66018 * Description: 16730a55fbb7Slm66018 * 16740a55fbb7Slm66018 * Arguments: 16750a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 16760a55fbb7Slm66018 * 16770a55fbb7Slm66018 * Return Code: 16780a55fbb7Slm66018 * 0 - Success 16790a55fbb7Slm66018 */ 16801ae08745Sheppo static int 16811ae08745Sheppo vdc_init_attr_negotiation(vdc_t *vdc) 16821ae08745Sheppo { 16831ae08745Sheppo vd_attr_msg_t pkt; 16841ae08745Sheppo size_t msglen = sizeof (pkt); 16851ae08745Sheppo int status; 16861ae08745Sheppo 16871ae08745Sheppo ASSERT(vdc != NULL); 16881ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 16891ae08745Sheppo 16903af08d82Slm66018 DMSG(vdc, 0, "[%d] entered\n", vdc->instance); 16911ae08745Sheppo 16921ae08745Sheppo /* fill in tag */ 16931ae08745Sheppo pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 16941ae08745Sheppo pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 16951ae08745Sheppo pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 16961ae08745Sheppo pkt.tag.vio_sid = vdc->session_id; 16971ae08745Sheppo /* fill in payload */ 16981ae08745Sheppo pkt.max_xfer_sz = vdc->max_xfer_sz; 1699*65908c77Syu, larry liu - Sun Microsystems - Beijing China pkt.vdisk_block_size = vdc->vdisk_bsize; 1700f0ca1d9aSsb155480 pkt.xfer_mode = VIO_DRING_MODE_V1_0; 17011ae08745Sheppo pkt.operations = 0; /* server will set bits of valid operations */ 17021ae08745Sheppo pkt.vdisk_type = 0; /* server will set to valid device type */ 170317cadca8Slm66018 pkt.vdisk_media = 0; /* server will set to valid media type */ 17041ae08745Sheppo pkt.vdisk_size = 0; /* server will set to valid size */ 17051ae08745Sheppo 17060a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 17073af08d82Slm66018 DMSG(vdc, 0, "Attr info sent (status = %d)\n", status); 17081ae08745Sheppo 1709f3241e46Sanbui if ((status != 0) || (msglen != sizeof (vd_attr_msg_t))) { 17103af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to send Attr negotiation info: " 17118cd10891Snarayan "id(%lx) rv(%d) size(%ld)", vdc->instance, 17128cd10891Snarayan vdc->curr_server->ldc_handle, status, msglen); 1713f3241e46Sanbui if (msglen != sizeof (vd_attr_msg_t)) 17141ae08745Sheppo status = ENOMSG; 17151ae08745Sheppo } 17161ae08745Sheppo 17171ae08745Sheppo return (status); 17181ae08745Sheppo } 17191ae08745Sheppo 17200a55fbb7Slm66018 /* 17210a55fbb7Slm66018 * Function: 17223af08d82Slm66018 * vdc_attr_negotiation() 17233af08d82Slm66018 * 17243af08d82Slm66018 * Description: 17253af08d82Slm66018 * 17263af08d82Slm66018 * Arguments: 17273af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 17283af08d82Slm66018 * 17293af08d82Slm66018 * Return Code: 17303af08d82Slm66018 * 0 - Success 17313af08d82Slm66018 */ 17323af08d82Slm66018 static int 17333af08d82Slm66018 vdc_attr_negotiation(vdc_t *vdcp) 17343af08d82Slm66018 { 17353af08d82Slm66018 int status; 17363af08d82Slm66018 vio_msg_t vio_msg; 17373af08d82Slm66018 17383af08d82Slm66018 if (status = vdc_init_attr_negotiation(vdcp)) 17393af08d82Slm66018 return (status); 17403af08d82Slm66018 17413af08d82Slm66018 /* release lock and wait for response */ 17423af08d82Slm66018 mutex_exit(&vdcp->lock); 17433af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 17443af08d82Slm66018 mutex_enter(&vdcp->lock); 17453af08d82Slm66018 if (status) { 17463af08d82Slm66018 DMSG(vdcp, 0, 17473af08d82Slm66018 "[%d] Failed waiting for Attr negotiation response, rv(%d)", 17483af08d82Slm66018 vdcp->instance, status); 17493af08d82Slm66018 return (status); 17503af08d82Slm66018 } 17513af08d82Slm66018 17523af08d82Slm66018 /* check type and sub_type ... */ 17533af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 17543af08d82Slm66018 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 17553af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid attr negotiation response\n", 17563af08d82Slm66018 vdcp->instance); 17573af08d82Slm66018 return (EPROTO); 17583af08d82Slm66018 } 17593af08d82Slm66018 17603af08d82Slm66018 return (vdc_handle_attr_msg(vdcp, (vd_attr_msg_t *)&vio_msg)); 17613af08d82Slm66018 } 17623af08d82Slm66018 17633af08d82Slm66018 17643af08d82Slm66018 /* 17653af08d82Slm66018 * Function: 17660a55fbb7Slm66018 * vdc_init_dring_negotiate() 17670a55fbb7Slm66018 * 17680a55fbb7Slm66018 * Description: 17690a55fbb7Slm66018 * 17700a55fbb7Slm66018 * Arguments: 17710a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 17720a55fbb7Slm66018 * 17730a55fbb7Slm66018 * Return Code: 17740a55fbb7Slm66018 * 0 - Success 17750a55fbb7Slm66018 */ 17761ae08745Sheppo static int 17771ae08745Sheppo vdc_init_dring_negotiate(vdc_t *vdc) 17781ae08745Sheppo { 17791ae08745Sheppo vio_dring_reg_msg_t pkt; 17801ae08745Sheppo size_t msglen = sizeof (pkt); 17811ae08745Sheppo int status = -1; 17823af08d82Slm66018 int retry; 17833af08d82Slm66018 int nretries = 10; 17841ae08745Sheppo 17851ae08745Sheppo ASSERT(vdc != NULL); 17861ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 17871ae08745Sheppo 17883af08d82Slm66018 for (retry = 0; retry < nretries; retry++) { 17891ae08745Sheppo status = vdc_init_descriptor_ring(vdc); 17903af08d82Slm66018 if (status != EAGAIN) 17913af08d82Slm66018 break; 17923af08d82Slm66018 drv_usecwait(vdc_min_timeout_ldc); 17933af08d82Slm66018 } 17943af08d82Slm66018 17951ae08745Sheppo if (status != 0) { 17963af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to init DRing (status = %d)\n", 17971ae08745Sheppo vdc->instance, status); 17981ae08745Sheppo return (status); 17991ae08745Sheppo } 18003af08d82Slm66018 18013af08d82Slm66018 DMSG(vdc, 0, "[%d] Init of descriptor ring completed (status = %d)\n", 1802e1ebb9ecSlm66018 vdc->instance, status); 18031ae08745Sheppo 18041ae08745Sheppo /* fill in tag */ 18051ae08745Sheppo pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 18061ae08745Sheppo pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 18071ae08745Sheppo pkt.tag.vio_subtype_env = VIO_DRING_REG; 18081ae08745Sheppo pkt.tag.vio_sid = vdc->session_id; 18091ae08745Sheppo /* fill in payload */ 18101ae08745Sheppo pkt.dring_ident = 0; 1811e1ebb9ecSlm66018 pkt.num_descriptors = vdc->dring_len; 1812e1ebb9ecSlm66018 pkt.descriptor_size = vdc->dring_entry_size; 18131ae08745Sheppo pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 18141ae08745Sheppo pkt.ncookies = vdc->dring_cookie_count; 18151ae08745Sheppo pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 18161ae08745Sheppo 18170a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 18181ae08745Sheppo if (status != 0) { 18193af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to register DRing (err = %d)", 1820e1ebb9ecSlm66018 vdc->instance, status); 18211ae08745Sheppo } 18221ae08745Sheppo 18231ae08745Sheppo return (status); 18241ae08745Sheppo } 18251ae08745Sheppo 18261ae08745Sheppo 18273af08d82Slm66018 /* 18283af08d82Slm66018 * Function: 18293af08d82Slm66018 * vdc_dring_negotiation() 18303af08d82Slm66018 * 18313af08d82Slm66018 * Description: 18323af08d82Slm66018 * 18333af08d82Slm66018 * Arguments: 18343af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 18353af08d82Slm66018 * 18363af08d82Slm66018 * Return Code: 18373af08d82Slm66018 * 0 - Success 18383af08d82Slm66018 */ 18393af08d82Slm66018 static int 18403af08d82Slm66018 vdc_dring_negotiation(vdc_t *vdcp) 18413af08d82Slm66018 { 18423af08d82Slm66018 int status; 18433af08d82Slm66018 vio_msg_t vio_msg; 18443af08d82Slm66018 18453af08d82Slm66018 if (status = vdc_init_dring_negotiate(vdcp)) 18463af08d82Slm66018 return (status); 18473af08d82Slm66018 18483af08d82Slm66018 /* release lock and wait for response */ 18493af08d82Slm66018 mutex_exit(&vdcp->lock); 18503af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 18513af08d82Slm66018 mutex_enter(&vdcp->lock); 18523af08d82Slm66018 if (status) { 18533af08d82Slm66018 DMSG(vdcp, 0, 18543af08d82Slm66018 "[%d] Failed waiting for Dring negotiation response," 18553af08d82Slm66018 " rv(%d)", vdcp->instance, status); 18563af08d82Slm66018 return (status); 18573af08d82Slm66018 } 18583af08d82Slm66018 18593af08d82Slm66018 /* check type and sub_type ... */ 18603af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 18613af08d82Slm66018 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 18623af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid Dring negotiation response\n", 18633af08d82Slm66018 vdcp->instance); 18643af08d82Slm66018 return (EPROTO); 18653af08d82Slm66018 } 18663af08d82Slm66018 18673af08d82Slm66018 return (vdc_handle_dring_reg_msg(vdcp, 18683af08d82Slm66018 (vio_dring_reg_msg_t *)&vio_msg)); 18693af08d82Slm66018 } 18703af08d82Slm66018 18713af08d82Slm66018 18723af08d82Slm66018 /* 18733af08d82Slm66018 * Function: 18743af08d82Slm66018 * vdc_send_rdx() 18753af08d82Slm66018 * 18763af08d82Slm66018 * Description: 18773af08d82Slm66018 * 18783af08d82Slm66018 * Arguments: 18793af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 18803af08d82Slm66018 * 18813af08d82Slm66018 * Return Code: 18823af08d82Slm66018 * 0 - Success 18833af08d82Slm66018 */ 18843af08d82Slm66018 static int 18853af08d82Slm66018 vdc_send_rdx(vdc_t *vdcp) 18863af08d82Slm66018 { 18873af08d82Slm66018 vio_msg_t msg; 18883af08d82Slm66018 size_t msglen = sizeof (vio_msg_t); 18893af08d82Slm66018 int status; 18903af08d82Slm66018 18913af08d82Slm66018 /* 18923af08d82Slm66018 * Send an RDX message to vds to indicate we are ready 18933af08d82Slm66018 * to send data 18943af08d82Slm66018 */ 18953af08d82Slm66018 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 18963af08d82Slm66018 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 18973af08d82Slm66018 msg.tag.vio_subtype_env = VIO_RDX; 18983af08d82Slm66018 msg.tag.vio_sid = vdcp->session_id; 18993af08d82Slm66018 status = vdc_send(vdcp, (caddr_t)&msg, &msglen); 19003af08d82Slm66018 if (status != 0) { 19013af08d82Slm66018 DMSG(vdcp, 0, "[%d] Failed to send RDX message (%d)", 19023af08d82Slm66018 vdcp->instance, status); 19033af08d82Slm66018 } 19043af08d82Slm66018 19053af08d82Slm66018 return (status); 19063af08d82Slm66018 } 19073af08d82Slm66018 19083af08d82Slm66018 /* 19093af08d82Slm66018 * Function: 19103af08d82Slm66018 * vdc_handle_rdx() 19113af08d82Slm66018 * 19123af08d82Slm66018 * Description: 19133af08d82Slm66018 * 19143af08d82Slm66018 * Arguments: 19153af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 19163af08d82Slm66018 * msgp - received msg 19173af08d82Slm66018 * 19183af08d82Slm66018 * Return Code: 19193af08d82Slm66018 * 0 - Success 19203af08d82Slm66018 */ 19213af08d82Slm66018 static int 19223af08d82Slm66018 vdc_handle_rdx(vdc_t *vdcp, vio_rdx_msg_t *msgp) 19233af08d82Slm66018 { 19243af08d82Slm66018 _NOTE(ARGUNUSED(vdcp)) 19253af08d82Slm66018 _NOTE(ARGUNUSED(msgp)) 19263af08d82Slm66018 19273af08d82Slm66018 ASSERT(msgp->tag.vio_msgtype == VIO_TYPE_CTRL); 19283af08d82Slm66018 ASSERT(msgp->tag.vio_subtype == VIO_SUBTYPE_ACK); 19293af08d82Slm66018 ASSERT(msgp->tag.vio_subtype_env == VIO_RDX); 19303af08d82Slm66018 19313af08d82Slm66018 DMSG(vdcp, 1, "[%d] Got an RDX msg", vdcp->instance); 19323af08d82Slm66018 19333af08d82Slm66018 return (0); 19343af08d82Slm66018 } 19353af08d82Slm66018 19363af08d82Slm66018 /* 19373af08d82Slm66018 * Function: 19383af08d82Slm66018 * vdc_rdx_exchange() 19393af08d82Slm66018 * 19403af08d82Slm66018 * Description: 19413af08d82Slm66018 * 19423af08d82Slm66018 * Arguments: 19433af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 19443af08d82Slm66018 * 19453af08d82Slm66018 * Return Code: 19463af08d82Slm66018 * 0 - Success 19473af08d82Slm66018 */ 19483af08d82Slm66018 static int 19493af08d82Slm66018 vdc_rdx_exchange(vdc_t *vdcp) 19503af08d82Slm66018 { 19513af08d82Slm66018 int status; 19523af08d82Slm66018 vio_msg_t vio_msg; 19533af08d82Slm66018 19543af08d82Slm66018 if (status = vdc_send_rdx(vdcp)) 19553af08d82Slm66018 return (status); 19563af08d82Slm66018 19573af08d82Slm66018 /* release lock and wait for response */ 19583af08d82Slm66018 mutex_exit(&vdcp->lock); 19593af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 19603af08d82Slm66018 mutex_enter(&vdcp->lock); 19613af08d82Slm66018 if (status) { 196287a7269eSachartre DMSG(vdcp, 0, "[%d] Failed waiting for RDX response, rv(%d)", 196387a7269eSachartre vdcp->instance, status); 19643af08d82Slm66018 return (status); 19653af08d82Slm66018 } 19663af08d82Slm66018 19673af08d82Slm66018 /* check type and sub_type ... */ 19683af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 19693af08d82Slm66018 vio_msg.tag.vio_subtype != VIO_SUBTYPE_ACK) { 197087a7269eSachartre DMSG(vdcp, 0, "[%d] Invalid RDX response\n", vdcp->instance); 19713af08d82Slm66018 return (EPROTO); 19723af08d82Slm66018 } 19733af08d82Slm66018 19743af08d82Slm66018 return (vdc_handle_rdx(vdcp, (vio_rdx_msg_t *)&vio_msg)); 19753af08d82Slm66018 } 19763af08d82Slm66018 19773af08d82Slm66018 19781ae08745Sheppo /* -------------------------------------------------------------------------- */ 19791ae08745Sheppo 19801ae08745Sheppo /* 19811ae08745Sheppo * LDC helper routines 19821ae08745Sheppo */ 19831ae08745Sheppo 19843af08d82Slm66018 static int 19853af08d82Slm66018 vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp) 19863af08d82Slm66018 { 19873af08d82Slm66018 int status; 198817cadca8Slm66018 uint64_t delay_time; 19893af08d82Slm66018 size_t len; 19903af08d82Slm66018 1991ea43803bSAlexandre Chartre /* 1992ea43803bSAlexandre Chartre * Until we get a blocking ldc read we have to retry until the entire 1993ea43803bSAlexandre Chartre * LDC message has arrived before ldc_read() will return that message. 1994ea43803bSAlexandre Chartre * If ldc_read() succeed but returns a zero length message then that 1995ea43803bSAlexandre Chartre * means that the LDC queue is empty and we have to wait for a 1996ea43803bSAlexandre Chartre * notification from the LDC callback which will set the read_state to 1997ea43803bSAlexandre Chartre * VDC_READ_PENDING. Note we also bail out if the channel is reset or 1998ea43803bSAlexandre Chartre * goes away. 1999ea43803bSAlexandre Chartre */ 2000ea43803bSAlexandre Chartre delay_time = vdc_ldc_read_init_delay; 20013af08d82Slm66018 2002ea43803bSAlexandre Chartre for (;;) { 2003ea43803bSAlexandre Chartre 2004ea43803bSAlexandre Chartre len = *nbytesp; 2005ea43803bSAlexandre Chartre /* 2006ea43803bSAlexandre Chartre * vdc->curr_server is protected by vdc->lock but to avoid 2007ea43803bSAlexandre Chartre * contentions we don't take the lock here. We can do this 2008ea43803bSAlexandre Chartre * safely because vdc_recv() is only called from thread 2009ea43803bSAlexandre Chartre * process_msg_thread() which is also the only thread that 2010ea43803bSAlexandre Chartre * can change vdc->curr_server. 2011ea43803bSAlexandre Chartre */ 2012ea43803bSAlexandre Chartre status = ldc_read(vdc->curr_server->ldc_handle, 2013ea43803bSAlexandre Chartre (caddr_t)msgp, &len); 2014ea43803bSAlexandre Chartre 2015ea43803bSAlexandre Chartre if (status == EAGAIN) { 2016ea43803bSAlexandre Chartre delay_time *= 2; 2017ea43803bSAlexandre Chartre if (delay_time >= vdc_ldc_read_max_delay) 2018ea43803bSAlexandre Chartre delay_time = vdc_ldc_read_max_delay; 2019ea43803bSAlexandre Chartre delay(delay_time); 2020ea43803bSAlexandre Chartre continue; 2021ea43803bSAlexandre Chartre } 2022ea43803bSAlexandre Chartre 2023ea43803bSAlexandre Chartre if (status != 0) { 2024ea43803bSAlexandre Chartre DMSG(vdc, 0, "ldc_read returned %d\n", status); 2025ea43803bSAlexandre Chartre break; 2026ea43803bSAlexandre Chartre } 2027ea43803bSAlexandre Chartre 2028ea43803bSAlexandre Chartre if (len != 0) { 2029ea43803bSAlexandre Chartre *nbytesp = len; 2030ea43803bSAlexandre Chartre break; 2031ea43803bSAlexandre Chartre } 2032ea43803bSAlexandre Chartre 2033ea43803bSAlexandre Chartre mutex_enter(&vdc->read_lock); 20343af08d82Slm66018 20353af08d82Slm66018 while (vdc->read_state != VDC_READ_PENDING) { 20363af08d82Slm66018 20373af08d82Slm66018 /* detect if the connection has been reset */ 20383af08d82Slm66018 if (vdc->read_state == VDC_READ_RESET) { 2039ea43803bSAlexandre Chartre mutex_exit(&vdc->read_lock); 2040ea43803bSAlexandre Chartre return (ECONNRESET); 20413af08d82Slm66018 } 20423af08d82Slm66018 2043ea43803bSAlexandre Chartre vdc->read_state = VDC_READ_WAITING; 20443af08d82Slm66018 cv_wait(&vdc->read_cv, &vdc->read_lock); 20453af08d82Slm66018 } 20463af08d82Slm66018 20473af08d82Slm66018 vdc->read_state = VDC_READ_IDLE; 20483af08d82Slm66018 mutex_exit(&vdc->read_lock); 20493af08d82Slm66018 2050ea43803bSAlexandre Chartre delay_time = vdc_ldc_read_init_delay; 2051ea43803bSAlexandre Chartre } 2052ea43803bSAlexandre Chartre 20533af08d82Slm66018 return (status); 20543af08d82Slm66018 } 20553af08d82Slm66018 20563af08d82Slm66018 20573af08d82Slm66018 20583af08d82Slm66018 #ifdef DEBUG 20593af08d82Slm66018 void 20603af08d82Slm66018 vdc_decode_tag(vdc_t *vdcp, vio_msg_t *msg) 20613af08d82Slm66018 { 20623af08d82Slm66018 char *ms, *ss, *ses; 20633af08d82Slm66018 switch (msg->tag.vio_msgtype) { 20643af08d82Slm66018 #define Q(_s) case _s : ms = #_s; break; 20653af08d82Slm66018 Q(VIO_TYPE_CTRL) 20663af08d82Slm66018 Q(VIO_TYPE_DATA) 20673af08d82Slm66018 Q(VIO_TYPE_ERR) 20683af08d82Slm66018 #undef Q 20693af08d82Slm66018 default: ms = "unknown"; break; 20703af08d82Slm66018 } 20713af08d82Slm66018 20723af08d82Slm66018 switch (msg->tag.vio_subtype) { 20733af08d82Slm66018 #define Q(_s) case _s : ss = #_s; break; 20743af08d82Slm66018 Q(VIO_SUBTYPE_INFO) 20753af08d82Slm66018 Q(VIO_SUBTYPE_ACK) 20763af08d82Slm66018 Q(VIO_SUBTYPE_NACK) 20773af08d82Slm66018 #undef Q 20783af08d82Slm66018 default: ss = "unknown"; break; 20793af08d82Slm66018 } 20803af08d82Slm66018 20813af08d82Slm66018 switch (msg->tag.vio_subtype_env) { 20823af08d82Slm66018 #define Q(_s) case _s : ses = #_s; break; 20833af08d82Slm66018 Q(VIO_VER_INFO) 20843af08d82Slm66018 Q(VIO_ATTR_INFO) 20853af08d82Slm66018 Q(VIO_DRING_REG) 20863af08d82Slm66018 Q(VIO_DRING_UNREG) 20873af08d82Slm66018 Q(VIO_RDX) 20883af08d82Slm66018 Q(VIO_PKT_DATA) 20893af08d82Slm66018 Q(VIO_DESC_DATA) 20903af08d82Slm66018 Q(VIO_DRING_DATA) 20913af08d82Slm66018 #undef Q 20923af08d82Slm66018 default: ses = "unknown"; break; 20933af08d82Slm66018 } 20943af08d82Slm66018 20953af08d82Slm66018 DMSG(vdcp, 3, "(%x/%x/%x) message : (%s/%s/%s)\n", 20963af08d82Slm66018 msg->tag.vio_msgtype, msg->tag.vio_subtype, 20973af08d82Slm66018 msg->tag.vio_subtype_env, ms, ss, ses); 20983af08d82Slm66018 } 20993af08d82Slm66018 #endif 21003af08d82Slm66018 21011ae08745Sheppo /* 21021ae08745Sheppo * Function: 21031ae08745Sheppo * vdc_send() 21041ae08745Sheppo * 21051ae08745Sheppo * Description: 21061ae08745Sheppo * The function encapsulates the call to write a message using LDC. 21071ae08745Sheppo * If LDC indicates that the call failed due to the queue being full, 210817cadca8Slm66018 * we retry the ldc_write(), otherwise we return the error returned by LDC. 21091ae08745Sheppo * 21101ae08745Sheppo * Arguments: 21111ae08745Sheppo * ldc_handle - LDC handle for the channel this instance of vdc uses 21121ae08745Sheppo * pkt - address of LDC message to be sent 21131ae08745Sheppo * msglen - the size of the message being sent. When the function 21141ae08745Sheppo * returns, this contains the number of bytes written. 21151ae08745Sheppo * 21161ae08745Sheppo * Return Code: 21171ae08745Sheppo * 0 - Success. 21181ae08745Sheppo * EINVAL - pkt or msglen were NULL 21191ae08745Sheppo * ECONNRESET - The connection was not up. 21201ae08745Sheppo * EWOULDBLOCK - LDC queue is full 21211ae08745Sheppo * xxx - other error codes returned by ldc_write 21221ae08745Sheppo */ 21231ae08745Sheppo static int 21240a55fbb7Slm66018 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 21251ae08745Sheppo { 21261ae08745Sheppo size_t size = 0; 21271ae08745Sheppo int status = 0; 21283af08d82Slm66018 clock_t delay_ticks; 21291ae08745Sheppo 21300a55fbb7Slm66018 ASSERT(vdc != NULL); 21310a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 21321ae08745Sheppo ASSERT(msglen != NULL); 21331ae08745Sheppo ASSERT(*msglen != 0); 21341ae08745Sheppo 21353af08d82Slm66018 #ifdef DEBUG 213617cadca8Slm66018 vdc_decode_tag(vdc, (vio_msg_t *)(uintptr_t)pkt); 21373af08d82Slm66018 #endif 21383af08d82Slm66018 /* 21393af08d82Slm66018 * Wait indefinitely to send if channel 21403af08d82Slm66018 * is busy, but bail out if we succeed or 21413af08d82Slm66018 * if the channel closes or is reset. 21423af08d82Slm66018 */ 21433af08d82Slm66018 delay_ticks = vdc_hz_min_ldc_delay; 21441ae08745Sheppo do { 21451ae08745Sheppo size = *msglen; 21468cd10891Snarayan status = ldc_write(vdc->curr_server->ldc_handle, pkt, &size); 21473af08d82Slm66018 if (status == EWOULDBLOCK) { 21483af08d82Slm66018 delay(delay_ticks); 21493af08d82Slm66018 /* geometric backoff */ 21503af08d82Slm66018 delay_ticks *= 2; 21513af08d82Slm66018 if (delay_ticks > vdc_hz_max_ldc_delay) 21523af08d82Slm66018 delay_ticks = vdc_hz_max_ldc_delay; 21533af08d82Slm66018 } 21543af08d82Slm66018 } while (status == EWOULDBLOCK); 21551ae08745Sheppo 21560a55fbb7Slm66018 /* if LDC had serious issues --- reset vdc state */ 21570a55fbb7Slm66018 if (status == EIO || status == ECONNRESET) { 21583af08d82Slm66018 /* LDC had serious issues --- reset vdc state */ 21593af08d82Slm66018 mutex_enter(&vdc->read_lock); 21603af08d82Slm66018 if ((vdc->read_state == VDC_READ_WAITING) || 21613af08d82Slm66018 (vdc->read_state == VDC_READ_RESET)) 21623af08d82Slm66018 cv_signal(&vdc->read_cv); 21633af08d82Slm66018 vdc->read_state = VDC_READ_RESET; 21643af08d82Slm66018 mutex_exit(&vdc->read_lock); 21653af08d82Slm66018 21663af08d82Slm66018 /* wake up any waiters in the reset thread */ 21673af08d82Slm66018 if (vdc->state == VDC_STATE_INIT_WAITING) { 21683af08d82Slm66018 DMSG(vdc, 0, "[%d] write reset - " 21693af08d82Slm66018 "vdc is resetting ..\n", vdc->instance); 21703af08d82Slm66018 vdc->state = VDC_STATE_RESETTING; 21713af08d82Slm66018 cv_signal(&vdc->initwait_cv); 21723af08d82Slm66018 } 21733af08d82Slm66018 21743af08d82Slm66018 return (ECONNRESET); 21750a55fbb7Slm66018 } 21760a55fbb7Slm66018 21771ae08745Sheppo /* return the last size written */ 21781ae08745Sheppo *msglen = size; 21791ae08745Sheppo 21801ae08745Sheppo return (status); 21811ae08745Sheppo } 21821ae08745Sheppo 21831ae08745Sheppo /* 21841ae08745Sheppo * Function: 2185655fd6a9Sachartre * vdc_get_md_node 21861ae08745Sheppo * 21871ae08745Sheppo * Description: 21888cd10891Snarayan * Get the MD, the device node for the given disk instance. The 21898cd10891Snarayan * caller is responsible for cleaning up the reference to the 21908cd10891Snarayan * returned MD (mdpp) by calling md_fini_handle(). 21911ae08745Sheppo * 21921ae08745Sheppo * Arguments: 21931ae08745Sheppo * dip - dev info pointer for this instance of the device driver. 2194655fd6a9Sachartre * mdpp - the returned MD. 2195655fd6a9Sachartre * vd_nodep - the returned device node. 21961ae08745Sheppo * 21971ae08745Sheppo * Return Code: 21981ae08745Sheppo * 0 - Success. 21991ae08745Sheppo * ENOENT - Expected node or property did not exist. 22001ae08745Sheppo * ENXIO - Unexpected error communicating with MD framework 22011ae08745Sheppo */ 22021ae08745Sheppo static int 22038cd10891Snarayan vdc_get_md_node(dev_info_t *dip, md_t **mdpp, mde_cookie_t *vd_nodep) 22041ae08745Sheppo { 22051ae08745Sheppo int status = ENOENT; 22061ae08745Sheppo char *node_name = NULL; 22071ae08745Sheppo md_t *mdp = NULL; 22081ae08745Sheppo int num_nodes; 22091ae08745Sheppo int num_vdevs; 22101ae08745Sheppo mde_cookie_t rootnode; 22111ae08745Sheppo mde_cookie_t *listp = NULL; 22121ae08745Sheppo boolean_t found_inst = B_FALSE; 22131ae08745Sheppo int listsz; 22141ae08745Sheppo int idx; 22151ae08745Sheppo uint64_t md_inst; 22161ae08745Sheppo int obp_inst; 22171ae08745Sheppo int instance = ddi_get_instance(dip); 22181ae08745Sheppo 22191ae08745Sheppo /* 22201ae08745Sheppo * Get the OBP instance number for comparison with the MD instance 22211ae08745Sheppo * 22221ae08745Sheppo * The "cfg-handle" property of a vdc node in an MD contains the MD's 22231ae08745Sheppo * notion of "instance", or unique identifier, for that node; OBP 22241ae08745Sheppo * stores the value of the "cfg-handle" MD property as the value of 22251ae08745Sheppo * the "reg" property on the node in the device tree it builds from 22261ae08745Sheppo * the MD and passes to Solaris. Thus, we look up the devinfo node's 22271ae08745Sheppo * "reg" property value to uniquely identify this device instance. 22281ae08745Sheppo * If the "reg" property cannot be found, the device tree state is 22291ae08745Sheppo * presumably so broken that there is no point in continuing. 22301ae08745Sheppo */ 22311ae08745Sheppo if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 22321ae08745Sheppo cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 22331ae08745Sheppo return (ENOENT); 22341ae08745Sheppo } 22351ae08745Sheppo obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 22361ae08745Sheppo OBP_REG, -1); 22373af08d82Slm66018 DMSGX(1, "[%d] OBP inst=%d\n", instance, obp_inst); 22381ae08745Sheppo 22391ae08745Sheppo /* 2240655fd6a9Sachartre * We now walk the MD nodes to find the node for this vdisk. 22411ae08745Sheppo */ 22421ae08745Sheppo if ((mdp = md_get_handle()) == NULL) { 22431ae08745Sheppo cmn_err(CE_WARN, "unable to init machine description"); 22441ae08745Sheppo return (ENXIO); 22451ae08745Sheppo } 22461ae08745Sheppo 22471ae08745Sheppo num_nodes = md_node_count(mdp); 22481ae08745Sheppo ASSERT(num_nodes > 0); 22491ae08745Sheppo 22501ae08745Sheppo listsz = num_nodes * sizeof (mde_cookie_t); 22511ae08745Sheppo 22521ae08745Sheppo /* allocate memory for nodes */ 22531ae08745Sheppo listp = kmem_zalloc(listsz, KM_SLEEP); 22541ae08745Sheppo 22551ae08745Sheppo rootnode = md_root_node(mdp); 22561ae08745Sheppo ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 22571ae08745Sheppo 22581ae08745Sheppo /* 22591ae08745Sheppo * Search for all the virtual devices, we will then check to see which 22601ae08745Sheppo * ones are disk nodes. 22611ae08745Sheppo */ 22621ae08745Sheppo num_vdevs = md_scan_dag(mdp, rootnode, 22631ae08745Sheppo md_find_name(mdp, VDC_MD_VDEV_NAME), 22641ae08745Sheppo md_find_name(mdp, "fwd"), listp); 22651ae08745Sheppo 22661ae08745Sheppo if (num_vdevs <= 0) { 22671ae08745Sheppo cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 22681ae08745Sheppo status = ENOENT; 22691ae08745Sheppo goto done; 22701ae08745Sheppo } 22711ae08745Sheppo 22723af08d82Slm66018 DMSGX(1, "[%d] num_vdevs=%d\n", instance, num_vdevs); 22731ae08745Sheppo for (idx = 0; idx < num_vdevs; idx++) { 22741ae08745Sheppo status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 22751ae08745Sheppo if ((status != 0) || (node_name == NULL)) { 22761ae08745Sheppo cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 22771ae08745Sheppo ": err %d", VDC_MD_VDEV_NAME, status); 22781ae08745Sheppo continue; 22791ae08745Sheppo } 22801ae08745Sheppo 22813af08d82Slm66018 DMSGX(1, "[%d] Found node '%s'\n", instance, node_name); 22821ae08745Sheppo if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 22831ae08745Sheppo status = md_get_prop_val(mdp, listp[idx], 22841ae08745Sheppo VDC_MD_CFG_HDL, &md_inst); 22853af08d82Slm66018 DMSGX(1, "[%d] vdc inst in MD=%lx\n", 22863af08d82Slm66018 instance, md_inst); 22871ae08745Sheppo if ((status == 0) && (md_inst == obp_inst)) { 22881ae08745Sheppo found_inst = B_TRUE; 22891ae08745Sheppo break; 22901ae08745Sheppo } 22911ae08745Sheppo } 22921ae08745Sheppo } 22931ae08745Sheppo 22940a55fbb7Slm66018 if (!found_inst) { 22953af08d82Slm66018 DMSGX(0, "Unable to find correct '%s' node", VDC_MD_DISK_NAME); 22961ae08745Sheppo status = ENOENT; 22971ae08745Sheppo goto done; 22981ae08745Sheppo } 22993af08d82Slm66018 DMSGX(0, "[%d] MD inst=%lx\n", instance, md_inst); 23001ae08745Sheppo 2301655fd6a9Sachartre *vd_nodep = listp[idx]; 2302655fd6a9Sachartre *mdpp = mdp; 2303655fd6a9Sachartre done: 2304655fd6a9Sachartre kmem_free(listp, listsz); 2305655fd6a9Sachartre return (status); 2306655fd6a9Sachartre } 2307655fd6a9Sachartre 2308655fd6a9Sachartre /* 2309655fd6a9Sachartre * Function: 23108cd10891Snarayan * vdc_init_ports 2311655fd6a9Sachartre * 2312655fd6a9Sachartre * Description: 23138cd10891Snarayan * Initialize all the ports for this vdisk instance. 2314655fd6a9Sachartre * 2315655fd6a9Sachartre * Arguments: 23168cd10891Snarayan * vdc - soft state pointer for this instance of the device driver. 23178cd10891Snarayan * mdp - md pointer 23188cd10891Snarayan * vd_nodep - device md node. 2319655fd6a9Sachartre * 2320655fd6a9Sachartre * Return Code: 2321655fd6a9Sachartre * 0 - Success. 2322655fd6a9Sachartre * ENOENT - Expected node or property did not exist. 2323655fd6a9Sachartre */ 2324655fd6a9Sachartre static int 23258cd10891Snarayan vdc_init_ports(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_nodep) 2326655fd6a9Sachartre { 2327655fd6a9Sachartre int status = 0; 23288cd10891Snarayan int idx; 23298cd10891Snarayan int num_nodes; 23308cd10891Snarayan int num_vports; 23318cd10891Snarayan int num_chans; 23328cd10891Snarayan int listsz; 23338cd10891Snarayan mde_cookie_t vd_port; 23348cd10891Snarayan mde_cookie_t *chanp = NULL; 23358cd10891Snarayan mde_cookie_t *portp = NULL; 23368cd10891Snarayan vdc_server_t *srvr; 23378cd10891Snarayan vdc_server_t *prev_srvr = NULL; 2338655fd6a9Sachartre 23398cd10891Snarayan /* 23408cd10891Snarayan * We now walk the MD nodes to find the port nodes for this vdisk. 23418cd10891Snarayan */ 2342655fd6a9Sachartre num_nodes = md_node_count(mdp); 2343655fd6a9Sachartre ASSERT(num_nodes > 0); 2344655fd6a9Sachartre 2345655fd6a9Sachartre listsz = num_nodes * sizeof (mde_cookie_t); 2346655fd6a9Sachartre 2347655fd6a9Sachartre /* allocate memory for nodes */ 23488cd10891Snarayan portp = kmem_zalloc(listsz, KM_SLEEP); 2349655fd6a9Sachartre chanp = kmem_zalloc(listsz, KM_SLEEP); 2350655fd6a9Sachartre 23518cd10891Snarayan num_vports = md_scan_dag(mdp, vd_nodep, 23528cd10891Snarayan md_find_name(mdp, VDC_MD_PORT_NAME), 23538cd10891Snarayan md_find_name(mdp, "fwd"), portp); 23548cd10891Snarayan if (num_vports == 0) { 23558cd10891Snarayan DMSGX(0, "Found no '%s' node for '%s' port\n", 23568cd10891Snarayan VDC_MD_PORT_NAME, VDC_MD_VDEV_NAME); 23578cd10891Snarayan status = ENOENT; 23588cd10891Snarayan goto done; 23598cd10891Snarayan } 23608cd10891Snarayan 23618cd10891Snarayan DMSGX(1, "Found %d '%s' node(s) for '%s' port\n", 23628cd10891Snarayan num_vports, VDC_MD_PORT_NAME, VDC_MD_VDEV_NAME); 23638cd10891Snarayan 23648cd10891Snarayan vdc->num_servers = 0; 23658cd10891Snarayan for (idx = 0; idx < num_vports; idx++) { 23668cd10891Snarayan 23678cd10891Snarayan /* initialize this port */ 23688cd10891Snarayan vd_port = portp[idx]; 23698cd10891Snarayan srvr = kmem_zalloc(sizeof (vdc_server_t), KM_SLEEP); 23708cd10891Snarayan srvr->vdcp = vdc; 23718cd10891Snarayan 23728cd10891Snarayan /* get port id */ 23738cd10891Snarayan if (md_get_prop_val(mdp, vd_port, VDC_MD_ID, &srvr->id) != 0) { 23748cd10891Snarayan cmn_err(CE_NOTE, "vDisk port '%s' property not found", 23758cd10891Snarayan VDC_MD_ID); 23768cd10891Snarayan kmem_free(srvr, sizeof (vdc_server_t)); 23778cd10891Snarayan continue; 23788cd10891Snarayan } 23798cd10891Snarayan 23808cd10891Snarayan /* set the connection timeout */ 23818cd10891Snarayan if (md_get_prop_val(mdp, vd_port, VDC_MD_TIMEOUT, 23828cd10891Snarayan &srvr->ctimeout) != 0) { 23838cd10891Snarayan srvr->ctimeout = 0; 23848cd10891Snarayan } 23858cd10891Snarayan 23868cd10891Snarayan /* get the ldc id */ 23878cd10891Snarayan num_chans = md_scan_dag(mdp, vd_port, 23881ae08745Sheppo md_find_name(mdp, VDC_MD_CHAN_NAME), 23891ae08745Sheppo md_find_name(mdp, "fwd"), chanp); 23901ae08745Sheppo 23911ae08745Sheppo /* expecting at least one channel */ 23921ae08745Sheppo if (num_chans <= 0) { 23931ae08745Sheppo cmn_err(CE_NOTE, "No '%s' node for '%s' port", 23941ae08745Sheppo VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 23958cd10891Snarayan kmem_free(srvr, sizeof (vdc_server_t)); 23968cd10891Snarayan continue; 23971ae08745Sheppo } else if (num_chans != 1) { 23988cd10891Snarayan DMSGX(0, "Expected 1 '%s' node for '%s' port, " 23998cd10891Snarayan "found %d\n", VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, 24008cd10891Snarayan num_chans); 24011ae08745Sheppo } 24021ae08745Sheppo 24031ae08745Sheppo /* 24041ae08745Sheppo * We use the first channel found (index 0), irrespective of how 24051ae08745Sheppo * many are there in total. 24061ae08745Sheppo */ 24078cd10891Snarayan if (md_get_prop_val(mdp, chanp[0], VDC_MD_ID, 24088cd10891Snarayan &srvr->ldc_id) != 0) { 24098cd10891Snarayan cmn_err(CE_NOTE, "Channel '%s' property not found", 24108cd10891Snarayan VDC_MD_ID); 24118cd10891Snarayan kmem_free(srvr, sizeof (vdc_server_t)); 24128cd10891Snarayan continue; 24138cd10891Snarayan } 24148cd10891Snarayan 24158cd10891Snarayan /* 24168cd10891Snarayan * now initialise LDC channel which will be used to 24178cd10891Snarayan * communicate with this server 24188cd10891Snarayan */ 24198cd10891Snarayan if (vdc_do_ldc_init(vdc, srvr) != 0) { 24208cd10891Snarayan kmem_free(srvr, sizeof (vdc_server_t)); 24218cd10891Snarayan continue; 24228cd10891Snarayan } 24238cd10891Snarayan 24248cd10891Snarayan /* add server to list */ 2425d7400d00Sachartre if (prev_srvr) 24268cd10891Snarayan prev_srvr->next = srvr; 2427d7400d00Sachartre else 24288cd10891Snarayan vdc->server_list = srvr; 2429d7400d00Sachartre 24308cd10891Snarayan prev_srvr = srvr; 24318cd10891Snarayan 24328cd10891Snarayan /* inc numbers of servers */ 24338cd10891Snarayan vdc->num_servers++; 24348cd10891Snarayan } 24358cd10891Snarayan 24368cd10891Snarayan /* 24378cd10891Snarayan * Adjust the max number of handshake retries to match 24388cd10891Snarayan * the number of vdisk servers. 24398cd10891Snarayan */ 24408cd10891Snarayan if (vdc_hshake_retries < vdc->num_servers) 24418cd10891Snarayan vdc_hshake_retries = vdc->num_servers; 24428cd10891Snarayan 24438cd10891Snarayan /* pick first server as current server */ 24448cd10891Snarayan if (vdc->server_list != NULL) { 24458cd10891Snarayan vdc->curr_server = vdc->server_list; 24468cd10891Snarayan status = 0; 24478cd10891Snarayan } else { 24481ae08745Sheppo status = ENOENT; 24491ae08745Sheppo } 24501ae08745Sheppo 24511ae08745Sheppo done: 24521ae08745Sheppo kmem_free(chanp, listsz); 24538cd10891Snarayan kmem_free(portp, listsz); 24541ae08745Sheppo return (status); 24551ae08745Sheppo } 24561ae08745Sheppo 24578cd10891Snarayan 24588cd10891Snarayan /* 24598cd10891Snarayan * Function: 24608cd10891Snarayan * vdc_do_ldc_up 24618cd10891Snarayan * 24628cd10891Snarayan * Description: 24638cd10891Snarayan * Bring the channel for the current server up. 24648cd10891Snarayan * 24658cd10891Snarayan * Arguments: 24668cd10891Snarayan * vdc - soft state pointer for this instance of the device driver. 24678cd10891Snarayan * 24688cd10891Snarayan * Return Code: 24698cd10891Snarayan * 0 - Success. 24708cd10891Snarayan * EINVAL - Driver is detaching / LDC error 24718cd10891Snarayan * ECONNREFUSED - Other end is not listening 24728cd10891Snarayan */ 24730a55fbb7Slm66018 static int 24740a55fbb7Slm66018 vdc_do_ldc_up(vdc_t *vdc) 24750a55fbb7Slm66018 { 24760a55fbb7Slm66018 int status; 24773af08d82Slm66018 ldc_status_t ldc_state; 24780a55fbb7Slm66018 24798cd10891Snarayan ASSERT(MUTEX_HELD(&vdc->lock)); 24808cd10891Snarayan 24813af08d82Slm66018 DMSG(vdc, 0, "[%d] Bringing up channel %lx\n", 24828cd10891Snarayan vdc->instance, vdc->curr_server->ldc_id); 24833af08d82Slm66018 24843af08d82Slm66018 if (vdc->lifecycle == VDC_LC_DETACHING) 24853af08d82Slm66018 return (EINVAL); 24860a55fbb7Slm66018 24878cd10891Snarayan if ((status = ldc_up(vdc->curr_server->ldc_handle)) != 0) { 24880a55fbb7Slm66018 switch (status) { 24890a55fbb7Slm66018 case ECONNREFUSED: /* listener not ready at other end */ 24903af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_up(%lx,...) return %d\n", 24918cd10891Snarayan vdc->instance, vdc->curr_server->ldc_id, status); 24920a55fbb7Slm66018 status = 0; 24930a55fbb7Slm66018 break; 24940a55fbb7Slm66018 default: 24953af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to bring up LDC: " 24968cd10891Snarayan "channel=%ld, err=%d", vdc->instance, 24978cd10891Snarayan vdc->curr_server->ldc_id, status); 24983af08d82Slm66018 break; 24993af08d82Slm66018 } 25003af08d82Slm66018 } 25013af08d82Slm66018 25028cd10891Snarayan if (ldc_status(vdc->curr_server->ldc_handle, &ldc_state) == 0) { 25038cd10891Snarayan vdc->curr_server->ldc_state = ldc_state; 25043af08d82Slm66018 if (ldc_state == LDC_UP) { 25053af08d82Slm66018 DMSG(vdc, 0, "[%d] LDC channel already up\n", 25063af08d82Slm66018 vdc->instance); 25073af08d82Slm66018 vdc->seq_num = 1; 25083af08d82Slm66018 vdc->seq_num_reply = 0; 25090a55fbb7Slm66018 } 25100a55fbb7Slm66018 } 25110a55fbb7Slm66018 25120a55fbb7Slm66018 return (status); 25130a55fbb7Slm66018 } 25140a55fbb7Slm66018 25150a55fbb7Slm66018 /* 25160a55fbb7Slm66018 * Function: 25170a55fbb7Slm66018 * vdc_terminate_ldc() 25180a55fbb7Slm66018 * 25190a55fbb7Slm66018 * Description: 25200a55fbb7Slm66018 * 25210a55fbb7Slm66018 * Arguments: 25220a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 25238cd10891Snarayan * srvr - vdc per-server info structure 25240a55fbb7Slm66018 * 25250a55fbb7Slm66018 * Return Code: 25260a55fbb7Slm66018 * None 25270a55fbb7Slm66018 */ 25281ae08745Sheppo static void 25298cd10891Snarayan vdc_terminate_ldc(vdc_t *vdc, vdc_server_t *srvr) 25301ae08745Sheppo { 25311ae08745Sheppo int instance = ddi_get_instance(vdc->dip); 25321ae08745Sheppo 25338cd10891Snarayan if (srvr->state & VDC_LDC_OPEN) { 25348cd10891Snarayan DMSG(vdc, 0, "[%d] ldc_close()\n", instance); 25358cd10891Snarayan (void) ldc_close(srvr->ldc_handle); 25368cd10891Snarayan } 25378cd10891Snarayan if (srvr->state & VDC_LDC_CB) { 25388cd10891Snarayan DMSG(vdc, 0, "[%d] ldc_unreg_callback()\n", instance); 25398cd10891Snarayan (void) ldc_unreg_callback(srvr->ldc_handle); 25408cd10891Snarayan } 25418cd10891Snarayan if (srvr->state & VDC_LDC_INIT) { 25428cd10891Snarayan DMSG(vdc, 0, "[%d] ldc_fini()\n", instance); 25438cd10891Snarayan (void) ldc_fini(srvr->ldc_handle); 25448cd10891Snarayan srvr->ldc_handle = NULL; 25458cd10891Snarayan } 25468cd10891Snarayan 25478cd10891Snarayan srvr->state &= ~(VDC_LDC_INIT | VDC_LDC_CB | VDC_LDC_OPEN); 25488cd10891Snarayan } 25498cd10891Snarayan 25508cd10891Snarayan /* 25518cd10891Snarayan * Function: 25528cd10891Snarayan * vdc_fini_ports() 25538cd10891Snarayan * 25548cd10891Snarayan * Description: 25558cd10891Snarayan * Finalize all ports by closing the channel associated with each 25568cd10891Snarayan * port and also freeing the server structure. 25578cd10891Snarayan * 25588cd10891Snarayan * Arguments: 25598cd10891Snarayan * vdc - soft state pointer for this instance of the device driver. 25608cd10891Snarayan * 25618cd10891Snarayan * Return Code: 25628cd10891Snarayan * None 25638cd10891Snarayan */ 25648cd10891Snarayan static void 25658cd10891Snarayan vdc_fini_ports(vdc_t *vdc) 25668cd10891Snarayan { 25678cd10891Snarayan int instance = ddi_get_instance(vdc->dip); 25688cd10891Snarayan vdc_server_t *srvr, *prev_srvr; 25698cd10891Snarayan 25701ae08745Sheppo ASSERT(vdc != NULL); 25711ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 25721ae08745Sheppo 25733af08d82Slm66018 DMSG(vdc, 0, "[%d] initialized=%x\n", instance, vdc->initialized); 25741ae08745Sheppo 25758cd10891Snarayan srvr = vdc->server_list; 25768cd10891Snarayan 25778cd10891Snarayan while (srvr) { 25788cd10891Snarayan 25798cd10891Snarayan vdc_terminate_ldc(vdc, srvr); 25808cd10891Snarayan 25818cd10891Snarayan /* next server */ 25828cd10891Snarayan prev_srvr = srvr; 25838cd10891Snarayan srvr = srvr->next; 25848cd10891Snarayan 25858cd10891Snarayan /* free server */ 25868cd10891Snarayan kmem_free(prev_srvr, sizeof (vdc_server_t)); 25871ae08745Sheppo } 25881ae08745Sheppo 25898cd10891Snarayan vdc->server_list = NULL; 25901ae08745Sheppo } 25911ae08745Sheppo 25921ae08745Sheppo /* -------------------------------------------------------------------------- */ 25931ae08745Sheppo 25941ae08745Sheppo /* 25951ae08745Sheppo * Descriptor Ring helper routines 25961ae08745Sheppo */ 25971ae08745Sheppo 25980a55fbb7Slm66018 /* 25990a55fbb7Slm66018 * Function: 26000a55fbb7Slm66018 * vdc_init_descriptor_ring() 26010a55fbb7Slm66018 * 26020a55fbb7Slm66018 * Description: 26030a55fbb7Slm66018 * 26040a55fbb7Slm66018 * Arguments: 26050a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 26060a55fbb7Slm66018 * 26070a55fbb7Slm66018 * Return Code: 26080a55fbb7Slm66018 * 0 - Success 26090a55fbb7Slm66018 */ 26101ae08745Sheppo static int 26111ae08745Sheppo vdc_init_descriptor_ring(vdc_t *vdc) 26121ae08745Sheppo { 26131ae08745Sheppo vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 26140a55fbb7Slm66018 int status = 0; 26151ae08745Sheppo int i; 26161ae08745Sheppo 26173af08d82Slm66018 DMSG(vdc, 0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized); 26181ae08745Sheppo 26191ae08745Sheppo ASSERT(vdc != NULL); 26201ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 26211ae08745Sheppo 2622e1ebb9ecSlm66018 /* ensure we have enough room to store max sized block */ 2623e1ebb9ecSlm66018 ASSERT(maxphys <= VD_MAX_BLOCK_SIZE); 2624e1ebb9ecSlm66018 26250a55fbb7Slm66018 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 26263af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_mem_dring_create\n", vdc->instance); 2627e1ebb9ecSlm66018 /* 2628e1ebb9ecSlm66018 * Calculate the maximum block size we can transmit using one 2629e1ebb9ecSlm66018 * Descriptor Ring entry from the attributes returned by the 2630e1ebb9ecSlm66018 * vDisk server. This is subject to a minimum of 'maxphys' 2631e1ebb9ecSlm66018 * as we do not have the capability to split requests over 2632e1ebb9ecSlm66018 * multiple DRing entries. 2633e1ebb9ecSlm66018 */ 2634*65908c77Syu, larry liu - Sun Microsystems - Beijing China if ((vdc->max_xfer_sz * vdc->vdisk_bsize) < maxphys) { 26353af08d82Slm66018 DMSG(vdc, 0, "[%d] using minimum DRing size\n", 2636e1ebb9ecSlm66018 vdc->instance); 2637e1ebb9ecSlm66018 vdc->dring_max_cookies = maxphys / PAGESIZE; 2638e1ebb9ecSlm66018 } else { 2639e1ebb9ecSlm66018 vdc->dring_max_cookies = 2640*65908c77Syu, larry liu - Sun Microsystems - Beijing China (vdc->max_xfer_sz * vdc->vdisk_bsize) / PAGESIZE; 2641e1ebb9ecSlm66018 } 2642e1ebb9ecSlm66018 vdc->dring_entry_size = (sizeof (vd_dring_entry_t) + 2643e1ebb9ecSlm66018 (sizeof (ldc_mem_cookie_t) * 2644e1ebb9ecSlm66018 (vdc->dring_max_cookies - 1))); 2645e1ebb9ecSlm66018 vdc->dring_len = VD_DRING_LEN; 2646e1ebb9ecSlm66018 2647e1ebb9ecSlm66018 status = ldc_mem_dring_create(vdc->dring_len, 26488cd10891Snarayan vdc->dring_entry_size, &vdc->dring_hdl); 26498cd10891Snarayan if ((vdc->dring_hdl == NULL) || (status != 0)) { 26503af08d82Slm66018 DMSG(vdc, 0, "[%d] Descriptor ring creation failed", 2651e1ebb9ecSlm66018 vdc->instance); 26521ae08745Sheppo return (status); 26531ae08745Sheppo } 26540a55fbb7Slm66018 vdc->initialized |= VDC_DRING_INIT; 26550a55fbb7Slm66018 } 26561ae08745Sheppo 26570a55fbb7Slm66018 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 26583af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_mem_dring_bind\n", vdc->instance); 26590a55fbb7Slm66018 vdc->dring_cookie = 26600a55fbb7Slm66018 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 26611ae08745Sheppo 26628cd10891Snarayan status = ldc_mem_dring_bind(vdc->curr_server->ldc_handle, 26638cd10891Snarayan vdc->dring_hdl, 26644bac2208Snarayan LDC_SHADOW_MAP|LDC_DIRECT_MAP, LDC_MEM_RW, 26650a55fbb7Slm66018 &vdc->dring_cookie[0], 26661ae08745Sheppo &vdc->dring_cookie_count); 26671ae08745Sheppo if (status != 0) { 26683af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to bind descriptor ring " 26693af08d82Slm66018 "(%lx) to channel (%lx) status=%d\n", 26708cd10891Snarayan vdc->instance, vdc->dring_hdl, 26718cd10891Snarayan vdc->curr_server->ldc_handle, status); 26721ae08745Sheppo return (status); 26731ae08745Sheppo } 26741ae08745Sheppo ASSERT(vdc->dring_cookie_count == 1); 26751ae08745Sheppo vdc->initialized |= VDC_DRING_BOUND; 26760a55fbb7Slm66018 } 26771ae08745Sheppo 26788cd10891Snarayan status = ldc_mem_dring_info(vdc->dring_hdl, &vdc->dring_mem_info); 26791ae08745Sheppo if (status != 0) { 26803af08d82Slm66018 DMSG(vdc, 0, 26813af08d82Slm66018 "[%d] Failed to get info for descriptor ring (%lx)\n", 26828cd10891Snarayan vdc->instance, vdc->dring_hdl); 26831ae08745Sheppo return (status); 26841ae08745Sheppo } 26851ae08745Sheppo 26860a55fbb7Slm66018 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 26873af08d82Slm66018 DMSG(vdc, 0, "[%d] local dring\n", vdc->instance); 26880a55fbb7Slm66018 26891ae08745Sheppo /* Allocate the local copy of this dring */ 26900a55fbb7Slm66018 vdc->local_dring = 2691e1ebb9ecSlm66018 kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t), 26921ae08745Sheppo KM_SLEEP); 26931ae08745Sheppo vdc->initialized |= VDC_DRING_LOCAL; 26940a55fbb7Slm66018 } 26951ae08745Sheppo 26961ae08745Sheppo /* 26970a55fbb7Slm66018 * Mark all DRing entries as free and initialize the private 26980a55fbb7Slm66018 * descriptor's memory handles. If any entry is initialized, 26990a55fbb7Slm66018 * we need to free it later so we set the bit in 'initialized' 27000a55fbb7Slm66018 * at the start. 27011ae08745Sheppo */ 27021ae08745Sheppo vdc->initialized |= VDC_DRING_ENTRY; 2703e1ebb9ecSlm66018 for (i = 0; i < vdc->dring_len; i++) { 27041ae08745Sheppo dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 27051ae08745Sheppo dep->hdr.dstate = VIO_DESC_FREE; 27061ae08745Sheppo 27078cd10891Snarayan status = ldc_mem_alloc_handle(vdc->curr_server->ldc_handle, 27081ae08745Sheppo &vdc->local_dring[i].desc_mhdl); 27091ae08745Sheppo if (status != 0) { 27103af08d82Slm66018 DMSG(vdc, 0, "![%d] Failed to alloc mem handle for" 27111ae08745Sheppo " descriptor %d", vdc->instance, i); 27121ae08745Sheppo return (status); 27131ae08745Sheppo } 27143af08d82Slm66018 vdc->local_dring[i].is_free = B_TRUE; 27151ae08745Sheppo vdc->local_dring[i].dep = dep; 27161ae08745Sheppo } 27171ae08745Sheppo 27183af08d82Slm66018 /* Initialize the starting index */ 27193af08d82Slm66018 vdc->dring_curr_idx = 0; 27201ae08745Sheppo 27211ae08745Sheppo return (status); 27221ae08745Sheppo } 27231ae08745Sheppo 27240a55fbb7Slm66018 /* 27250a55fbb7Slm66018 * Function: 27260a55fbb7Slm66018 * vdc_destroy_descriptor_ring() 27270a55fbb7Slm66018 * 27280a55fbb7Slm66018 * Description: 27290a55fbb7Slm66018 * 27300a55fbb7Slm66018 * Arguments: 27310a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 27320a55fbb7Slm66018 * 27330a55fbb7Slm66018 * Return Code: 27340a55fbb7Slm66018 * None 27350a55fbb7Slm66018 */ 27361ae08745Sheppo static void 27371ae08745Sheppo vdc_destroy_descriptor_ring(vdc_t *vdc) 27381ae08745Sheppo { 27390a55fbb7Slm66018 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 27401ae08745Sheppo ldc_mem_handle_t mhdl = NULL; 27413af08d82Slm66018 ldc_mem_info_t minfo; 27421ae08745Sheppo int status = -1; 27431ae08745Sheppo int i; /* loop */ 27441ae08745Sheppo 27451ae08745Sheppo ASSERT(vdc != NULL); 27461ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 27471ae08745Sheppo 27483af08d82Slm66018 DMSG(vdc, 0, "[%d] Entered\n", vdc->instance); 27491ae08745Sheppo 27501ae08745Sheppo if (vdc->initialized & VDC_DRING_ENTRY) { 27513af08d82Slm66018 DMSG(vdc, 0, 27523af08d82Slm66018 "[%d] Removing Local DRing entries\n", vdc->instance); 2753e1ebb9ecSlm66018 for (i = 0; i < vdc->dring_len; i++) { 27540a55fbb7Slm66018 ldep = &vdc->local_dring[i]; 27550a55fbb7Slm66018 mhdl = ldep->desc_mhdl; 27561ae08745Sheppo 27570a55fbb7Slm66018 if (mhdl == NULL) 27580a55fbb7Slm66018 continue; 27590a55fbb7Slm66018 27603af08d82Slm66018 if ((status = ldc_mem_info(mhdl, &minfo)) != 0) { 27613af08d82Slm66018 DMSG(vdc, 0, 27623af08d82Slm66018 "ldc_mem_info returned an error: %d\n", 27633af08d82Slm66018 status); 27643af08d82Slm66018 27653af08d82Slm66018 /* 27663af08d82Slm66018 * This must mean that the mem handle 27673af08d82Slm66018 * is not valid. Clear it out so that 27683af08d82Slm66018 * no one tries to use it. 27693af08d82Slm66018 */ 27703af08d82Slm66018 ldep->desc_mhdl = NULL; 27713af08d82Slm66018 continue; 27723af08d82Slm66018 } 27733af08d82Slm66018 27743af08d82Slm66018 if (minfo.status == LDC_BOUND) { 27753af08d82Slm66018 (void) ldc_mem_unbind_handle(mhdl); 27763af08d82Slm66018 } 27773af08d82Slm66018 27781ae08745Sheppo (void) ldc_mem_free_handle(mhdl); 27793af08d82Slm66018 27803af08d82Slm66018 ldep->desc_mhdl = NULL; 27811ae08745Sheppo } 27821ae08745Sheppo vdc->initialized &= ~VDC_DRING_ENTRY; 27831ae08745Sheppo } 27841ae08745Sheppo 27851ae08745Sheppo if (vdc->initialized & VDC_DRING_LOCAL) { 27863af08d82Slm66018 DMSG(vdc, 0, "[%d] Freeing Local DRing\n", vdc->instance); 27871ae08745Sheppo kmem_free(vdc->local_dring, 2788e1ebb9ecSlm66018 vdc->dring_len * sizeof (vdc_local_desc_t)); 27891ae08745Sheppo vdc->initialized &= ~VDC_DRING_LOCAL; 27901ae08745Sheppo } 27911ae08745Sheppo 27921ae08745Sheppo if (vdc->initialized & VDC_DRING_BOUND) { 27933af08d82Slm66018 DMSG(vdc, 0, "[%d] Unbinding DRing\n", vdc->instance); 27948cd10891Snarayan status = ldc_mem_dring_unbind(vdc->dring_hdl); 27951ae08745Sheppo if (status == 0) { 27961ae08745Sheppo vdc->initialized &= ~VDC_DRING_BOUND; 27971ae08745Sheppo } else { 27983af08d82Slm66018 DMSG(vdc, 0, "[%d] Error %d unbinding DRing %lx", 27998cd10891Snarayan vdc->instance, status, vdc->dring_hdl); 28001ae08745Sheppo } 28013af08d82Slm66018 kmem_free(vdc->dring_cookie, sizeof (ldc_mem_cookie_t)); 28021ae08745Sheppo } 28031ae08745Sheppo 28041ae08745Sheppo if (vdc->initialized & VDC_DRING_INIT) { 28053af08d82Slm66018 DMSG(vdc, 0, "[%d] Destroying DRing\n", vdc->instance); 28068cd10891Snarayan status = ldc_mem_dring_destroy(vdc->dring_hdl); 28071ae08745Sheppo if (status == 0) { 28088cd10891Snarayan vdc->dring_hdl = NULL; 28091ae08745Sheppo bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 28101ae08745Sheppo vdc->initialized &= ~VDC_DRING_INIT; 28111ae08745Sheppo } else { 28123af08d82Slm66018 DMSG(vdc, 0, "[%d] Error %d destroying DRing (%lx)", 28138cd10891Snarayan vdc->instance, status, vdc->dring_hdl); 28141ae08745Sheppo } 28151ae08745Sheppo } 28161ae08745Sheppo } 28171ae08745Sheppo 28181ae08745Sheppo /* 28193af08d82Slm66018 * Function: 282090e2f9dcSlm66018 * vdc_map_to_shared_dring() 28211ae08745Sheppo * 28221ae08745Sheppo * Description: 28233af08d82Slm66018 * Copy contents of the local descriptor to the shared 28243af08d82Slm66018 * memory descriptor. 28251ae08745Sheppo * 28263af08d82Slm66018 * Arguments: 28273af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 28283af08d82Slm66018 * idx - descriptor ring index 28293af08d82Slm66018 * 28303af08d82Slm66018 * Return Code: 28313af08d82Slm66018 * None 28321ae08745Sheppo */ 28331ae08745Sheppo static int 28343af08d82Slm66018 vdc_map_to_shared_dring(vdc_t *vdcp, int idx) 28351ae08745Sheppo { 28363af08d82Slm66018 vdc_local_desc_t *ldep; 28373af08d82Slm66018 vd_dring_entry_t *dep; 28383af08d82Slm66018 int rv; 28391ae08745Sheppo 28403af08d82Slm66018 ldep = &(vdcp->local_dring[idx]); 28411ae08745Sheppo 28423af08d82Slm66018 /* for now leave in the old pop_mem_hdl stuff */ 28433af08d82Slm66018 if (ldep->nbytes > 0) { 28443af08d82Slm66018 rv = vdc_populate_mem_hdl(vdcp, ldep); 28453af08d82Slm66018 if (rv) { 28463af08d82Slm66018 DMSG(vdcp, 0, "[%d] Cannot populate mem handle\n", 28473af08d82Slm66018 vdcp->instance); 28483af08d82Slm66018 return (rv); 28493af08d82Slm66018 } 28503af08d82Slm66018 } 28511ae08745Sheppo 28523af08d82Slm66018 /* 28533af08d82Slm66018 * fill in the data details into the DRing 28543af08d82Slm66018 */ 2855d10e4ef2Snarayan dep = ldep->dep; 28561ae08745Sheppo ASSERT(dep != NULL); 28571ae08745Sheppo 28583af08d82Slm66018 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdcp); 28593af08d82Slm66018 dep->payload.operation = ldep->operation; 28603af08d82Slm66018 dep->payload.addr = ldep->offset; 28613af08d82Slm66018 dep->payload.nbytes = ldep->nbytes; 2862055d7c80Scarlsonj dep->payload.status = (uint32_t)-1; /* vds will set valid value */ 28633af08d82Slm66018 dep->payload.slice = ldep->slice; 28643af08d82Slm66018 dep->hdr.dstate = VIO_DESC_READY; 28653af08d82Slm66018 dep->hdr.ack = 1; /* request an ACK for every message */ 28661ae08745Sheppo 28673af08d82Slm66018 return (0); 28681ae08745Sheppo } 28691ae08745Sheppo 28701ae08745Sheppo /* 28711ae08745Sheppo * Function: 28723af08d82Slm66018 * vdc_send_request 28733af08d82Slm66018 * 28743af08d82Slm66018 * Description: 28753af08d82Slm66018 * This routine writes the data to be transmitted to vds into the 28763af08d82Slm66018 * descriptor, notifies vds that the ring has been updated and 28773af08d82Slm66018 * then waits for the request to be processed. 28783af08d82Slm66018 * 28793af08d82Slm66018 * Arguments: 28803af08d82Slm66018 * vdcp - the soft state pointer 28813af08d82Slm66018 * operation - operation we want vds to perform (VD_OP_XXX) 28823af08d82Slm66018 * addr - address of data buf to be read/written. 28833af08d82Slm66018 * nbytes - number of bytes to read/write 28843af08d82Slm66018 * slice - the disk slice this request is for 28853af08d82Slm66018 * offset - relative disk offset 28863af08d82Slm66018 * cb_type - type of call - STRATEGY or SYNC 28873af08d82Slm66018 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 28883af08d82Slm66018 * . mode for ioctl(9e) 28893af08d82Slm66018 * . LP64 diskaddr_t (block I/O) 28903af08d82Slm66018 * dir - direction of operation (READ/WRITE/BOTH) 28913af08d82Slm66018 * 28923af08d82Slm66018 * Return Codes: 28933af08d82Slm66018 * 0 28943af08d82Slm66018 * ENXIO 28953af08d82Slm66018 */ 28963af08d82Slm66018 static int 28973af08d82Slm66018 vdc_send_request(vdc_t *vdcp, int operation, caddr_t addr, 28983af08d82Slm66018 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 28993af08d82Slm66018 void *cb_arg, vio_desc_direction_t dir) 29003af08d82Slm66018 { 2901366a92acSlm66018 int rv = 0; 2902366a92acSlm66018 29033af08d82Slm66018 ASSERT(vdcp != NULL); 290487a7269eSachartre ASSERT(slice == VD_SLICE_NONE || slice < V_NUMPAR); 29053af08d82Slm66018 29063af08d82Slm66018 mutex_enter(&vdcp->lock); 29073af08d82Slm66018 2908366a92acSlm66018 /* 2909366a92acSlm66018 * If this is a block read/write operation we update the I/O statistics 2910366a92acSlm66018 * to indicate that the request is being put on the waitq to be 2911366a92acSlm66018 * serviced. 2912366a92acSlm66018 * 2913366a92acSlm66018 * We do it here (a common routine for both synchronous and strategy 2914366a92acSlm66018 * calls) for performance reasons - we are already holding vdc->lock 2915366a92acSlm66018 * so there is no extra locking overhead. We would have to explicitly 2916366a92acSlm66018 * grab the 'lock' mutex to update the stats if we were to do this 2917366a92acSlm66018 * higher up the stack in vdc_strategy() et. al. 2918366a92acSlm66018 */ 2919366a92acSlm66018 if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) { 2920366a92acSlm66018 DTRACE_IO1(start, buf_t *, cb_arg); 292190e2f9dcSlm66018 VD_KSTAT_WAITQ_ENTER(vdcp); 2922366a92acSlm66018 } 2923366a92acSlm66018 29243af08d82Slm66018 do { 29253c96341aSnarayan while (vdcp->state != VDC_STATE_RUNNING) { 29263af08d82Slm66018 29273c96341aSnarayan /* return error if detaching */ 29283c96341aSnarayan if (vdcp->state == VDC_STATE_DETACH) { 2929366a92acSlm66018 rv = ENXIO; 2930366a92acSlm66018 goto done; 29313c96341aSnarayan } 2932655fd6a9Sachartre 2933655fd6a9Sachartre /* fail request if connection timeout is reached */ 2934655fd6a9Sachartre if (vdcp->ctimeout_reached) { 2935366a92acSlm66018 rv = EIO; 2936366a92acSlm66018 goto done; 2937655fd6a9Sachartre } 2938655fd6a9Sachartre 29392f5224aeSachartre /* 29402f5224aeSachartre * If we are panicking and the disk is not ready then 29412f5224aeSachartre * we can't send any request because we can't complete 29422f5224aeSachartre * the handshake now. 29432f5224aeSachartre */ 29442f5224aeSachartre if (ddi_in_panic()) { 2945366a92acSlm66018 rv = EIO; 2946366a92acSlm66018 goto done; 29472f5224aeSachartre } 29482f5224aeSachartre 2949655fd6a9Sachartre cv_wait(&vdcp->running_cv, &vdcp->lock); 29503c96341aSnarayan } 29513c96341aSnarayan 29523af08d82Slm66018 } while (vdc_populate_descriptor(vdcp, operation, addr, 29533af08d82Slm66018 nbytes, slice, offset, cb_type, cb_arg, dir)); 29543af08d82Slm66018 2955366a92acSlm66018 done: 2956366a92acSlm66018 /* 2957366a92acSlm66018 * If this is a block read/write we update the I/O statistics kstat 2958366a92acSlm66018 * to indicate that this request has been placed on the queue for 2959366a92acSlm66018 * processing (i.e sent to the vDisk server) - iostat(1M) will 2960366a92acSlm66018 * report the time waiting for the vDisk server under the %b column 2961366a92acSlm66018 * In the case of an error we simply take it off the wait queue. 2962366a92acSlm66018 */ 2963366a92acSlm66018 if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) { 2964366a92acSlm66018 if (rv == 0) { 296590e2f9dcSlm66018 VD_KSTAT_WAITQ_TO_RUNQ(vdcp); 2966366a92acSlm66018 DTRACE_PROBE1(send, buf_t *, cb_arg); 2967366a92acSlm66018 } else { 2968366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_transerrs); 296990e2f9dcSlm66018 VD_KSTAT_WAITQ_EXIT(vdcp); 2970366a92acSlm66018 DTRACE_IO1(done, buf_t *, cb_arg); 2971366a92acSlm66018 } 2972366a92acSlm66018 } 2973366a92acSlm66018 29743af08d82Slm66018 mutex_exit(&vdcp->lock); 2975366a92acSlm66018 2976366a92acSlm66018 return (rv); 29773af08d82Slm66018 } 29783af08d82Slm66018 29793af08d82Slm66018 29803af08d82Slm66018 /* 29813af08d82Slm66018 * Function: 29821ae08745Sheppo * vdc_populate_descriptor 29831ae08745Sheppo * 29841ae08745Sheppo * Description: 29851ae08745Sheppo * This routine writes the data to be transmitted to vds into the 29861ae08745Sheppo * descriptor, notifies vds that the ring has been updated and 29871ae08745Sheppo * then waits for the request to be processed. 29881ae08745Sheppo * 29891ae08745Sheppo * Arguments: 29903af08d82Slm66018 * vdcp - the soft state pointer 29911ae08745Sheppo * operation - operation we want vds to perform (VD_OP_XXX) 29923af08d82Slm66018 * addr - address of data buf to be read/written. 29933af08d82Slm66018 * nbytes - number of bytes to read/write 29943af08d82Slm66018 * slice - the disk slice this request is for 29953af08d82Slm66018 * offset - relative disk offset 29963af08d82Slm66018 * cb_type - type of call - STRATEGY or SYNC 29973af08d82Slm66018 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 29981ae08745Sheppo * . mode for ioctl(9e) 29991ae08745Sheppo * . LP64 diskaddr_t (block I/O) 30003af08d82Slm66018 * dir - direction of operation (READ/WRITE/BOTH) 30011ae08745Sheppo * 30021ae08745Sheppo * Return Codes: 30031ae08745Sheppo * 0 30041ae08745Sheppo * EAGAIN 300517cadca8Slm66018 * ECONNRESET 30061ae08745Sheppo * ENXIO 30071ae08745Sheppo */ 30081ae08745Sheppo static int 30093af08d82Slm66018 vdc_populate_descriptor(vdc_t *vdcp, int operation, caddr_t addr, 30103af08d82Slm66018 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 30113af08d82Slm66018 void *cb_arg, vio_desc_direction_t dir) 30121ae08745Sheppo { 30133af08d82Slm66018 vdc_local_desc_t *local_dep = NULL; /* Local Dring Pointer */ 30143af08d82Slm66018 int idx; /* Index of DRing entry used */ 30153af08d82Slm66018 int next_idx; 30161ae08745Sheppo vio_dring_msg_t dmsg; 30173af08d82Slm66018 size_t msglen; 30188e6a2a04Slm66018 int rv; 30191ae08745Sheppo 30203af08d82Slm66018 ASSERT(MUTEX_HELD(&vdcp->lock)); 30213af08d82Slm66018 vdcp->threads_pending++; 30223af08d82Slm66018 loop: 30233af08d82Slm66018 DMSG(vdcp, 2, ": dring_curr_idx = %d\n", vdcp->dring_curr_idx); 30241ae08745Sheppo 30253af08d82Slm66018 /* Get next available D-Ring entry */ 30263af08d82Slm66018 idx = vdcp->dring_curr_idx; 30273af08d82Slm66018 local_dep = &(vdcp->local_dring[idx]); 30281ae08745Sheppo 30293af08d82Slm66018 if (!local_dep->is_free) { 30303af08d82Slm66018 DMSG(vdcp, 2, "[%d]: dring full - waiting for space\n", 30313af08d82Slm66018 vdcp->instance); 30323af08d82Slm66018 cv_wait(&vdcp->dring_free_cv, &vdcp->lock); 30333af08d82Slm66018 if (vdcp->state == VDC_STATE_RUNNING || 30343af08d82Slm66018 vdcp->state == VDC_STATE_HANDLE_PENDING) { 30353af08d82Slm66018 goto loop; 30363af08d82Slm66018 } 30373af08d82Slm66018 vdcp->threads_pending--; 30383af08d82Slm66018 return (ECONNRESET); 30391ae08745Sheppo } 30401ae08745Sheppo 30413af08d82Slm66018 next_idx = idx + 1; 30423af08d82Slm66018 if (next_idx >= vdcp->dring_len) 30433af08d82Slm66018 next_idx = 0; 30443af08d82Slm66018 vdcp->dring_curr_idx = next_idx; 30451ae08745Sheppo 30463af08d82Slm66018 ASSERT(local_dep->is_free); 30471ae08745Sheppo 30483af08d82Slm66018 local_dep->operation = operation; 3049d10e4ef2Snarayan local_dep->addr = addr; 30503af08d82Slm66018 local_dep->nbytes = nbytes; 30513af08d82Slm66018 local_dep->slice = slice; 30523af08d82Slm66018 local_dep->offset = offset; 30533af08d82Slm66018 local_dep->cb_type = cb_type; 30543af08d82Slm66018 local_dep->cb_arg = cb_arg; 30553af08d82Slm66018 local_dep->dir = dir; 30563af08d82Slm66018 30573af08d82Slm66018 local_dep->is_free = B_FALSE; 30583af08d82Slm66018 30593af08d82Slm66018 rv = vdc_map_to_shared_dring(vdcp, idx); 30603af08d82Slm66018 if (rv) { 30613af08d82Slm66018 DMSG(vdcp, 0, "[%d]: cannot bind memory - waiting ..\n", 30623af08d82Slm66018 vdcp->instance); 30633af08d82Slm66018 /* free the descriptor */ 30643af08d82Slm66018 local_dep->is_free = B_TRUE; 30653af08d82Slm66018 vdcp->dring_curr_idx = idx; 30663af08d82Slm66018 cv_wait(&vdcp->membind_cv, &vdcp->lock); 30673af08d82Slm66018 if (vdcp->state == VDC_STATE_RUNNING || 30683af08d82Slm66018 vdcp->state == VDC_STATE_HANDLE_PENDING) { 30693af08d82Slm66018 goto loop; 30701ae08745Sheppo } 30713af08d82Slm66018 vdcp->threads_pending--; 30723af08d82Slm66018 return (ECONNRESET); 30731ae08745Sheppo } 30741ae08745Sheppo 30751ae08745Sheppo /* 30761ae08745Sheppo * Send a msg with the DRing details to vds 30771ae08745Sheppo */ 30781ae08745Sheppo VIO_INIT_DRING_DATA_TAG(dmsg); 30793af08d82Slm66018 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdcp); 30803af08d82Slm66018 dmsg.dring_ident = vdcp->dring_ident; 30811ae08745Sheppo dmsg.start_idx = idx; 30821ae08745Sheppo dmsg.end_idx = idx; 30833af08d82Slm66018 vdcp->seq_num++; 30841ae08745Sheppo 3085366a92acSlm66018 DTRACE_PROBE2(populate, int, vdcp->instance, 3086366a92acSlm66018 vdc_local_desc_t *, local_dep); 30873af08d82Slm66018 DMSG(vdcp, 2, "ident=0x%lx, st=%u, end=%u, seq=%ld\n", 30883af08d82Slm66018 vdcp->dring_ident, dmsg.start_idx, dmsg.end_idx, dmsg.seq_num); 30891ae08745Sheppo 30903af08d82Slm66018 /* 30913af08d82Slm66018 * note we're still holding the lock here to 30923af08d82Slm66018 * make sure the message goes out in order !!!... 30933af08d82Slm66018 */ 30943af08d82Slm66018 msglen = sizeof (dmsg); 30953af08d82Slm66018 rv = vdc_send(vdcp, (caddr_t)&dmsg, &msglen); 30963af08d82Slm66018 switch (rv) { 30973af08d82Slm66018 case ECONNRESET: 30983af08d82Slm66018 /* 30993af08d82Slm66018 * vdc_send initiates the reset on failure. 31003af08d82Slm66018 * Since the transaction has already been put 31013af08d82Slm66018 * on the local dring, it will automatically get 31023af08d82Slm66018 * retried when the channel is reset. Given that, 31033af08d82Slm66018 * it is ok to just return success even though the 31043af08d82Slm66018 * send failed. 31053af08d82Slm66018 */ 31063af08d82Slm66018 rv = 0; 31073af08d82Slm66018 break; 3108d10e4ef2Snarayan 31093af08d82Slm66018 case 0: /* EOK */ 31103af08d82Slm66018 DMSG(vdcp, 1, "sent via LDC: rv=%d\n", rv); 31113af08d82Slm66018 break; 3112d10e4ef2Snarayan 31133af08d82Slm66018 default: 31143af08d82Slm66018 goto cleanup_and_exit; 31153af08d82Slm66018 } 3116e1ebb9ecSlm66018 31173af08d82Slm66018 vdcp->threads_pending--; 31183af08d82Slm66018 return (rv); 31193af08d82Slm66018 31203af08d82Slm66018 cleanup_and_exit: 31213af08d82Slm66018 DMSG(vdcp, 0, "unexpected error, rv=%d\n", rv); 31223af08d82Slm66018 return (ENXIO); 31231ae08745Sheppo } 31241ae08745Sheppo 31251ae08745Sheppo /* 31263af08d82Slm66018 * Function: 31273af08d82Slm66018 * vdc_do_sync_op 31283af08d82Slm66018 * 31293af08d82Slm66018 * Description: 31303af08d82Slm66018 * Wrapper around vdc_populate_descriptor that blocks until the 31313af08d82Slm66018 * response to the message is available. 31323af08d82Slm66018 * 31333af08d82Slm66018 * Arguments: 31343af08d82Slm66018 * vdcp - the soft state pointer 31353af08d82Slm66018 * operation - operation we want vds to perform (VD_OP_XXX) 31363af08d82Slm66018 * addr - address of data buf to be read/written. 31373af08d82Slm66018 * nbytes - number of bytes to read/write 31383af08d82Slm66018 * slice - the disk slice this request is for 31393af08d82Slm66018 * offset - relative disk offset 31403af08d82Slm66018 * cb_type - type of call - STRATEGY or SYNC 31413af08d82Slm66018 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 31423af08d82Slm66018 * . mode for ioctl(9e) 31433af08d82Slm66018 * . LP64 diskaddr_t (block I/O) 31443af08d82Slm66018 * dir - direction of operation (READ/WRITE/BOTH) 31452f5224aeSachartre * rconflict - check for reservation conflict in case of failure 31462f5224aeSachartre * 31472f5224aeSachartre * rconflict should be set to B_TRUE by most callers. Callers invoking the 31482f5224aeSachartre * VD_OP_SCSICMD operation can set rconflict to B_FALSE if they check the 31492f5224aeSachartre * result of a successful operation with vd_scsi_status(). 31503af08d82Slm66018 * 31513af08d82Slm66018 * Return Codes: 31523af08d82Slm66018 * 0 31533af08d82Slm66018 * EAGAIN 31543af08d82Slm66018 * EFAULT 31553af08d82Slm66018 * ENXIO 31563af08d82Slm66018 * EIO 31570a55fbb7Slm66018 */ 31583af08d82Slm66018 static int 31593af08d82Slm66018 vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, size_t nbytes, 31603af08d82Slm66018 int slice, diskaddr_t offset, int cb_type, void *cb_arg, 31612f5224aeSachartre vio_desc_direction_t dir, boolean_t rconflict) 31623af08d82Slm66018 { 31633af08d82Slm66018 int status; 31642f5224aeSachartre vdc_io_t *vio; 31652f5224aeSachartre boolean_t check_resv_conflict = B_FALSE; 31663af08d82Slm66018 31673af08d82Slm66018 ASSERT(cb_type == CB_SYNC); 31681ae08745Sheppo 31691ae08745Sheppo /* 31703af08d82Slm66018 * Grab the lock, if blocked wait until the server 31713af08d82Slm66018 * response causes us to wake up again. 31723af08d82Slm66018 */ 31733af08d82Slm66018 mutex_enter(&vdcp->lock); 31743af08d82Slm66018 vdcp->sync_op_cnt++; 317511f54b6eSAlexandre Chartre while (vdcp->sync_op_blocked && vdcp->state != VDC_STATE_DETACH) { 317611f54b6eSAlexandre Chartre if (ddi_in_panic()) { 317711f54b6eSAlexandre Chartre /* don't block if we are panicking */ 317811f54b6eSAlexandre Chartre vdcp->sync_op_cnt--; 317911f54b6eSAlexandre Chartre mutex_exit(&vdcp->lock); 318011f54b6eSAlexandre Chartre return (EIO); 318111f54b6eSAlexandre Chartre } else { 31823af08d82Slm66018 cv_wait(&vdcp->sync_blocked_cv, &vdcp->lock); 318311f54b6eSAlexandre Chartre } 318411f54b6eSAlexandre Chartre } 31853af08d82Slm66018 31863af08d82Slm66018 if (vdcp->state == VDC_STATE_DETACH) { 31873af08d82Slm66018 cv_broadcast(&vdcp->sync_blocked_cv); 31883af08d82Slm66018 vdcp->sync_op_cnt--; 31893af08d82Slm66018 mutex_exit(&vdcp->lock); 31903af08d82Slm66018 return (ENXIO); 31913af08d82Slm66018 } 31923af08d82Slm66018 31933af08d82Slm66018 /* now block anyone other thread entering after us */ 31943af08d82Slm66018 vdcp->sync_op_blocked = B_TRUE; 31953af08d82Slm66018 vdcp->sync_op_pending = B_TRUE; 31963af08d82Slm66018 mutex_exit(&vdcp->lock); 31973af08d82Slm66018 3198655fd6a9Sachartre status = vdc_send_request(vdcp, operation, addr, 31993af08d82Slm66018 nbytes, slice, offset, cb_type, cb_arg, dir); 32003af08d82Slm66018 3201655fd6a9Sachartre mutex_enter(&vdcp->lock); 3202655fd6a9Sachartre 3203655fd6a9Sachartre if (status != 0) { 3204655fd6a9Sachartre vdcp->sync_op_pending = B_FALSE; 320511f54b6eSAlexandre Chartre } else if (ddi_in_panic()) { 320611f54b6eSAlexandre Chartre if (vdc_drain_response(vdcp, CB_SYNC, NULL) == 0) { 320711f54b6eSAlexandre Chartre status = vdcp->sync_op_status; 320811f54b6eSAlexandre Chartre } else { 320911f54b6eSAlexandre Chartre vdcp->sync_op_pending = B_FALSE; 321011f54b6eSAlexandre Chartre status = EIO; 321111f54b6eSAlexandre Chartre } 3212655fd6a9Sachartre } else { 32133af08d82Slm66018 /* 32143af08d82Slm66018 * block until our transaction completes. 32153af08d82Slm66018 * Also anyone else waiting also gets to go next. 32163af08d82Slm66018 */ 32173af08d82Slm66018 while (vdcp->sync_op_pending && vdcp->state != VDC_STATE_DETACH) 32183af08d82Slm66018 cv_wait(&vdcp->sync_pending_cv, &vdcp->lock); 32193af08d82Slm66018 3220655fd6a9Sachartre DMSG(vdcp, 2, ": operation returned %d\n", 3221655fd6a9Sachartre vdcp->sync_op_status); 32223c96341aSnarayan if (vdcp->state == VDC_STATE_DETACH) { 32233c96341aSnarayan vdcp->sync_op_pending = B_FALSE; 32243af08d82Slm66018 status = ENXIO; 32253c96341aSnarayan } else { 32263af08d82Slm66018 status = vdcp->sync_op_status; 32272f5224aeSachartre if (status != 0 && vdcp->failfast_interval != 0) { 32282f5224aeSachartre /* 32292f5224aeSachartre * Operation has failed and failfast is enabled. 32302f5224aeSachartre * We need to check if the failure is due to a 32312f5224aeSachartre * reservation conflict if this was requested. 32322f5224aeSachartre */ 32332f5224aeSachartre check_resv_conflict = rconflict; 32342f5224aeSachartre } 32352f5224aeSachartre 32363c96341aSnarayan } 3237655fd6a9Sachartre } 32383c96341aSnarayan 32393af08d82Slm66018 vdcp->sync_op_status = 0; 32403af08d82Slm66018 vdcp->sync_op_blocked = B_FALSE; 32413af08d82Slm66018 vdcp->sync_op_cnt--; 32423af08d82Slm66018 32433af08d82Slm66018 /* signal the next waiting thread */ 32443af08d82Slm66018 cv_signal(&vdcp->sync_blocked_cv); 32452f5224aeSachartre 32462f5224aeSachartre /* 32472f5224aeSachartre * We have to check for reservation conflict after unblocking sync 32482f5224aeSachartre * operations because some sync operations will be used to do this 32492f5224aeSachartre * check. 32502f5224aeSachartre */ 32512f5224aeSachartre if (check_resv_conflict) { 32522f5224aeSachartre vio = vdc_failfast_io_queue(vdcp, NULL); 32532f5224aeSachartre while (vio->vio_qtime != 0) 32542f5224aeSachartre cv_wait(&vdcp->failfast_io_cv, &vdcp->lock); 32552f5224aeSachartre kmem_free(vio, sizeof (vdc_io_t)); 32562f5224aeSachartre } 32572f5224aeSachartre 32583af08d82Slm66018 mutex_exit(&vdcp->lock); 32593af08d82Slm66018 32603af08d82Slm66018 return (status); 32613af08d82Slm66018 } 32623af08d82Slm66018 32633af08d82Slm66018 32643af08d82Slm66018 /* 32653af08d82Slm66018 * Function: 32663af08d82Slm66018 * vdc_drain_response() 32673af08d82Slm66018 * 32683af08d82Slm66018 * Description: 32691ae08745Sheppo * When a guest is panicking, the completion of requests needs to be 32701ae08745Sheppo * handled differently because interrupts are disabled and vdc 32711ae08745Sheppo * will not get messages. We have to poll for the messages instead. 32723af08d82Slm66018 * 32733c2ebf09Sachartre * Note: since we are panicking we don't implement the io:::done 32743c2ebf09Sachartre * DTrace probe or update the I/O statistics kstats. 3275366a92acSlm66018 * 32763af08d82Slm66018 * Arguments: 32773af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 327811f54b6eSAlexandre Chartre * cb_type - the type of request we want to drain. If type is CB_SYNC 327911f54b6eSAlexandre Chartre * then we drain all responses until we find a CB_SYNC request. 328011f54b6eSAlexandre Chartre * If the type is CB_STRATEGY then the behavior depends on the 328111f54b6eSAlexandre Chartre * value of the buf argument. 328211f54b6eSAlexandre Chartre * buf - if the cb_type argument is CB_SYNC then the buf argument 328311f54b6eSAlexandre Chartre * must be NULL. If the cb_type argument is CB_STRATEGY and 328411f54b6eSAlexandre Chartre * if buf is NULL then we drain all responses, otherwise we 32853c2ebf09Sachartre * poll until we receive a ACK/NACK for the specific I/O 32863c2ebf09Sachartre * described by buf. 32873af08d82Slm66018 * 32883af08d82Slm66018 * Return Code: 328911f54b6eSAlexandre Chartre * 0 - Success. If we were expecting a response to a particular 329011f54b6eSAlexandre Chartre * CB_SYNC or CB_STRATEGY request then this means that a 329111f54b6eSAlexandre Chartre * response has been received. 32921ae08745Sheppo */ 32933af08d82Slm66018 static int 329411f54b6eSAlexandre Chartre vdc_drain_response(vdc_t *vdc, vio_cb_type_t cb_type, struct buf *buf) 32953af08d82Slm66018 { 32963af08d82Slm66018 int rv, idx, retries; 32973af08d82Slm66018 size_t msglen; 32983af08d82Slm66018 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 32993af08d82Slm66018 vio_dring_msg_t dmsg; 33003c2ebf09Sachartre struct buf *mbuf; 330111f54b6eSAlexandre Chartre boolean_t ack; 330211f54b6eSAlexandre Chartre 330311f54b6eSAlexandre Chartre ASSERT(cb_type == CB_STRATEGY || cb_type == CB_SYNC); 33043af08d82Slm66018 33053af08d82Slm66018 mutex_enter(&vdc->lock); 33063af08d82Slm66018 33071ae08745Sheppo retries = 0; 33081ae08745Sheppo for (;;) { 33091ae08745Sheppo msglen = sizeof (dmsg); 33108cd10891Snarayan rv = ldc_read(vdc->curr_server->ldc_handle, (caddr_t)&dmsg, 33118cd10891Snarayan &msglen); 33128e6a2a04Slm66018 if (rv) { 33138e6a2a04Slm66018 rv = EINVAL; 33141ae08745Sheppo break; 33151ae08745Sheppo } 33161ae08745Sheppo 33171ae08745Sheppo /* 33181ae08745Sheppo * if there are no packets wait and check again 33191ae08745Sheppo */ 33208e6a2a04Slm66018 if ((rv == 0) && (msglen == 0)) { 33211ae08745Sheppo if (retries++ > vdc_dump_retries) { 33228e6a2a04Slm66018 rv = EAGAIN; 33231ae08745Sheppo break; 33241ae08745Sheppo } 33251ae08745Sheppo 3326d10e4ef2Snarayan drv_usecwait(vdc_usec_timeout_dump); 33271ae08745Sheppo continue; 33281ae08745Sheppo } 33291ae08745Sheppo 33301ae08745Sheppo /* 33311ae08745Sheppo * Ignore all messages that are not ACKs/NACKs to 33321ae08745Sheppo * DRing requests. 33331ae08745Sheppo */ 33341ae08745Sheppo if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 33351ae08745Sheppo (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 33363af08d82Slm66018 DMSG(vdc, 0, "discard pkt: type=%d sub=%d env=%d\n", 33371ae08745Sheppo dmsg.tag.vio_msgtype, 33381ae08745Sheppo dmsg.tag.vio_subtype, 33391ae08745Sheppo dmsg.tag.vio_subtype_env); 33401ae08745Sheppo continue; 33411ae08745Sheppo } 33421ae08745Sheppo 33431ae08745Sheppo /* 334411f54b6eSAlexandre Chartre * Record if the packet was ACK'ed or not. If the packet was not 334511f54b6eSAlexandre Chartre * ACK'ed then we will just mark the request as failed; we don't 334611f54b6eSAlexandre Chartre * want to reset the connection at this point. 33471ae08745Sheppo */ 33481ae08745Sheppo switch (dmsg.tag.vio_subtype) { 33491ae08745Sheppo case VIO_SUBTYPE_ACK: 335011f54b6eSAlexandre Chartre ack = B_TRUE; 33511ae08745Sheppo break; 33521ae08745Sheppo case VIO_SUBTYPE_NACK: 335311f54b6eSAlexandre Chartre ack = B_FALSE; 33541ae08745Sheppo break; 33551ae08745Sheppo default: 33561ae08745Sheppo continue; 33571ae08745Sheppo } 33581ae08745Sheppo 33593af08d82Slm66018 idx = dmsg.start_idx; 33603af08d82Slm66018 if (idx >= vdc->dring_len) { 33613af08d82Slm66018 DMSG(vdc, 0, "[%d] Bogus ack data : start %d\n", 3362e1ebb9ecSlm66018 vdc->instance, idx); 33633af08d82Slm66018 continue; 33641ae08745Sheppo } 33653af08d82Slm66018 ldep = &vdc->local_dring[idx]; 33663af08d82Slm66018 if (ldep->dep->hdr.dstate != VIO_DESC_DONE) { 33673af08d82Slm66018 DMSG(vdc, 0, "[%d] Entry @ %d - state !DONE %d\n", 33683af08d82Slm66018 vdc->instance, idx, ldep->dep->hdr.dstate); 33691ae08745Sheppo continue; 33701ae08745Sheppo } 33711ae08745Sheppo 337211f54b6eSAlexandre Chartre switch (ldep->cb_type) { 337311f54b6eSAlexandre Chartre 337411f54b6eSAlexandre Chartre case CB_STRATEGY: 33753c2ebf09Sachartre mbuf = ldep->cb_arg; 337611f54b6eSAlexandre Chartre if (mbuf != NULL) { 33773c2ebf09Sachartre mbuf->b_resid = mbuf->b_bcount - 33783c2ebf09Sachartre ldep->dep->payload.nbytes; 337911f54b6eSAlexandre Chartre bioerror(mbuf, 338011f54b6eSAlexandre Chartre ack ? ldep->dep->payload.status : EIO); 33813c2ebf09Sachartre biodone(mbuf); 33823c2ebf09Sachartre } 33833af08d82Slm66018 rv = vdc_depopulate_descriptor(vdc, idx); 33843c2ebf09Sachartre if (buf != NULL && buf == mbuf) { 33853c2ebf09Sachartre rv = 0; 338611f54b6eSAlexandre Chartre goto done; 338711f54b6eSAlexandre Chartre } 338811f54b6eSAlexandre Chartre break; 338911f54b6eSAlexandre Chartre 339011f54b6eSAlexandre Chartre case CB_SYNC: 339111f54b6eSAlexandre Chartre rv = vdc_depopulate_descriptor(vdc, idx); 339211f54b6eSAlexandre Chartre vdc->sync_op_status = ack ? rv : EIO; 339311f54b6eSAlexandre Chartre vdc->sync_op_pending = B_FALSE; 339411f54b6eSAlexandre Chartre cv_signal(&vdc->sync_pending_cv); 339511f54b6eSAlexandre Chartre if (cb_type == CB_SYNC) { 339611f54b6eSAlexandre Chartre rv = 0; 339711f54b6eSAlexandre Chartre goto done; 339811f54b6eSAlexandre Chartre } 33993af08d82Slm66018 break; 34003af08d82Slm66018 } 34013af08d82Slm66018 34023c2ebf09Sachartre /* if this is the last descriptor - break out of loop */ 34033c2ebf09Sachartre if ((idx + 1) % vdc->dring_len == vdc->dring_curr_idx) { 34043c2ebf09Sachartre /* 340511f54b6eSAlexandre Chartre * If we were expecting a response for a particular 340611f54b6eSAlexandre Chartre * request then we return with an error otherwise we 340711f54b6eSAlexandre Chartre * have successfully completed the drain. 34083c2ebf09Sachartre */ 340911f54b6eSAlexandre Chartre rv = (buf != NULL || cb_type == CB_SYNC)? ESRCH: 0; 34103c2ebf09Sachartre break; 34113c2ebf09Sachartre } 34123c2ebf09Sachartre } 34133c2ebf09Sachartre 341411f54b6eSAlexandre Chartre done: 34153af08d82Slm66018 mutex_exit(&vdc->lock); 34163af08d82Slm66018 DMSG(vdc, 0, "End idx=%d\n", idx); 34173af08d82Slm66018 34183af08d82Slm66018 return (rv); 34191ae08745Sheppo } 34201ae08745Sheppo 34211ae08745Sheppo 34220a55fbb7Slm66018 /* 34230a55fbb7Slm66018 * Function: 34240a55fbb7Slm66018 * vdc_depopulate_descriptor() 34250a55fbb7Slm66018 * 34260a55fbb7Slm66018 * Description: 34270a55fbb7Slm66018 * 34280a55fbb7Slm66018 * Arguments: 34290a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 34300a55fbb7Slm66018 * idx - Index of the Descriptor Ring entry being modified 34310a55fbb7Slm66018 * 34320a55fbb7Slm66018 * Return Code: 34330a55fbb7Slm66018 * 0 - Success 34340a55fbb7Slm66018 */ 34351ae08745Sheppo static int 34361ae08745Sheppo vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 34371ae08745Sheppo { 34381ae08745Sheppo vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 34391ae08745Sheppo vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 34401ae08745Sheppo int status = ENXIO; 34418e6a2a04Slm66018 int rv = 0; 34421ae08745Sheppo 34431ae08745Sheppo ASSERT(vdc != NULL); 3444e1ebb9ecSlm66018 ASSERT(idx < vdc->dring_len); 34451ae08745Sheppo ldep = &vdc->local_dring[idx]; 34461ae08745Sheppo ASSERT(ldep != NULL); 34473af08d82Slm66018 ASSERT(MUTEX_HELD(&vdc->lock)); 34483af08d82Slm66018 3449366a92acSlm66018 DTRACE_PROBE2(depopulate, int, vdc->instance, vdc_local_desc_t *, ldep); 34503af08d82Slm66018 DMSG(vdc, 2, ": idx = %d\n", idx); 3451366a92acSlm66018 34521ae08745Sheppo dep = ldep->dep; 34531ae08745Sheppo ASSERT(dep != NULL); 3454e1ebb9ecSlm66018 ASSERT((dep->hdr.dstate == VIO_DESC_DONE) || 3455e1ebb9ecSlm66018 (dep->payload.status == ECANCELED)); 34561ae08745Sheppo 3457e1ebb9ecSlm66018 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 34583af08d82Slm66018 34593af08d82Slm66018 ldep->is_free = B_TRUE; 34601ae08745Sheppo status = dep->payload.status; 3461205eeb1aSlm66018 DMSG(vdc, 2, ": is_free = %d : status = %d\n", ldep->is_free, status); 34621ae08745Sheppo 3463eff7243fSlm66018 /* 3464eff7243fSlm66018 * If no buffers were used to transfer information to the server when 3465eff7243fSlm66018 * populating the descriptor then no memory handles need to be unbound 3466eff7243fSlm66018 * and we can return now. 3467eff7243fSlm66018 */ 3468eff7243fSlm66018 if (ldep->nbytes == 0) { 3469eff7243fSlm66018 cv_signal(&vdc->dring_free_cv); 34708e6a2a04Slm66018 return (status); 3471eff7243fSlm66018 } 34728e6a2a04Slm66018 34731ae08745Sheppo /* 34741ae08745Sheppo * If the upper layer passed in a misaligned address we copied the 34751ae08745Sheppo * data into an aligned buffer before sending it to LDC - we now 34761ae08745Sheppo * copy it back to the original buffer. 34771ae08745Sheppo */ 34781ae08745Sheppo if (ldep->align_addr) { 34791ae08745Sheppo ASSERT(ldep->addr != NULL); 34801ae08745Sheppo 34813c96341aSnarayan if (dep->payload.nbytes > 0) 34823c96341aSnarayan bcopy(ldep->align_addr, ldep->addr, 34833c96341aSnarayan dep->payload.nbytes); 34841ae08745Sheppo kmem_free(ldep->align_addr, 34853c96341aSnarayan sizeof (caddr_t) * P2ROUNDUP(ldep->nbytes, 8)); 34861ae08745Sheppo ldep->align_addr = NULL; 34871ae08745Sheppo } 34881ae08745Sheppo 34898e6a2a04Slm66018 rv = ldc_mem_unbind_handle(ldep->desc_mhdl); 34908e6a2a04Slm66018 if (rv != 0) { 34913af08d82Slm66018 DMSG(vdc, 0, "?[%d] unbind mhdl 0x%lx @ idx %d failed (%d)", 34928e6a2a04Slm66018 vdc->instance, ldep->desc_mhdl, idx, rv); 34938e6a2a04Slm66018 /* 34948e6a2a04Slm66018 * The error returned by the vDisk server is more informative 34958e6a2a04Slm66018 * and thus has a higher priority but if it isn't set we ensure 34968e6a2a04Slm66018 * that this function returns an error. 34978e6a2a04Slm66018 */ 34988e6a2a04Slm66018 if (status == 0) 34998e6a2a04Slm66018 status = EINVAL; 35001ae08745Sheppo } 35011ae08745Sheppo 35023af08d82Slm66018 cv_signal(&vdc->membind_cv); 35033af08d82Slm66018 cv_signal(&vdc->dring_free_cv); 35043af08d82Slm66018 35051ae08745Sheppo return (status); 35061ae08745Sheppo } 35071ae08745Sheppo 35080a55fbb7Slm66018 /* 35090a55fbb7Slm66018 * Function: 35100a55fbb7Slm66018 * vdc_populate_mem_hdl() 35110a55fbb7Slm66018 * 35120a55fbb7Slm66018 * Description: 35130a55fbb7Slm66018 * 35140a55fbb7Slm66018 * Arguments: 35150a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 35160a55fbb7Slm66018 * idx - Index of the Descriptor Ring entry being modified 35170a55fbb7Slm66018 * addr - virtual address being mapped in 35180a55fbb7Slm66018 * nybtes - number of bytes in 'addr' 35190a55fbb7Slm66018 * operation - the vDisk operation being performed (VD_OP_xxx) 35200a55fbb7Slm66018 * 35210a55fbb7Slm66018 * Return Code: 35220a55fbb7Slm66018 * 0 - Success 35230a55fbb7Slm66018 */ 35241ae08745Sheppo static int 35253af08d82Slm66018 vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep) 35261ae08745Sheppo { 35271ae08745Sheppo vd_dring_entry_t *dep = NULL; 35281ae08745Sheppo ldc_mem_handle_t mhdl; 35291ae08745Sheppo caddr_t vaddr; 35303af08d82Slm66018 size_t nbytes; 35314bac2208Snarayan uint8_t perm = LDC_MEM_RW; 35324bac2208Snarayan uint8_t maptype; 35331ae08745Sheppo int rv = 0; 35341ae08745Sheppo int i; 35351ae08745Sheppo 35363af08d82Slm66018 ASSERT(vdcp != NULL); 35371ae08745Sheppo 35383af08d82Slm66018 dep = ldep->dep; 35391ae08745Sheppo mhdl = ldep->desc_mhdl; 35401ae08745Sheppo 35413af08d82Slm66018 switch (ldep->dir) { 35423af08d82Slm66018 case VIO_read_dir: 35431ae08745Sheppo perm = LDC_MEM_W; 35441ae08745Sheppo break; 35451ae08745Sheppo 35463af08d82Slm66018 case VIO_write_dir: 35471ae08745Sheppo perm = LDC_MEM_R; 35481ae08745Sheppo break; 35491ae08745Sheppo 35503af08d82Slm66018 case VIO_both_dir: 35511ae08745Sheppo perm = LDC_MEM_RW; 35521ae08745Sheppo break; 35531ae08745Sheppo 35541ae08745Sheppo default: 35551ae08745Sheppo ASSERT(0); /* catch bad programming in vdc */ 35561ae08745Sheppo } 35571ae08745Sheppo 35581ae08745Sheppo /* 35591ae08745Sheppo * LDC expects any addresses passed in to be 8-byte aligned. We need 35601ae08745Sheppo * to copy the contents of any misaligned buffers to a newly allocated 35611ae08745Sheppo * buffer and bind it instead (and copy the the contents back to the 35621ae08745Sheppo * original buffer passed in when depopulating the descriptor) 35631ae08745Sheppo */ 35643af08d82Slm66018 vaddr = ldep->addr; 35653af08d82Slm66018 nbytes = ldep->nbytes; 35663af08d82Slm66018 if (((uint64_t)vaddr & 0x7) != 0) { 3567d10e4ef2Snarayan ASSERT(ldep->align_addr == NULL); 35681ae08745Sheppo ldep->align_addr = 35693af08d82Slm66018 kmem_alloc(sizeof (caddr_t) * 35703af08d82Slm66018 P2ROUNDUP(nbytes, 8), KM_SLEEP); 35713af08d82Slm66018 DMSG(vdcp, 0, "[%d] Misaligned address %p reallocating " 35723af08d82Slm66018 "(buf=%p nb=%ld op=%d)\n", 35733af08d82Slm66018 vdcp->instance, (void *)vaddr, (void *)ldep->align_addr, 35743af08d82Slm66018 nbytes, ldep->operation); 35753af08d82Slm66018 if (perm != LDC_MEM_W) 35763af08d82Slm66018 bcopy(vaddr, ldep->align_addr, nbytes); 35771ae08745Sheppo vaddr = ldep->align_addr; 35781ae08745Sheppo } 35791ae08745Sheppo 35804bac2208Snarayan maptype = LDC_IO_MAP|LDC_SHADOW_MAP|LDC_DIRECT_MAP; 35811ae08745Sheppo rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 358287a7269eSachartre maptype, perm, &dep->payload.cookie[0], &dep->payload.ncookies); 35833af08d82Slm66018 DMSG(vdcp, 2, "[%d] bound mem handle; ncookies=%d\n", 35843af08d82Slm66018 vdcp->instance, dep->payload.ncookies); 35851ae08745Sheppo if (rv != 0) { 35863af08d82Slm66018 DMSG(vdcp, 0, "[%d] Failed to bind LDC memory handle " 35873af08d82Slm66018 "(mhdl=%p, buf=%p, err=%d)\n", 35883af08d82Slm66018 vdcp->instance, (void *)mhdl, (void *)vaddr, rv); 35891ae08745Sheppo if (ldep->align_addr) { 35901ae08745Sheppo kmem_free(ldep->align_addr, 3591d10e4ef2Snarayan sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); 35921ae08745Sheppo ldep->align_addr = NULL; 35931ae08745Sheppo } 35941ae08745Sheppo return (EAGAIN); 35951ae08745Sheppo } 35961ae08745Sheppo 35971ae08745Sheppo /* 35981ae08745Sheppo * Get the other cookies (if any). 35991ae08745Sheppo */ 36001ae08745Sheppo for (i = 1; i < dep->payload.ncookies; i++) { 36011ae08745Sheppo rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 36021ae08745Sheppo if (rv != 0) { 36031ae08745Sheppo (void) ldc_mem_unbind_handle(mhdl); 36043af08d82Slm66018 DMSG(vdcp, 0, "?[%d] Failed to get next cookie " 3605e1ebb9ecSlm66018 "(mhdl=%lx cnum=%d), err=%d", 36063af08d82Slm66018 vdcp->instance, mhdl, i, rv); 36071ae08745Sheppo if (ldep->align_addr) { 36081ae08745Sheppo kmem_free(ldep->align_addr, 36093c96341aSnarayan sizeof (caddr_t) * ldep->nbytes); 36101ae08745Sheppo ldep->align_addr = NULL; 36111ae08745Sheppo } 36121ae08745Sheppo return (EAGAIN); 36131ae08745Sheppo } 36141ae08745Sheppo } 36151ae08745Sheppo 36161ae08745Sheppo return (rv); 36171ae08745Sheppo } 36181ae08745Sheppo 36191ae08745Sheppo /* 36201ae08745Sheppo * Interrupt handlers for messages from LDC 36211ae08745Sheppo */ 36221ae08745Sheppo 36230a55fbb7Slm66018 /* 36240a55fbb7Slm66018 * Function: 36250a55fbb7Slm66018 * vdc_handle_cb() 36260a55fbb7Slm66018 * 36270a55fbb7Slm66018 * Description: 36280a55fbb7Slm66018 * 36290a55fbb7Slm66018 * Arguments: 36300a55fbb7Slm66018 * event - Type of event (LDC_EVT_xxx) that triggered the callback 36310a55fbb7Slm66018 * arg - soft state pointer for this instance of the device driver. 36320a55fbb7Slm66018 * 36330a55fbb7Slm66018 * Return Code: 36340a55fbb7Slm66018 * 0 - Success 36350a55fbb7Slm66018 */ 36361ae08745Sheppo static uint_t 36371ae08745Sheppo vdc_handle_cb(uint64_t event, caddr_t arg) 36381ae08745Sheppo { 36391ae08745Sheppo ldc_status_t ldc_state; 36401ae08745Sheppo int rv = 0; 36418cd10891Snarayan vdc_server_t *srvr = (vdc_server_t *)(void *)arg; 36428cd10891Snarayan vdc_t *vdc = srvr->vdcp; 36431ae08745Sheppo 36441ae08745Sheppo ASSERT(vdc != NULL); 36451ae08745Sheppo 36463af08d82Slm66018 DMSG(vdc, 1, "evt=%lx seqID=%ld\n", event, vdc->seq_num); 36471ae08745Sheppo 36488cd10891Snarayan /* If callback is not for the current server, ignore it */ 36498cd10891Snarayan mutex_enter(&vdc->lock); 36508cd10891Snarayan 36518cd10891Snarayan if (vdc->curr_server != srvr) { 36528cd10891Snarayan DMSG(vdc, 0, "[%d] Ignoring event 0x%lx for port@%ld\n", 36538cd10891Snarayan vdc->instance, event, srvr->id); 36548cd10891Snarayan mutex_exit(&vdc->lock); 36558cd10891Snarayan return (LDC_SUCCESS); 36568cd10891Snarayan } 36578cd10891Snarayan 36581ae08745Sheppo /* 36591ae08745Sheppo * Depending on the type of event that triggered this callback, 36603af08d82Slm66018 * we modify the handshake state or read the data. 36611ae08745Sheppo * 36621ae08745Sheppo * NOTE: not done as a switch() as event could be triggered by 36631ae08745Sheppo * a state change and a read request. Also the ordering of the 36641ae08745Sheppo * check for the event types is deliberate. 36651ae08745Sheppo */ 36661ae08745Sheppo if (event & LDC_EVT_UP) { 36673af08d82Slm66018 DMSG(vdc, 0, "[%d] Received LDC_EVT_UP\n", vdc->instance); 36683af08d82Slm66018 36691ae08745Sheppo /* get LDC state */ 36708cd10891Snarayan rv = ldc_status(srvr->ldc_handle, &ldc_state); 36711ae08745Sheppo if (rv != 0) { 36723af08d82Slm66018 DMSG(vdc, 0, "[%d] Couldn't get LDC status %d", 36731ae08745Sheppo vdc->instance, rv); 36748cd10891Snarayan mutex_exit(&vdc->lock); 36751ae08745Sheppo return (LDC_SUCCESS); 36761ae08745Sheppo } 36778cd10891Snarayan if (srvr->ldc_state != LDC_UP && 36788cd10891Snarayan ldc_state == LDC_UP) { 36791ae08745Sheppo /* 36803af08d82Slm66018 * Reset the transaction sequence numbers when 36813af08d82Slm66018 * LDC comes up. We then kick off the handshake 36823af08d82Slm66018 * negotiation with the vDisk server. 36831ae08745Sheppo */ 36840a55fbb7Slm66018 vdc->seq_num = 1; 36851ae08745Sheppo vdc->seq_num_reply = 0; 36868cd10891Snarayan srvr->ldc_state = ldc_state; 36873af08d82Slm66018 cv_signal(&vdc->initwait_cv); 36883af08d82Slm66018 } 36891ae08745Sheppo } 36901ae08745Sheppo 36911ae08745Sheppo if (event & LDC_EVT_READ) { 369217cadca8Slm66018 DMSG(vdc, 1, "[%d] Received LDC_EVT_READ\n", vdc->instance); 36933af08d82Slm66018 mutex_enter(&vdc->read_lock); 36943af08d82Slm66018 cv_signal(&vdc->read_cv); 36953af08d82Slm66018 vdc->read_state = VDC_READ_PENDING; 36963af08d82Slm66018 mutex_exit(&vdc->read_lock); 36978cd10891Snarayan mutex_exit(&vdc->lock); 36981ae08745Sheppo 36991ae08745Sheppo /* that's all we have to do - no need to handle DOWN/RESET */ 37001ae08745Sheppo return (LDC_SUCCESS); 37011ae08745Sheppo } 37021ae08745Sheppo 37033af08d82Slm66018 if (event & (LDC_EVT_RESET|LDC_EVT_DOWN)) { 37040a55fbb7Slm66018 37053af08d82Slm66018 DMSG(vdc, 0, "[%d] Received LDC RESET event\n", vdc->instance); 37063af08d82Slm66018 37073af08d82Slm66018 /* 37083af08d82Slm66018 * Need to wake up any readers so they will 37093af08d82Slm66018 * detect that a reset has occurred. 37103af08d82Slm66018 */ 37113af08d82Slm66018 mutex_enter(&vdc->read_lock); 37123af08d82Slm66018 if ((vdc->read_state == VDC_READ_WAITING) || 37133af08d82Slm66018 (vdc->read_state == VDC_READ_RESET)) 37143af08d82Slm66018 cv_signal(&vdc->read_cv); 37153af08d82Slm66018 vdc->read_state = VDC_READ_RESET; 37163af08d82Slm66018 mutex_exit(&vdc->read_lock); 37170a55fbb7Slm66018 37183af08d82Slm66018 /* wake up any threads waiting for connection to come up */ 37193af08d82Slm66018 if (vdc->state == VDC_STATE_INIT_WAITING) { 37203af08d82Slm66018 vdc->state = VDC_STATE_RESETTING; 37213af08d82Slm66018 cv_signal(&vdc->initwait_cv); 37221ae08745Sheppo } 37231ae08745Sheppo 37241ae08745Sheppo } 37251ae08745Sheppo 37268cd10891Snarayan mutex_exit(&vdc->lock); 37278cd10891Snarayan 37281ae08745Sheppo if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 37293af08d82Slm66018 DMSG(vdc, 0, "![%d] Unexpected LDC event (%lx) received", 37301ae08745Sheppo vdc->instance, event); 37311ae08745Sheppo 37321ae08745Sheppo return (LDC_SUCCESS); 37331ae08745Sheppo } 37341ae08745Sheppo 37353af08d82Slm66018 /* 37363af08d82Slm66018 * Function: 37373af08d82Slm66018 * vdc_wait_for_response() 37383af08d82Slm66018 * 37393af08d82Slm66018 * Description: 37403af08d82Slm66018 * Block waiting for a response from the server. If there is 37413af08d82Slm66018 * no data the thread block on the read_cv that is signalled 37423af08d82Slm66018 * by the callback when an EVT_READ occurs. 37433af08d82Slm66018 * 37443af08d82Slm66018 * Arguments: 37453af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 37463af08d82Slm66018 * 37473af08d82Slm66018 * Return Code: 37483af08d82Slm66018 * 0 - Success 37493af08d82Slm66018 */ 37503af08d82Slm66018 static int 37513af08d82Slm66018 vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp) 37523af08d82Slm66018 { 37533af08d82Slm66018 size_t nbytes = sizeof (*msgp); 37543af08d82Slm66018 int status; 37553af08d82Slm66018 37563af08d82Slm66018 ASSERT(vdcp != NULL); 37573af08d82Slm66018 37583af08d82Slm66018 DMSG(vdcp, 1, "[%d] Entered\n", vdcp->instance); 37593af08d82Slm66018 37603af08d82Slm66018 status = vdc_recv(vdcp, msgp, &nbytes); 37613af08d82Slm66018 DMSG(vdcp, 3, "vdc_read() done.. status=0x%x size=0x%x\n", 37623af08d82Slm66018 status, (int)nbytes); 37633af08d82Slm66018 if (status) { 37643af08d82Slm66018 DMSG(vdcp, 0, "?[%d] Error %d reading LDC msg\n", 37653af08d82Slm66018 vdcp->instance, status); 37663af08d82Slm66018 return (status); 37673af08d82Slm66018 } 37683af08d82Slm66018 37693af08d82Slm66018 if (nbytes < sizeof (vio_msg_tag_t)) { 37703af08d82Slm66018 DMSG(vdcp, 0, "?[%d] Expect %lu bytes; recv'd %lu\n", 37713af08d82Slm66018 vdcp->instance, sizeof (vio_msg_tag_t), nbytes); 37723af08d82Slm66018 return (ENOMSG); 37733af08d82Slm66018 } 37743af08d82Slm66018 37753af08d82Slm66018 DMSG(vdcp, 2, "[%d] (%x/%x/%x)\n", vdcp->instance, 37763af08d82Slm66018 msgp->tag.vio_msgtype, 37773af08d82Slm66018 msgp->tag.vio_subtype, 37783af08d82Slm66018 msgp->tag.vio_subtype_env); 37793af08d82Slm66018 37803af08d82Slm66018 /* 37813af08d82Slm66018 * Verify the Session ID of the message 37823af08d82Slm66018 * 37833af08d82Slm66018 * Every message after the Version has been negotiated should 37843af08d82Slm66018 * have the correct session ID set. 37853af08d82Slm66018 */ 37863af08d82Slm66018 if ((msgp->tag.vio_sid != vdcp->session_id) && 37873af08d82Slm66018 (msgp->tag.vio_subtype_env != VIO_VER_INFO)) { 37883af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid SID: received 0x%x, " 37893af08d82Slm66018 "expected 0x%lx [seq num %lx @ %d]", 37903af08d82Slm66018 vdcp->instance, msgp->tag.vio_sid, 37913af08d82Slm66018 vdcp->session_id, 37923af08d82Slm66018 ((vio_dring_msg_t *)msgp)->seq_num, 37933af08d82Slm66018 ((vio_dring_msg_t *)msgp)->start_idx); 37943af08d82Slm66018 return (ENOMSG); 37953af08d82Slm66018 } 37963af08d82Slm66018 return (0); 37973af08d82Slm66018 } 37983af08d82Slm66018 37993af08d82Slm66018 38003af08d82Slm66018 /* 38013af08d82Slm66018 * Function: 38023af08d82Slm66018 * vdc_resubmit_backup_dring() 38033af08d82Slm66018 * 38043af08d82Slm66018 * Description: 38053af08d82Slm66018 * Resubmit each descriptor in the backed up dring to 38063af08d82Slm66018 * vDisk server. The Dring was backed up during connection 38073af08d82Slm66018 * reset. 38083af08d82Slm66018 * 38093af08d82Slm66018 * Arguments: 38103af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 38113af08d82Slm66018 * 38123af08d82Slm66018 * Return Code: 38133af08d82Slm66018 * 0 - Success 38143af08d82Slm66018 */ 38153af08d82Slm66018 static int 38163af08d82Slm66018 vdc_resubmit_backup_dring(vdc_t *vdcp) 38173af08d82Slm66018 { 381890e2f9dcSlm66018 int processed = 0; 38193af08d82Slm66018 int count; 38203af08d82Slm66018 int b_idx; 382190e2f9dcSlm66018 int rv = 0; 38223af08d82Slm66018 int dring_size; 382390e2f9dcSlm66018 int op; 38243af08d82Slm66018 vio_msg_t vio_msg; 38253af08d82Slm66018 vdc_local_desc_t *curr_ldep; 38263af08d82Slm66018 38273af08d82Slm66018 ASSERT(MUTEX_NOT_HELD(&vdcp->lock)); 38283af08d82Slm66018 ASSERT(vdcp->state == VDC_STATE_HANDLE_PENDING); 38293af08d82Slm66018 3830655fd6a9Sachartre if (vdcp->local_dring_backup == NULL) { 3831655fd6a9Sachartre /* the pending requests have already been processed */ 3832655fd6a9Sachartre return (0); 3833655fd6a9Sachartre } 3834655fd6a9Sachartre 38353af08d82Slm66018 DMSG(vdcp, 1, "restoring pending dring entries (len=%d, tail=%d)\n", 38363af08d82Slm66018 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 38373af08d82Slm66018 38383af08d82Slm66018 /* 38393af08d82Slm66018 * Walk the backup copy of the local descriptor ring and 38403af08d82Slm66018 * resubmit all the outstanding transactions. 38413af08d82Slm66018 */ 38423af08d82Slm66018 b_idx = vdcp->local_dring_backup_tail; 38433af08d82Slm66018 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 38443af08d82Slm66018 38453af08d82Slm66018 curr_ldep = &(vdcp->local_dring_backup[b_idx]); 38463af08d82Slm66018 3847eff7243fSlm66018 /* only resubmit outstanding transactions */ 38483af08d82Slm66018 if (!curr_ldep->is_free) { 384990e2f9dcSlm66018 /* 385090e2f9dcSlm66018 * If we are retrying a block read/write operation we 385190e2f9dcSlm66018 * need to update the I/O statistics to indicate that 385290e2f9dcSlm66018 * the request is being put back on the waitq to be 385390e2f9dcSlm66018 * serviced (it will have been taken off after the 385490e2f9dcSlm66018 * error was reported). 385590e2f9dcSlm66018 */ 385690e2f9dcSlm66018 mutex_enter(&vdcp->lock); 385790e2f9dcSlm66018 op = curr_ldep->operation; 385890e2f9dcSlm66018 if ((op == VD_OP_BREAD) || (op == VD_OP_BWRITE)) { 385990e2f9dcSlm66018 DTRACE_IO1(start, buf_t *, curr_ldep->cb_arg); 386090e2f9dcSlm66018 VD_KSTAT_WAITQ_ENTER(vdcp); 386190e2f9dcSlm66018 } 38623af08d82Slm66018 38633af08d82Slm66018 DMSG(vdcp, 1, "resubmitting entry idx=%x\n", b_idx); 386490e2f9dcSlm66018 rv = vdc_populate_descriptor(vdcp, op, 38653af08d82Slm66018 curr_ldep->addr, curr_ldep->nbytes, 38663af08d82Slm66018 curr_ldep->slice, curr_ldep->offset, 38673af08d82Slm66018 curr_ldep->cb_type, curr_ldep->cb_arg, 38683af08d82Slm66018 curr_ldep->dir); 386990e2f9dcSlm66018 38703af08d82Slm66018 if (rv) { 387190e2f9dcSlm66018 if (op == VD_OP_BREAD || op == VD_OP_BWRITE) { 387290e2f9dcSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_transerrs); 387390e2f9dcSlm66018 VD_KSTAT_WAITQ_EXIT(vdcp); 387490e2f9dcSlm66018 DTRACE_IO1(done, buf_t *, 387590e2f9dcSlm66018 curr_ldep->cb_arg); 387690e2f9dcSlm66018 } 38773af08d82Slm66018 DMSG(vdcp, 1, "[%d] cannot resubmit entry %d\n", 38783af08d82Slm66018 vdcp->instance, b_idx); 387990e2f9dcSlm66018 mutex_exit(&vdcp->lock); 388090e2f9dcSlm66018 goto done; 38813af08d82Slm66018 } 38823af08d82Slm66018 388390e2f9dcSlm66018 /* 388490e2f9dcSlm66018 * If this is a block read/write we update the I/O 388590e2f9dcSlm66018 * statistics kstat to indicate that the request 388690e2f9dcSlm66018 * has been sent back to the vDisk server and should 388790e2f9dcSlm66018 * now be put on the run queue. 388890e2f9dcSlm66018 */ 388990e2f9dcSlm66018 if ((op == VD_OP_BREAD) || (op == VD_OP_BWRITE)) { 389090e2f9dcSlm66018 DTRACE_PROBE1(send, buf_t *, curr_ldep->cb_arg); 389190e2f9dcSlm66018 VD_KSTAT_WAITQ_TO_RUNQ(vdcp); 389290e2f9dcSlm66018 } 389390e2f9dcSlm66018 mutex_exit(&vdcp->lock); 389490e2f9dcSlm66018 38953af08d82Slm66018 /* Wait for the response message. */ 38963af08d82Slm66018 DMSG(vdcp, 1, "waiting for response to idx=%x\n", 38973af08d82Slm66018 b_idx); 389890e2f9dcSlm66018 rv = vdc_wait_for_response(vdcp, &vio_msg); 389990e2f9dcSlm66018 if (rv) { 390090e2f9dcSlm66018 /* 390190e2f9dcSlm66018 * If this is a block read/write we update 390290e2f9dcSlm66018 * the I/O statistics kstat to take it 390390e2f9dcSlm66018 * off the run queue. 390490e2f9dcSlm66018 */ 390590e2f9dcSlm66018 mutex_enter(&vdcp->lock); 390690e2f9dcSlm66018 if (op == VD_OP_BREAD || op == VD_OP_BWRITE) { 390790e2f9dcSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_transerrs); 390890e2f9dcSlm66018 VD_KSTAT_RUNQ_EXIT(vdcp); 390990e2f9dcSlm66018 DTRACE_IO1(done, buf_t *, 391090e2f9dcSlm66018 curr_ldep->cb_arg); 391190e2f9dcSlm66018 } 39123af08d82Slm66018 DMSG(vdcp, 1, "[%d] wait_for_response " 39133af08d82Slm66018 "returned err=%d\n", vdcp->instance, 391490e2f9dcSlm66018 rv); 391590e2f9dcSlm66018 mutex_exit(&vdcp->lock); 391690e2f9dcSlm66018 goto done; 39173af08d82Slm66018 } 39183af08d82Slm66018 39193af08d82Slm66018 DMSG(vdcp, 1, "processing msg for idx=%x\n", b_idx); 392090e2f9dcSlm66018 rv = vdc_process_data_msg(vdcp, &vio_msg); 392190e2f9dcSlm66018 if (rv) { 39223af08d82Slm66018 DMSG(vdcp, 1, "[%d] process_data_msg " 39233af08d82Slm66018 "returned err=%d\n", vdcp->instance, 392490e2f9dcSlm66018 rv); 392590e2f9dcSlm66018 goto done; 39263af08d82Slm66018 } 3927630f014dSrameshc /* 3928630f014dSrameshc * Mark this entry as free so that we will not resubmit 3929630f014dSrameshc * this "done" request again, if we were to use the same 3930630f014dSrameshc * backup_dring again in future. This could happen when 3931630f014dSrameshc * a reset happens while processing the backup_dring. 3932630f014dSrameshc */ 3933630f014dSrameshc curr_ldep->is_free = B_TRUE; 393490e2f9dcSlm66018 processed++; 39353af08d82Slm66018 } 39363af08d82Slm66018 39373af08d82Slm66018 /* get the next element to submit */ 39383af08d82Slm66018 if (++b_idx >= vdcp->local_dring_backup_len) 39393af08d82Slm66018 b_idx = 0; 39403af08d82Slm66018 } 39413af08d82Slm66018 39423af08d82Slm66018 /* all done - now clear up pending dring copy */ 39433af08d82Slm66018 dring_size = vdcp->local_dring_backup_len * 39443af08d82Slm66018 sizeof (vdcp->local_dring_backup[0]); 39453af08d82Slm66018 39463af08d82Slm66018 (void) kmem_free(vdcp->local_dring_backup, dring_size); 39473af08d82Slm66018 39483af08d82Slm66018 vdcp->local_dring_backup = NULL; 39493af08d82Slm66018 395090e2f9dcSlm66018 done: 395190e2f9dcSlm66018 DTRACE_PROBE2(processed, int, processed, vdc_t *, vdcp); 395290e2f9dcSlm66018 395390e2f9dcSlm66018 return (rv); 39543af08d82Slm66018 } 39553af08d82Slm66018 39563af08d82Slm66018 /* 39573af08d82Slm66018 * Function: 3958655fd6a9Sachartre * vdc_cancel_backup_dring 3959655fd6a9Sachartre * 3960655fd6a9Sachartre * Description: 3961655fd6a9Sachartre * Cancel each descriptor in the backed up dring to vDisk server. 3962655fd6a9Sachartre * The Dring was backed up during connection reset. 3963655fd6a9Sachartre * 3964655fd6a9Sachartre * Arguments: 3965655fd6a9Sachartre * vdcp - soft state pointer for this instance of the device driver. 3966655fd6a9Sachartre * 3967655fd6a9Sachartre * Return Code: 3968655fd6a9Sachartre * None 3969655fd6a9Sachartre */ 3970655fd6a9Sachartre void 397190e2f9dcSlm66018 vdc_cancel_backup_dring(vdc_t *vdcp) 3972655fd6a9Sachartre { 3973655fd6a9Sachartre vdc_local_desc_t *ldep; 3974655fd6a9Sachartre struct buf *bufp; 3975655fd6a9Sachartre int count; 3976655fd6a9Sachartre int b_idx; 3977655fd6a9Sachartre int dring_size; 397890e2f9dcSlm66018 int cancelled = 0; 3979655fd6a9Sachartre 3980655fd6a9Sachartre ASSERT(MUTEX_HELD(&vdcp->lock)); 3981655fd6a9Sachartre ASSERT(vdcp->state == VDC_STATE_INIT || 3982655fd6a9Sachartre vdcp->state == VDC_STATE_INIT_WAITING || 3983655fd6a9Sachartre vdcp->state == VDC_STATE_NEGOTIATE || 3984655fd6a9Sachartre vdcp->state == VDC_STATE_RESETTING); 3985655fd6a9Sachartre 3986655fd6a9Sachartre if (vdcp->local_dring_backup == NULL) { 3987655fd6a9Sachartre /* the pending requests have already been processed */ 3988655fd6a9Sachartre return; 3989655fd6a9Sachartre } 3990655fd6a9Sachartre 3991655fd6a9Sachartre DMSG(vdcp, 1, "cancelling pending dring entries (len=%d, tail=%d)\n", 3992655fd6a9Sachartre vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3993655fd6a9Sachartre 3994655fd6a9Sachartre /* 3995655fd6a9Sachartre * Walk the backup copy of the local descriptor ring and 3996655fd6a9Sachartre * cancel all the outstanding transactions. 3997655fd6a9Sachartre */ 3998655fd6a9Sachartre b_idx = vdcp->local_dring_backup_tail; 3999655fd6a9Sachartre for (count = 0; count < vdcp->local_dring_backup_len; count++) { 4000655fd6a9Sachartre 4001655fd6a9Sachartre ldep = &(vdcp->local_dring_backup[b_idx]); 4002655fd6a9Sachartre 4003655fd6a9Sachartre /* only cancel outstanding transactions */ 4004655fd6a9Sachartre if (!ldep->is_free) { 4005655fd6a9Sachartre 4006655fd6a9Sachartre DMSG(vdcp, 1, "cancelling entry idx=%x\n", b_idx); 400790e2f9dcSlm66018 cancelled++; 4008655fd6a9Sachartre 4009655fd6a9Sachartre /* 4010655fd6a9Sachartre * All requests have already been cleared from the 4011655fd6a9Sachartre * local descriptor ring and the LDC channel has been 4012655fd6a9Sachartre * reset so we will never get any reply for these 4013655fd6a9Sachartre * requests. Now we just have to notify threads waiting 4014655fd6a9Sachartre * for replies that the request has failed. 4015655fd6a9Sachartre */ 4016655fd6a9Sachartre switch (ldep->cb_type) { 4017655fd6a9Sachartre case CB_SYNC: 4018655fd6a9Sachartre ASSERT(vdcp->sync_op_pending); 4019655fd6a9Sachartre vdcp->sync_op_status = EIO; 4020655fd6a9Sachartre vdcp->sync_op_pending = B_FALSE; 4021655fd6a9Sachartre cv_signal(&vdcp->sync_pending_cv); 4022655fd6a9Sachartre break; 4023655fd6a9Sachartre 4024655fd6a9Sachartre case CB_STRATEGY: 4025655fd6a9Sachartre bufp = ldep->cb_arg; 4026655fd6a9Sachartre ASSERT(bufp != NULL); 4027655fd6a9Sachartre bufp->b_resid = bufp->b_bcount; 4028366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 402990e2f9dcSlm66018 VD_KSTAT_RUNQ_EXIT(vdcp); 4030366a92acSlm66018 DTRACE_IO1(done, buf_t *, bufp); 4031655fd6a9Sachartre bioerror(bufp, EIO); 4032655fd6a9Sachartre biodone(bufp); 4033655fd6a9Sachartre break; 4034655fd6a9Sachartre 4035655fd6a9Sachartre default: 4036655fd6a9Sachartre ASSERT(0); 4037655fd6a9Sachartre } 4038655fd6a9Sachartre 4039655fd6a9Sachartre } 4040655fd6a9Sachartre 4041655fd6a9Sachartre /* get the next element to cancel */ 4042655fd6a9Sachartre if (++b_idx >= vdcp->local_dring_backup_len) 4043655fd6a9Sachartre b_idx = 0; 4044655fd6a9Sachartre } 4045655fd6a9Sachartre 4046655fd6a9Sachartre /* all done - now clear up pending dring copy */ 4047655fd6a9Sachartre dring_size = vdcp->local_dring_backup_len * 4048655fd6a9Sachartre sizeof (vdcp->local_dring_backup[0]); 4049655fd6a9Sachartre 4050655fd6a9Sachartre (void) kmem_free(vdcp->local_dring_backup, dring_size); 4051655fd6a9Sachartre 4052655fd6a9Sachartre vdcp->local_dring_backup = NULL; 4053655fd6a9Sachartre 405490e2f9dcSlm66018 DTRACE_PROBE2(cancelled, int, cancelled, vdc_t *, vdcp); 4055655fd6a9Sachartre } 4056655fd6a9Sachartre 4057655fd6a9Sachartre /* 4058655fd6a9Sachartre * Function: 4059655fd6a9Sachartre * vdc_connection_timeout 4060655fd6a9Sachartre * 4061655fd6a9Sachartre * Description: 4062655fd6a9Sachartre * This function is invoked if the timeout set to establish the connection 4063655fd6a9Sachartre * with vds expires. This will happen if we spend too much time in the 4064655fd6a9Sachartre * VDC_STATE_INIT_WAITING or VDC_STATE_NEGOTIATE states. Then we will 4065655fd6a9Sachartre * cancel any pending request and mark them as failed. 4066655fd6a9Sachartre * 4067655fd6a9Sachartre * If the timeout does not expire, it will be cancelled when we reach the 4068655fd6a9Sachartre * VDC_STATE_HANDLE_PENDING or VDC_STATE_RESETTING state. This function can 4069655fd6a9Sachartre * be invoked while we are in the VDC_STATE_HANDLE_PENDING or 4070655fd6a9Sachartre * VDC_STATE_RESETTING state in which case we do nothing because the 4071655fd6a9Sachartre * timeout is being cancelled. 4072655fd6a9Sachartre * 4073655fd6a9Sachartre * Arguments: 4074655fd6a9Sachartre * arg - argument of the timeout function actually a soft state 4075655fd6a9Sachartre * pointer for the instance of the device driver. 4076655fd6a9Sachartre * 4077655fd6a9Sachartre * Return Code: 4078655fd6a9Sachartre * None 4079655fd6a9Sachartre */ 4080655fd6a9Sachartre void 4081655fd6a9Sachartre vdc_connection_timeout(void *arg) 4082655fd6a9Sachartre { 4083655fd6a9Sachartre vdc_t *vdcp = (vdc_t *)arg; 4084655fd6a9Sachartre 4085655fd6a9Sachartre mutex_enter(&vdcp->lock); 4086655fd6a9Sachartre 4087655fd6a9Sachartre if (vdcp->state == VDC_STATE_HANDLE_PENDING || 4088655fd6a9Sachartre vdcp->state == VDC_STATE_DETACH) { 4089655fd6a9Sachartre /* 4090655fd6a9Sachartre * The connection has just been re-established or 4091655fd6a9Sachartre * we are detaching. 4092655fd6a9Sachartre */ 4093655fd6a9Sachartre vdcp->ctimeout_reached = B_FALSE; 4094655fd6a9Sachartre mutex_exit(&vdcp->lock); 4095655fd6a9Sachartre return; 4096655fd6a9Sachartre } 4097655fd6a9Sachartre 4098655fd6a9Sachartre vdcp->ctimeout_reached = B_TRUE; 4099655fd6a9Sachartre 4100655fd6a9Sachartre /* notify requests waiting for sending */ 4101655fd6a9Sachartre cv_broadcast(&vdcp->running_cv); 4102655fd6a9Sachartre 4103655fd6a9Sachartre /* cancel requests waiting for a result */ 410490e2f9dcSlm66018 vdc_cancel_backup_dring(vdcp); 4105655fd6a9Sachartre 4106655fd6a9Sachartre mutex_exit(&vdcp->lock); 4107655fd6a9Sachartre 4108655fd6a9Sachartre cmn_err(CE_NOTE, "[%d] connection to service domain timeout", 4109655fd6a9Sachartre vdcp->instance); 4110655fd6a9Sachartre } 4111655fd6a9Sachartre 4112655fd6a9Sachartre /* 4113655fd6a9Sachartre * Function: 41143af08d82Slm66018 * vdc_backup_local_dring() 41153af08d82Slm66018 * 41163af08d82Slm66018 * Description: 41173af08d82Slm66018 * Backup the current dring in the event of a reset. The Dring 41183af08d82Slm66018 * transactions will be resubmitted to the server when the 41193af08d82Slm66018 * connection is restored. 41203af08d82Slm66018 * 41213af08d82Slm66018 * Arguments: 41223af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 41233af08d82Slm66018 * 41243af08d82Slm66018 * Return Code: 41253af08d82Slm66018 * NONE 41263af08d82Slm66018 */ 41273af08d82Slm66018 static void 41283af08d82Slm66018 vdc_backup_local_dring(vdc_t *vdcp) 41293af08d82Slm66018 { 41303af08d82Slm66018 int dring_size; 41313af08d82Slm66018 4132655fd6a9Sachartre ASSERT(MUTEX_HELD(&vdcp->lock)); 41333af08d82Slm66018 ASSERT(vdcp->state == VDC_STATE_RESETTING); 41343af08d82Slm66018 41353af08d82Slm66018 /* 41363af08d82Slm66018 * If the backup dring is stil around, it means 41373af08d82Slm66018 * that the last restore did not complete. However, 41383af08d82Slm66018 * since we never got back into the running state, 41393af08d82Slm66018 * the backup copy we have is still valid. 41403af08d82Slm66018 */ 41413af08d82Slm66018 if (vdcp->local_dring_backup != NULL) { 41423af08d82Slm66018 DMSG(vdcp, 1, "reusing local descriptor ring backup " 41433af08d82Slm66018 "(len=%d, tail=%d)\n", vdcp->local_dring_backup_len, 41443af08d82Slm66018 vdcp->local_dring_backup_tail); 41453af08d82Slm66018 return; 41463af08d82Slm66018 } 41473af08d82Slm66018 4148655fd6a9Sachartre /* 4149655fd6a9Sachartre * The backup dring can be NULL and the local dring may not be 4150655fd6a9Sachartre * initialized. This can happen if we had a reset while establishing 4151655fd6a9Sachartre * a new connection but after the connection has timed out. In that 4152655fd6a9Sachartre * case the backup dring is NULL because the requests have been 4153655fd6a9Sachartre * cancelled and the request occured before the local dring is 4154655fd6a9Sachartre * initialized. 4155655fd6a9Sachartre */ 4156655fd6a9Sachartre if (!(vdcp->initialized & VDC_DRING_LOCAL)) 4157655fd6a9Sachartre return; 4158655fd6a9Sachartre 41593af08d82Slm66018 DMSG(vdcp, 1, "backing up the local descriptor ring (len=%d, " 41603af08d82Slm66018 "tail=%d)\n", vdcp->dring_len, vdcp->dring_curr_idx); 41613af08d82Slm66018 41623af08d82Slm66018 dring_size = vdcp->dring_len * sizeof (vdcp->local_dring[0]); 41633af08d82Slm66018 41643af08d82Slm66018 vdcp->local_dring_backup = kmem_alloc(dring_size, KM_SLEEP); 41653af08d82Slm66018 bcopy(vdcp->local_dring, vdcp->local_dring_backup, dring_size); 41663af08d82Slm66018 41673af08d82Slm66018 vdcp->local_dring_backup_tail = vdcp->dring_curr_idx; 41683af08d82Slm66018 vdcp->local_dring_backup_len = vdcp->dring_len; 41693af08d82Slm66018 } 41703af08d82Slm66018 41718cd10891Snarayan static void 41728cd10891Snarayan vdc_switch_server(vdc_t *vdcp) 41738cd10891Snarayan { 41748cd10891Snarayan int rv; 41758cd10891Snarayan vdc_server_t *curr_server, *new_server; 41768cd10891Snarayan 41778cd10891Snarayan ASSERT(MUTEX_HELD(&vdcp->lock)); 41788cd10891Snarayan 41798cd10891Snarayan /* if there is only one server return back */ 41808cd10891Snarayan if (vdcp->num_servers == 1) { 41818cd10891Snarayan return; 41828cd10891Snarayan } 41838cd10891Snarayan 41848cd10891Snarayan /* Get current and next server */ 41858cd10891Snarayan curr_server = vdcp->curr_server; 41868cd10891Snarayan new_server = 41878cd10891Snarayan (curr_server->next) ? curr_server->next : vdcp->server_list; 41888cd10891Snarayan ASSERT(curr_server != new_server); 41898cd10891Snarayan 41908cd10891Snarayan /* bring current server's channel down */ 41918cd10891Snarayan rv = ldc_down(curr_server->ldc_handle); 41928cd10891Snarayan if (rv) { 41938cd10891Snarayan DMSG(vdcp, 0, "[%d] Cannot bring channel down, port %ld\n", 41948cd10891Snarayan vdcp->instance, curr_server->id); 41958cd10891Snarayan return; 41968cd10891Snarayan } 41978cd10891Snarayan 41988cd10891Snarayan /* switch the server */ 41998cd10891Snarayan vdcp->curr_server = new_server; 42008cd10891Snarayan 42018cd10891Snarayan DMSG(vdcp, 0, "[%d] Switched to next vdisk server, port@%ld, ldc@%ld\n", 42028cd10891Snarayan vdcp->instance, vdcp->curr_server->id, vdcp->curr_server->ldc_id); 42038cd10891Snarayan } 42048cd10891Snarayan 42051ae08745Sheppo /* -------------------------------------------------------------------------- */ 42061ae08745Sheppo 42071ae08745Sheppo /* 42081ae08745Sheppo * The following functions process the incoming messages from vds 42091ae08745Sheppo */ 42101ae08745Sheppo 42110a55fbb7Slm66018 /* 42120a55fbb7Slm66018 * Function: 42130a55fbb7Slm66018 * vdc_process_msg_thread() 42140a55fbb7Slm66018 * 42150a55fbb7Slm66018 * Description: 42160a55fbb7Slm66018 * 42173af08d82Slm66018 * Main VDC message processing thread. Each vDisk instance 42183af08d82Slm66018 * consists of a copy of this thread. This thread triggers 42193af08d82Slm66018 * all the handshakes and data exchange with the server. It 42203af08d82Slm66018 * also handles all channel resets 42213af08d82Slm66018 * 42220a55fbb7Slm66018 * Arguments: 42230a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 42240a55fbb7Slm66018 * 42250a55fbb7Slm66018 * Return Code: 42260a55fbb7Slm66018 * None 42270a55fbb7Slm66018 */ 42281ae08745Sheppo static void 42293af08d82Slm66018 vdc_process_msg_thread(vdc_t *vdcp) 42301ae08745Sheppo { 42311ae08745Sheppo int status; 4232655fd6a9Sachartre int ctimeout; 4233655fd6a9Sachartre timeout_id_t tmid = 0; 42348cd10891Snarayan clock_t ldcup_timeout = 0; 42351ae08745Sheppo 42363af08d82Slm66018 mutex_enter(&vdcp->lock); 42371ae08745Sheppo 42381ae08745Sheppo for (;;) { 42391ae08745Sheppo 42403af08d82Slm66018 #define Q(_s) (vdcp->state == _s) ? #_s : 42413af08d82Slm66018 DMSG(vdcp, 3, "state = %d (%s)\n", vdcp->state, 42423af08d82Slm66018 Q(VDC_STATE_INIT) 42433af08d82Slm66018 Q(VDC_STATE_INIT_WAITING) 42443af08d82Slm66018 Q(VDC_STATE_NEGOTIATE) 42453af08d82Slm66018 Q(VDC_STATE_HANDLE_PENDING) 42463af08d82Slm66018 Q(VDC_STATE_RUNNING) 42473af08d82Slm66018 Q(VDC_STATE_RESETTING) 42483af08d82Slm66018 Q(VDC_STATE_DETACH) 42493af08d82Slm66018 "UNKNOWN"); 42501ae08745Sheppo 42513af08d82Slm66018 switch (vdcp->state) { 42523af08d82Slm66018 case VDC_STATE_INIT: 42533af08d82Slm66018 4254655fd6a9Sachartre /* 4255655fd6a9Sachartre * If requested, start a timeout to check if the 4256655fd6a9Sachartre * connection with vds is established in the 4257655fd6a9Sachartre * specified delay. If the timeout expires, we 4258655fd6a9Sachartre * will cancel any pending request. 4259655fd6a9Sachartre * 4260655fd6a9Sachartre * If some reset have occurred while establishing 4261655fd6a9Sachartre * the connection, we already have a timeout armed 4262655fd6a9Sachartre * and in that case we don't need to arm a new one. 42638cd10891Snarayan * 42648cd10891Snarayan * The same rule applies when there are multiple vds'. 42658cd10891Snarayan * If either a connection cannot be established or 42668cd10891Snarayan * the handshake times out, the connection thread will 42678cd10891Snarayan * try another server. The 'ctimeout' will report 42688cd10891Snarayan * back an error after it expires irrespective of 42698cd10891Snarayan * whether the vdisk is trying to connect to just 42708cd10891Snarayan * one or multiple servers. 4271655fd6a9Sachartre */ 4272655fd6a9Sachartre ctimeout = (vdc_timeout != 0)? 42738cd10891Snarayan vdc_timeout : vdcp->curr_server->ctimeout; 4274655fd6a9Sachartre 4275655fd6a9Sachartre if (ctimeout != 0 && tmid == 0) { 4276655fd6a9Sachartre tmid = timeout(vdc_connection_timeout, vdcp, 42778cd10891Snarayan ctimeout * drv_usectohz(MICROSEC)); 4278655fd6a9Sachartre } 4279655fd6a9Sachartre 42808cd10891Snarayan /* Check if we are re-initializing repeatedly */ 42818cd10891Snarayan if (vdcp->hshake_cnt > vdc_hshake_retries && 4282655fd6a9Sachartre vdcp->lifecycle != VDC_LC_ONLINE) { 42838cd10891Snarayan 42848cd10891Snarayan DMSG(vdcp, 0, "[%d] too many handshakes,cnt=%d", 42858cd10891Snarayan vdcp->instance, vdcp->hshake_cnt); 42863c96341aSnarayan cmn_err(CE_NOTE, "[%d] disk access failed.\n", 42873c96341aSnarayan vdcp->instance); 42883af08d82Slm66018 vdcp->state = VDC_STATE_DETACH; 42893af08d82Slm66018 break; 42903af08d82Slm66018 } 42913af08d82Slm66018 42928cd10891Snarayan /* Switch to STATE_DETACH if drv is detaching */ 42938cd10891Snarayan if (vdcp->lifecycle == VDC_LC_DETACHING) { 42948cd10891Snarayan vdcp->state = VDC_STATE_DETACH; 42958cd10891Snarayan break; 42968cd10891Snarayan } 42978cd10891Snarayan 42988cd10891Snarayan /* Switch server */ 42998cd10891Snarayan if (vdcp->hshake_cnt > 0) 43008cd10891Snarayan vdc_switch_server(vdcp); 43018cd10891Snarayan vdcp->hshake_cnt++; 43028cd10891Snarayan 43033af08d82Slm66018 /* Bring up connection with vds via LDC */ 43043af08d82Slm66018 status = vdc_start_ldc_connection(vdcp); 43058cd10891Snarayan if (status != EINVAL) { 43063af08d82Slm66018 vdcp->state = VDC_STATE_INIT_WAITING; 43073af08d82Slm66018 } 43083af08d82Slm66018 break; 43093af08d82Slm66018 43103af08d82Slm66018 case VDC_STATE_INIT_WAITING: 43113af08d82Slm66018 43128cd10891Snarayan /* if channel is UP, start negotiation */ 43138cd10891Snarayan if (vdcp->curr_server->ldc_state == LDC_UP) { 43148cd10891Snarayan vdcp->state = VDC_STATE_NEGOTIATE; 43158cd10891Snarayan break; 43168cd10891Snarayan } 43178cd10891Snarayan 43188cd10891Snarayan /* check if only one server exists */ 43198cd10891Snarayan if (vdcp->num_servers == 1) { 43203af08d82Slm66018 cv_wait(&vdcp->initwait_cv, &vdcp->lock); 43218cd10891Snarayan } else { 43228cd10891Snarayan /* 43238cd10891Snarayan * wait for LDC_UP, if it times out, switch 43248cd10891Snarayan * to another server. 43258cd10891Snarayan */ 43268cd10891Snarayan ldcup_timeout = ddi_get_lbolt() + 43278cd10891Snarayan (vdc_ldcup_timeout * 43288cd10891Snarayan drv_usectohz(MICROSEC)); 43298cd10891Snarayan status = cv_timedwait(&vdcp->initwait_cv, 43308cd10891Snarayan &vdcp->lock, ldcup_timeout); 43318cd10891Snarayan if (status == -1 && 43328cd10891Snarayan vdcp->state == VDC_STATE_INIT_WAITING && 43338cd10891Snarayan vdcp->curr_server->ldc_state != LDC_UP) { 43348cd10891Snarayan /* timed out & still waiting */ 43358cd10891Snarayan vdcp->state = VDC_STATE_INIT; 43368cd10891Snarayan break; 43378cd10891Snarayan } 43388cd10891Snarayan } 43398cd10891Snarayan 43403af08d82Slm66018 if (vdcp->state != VDC_STATE_INIT_WAITING) { 43413af08d82Slm66018 DMSG(vdcp, 0, 43423af08d82Slm66018 "state moved to %d out from under us...\n", 43433af08d82Slm66018 vdcp->state); 43443af08d82Slm66018 } 43453af08d82Slm66018 break; 43463af08d82Slm66018 43473af08d82Slm66018 case VDC_STATE_NEGOTIATE: 43483af08d82Slm66018 switch (status = vdc_ver_negotiation(vdcp)) { 43493af08d82Slm66018 case 0: 43503af08d82Slm66018 break; 43513af08d82Slm66018 default: 43523af08d82Slm66018 DMSG(vdcp, 0, "ver negotiate failed (%d)..\n", 43533af08d82Slm66018 status); 43543af08d82Slm66018 goto reset; 43553af08d82Slm66018 } 43563af08d82Slm66018 43573af08d82Slm66018 switch (status = vdc_attr_negotiation(vdcp)) { 43583af08d82Slm66018 case 0: 43593af08d82Slm66018 break; 43603af08d82Slm66018 default: 43613af08d82Slm66018 DMSG(vdcp, 0, "attr negotiate failed (%d)..\n", 43623af08d82Slm66018 status); 43633af08d82Slm66018 goto reset; 43643af08d82Slm66018 } 43653af08d82Slm66018 43663af08d82Slm66018 switch (status = vdc_dring_negotiation(vdcp)) { 43673af08d82Slm66018 case 0: 43683af08d82Slm66018 break; 43693af08d82Slm66018 default: 43703af08d82Slm66018 DMSG(vdcp, 0, "dring negotiate failed (%d)..\n", 43713af08d82Slm66018 status); 43723af08d82Slm66018 goto reset; 43733af08d82Slm66018 } 43743af08d82Slm66018 43753af08d82Slm66018 switch (status = vdc_rdx_exchange(vdcp)) { 43763af08d82Slm66018 case 0: 43773af08d82Slm66018 vdcp->state = VDC_STATE_HANDLE_PENDING; 43783af08d82Slm66018 goto done; 43793af08d82Slm66018 default: 43803af08d82Slm66018 DMSG(vdcp, 0, "RDX xchg failed ..(%d)\n", 43813af08d82Slm66018 status); 43823af08d82Slm66018 goto reset; 43833af08d82Slm66018 } 43843af08d82Slm66018 reset: 43853af08d82Slm66018 DMSG(vdcp, 0, "negotiation failed: resetting (%d)\n", 43863af08d82Slm66018 status); 43873af08d82Slm66018 vdcp->state = VDC_STATE_RESETTING; 4388655fd6a9Sachartre vdcp->self_reset = B_TRUE; 43893af08d82Slm66018 done: 43903af08d82Slm66018 DMSG(vdcp, 0, "negotiation complete (state=0x%x)...\n", 43913af08d82Slm66018 vdcp->state); 43923af08d82Slm66018 break; 43933af08d82Slm66018 43943af08d82Slm66018 case VDC_STATE_HANDLE_PENDING: 43953af08d82Slm66018 4396655fd6a9Sachartre if (vdcp->ctimeout_reached) { 4397655fd6a9Sachartre /* 4398655fd6a9Sachartre * The connection timeout had been reached so 4399655fd6a9Sachartre * pending requests have been cancelled. Now 4400655fd6a9Sachartre * that the connection is back we can reset 4401655fd6a9Sachartre * the timeout. 4402655fd6a9Sachartre */ 4403655fd6a9Sachartre ASSERT(vdcp->local_dring_backup == NULL); 4404655fd6a9Sachartre ASSERT(tmid != 0); 4405655fd6a9Sachartre tmid = 0; 4406655fd6a9Sachartre vdcp->ctimeout_reached = B_FALSE; 4407655fd6a9Sachartre vdcp->state = VDC_STATE_RUNNING; 4408655fd6a9Sachartre DMSG(vdcp, 0, "[%d] connection to service " 4409655fd6a9Sachartre "domain is up", vdcp->instance); 4410655fd6a9Sachartre break; 4411655fd6a9Sachartre } 4412655fd6a9Sachartre 44133af08d82Slm66018 mutex_exit(&vdcp->lock); 4414655fd6a9Sachartre if (tmid != 0) { 4415655fd6a9Sachartre (void) untimeout(tmid); 4416655fd6a9Sachartre tmid = 0; 4417655fd6a9Sachartre } 44183af08d82Slm66018 status = vdc_resubmit_backup_dring(vdcp); 44193af08d82Slm66018 mutex_enter(&vdcp->lock); 44203af08d82Slm66018 44213af08d82Slm66018 if (status) 44223af08d82Slm66018 vdcp->state = VDC_STATE_RESETTING; 44233af08d82Slm66018 else 44243af08d82Slm66018 vdcp->state = VDC_STATE_RUNNING; 44253af08d82Slm66018 44263af08d82Slm66018 break; 44273af08d82Slm66018 44283af08d82Slm66018 /* enter running state */ 44293af08d82Slm66018 case VDC_STATE_RUNNING: 44303af08d82Slm66018 /* 44313af08d82Slm66018 * Signal anyone waiting for the connection 44323af08d82Slm66018 * to come on line. 44333af08d82Slm66018 */ 44343af08d82Slm66018 vdcp->hshake_cnt = 0; 44353af08d82Slm66018 cv_broadcast(&vdcp->running_cv); 44362f5224aeSachartre 44372f5224aeSachartre /* failfast has to been checked after reset */ 44382f5224aeSachartre cv_signal(&vdcp->failfast_cv); 44392f5224aeSachartre 44402f5224aeSachartre /* ownership is lost during reset */ 44412f5224aeSachartre if (vdcp->ownership & VDC_OWNERSHIP_WANTED) 44422f5224aeSachartre vdcp->ownership |= VDC_OWNERSHIP_RESET; 44432f5224aeSachartre cv_signal(&vdcp->ownership_cv); 44442f5224aeSachartre 4445d7400d00Sachartre cmn_err(CE_CONT, "?vdisk@%d is online using " 4446d7400d00Sachartre "ldc@%ld,%ld\n", vdcp->instance, 4447d7400d00Sachartre vdcp->curr_server->ldc_id, vdcp->curr_server->id); 4448d7400d00Sachartre 44493af08d82Slm66018 mutex_exit(&vdcp->lock); 44503af08d82Slm66018 44513af08d82Slm66018 for (;;) { 44523af08d82Slm66018 vio_msg_t msg; 44533af08d82Slm66018 status = vdc_wait_for_response(vdcp, &msg); 44543af08d82Slm66018 if (status) break; 44553af08d82Slm66018 44563af08d82Slm66018 DMSG(vdcp, 1, "[%d] new pkt(s) available\n", 44573af08d82Slm66018 vdcp->instance); 44583af08d82Slm66018 status = vdc_process_data_msg(vdcp, &msg); 44591ae08745Sheppo if (status) { 44603af08d82Slm66018 DMSG(vdcp, 1, "[%d] process_data_msg " 44613af08d82Slm66018 "returned err=%d\n", vdcp->instance, 44623af08d82Slm66018 status); 44631ae08745Sheppo break; 44641ae08745Sheppo } 44651ae08745Sheppo 44663af08d82Slm66018 } 4467e1ebb9ecSlm66018 44683af08d82Slm66018 mutex_enter(&vdcp->lock); 44693af08d82Slm66018 4470d7400d00Sachartre cmn_err(CE_CONT, "?vdisk@%d is offline\n", 4471d7400d00Sachartre vdcp->instance); 4472d7400d00Sachartre 44733af08d82Slm66018 vdcp->state = VDC_STATE_RESETTING; 4474690555a1Sachartre vdcp->self_reset = B_TRUE; 44753af08d82Slm66018 break; 44763af08d82Slm66018 44773af08d82Slm66018 case VDC_STATE_RESETTING: 4478655fd6a9Sachartre /* 4479655fd6a9Sachartre * When we reach this state, we either come from the 4480655fd6a9Sachartre * VDC_STATE_RUNNING state and we can have pending 4481655fd6a9Sachartre * request but no timeout is armed; or we come from 4482655fd6a9Sachartre * the VDC_STATE_INIT_WAITING, VDC_NEGOTIATE or 4483655fd6a9Sachartre * VDC_HANDLE_PENDING state and there is no pending 4484655fd6a9Sachartre * request or pending requests have already been copied 4485655fd6a9Sachartre * into the backup dring. So we can safely keep the 4486655fd6a9Sachartre * connection timeout armed while we are in this state. 4487655fd6a9Sachartre */ 4488655fd6a9Sachartre 44893af08d82Slm66018 DMSG(vdcp, 0, "Initiating channel reset " 44903af08d82Slm66018 "(pending = %d)\n", (int)vdcp->threads_pending); 44913af08d82Slm66018 44923af08d82Slm66018 if (vdcp->self_reset) { 44933af08d82Slm66018 DMSG(vdcp, 0, 44943af08d82Slm66018 "[%d] calling stop_ldc_connection.\n", 44953af08d82Slm66018 vdcp->instance); 44963af08d82Slm66018 status = vdc_stop_ldc_connection(vdcp); 44973af08d82Slm66018 vdcp->self_reset = B_FALSE; 44981ae08745Sheppo } 44991ae08745Sheppo 45001ae08745Sheppo /* 45013af08d82Slm66018 * Wait for all threads currently waiting 45023af08d82Slm66018 * for a free dring entry to use. 45031ae08745Sheppo */ 45043af08d82Slm66018 while (vdcp->threads_pending) { 45053af08d82Slm66018 cv_broadcast(&vdcp->membind_cv); 45063af08d82Slm66018 cv_broadcast(&vdcp->dring_free_cv); 45073af08d82Slm66018 mutex_exit(&vdcp->lock); 4508205eeb1aSlm66018 /* give the waiters enough time to wake up */ 4509205eeb1aSlm66018 delay(vdc_hz_min_ldc_delay); 45103af08d82Slm66018 mutex_enter(&vdcp->lock); 45111ae08745Sheppo } 45121ae08745Sheppo 45133af08d82Slm66018 ASSERT(vdcp->threads_pending == 0); 45141ae08745Sheppo 45153af08d82Slm66018 /* Sanity check that no thread is receiving */ 45163af08d82Slm66018 ASSERT(vdcp->read_state != VDC_READ_WAITING); 45170a55fbb7Slm66018 45183af08d82Slm66018 vdcp->read_state = VDC_READ_IDLE; 45193af08d82Slm66018 45203af08d82Slm66018 vdc_backup_local_dring(vdcp); 45213af08d82Slm66018 45223af08d82Slm66018 /* cleanup the old d-ring */ 45233af08d82Slm66018 vdc_destroy_descriptor_ring(vdcp); 45243af08d82Slm66018 45253af08d82Slm66018 /* go and start again */ 45263af08d82Slm66018 vdcp->state = VDC_STATE_INIT; 45273af08d82Slm66018 45280a55fbb7Slm66018 break; 45290a55fbb7Slm66018 45303af08d82Slm66018 case VDC_STATE_DETACH: 45313af08d82Slm66018 DMSG(vdcp, 0, "[%d] Reset thread exit cleanup ..\n", 45323af08d82Slm66018 vdcp->instance); 45333af08d82Slm66018 4534655fd6a9Sachartre /* cancel any pending timeout */ 4535655fd6a9Sachartre mutex_exit(&vdcp->lock); 4536655fd6a9Sachartre if (tmid != 0) { 4537655fd6a9Sachartre (void) untimeout(tmid); 4538655fd6a9Sachartre tmid = 0; 4539655fd6a9Sachartre } 4540655fd6a9Sachartre mutex_enter(&vdcp->lock); 4541655fd6a9Sachartre 45423c96341aSnarayan /* 45433c96341aSnarayan * Signal anyone waiting for connection 45443c96341aSnarayan * to come online 45453c96341aSnarayan */ 45463c96341aSnarayan cv_broadcast(&vdcp->running_cv); 45473c96341aSnarayan 45483af08d82Slm66018 while (vdcp->sync_op_pending) { 45493af08d82Slm66018 cv_signal(&vdcp->sync_pending_cv); 45503af08d82Slm66018 cv_signal(&vdcp->sync_blocked_cv); 45513af08d82Slm66018 mutex_exit(&vdcp->lock); 4552205eeb1aSlm66018 /* give the waiters enough time to wake up */ 4553205eeb1aSlm66018 delay(vdc_hz_min_ldc_delay); 45543af08d82Slm66018 mutex_enter(&vdcp->lock); 45550a55fbb7Slm66018 } 45561ae08745Sheppo 45573af08d82Slm66018 mutex_exit(&vdcp->lock); 45583af08d82Slm66018 45593af08d82Slm66018 DMSG(vdcp, 0, "[%d] Msg processing thread exiting ..\n", 45603af08d82Slm66018 vdcp->instance); 45613af08d82Slm66018 thread_exit(); 45623af08d82Slm66018 break; 45633af08d82Slm66018 } 45643af08d82Slm66018 } 45650a55fbb7Slm66018 } 45660a55fbb7Slm66018 45670a55fbb7Slm66018 45680a55fbb7Slm66018 /* 45690a55fbb7Slm66018 * Function: 45700a55fbb7Slm66018 * vdc_process_data_msg() 45710a55fbb7Slm66018 * 45720a55fbb7Slm66018 * Description: 45730a55fbb7Slm66018 * This function is called by the message processing thread each time 45740a55fbb7Slm66018 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 45750a55fbb7Slm66018 * be an ACK or NACK from vds[1] which vdc handles as follows. 45760a55fbb7Slm66018 * ACK - wake up the waiting thread 45770a55fbb7Slm66018 * NACK - resend any messages necessary 45780a55fbb7Slm66018 * 45790a55fbb7Slm66018 * [1] Although the message format allows it, vds should not send a 45800a55fbb7Slm66018 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 45810a55fbb7Slm66018 * some bizarre reason it does, vdc will reset the connection. 45820a55fbb7Slm66018 * 45830a55fbb7Slm66018 * Arguments: 45840a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 45850a55fbb7Slm66018 * msg - the LDC message sent by vds 45860a55fbb7Slm66018 * 45870a55fbb7Slm66018 * Return Code: 45880a55fbb7Slm66018 * 0 - Success. 45890a55fbb7Slm66018 * > 0 - error value returned by LDC 45900a55fbb7Slm66018 */ 45910a55fbb7Slm66018 static int 45923af08d82Slm66018 vdc_process_data_msg(vdc_t *vdcp, vio_msg_t *msg) 45930a55fbb7Slm66018 { 45940a55fbb7Slm66018 int status = 0; 45953af08d82Slm66018 vio_dring_msg_t *dring_msg; 4596d10e4ef2Snarayan vdc_local_desc_t *ldep = NULL; 45973af08d82Slm66018 int start, end; 45983af08d82Slm66018 int idx; 459990e2f9dcSlm66018 int op; 46000a55fbb7Slm66018 46013af08d82Slm66018 dring_msg = (vio_dring_msg_t *)msg; 46020a55fbb7Slm66018 46033af08d82Slm66018 ASSERT(msg->tag.vio_msgtype == VIO_TYPE_DATA); 46043af08d82Slm66018 ASSERT(vdcp != NULL); 46053af08d82Slm66018 46063af08d82Slm66018 mutex_enter(&vdcp->lock); 46070a55fbb7Slm66018 46080a55fbb7Slm66018 /* 46090a55fbb7Slm66018 * Check to see if the message has bogus data 46100a55fbb7Slm66018 */ 4611e1ebb9ecSlm66018 idx = start = dring_msg->start_idx; 46120a55fbb7Slm66018 end = dring_msg->end_idx; 46133af08d82Slm66018 if ((start >= vdcp->dring_len) || 46143af08d82Slm66018 (end >= vdcp->dring_len) || (end < -1)) { 461590e2f9dcSlm66018 /* 461690e2f9dcSlm66018 * Update the I/O statistics to indicate that an error ocurred. 461790e2f9dcSlm66018 * No need to update the wait/run queues as no specific read or 461890e2f9dcSlm66018 * write request is being completed in response to this 'msg'. 461990e2f9dcSlm66018 */ 462090e2f9dcSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 46213af08d82Slm66018 DMSG(vdcp, 0, "[%d] Bogus ACK data : start %d, end %d\n", 46223af08d82Slm66018 vdcp->instance, start, end); 46233af08d82Slm66018 mutex_exit(&vdcp->lock); 4624e1ebb9ecSlm66018 return (EINVAL); 46250a55fbb7Slm66018 } 46260a55fbb7Slm66018 46270a55fbb7Slm66018 /* 46280a55fbb7Slm66018 * Verify that the sequence number is what vdc expects. 46290a55fbb7Slm66018 */ 46303af08d82Slm66018 switch (vdc_verify_seq_num(vdcp, dring_msg)) { 4631e1ebb9ecSlm66018 case VDC_SEQ_NUM_TODO: 4632e1ebb9ecSlm66018 break; /* keep processing this message */ 4633e1ebb9ecSlm66018 case VDC_SEQ_NUM_SKIP: 46343af08d82Slm66018 mutex_exit(&vdcp->lock); 4635e1ebb9ecSlm66018 return (0); 4636e1ebb9ecSlm66018 case VDC_SEQ_NUM_INVALID: 463790e2f9dcSlm66018 /* 463890e2f9dcSlm66018 * Update the I/O statistics to indicate that an error ocurred. 463990e2f9dcSlm66018 * No need to update the wait/run queues as no specific read or 464090e2f9dcSlm66018 * write request is being completed in response to this 'msg'. 464190e2f9dcSlm66018 */ 4642366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 464390e2f9dcSlm66018 DMSG(vdcp, 0, "[%d] invalid seqno\n", vdcp->instance); 4644366a92acSlm66018 mutex_exit(&vdcp->lock); 46450a55fbb7Slm66018 return (ENXIO); 46460a55fbb7Slm66018 } 46470a55fbb7Slm66018 46483af08d82Slm66018 if (msg->tag.vio_subtype == VIO_SUBTYPE_NACK) { 464990e2f9dcSlm66018 /* 465090e2f9dcSlm66018 * Update the I/O statistics to indicate that an error ocurred. 465190e2f9dcSlm66018 * 465290e2f9dcSlm66018 * We need to update the run queue if a read or write request 465390e2f9dcSlm66018 * is being NACKed - otherwise there will appear to be an 465490e2f9dcSlm66018 * indefinite outstanding request and statistics reported by 465590e2f9dcSlm66018 * iostat(1M) will be incorrect. The transaction will be 465690e2f9dcSlm66018 * resubmitted from the backup DRing following the reset 465790e2f9dcSlm66018 * and the wait/run queues will be entered again. 465890e2f9dcSlm66018 */ 465990e2f9dcSlm66018 ldep = &vdcp->local_dring[idx]; 466090e2f9dcSlm66018 op = ldep->operation; 466190e2f9dcSlm66018 if ((op == VD_OP_BREAD) || (op == VD_OP_BWRITE)) { 466290e2f9dcSlm66018 DTRACE_IO1(done, buf_t *, ldep->cb_arg); 466390e2f9dcSlm66018 VD_KSTAT_RUNQ_EXIT(vdcp); 466490e2f9dcSlm66018 } 4665366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 466690e2f9dcSlm66018 VDC_DUMP_DRING_MSG(dring_msg); 466790e2f9dcSlm66018 DMSG(vdcp, 0, "[%d] DATA NACK\n", vdcp->instance); 46683af08d82Slm66018 mutex_exit(&vdcp->lock); 4669e1ebb9ecSlm66018 return (EIO); 46700a55fbb7Slm66018 46713af08d82Slm66018 } else if (msg->tag.vio_subtype == VIO_SUBTYPE_INFO) { 467290e2f9dcSlm66018 /* 467390e2f9dcSlm66018 * Update the I/O statistics to indicate that an error occurred. 467490e2f9dcSlm66018 * No need to update the wait/run queues as no specific read or 467590e2f9dcSlm66018 * write request is being completed in response to this 'msg'. 467690e2f9dcSlm66018 */ 4677366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_protoerrs); 46783af08d82Slm66018 mutex_exit(&vdcp->lock); 4679e1ebb9ecSlm66018 return (EPROTO); 4680e1ebb9ecSlm66018 } 4681e1ebb9ecSlm66018 46823af08d82Slm66018 DMSG(vdcp, 1, ": start %d end %d\n", start, end); 46833af08d82Slm66018 ASSERT(start == end); 46843af08d82Slm66018 46853af08d82Slm66018 ldep = &vdcp->local_dring[idx]; 46863af08d82Slm66018 46873af08d82Slm66018 DMSG(vdcp, 1, ": state 0x%x - cb_type 0x%x\n", 46883af08d82Slm66018 ldep->dep->hdr.dstate, ldep->cb_type); 46893af08d82Slm66018 4690e1ebb9ecSlm66018 if (ldep->dep->hdr.dstate == VIO_DESC_DONE) { 46913af08d82Slm66018 struct buf *bufp; 4692e1ebb9ecSlm66018 46933af08d82Slm66018 switch (ldep->cb_type) { 46943af08d82Slm66018 case CB_SYNC: 46953af08d82Slm66018 ASSERT(vdcp->sync_op_pending); 4696d10e4ef2Snarayan 46973af08d82Slm66018 status = vdc_depopulate_descriptor(vdcp, idx); 46983af08d82Slm66018 vdcp->sync_op_status = status; 46993af08d82Slm66018 vdcp->sync_op_pending = B_FALSE; 47003af08d82Slm66018 cv_signal(&vdcp->sync_pending_cv); 47013af08d82Slm66018 break; 47024bac2208Snarayan 47033af08d82Slm66018 case CB_STRATEGY: 47043af08d82Slm66018 bufp = ldep->cb_arg; 47053af08d82Slm66018 ASSERT(bufp != NULL); 47063c96341aSnarayan bufp->b_resid = 47073c96341aSnarayan bufp->b_bcount - ldep->dep->payload.nbytes; 47083af08d82Slm66018 status = ldep->dep->payload.status; /* Future:ntoh */ 47093af08d82Slm66018 if (status != 0) { 47103af08d82Slm66018 DMSG(vdcp, 1, "strategy status=%d\n", status); 4711366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 47123af08d82Slm66018 bioerror(bufp, status); 4713d10e4ef2Snarayan } 47142f5224aeSachartre 47152f5224aeSachartre (void) vdc_depopulate_descriptor(vdcp, idx); 47163c96341aSnarayan 47173c96341aSnarayan DMSG(vdcp, 1, 47183c96341aSnarayan "strategy complete req=%ld bytes resp=%ld bytes\n", 47193c96341aSnarayan bufp->b_bcount, ldep->dep->payload.nbytes); 47202f5224aeSachartre 47212f5224aeSachartre if (status != 0 && vdcp->failfast_interval != 0) { 47222f5224aeSachartre /* 47232f5224aeSachartre * The I/O has failed and failfast is enabled. 47242f5224aeSachartre * We need the failfast thread to check if the 47252f5224aeSachartre * failure is due to a reservation conflict. 47262f5224aeSachartre */ 47272f5224aeSachartre (void) vdc_failfast_io_queue(vdcp, bufp); 47282f5224aeSachartre } else { 4729366a92acSlm66018 if (status == 0) { 473090e2f9dcSlm66018 op = (bufp->b_flags & B_READ) ? 4731366a92acSlm66018 VD_OP_BREAD : VD_OP_BWRITE; 4732366a92acSlm66018 VD_UPDATE_IO_STATS(vdcp, op, 4733366a92acSlm66018 ldep->dep->payload.nbytes); 4734366a92acSlm66018 } 473590e2f9dcSlm66018 VD_KSTAT_RUNQ_EXIT(vdcp); 4736366a92acSlm66018 DTRACE_IO1(done, buf_t *, bufp); 47372f5224aeSachartre biodone(bufp); 47382f5224aeSachartre } 47393af08d82Slm66018 break; 47403af08d82Slm66018 47413af08d82Slm66018 default: 47423af08d82Slm66018 ASSERT(0); 47430a55fbb7Slm66018 } 47443af08d82Slm66018 } 47453af08d82Slm66018 47463af08d82Slm66018 /* let the arrival signal propogate */ 47473af08d82Slm66018 mutex_exit(&vdcp->lock); 47480a55fbb7Slm66018 4749e1ebb9ecSlm66018 /* probe gives the count of how many entries were processed */ 4750366a92acSlm66018 DTRACE_PROBE2(processed, int, 1, vdc_t *, vdcp); 47510a55fbb7Slm66018 47523af08d82Slm66018 return (0); 47530a55fbb7Slm66018 } 47540a55fbb7Slm66018 47550a55fbb7Slm66018 47560a55fbb7Slm66018 /* 47570a55fbb7Slm66018 * Function: 47580a55fbb7Slm66018 * vdc_handle_ver_msg() 47590a55fbb7Slm66018 * 47600a55fbb7Slm66018 * Description: 47610a55fbb7Slm66018 * 47620a55fbb7Slm66018 * Arguments: 47630a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 47640a55fbb7Slm66018 * ver_msg - LDC message sent by vDisk server 47650a55fbb7Slm66018 * 47660a55fbb7Slm66018 * Return Code: 47670a55fbb7Slm66018 * 0 - Success 47680a55fbb7Slm66018 */ 47690a55fbb7Slm66018 static int 47700a55fbb7Slm66018 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) 47710a55fbb7Slm66018 { 47720a55fbb7Slm66018 int status = 0; 47730a55fbb7Slm66018 47740a55fbb7Slm66018 ASSERT(vdc != NULL); 47750a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 47760a55fbb7Slm66018 47770a55fbb7Slm66018 if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { 47780a55fbb7Slm66018 return (EPROTO); 47790a55fbb7Slm66018 } 47800a55fbb7Slm66018 47810a55fbb7Slm66018 if (ver_msg->dev_class != VDEV_DISK_SERVER) { 47820a55fbb7Slm66018 return (EINVAL); 47830a55fbb7Slm66018 } 47840a55fbb7Slm66018 47850a55fbb7Slm66018 switch (ver_msg->tag.vio_subtype) { 47860a55fbb7Slm66018 case VIO_SUBTYPE_ACK: 47870a55fbb7Slm66018 /* 47880a55fbb7Slm66018 * We check to see if the version returned is indeed supported 47890a55fbb7Slm66018 * (The server may have also adjusted the minor number downwards 47900a55fbb7Slm66018 * and if so 'ver_msg' will contain the actual version agreed) 47910a55fbb7Slm66018 */ 47920a55fbb7Slm66018 if (vdc_is_supported_version(ver_msg)) { 47930a55fbb7Slm66018 vdc->ver.major = ver_msg->ver_major; 47940a55fbb7Slm66018 vdc->ver.minor = ver_msg->ver_minor; 47950a55fbb7Slm66018 ASSERT(vdc->ver.major > 0); 47960a55fbb7Slm66018 } else { 47970a55fbb7Slm66018 status = EPROTO; 47980a55fbb7Slm66018 } 47990a55fbb7Slm66018 break; 48000a55fbb7Slm66018 48010a55fbb7Slm66018 case VIO_SUBTYPE_NACK: 48020a55fbb7Slm66018 /* 48030a55fbb7Slm66018 * call vdc_is_supported_version() which will return the next 48040a55fbb7Slm66018 * supported version (if any) in 'ver_msg' 48050a55fbb7Slm66018 */ 48060a55fbb7Slm66018 (void) vdc_is_supported_version(ver_msg); 48070a55fbb7Slm66018 if (ver_msg->ver_major > 0) { 48080a55fbb7Slm66018 size_t len = sizeof (*ver_msg); 48090a55fbb7Slm66018 48100a55fbb7Slm66018 ASSERT(vdc->ver.major > 0); 48110a55fbb7Slm66018 48120a55fbb7Slm66018 /* reset the necessary fields and resend */ 48130a55fbb7Slm66018 ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; 48140a55fbb7Slm66018 ver_msg->dev_class = VDEV_DISK; 48150a55fbb7Slm66018 48160a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)ver_msg, &len); 48173af08d82Slm66018 DMSG(vdc, 0, "[%d] Resend VER info (LDC status = %d)\n", 48180a55fbb7Slm66018 vdc->instance, status); 48190a55fbb7Slm66018 if (len != sizeof (*ver_msg)) 48200a55fbb7Slm66018 status = EBADMSG; 48210a55fbb7Slm66018 } else { 482287a7269eSachartre DMSG(vdc, 0, "[%d] No common version with vDisk server", 482387a7269eSachartre vdc->instance); 48240a55fbb7Slm66018 status = ENOTSUP; 48250a55fbb7Slm66018 } 48260a55fbb7Slm66018 48270a55fbb7Slm66018 break; 48281ae08745Sheppo case VIO_SUBTYPE_INFO: 48291ae08745Sheppo /* 48301ae08745Sheppo * Handle the case where vds starts handshake 4831eff7243fSlm66018 * (for now only vdc is the instigator) 48321ae08745Sheppo */ 48331ae08745Sheppo status = ENOTSUP; 48341ae08745Sheppo break; 48351ae08745Sheppo 48361ae08745Sheppo default: 48370a55fbb7Slm66018 status = EINVAL; 48381ae08745Sheppo break; 48391ae08745Sheppo } 48401ae08745Sheppo 48410a55fbb7Slm66018 return (status); 48420a55fbb7Slm66018 } 48430a55fbb7Slm66018 48440a55fbb7Slm66018 /* 48450a55fbb7Slm66018 * Function: 48460a55fbb7Slm66018 * vdc_handle_attr_msg() 48470a55fbb7Slm66018 * 48480a55fbb7Slm66018 * Description: 48490a55fbb7Slm66018 * 48500a55fbb7Slm66018 * Arguments: 48510a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 48520a55fbb7Slm66018 * attr_msg - LDC message sent by vDisk server 48530a55fbb7Slm66018 * 48540a55fbb7Slm66018 * Return Code: 48550a55fbb7Slm66018 * 0 - Success 48560a55fbb7Slm66018 */ 48570a55fbb7Slm66018 static int 48580a55fbb7Slm66018 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) 48590a55fbb7Slm66018 { 48600a55fbb7Slm66018 int status = 0; 48610a55fbb7Slm66018 48620a55fbb7Slm66018 ASSERT(vdc != NULL); 48630a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 48640a55fbb7Slm66018 48650a55fbb7Slm66018 if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { 48660a55fbb7Slm66018 return (EPROTO); 48670a55fbb7Slm66018 } 48680a55fbb7Slm66018 48690a55fbb7Slm66018 switch (attr_msg->tag.vio_subtype) { 48701ae08745Sheppo case VIO_SUBTYPE_ACK: 48711ae08745Sheppo /* 48721ae08745Sheppo * We now verify the attributes sent by vds. 48731ae08745Sheppo */ 487478fcd0a1Sachartre if (attr_msg->vdisk_size == 0) { 487578fcd0a1Sachartre DMSG(vdc, 0, "[%d] Invalid disk size from vds", 487678fcd0a1Sachartre vdc->instance); 487778fcd0a1Sachartre status = EINVAL; 487878fcd0a1Sachartre break; 487978fcd0a1Sachartre } 488078fcd0a1Sachartre 488178fcd0a1Sachartre if (attr_msg->max_xfer_sz == 0) { 488278fcd0a1Sachartre DMSG(vdc, 0, "[%d] Invalid transfer size from vds", 488378fcd0a1Sachartre vdc->instance); 488478fcd0a1Sachartre status = EINVAL; 488578fcd0a1Sachartre break; 488678fcd0a1Sachartre } 488778fcd0a1Sachartre 48882f5224aeSachartre if (attr_msg->vdisk_size == VD_SIZE_UNKNOWN) { 48892f5224aeSachartre DMSG(vdc, 0, "[%d] Unknown disk size from vds", 48902f5224aeSachartre vdc->instance); 48912f5224aeSachartre attr_msg->vdisk_size = 0; 48922f5224aeSachartre } 4893*65908c77Syu, larry liu - Sun Microsystems - Beijing China 4894*65908c77Syu, larry liu - Sun Microsystems - Beijing China /* update the VIO block size */ 4895*65908c77Syu, larry liu - Sun Microsystems - Beijing China if (attr_msg->vdisk_block_size > 0 && 4896*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdc_update_vio_bsize(vdc, 4897*65908c77Syu, larry liu - Sun Microsystems - Beijing China attr_msg->vdisk_block_size) != 0) { 4898*65908c77Syu, larry liu - Sun Microsystems - Beijing China DMSG(vdc, 0, "[%d] Invalid block size (%u) from vds", 4899*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->instance, attr_msg->vdisk_block_size); 4900*65908c77Syu, larry liu - Sun Microsystems - Beijing China status = EINVAL; 4901*65908c77Syu, larry liu - Sun Microsystems - Beijing China break; 4902*65908c77Syu, larry liu - Sun Microsystems - Beijing China } 4903*65908c77Syu, larry liu - Sun Microsystems - Beijing China 4904de3a5331SRamesh Chitrothu /* update disk, block and transfer sizes */ 4905de3a5331SRamesh Chitrothu vdc_update_size(vdc, attr_msg->vdisk_size, 4906de3a5331SRamesh Chitrothu attr_msg->vdisk_block_size, attr_msg->max_xfer_sz); 49071ae08745Sheppo vdc->vdisk_type = attr_msg->vdisk_type; 490817cadca8Slm66018 vdc->operations = attr_msg->operations; 490917cadca8Slm66018 if (vio_ver_is_supported(vdc->ver, 1, 1)) 491017cadca8Slm66018 vdc->vdisk_media = attr_msg->vdisk_media; 491117cadca8Slm66018 else 491217cadca8Slm66018 vdc->vdisk_media = 0; 49131ae08745Sheppo 49143af08d82Slm66018 DMSG(vdc, 0, "[%d] max_xfer_sz: sent %lx acked %lx\n", 4915e1ebb9ecSlm66018 vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz); 49163af08d82Slm66018 DMSG(vdc, 0, "[%d] vdisk_block_size: sent %lx acked %x\n", 4917*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->instance, vdc->vdisk_bsize, 4918e1ebb9ecSlm66018 attr_msg->vdisk_block_size); 4919e1ebb9ecSlm66018 4920f0ca1d9aSsb155480 if ((attr_msg->xfer_mode != VIO_DRING_MODE_V1_0) || 49211ae08745Sheppo (attr_msg->vdisk_size > INT64_MAX) || 492217cadca8Slm66018 (attr_msg->operations == 0) || 49231ae08745Sheppo (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 49243af08d82Slm66018 DMSG(vdc, 0, "[%d] Invalid attributes from vds", 4925e1ebb9ecSlm66018 vdc->instance); 49261ae08745Sheppo status = EINVAL; 49271ae08745Sheppo break; 49281ae08745Sheppo } 49291ae08745Sheppo 493078fcd0a1Sachartre /* 493178fcd0a1Sachartre * Now that we have received all attributes we can create a 493278fcd0a1Sachartre * fake geometry for the disk. 493378fcd0a1Sachartre */ 493478fcd0a1Sachartre vdc_create_fake_geometry(vdc); 49351ae08745Sheppo break; 49361ae08745Sheppo 49371ae08745Sheppo case VIO_SUBTYPE_NACK: 49381ae08745Sheppo /* 49391ae08745Sheppo * vds could not handle the attributes we sent so we 49401ae08745Sheppo * stop negotiating. 49411ae08745Sheppo */ 49421ae08745Sheppo status = EPROTO; 49431ae08745Sheppo break; 49441ae08745Sheppo 49451ae08745Sheppo case VIO_SUBTYPE_INFO: 49461ae08745Sheppo /* 49471ae08745Sheppo * Handle the case where vds starts the handshake 49481ae08745Sheppo * (for now; vdc is the only supported instigatior) 49491ae08745Sheppo */ 49501ae08745Sheppo status = ENOTSUP; 49511ae08745Sheppo break; 49521ae08745Sheppo 49531ae08745Sheppo default: 49541ae08745Sheppo status = ENOTSUP; 49551ae08745Sheppo break; 49561ae08745Sheppo } 49571ae08745Sheppo 49580a55fbb7Slm66018 return (status); 49591ae08745Sheppo } 49601ae08745Sheppo 49610a55fbb7Slm66018 /* 49620a55fbb7Slm66018 * Function: 49630a55fbb7Slm66018 * vdc_handle_dring_reg_msg() 49640a55fbb7Slm66018 * 49650a55fbb7Slm66018 * Description: 49660a55fbb7Slm66018 * 49670a55fbb7Slm66018 * Arguments: 49680a55fbb7Slm66018 * vdc - soft state pointer for this instance of the driver. 49690a55fbb7Slm66018 * dring_msg - LDC message sent by vDisk server 49700a55fbb7Slm66018 * 49710a55fbb7Slm66018 * Return Code: 49720a55fbb7Slm66018 * 0 - Success 49730a55fbb7Slm66018 */ 49740a55fbb7Slm66018 static int 49750a55fbb7Slm66018 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) 49760a55fbb7Slm66018 { 49770a55fbb7Slm66018 int status = 0; 49781ae08745Sheppo 49790a55fbb7Slm66018 ASSERT(vdc != NULL); 49800a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 49810a55fbb7Slm66018 49820a55fbb7Slm66018 if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { 49830a55fbb7Slm66018 return (EPROTO); 49840a55fbb7Slm66018 } 49850a55fbb7Slm66018 49860a55fbb7Slm66018 switch (dring_msg->tag.vio_subtype) { 49870a55fbb7Slm66018 case VIO_SUBTYPE_ACK: 49881ae08745Sheppo /* save the received dring_ident */ 49891ae08745Sheppo vdc->dring_ident = dring_msg->dring_ident; 49903af08d82Slm66018 DMSG(vdc, 0, "[%d] Received dring ident=0x%lx\n", 4991e1ebb9ecSlm66018 vdc->instance, vdc->dring_ident); 49921ae08745Sheppo break; 49931ae08745Sheppo 49941ae08745Sheppo case VIO_SUBTYPE_NACK: 49951ae08745Sheppo /* 49961ae08745Sheppo * vds could not handle the DRing info we sent so we 49971ae08745Sheppo * stop negotiating. 49981ae08745Sheppo */ 49993af08d82Slm66018 DMSG(vdc, 0, "[%d] server could not register DRing\n", 50003af08d82Slm66018 vdc->instance); 50011ae08745Sheppo status = EPROTO; 50021ae08745Sheppo break; 50031ae08745Sheppo 50041ae08745Sheppo case VIO_SUBTYPE_INFO: 50051ae08745Sheppo /* 50061ae08745Sheppo * Handle the case where vds starts handshake 50071ae08745Sheppo * (for now only vdc is the instigatior) 50081ae08745Sheppo */ 50091ae08745Sheppo status = ENOTSUP; 50101ae08745Sheppo break; 50111ae08745Sheppo default: 50121ae08745Sheppo status = ENOTSUP; 50131ae08745Sheppo } 50141ae08745Sheppo 50151ae08745Sheppo return (status); 50161ae08745Sheppo } 50171ae08745Sheppo 50181ae08745Sheppo /* 50191ae08745Sheppo * Function: 50201ae08745Sheppo * vdc_verify_seq_num() 50211ae08745Sheppo * 50221ae08745Sheppo * Description: 5023e1ebb9ecSlm66018 * This functions verifies that the sequence number sent back by the vDisk 5024e1ebb9ecSlm66018 * server with the latest message is what is expected (i.e. it is greater 5025e1ebb9ecSlm66018 * than the last seq num sent by the vDisk server and less than or equal 5026e1ebb9ecSlm66018 * to the last seq num generated by vdc). 5027e1ebb9ecSlm66018 * 5028e1ebb9ecSlm66018 * It then checks the request ID to see if any requests need processing 5029e1ebb9ecSlm66018 * in the DRing. 50301ae08745Sheppo * 50311ae08745Sheppo * Arguments: 50321ae08745Sheppo * vdc - soft state pointer for this instance of the driver. 50331ae08745Sheppo * dring_msg - pointer to the LDC message sent by vds 50341ae08745Sheppo * 50351ae08745Sheppo * Return Code: 5036e1ebb9ecSlm66018 * VDC_SEQ_NUM_TODO - Message needs to be processed 5037e1ebb9ecSlm66018 * VDC_SEQ_NUM_SKIP - Message has already been processed 5038e1ebb9ecSlm66018 * VDC_SEQ_NUM_INVALID - The seq numbers are so out of sync, 5039e1ebb9ecSlm66018 * vdc cannot deal with them 50401ae08745Sheppo */ 5041e1ebb9ecSlm66018 static int 5042e1ebb9ecSlm66018 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg) 50431ae08745Sheppo { 50441ae08745Sheppo ASSERT(vdc != NULL); 50451ae08745Sheppo ASSERT(dring_msg != NULL); 5046d10e4ef2Snarayan ASSERT(mutex_owned(&vdc->lock)); 50471ae08745Sheppo 50481ae08745Sheppo /* 50491ae08745Sheppo * Check to see if the messages were responded to in the correct 5050e1ebb9ecSlm66018 * order by vds. 50511ae08745Sheppo */ 5052e1ebb9ecSlm66018 if ((dring_msg->seq_num <= vdc->seq_num_reply) || 5053e1ebb9ecSlm66018 (dring_msg->seq_num > vdc->seq_num)) { 50543af08d82Slm66018 DMSG(vdc, 0, "?[%d] Bogus sequence_number %lu: " 5055e1ebb9ecSlm66018 "%lu > expected <= %lu (last proc req %lu sent %lu)\n", 5056e1ebb9ecSlm66018 vdc->instance, dring_msg->seq_num, 5057e1ebb9ecSlm66018 vdc->seq_num_reply, vdc->seq_num, 5058e1ebb9ecSlm66018 vdc->req_id_proc, vdc->req_id); 5059e1ebb9ecSlm66018 return (VDC_SEQ_NUM_INVALID); 50601ae08745Sheppo } 5061e1ebb9ecSlm66018 vdc->seq_num_reply = dring_msg->seq_num; 50621ae08745Sheppo 5063e1ebb9ecSlm66018 if (vdc->req_id_proc < vdc->req_id) 5064e1ebb9ecSlm66018 return (VDC_SEQ_NUM_TODO); 5065e1ebb9ecSlm66018 else 5066e1ebb9ecSlm66018 return (VDC_SEQ_NUM_SKIP); 50671ae08745Sheppo } 50681ae08745Sheppo 50690a55fbb7Slm66018 50700a55fbb7Slm66018 /* 50710a55fbb7Slm66018 * Function: 50720a55fbb7Slm66018 * vdc_is_supported_version() 50730a55fbb7Slm66018 * 50740a55fbb7Slm66018 * Description: 50750a55fbb7Slm66018 * This routine checks if the major/minor version numbers specified in 50760a55fbb7Slm66018 * 'ver_msg' are supported. If not it finds the next version that is 50770a55fbb7Slm66018 * in the supported version list 'vdc_version[]' and sets the fields in 50780a55fbb7Slm66018 * 'ver_msg' to those values 50790a55fbb7Slm66018 * 50800a55fbb7Slm66018 * Arguments: 50810a55fbb7Slm66018 * ver_msg - LDC message sent by vDisk server 50820a55fbb7Slm66018 * 50830a55fbb7Slm66018 * Return Code: 50840a55fbb7Slm66018 * B_TRUE - Success 50850a55fbb7Slm66018 * B_FALSE - Version not supported 50860a55fbb7Slm66018 */ 50870a55fbb7Slm66018 static boolean_t 50880a55fbb7Slm66018 vdc_is_supported_version(vio_ver_msg_t *ver_msg) 50890a55fbb7Slm66018 { 50900a55fbb7Slm66018 int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); 50910a55fbb7Slm66018 50920a55fbb7Slm66018 for (int i = 0; i < vdc_num_versions; i++) { 50930a55fbb7Slm66018 ASSERT(vdc_version[i].major > 0); 50940a55fbb7Slm66018 ASSERT((i == 0) || 50950a55fbb7Slm66018 (vdc_version[i].major < vdc_version[i-1].major)); 50960a55fbb7Slm66018 50970a55fbb7Slm66018 /* 50980a55fbb7Slm66018 * If the major versions match, adjust the minor version, if 50990a55fbb7Slm66018 * necessary, down to the highest value supported by this 51000a55fbb7Slm66018 * client. The server should support all minor versions lower 51010a55fbb7Slm66018 * than the value it sent 51020a55fbb7Slm66018 */ 51030a55fbb7Slm66018 if (ver_msg->ver_major == vdc_version[i].major) { 51040a55fbb7Slm66018 if (ver_msg->ver_minor > vdc_version[i].minor) { 51053af08d82Slm66018 DMSGX(0, 51063af08d82Slm66018 "Adjusting minor version from %u to %u", 51070a55fbb7Slm66018 ver_msg->ver_minor, vdc_version[i].minor); 51080a55fbb7Slm66018 ver_msg->ver_minor = vdc_version[i].minor; 51090a55fbb7Slm66018 } 51100a55fbb7Slm66018 return (B_TRUE); 51110a55fbb7Slm66018 } 51120a55fbb7Slm66018 51130a55fbb7Slm66018 /* 51140a55fbb7Slm66018 * If the message contains a higher major version number, set 51150a55fbb7Slm66018 * the message's major/minor versions to the current values 51160a55fbb7Slm66018 * and return false, so this message will get resent with 51170a55fbb7Slm66018 * these values, and the server will potentially try again 51180a55fbb7Slm66018 * with the same or a lower version 51190a55fbb7Slm66018 */ 51200a55fbb7Slm66018 if (ver_msg->ver_major > vdc_version[i].major) { 51210a55fbb7Slm66018 ver_msg->ver_major = vdc_version[i].major; 51220a55fbb7Slm66018 ver_msg->ver_minor = vdc_version[i].minor; 51233af08d82Slm66018 DMSGX(0, "Suggesting major/minor (0x%x/0x%x)\n", 51240a55fbb7Slm66018 ver_msg->ver_major, ver_msg->ver_minor); 51250a55fbb7Slm66018 51260a55fbb7Slm66018 return (B_FALSE); 51270a55fbb7Slm66018 } 51280a55fbb7Slm66018 51290a55fbb7Slm66018 /* 51300a55fbb7Slm66018 * Otherwise, the message's major version is less than the 51310a55fbb7Slm66018 * current major version, so continue the loop to the next 51320a55fbb7Slm66018 * (lower) supported version 51330a55fbb7Slm66018 */ 51340a55fbb7Slm66018 } 51350a55fbb7Slm66018 51360a55fbb7Slm66018 /* 51370a55fbb7Slm66018 * No common version was found; "ground" the version pair in the 51380a55fbb7Slm66018 * message to terminate negotiation 51390a55fbb7Slm66018 */ 51400a55fbb7Slm66018 ver_msg->ver_major = 0; 51410a55fbb7Slm66018 ver_msg->ver_minor = 0; 51420a55fbb7Slm66018 51430a55fbb7Slm66018 return (B_FALSE); 51440a55fbb7Slm66018 } 51451ae08745Sheppo /* -------------------------------------------------------------------------- */ 51461ae08745Sheppo 51471ae08745Sheppo /* 51481ae08745Sheppo * DKIO(7) support 51491ae08745Sheppo */ 51501ae08745Sheppo 51511ae08745Sheppo typedef struct vdc_dk_arg { 51521ae08745Sheppo struct dk_callback dkc; 51531ae08745Sheppo int mode; 51541ae08745Sheppo dev_t dev; 51551ae08745Sheppo vdc_t *vdc; 51561ae08745Sheppo } vdc_dk_arg_t; 51571ae08745Sheppo 51581ae08745Sheppo /* 51591ae08745Sheppo * Function: 51601ae08745Sheppo * vdc_dkio_flush_cb() 51611ae08745Sheppo * 51621ae08745Sheppo * Description: 51631ae08745Sheppo * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 51641ae08745Sheppo * by kernel code. 51651ae08745Sheppo * 51661ae08745Sheppo * Arguments: 51671ae08745Sheppo * arg - a pointer to a vdc_dk_arg_t structure. 51681ae08745Sheppo */ 51691ae08745Sheppo void 51701ae08745Sheppo vdc_dkio_flush_cb(void *arg) 51711ae08745Sheppo { 51721ae08745Sheppo struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 51731ae08745Sheppo struct dk_callback *dkc = NULL; 51741ae08745Sheppo vdc_t *vdc = NULL; 51751ae08745Sheppo int rv; 51761ae08745Sheppo 51771ae08745Sheppo if (dk_arg == NULL) { 51783af08d82Slm66018 cmn_err(CE_NOTE, "?[Unk] DKIOCFLUSHWRITECACHE arg is NULL\n"); 51791ae08745Sheppo return; 51801ae08745Sheppo } 51811ae08745Sheppo dkc = &dk_arg->dkc; 51821ae08745Sheppo vdc = dk_arg->vdc; 51831ae08745Sheppo ASSERT(vdc != NULL); 51841ae08745Sheppo 51853af08d82Slm66018 rv = vdc_do_sync_op(vdc, VD_OP_FLUSH, NULL, 0, 51862f5224aeSachartre VDCPART(dk_arg->dev), 0, CB_SYNC, 0, VIO_both_dir, B_TRUE); 51871ae08745Sheppo if (rv != 0) { 51883af08d82Slm66018 DMSG(vdc, 0, "[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n", 5189e1ebb9ecSlm66018 vdc->instance, rv, 51901ae08745Sheppo ddi_model_convert_from(dk_arg->mode & FMODELS)); 51911ae08745Sheppo } 51921ae08745Sheppo 51931ae08745Sheppo /* 51941ae08745Sheppo * Trigger the call back to notify the caller the the ioctl call has 51951ae08745Sheppo * been completed. 51961ae08745Sheppo */ 51971ae08745Sheppo if ((dk_arg->mode & FKIOCTL) && 51981ae08745Sheppo (dkc != NULL) && 51991ae08745Sheppo (dkc->dkc_callback != NULL)) { 52001ae08745Sheppo ASSERT(dkc->dkc_cookie != NULL); 52018e6a2a04Slm66018 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 52021ae08745Sheppo } 52031ae08745Sheppo 52041ae08745Sheppo /* Indicate that one less DKIO write flush is outstanding */ 52051ae08745Sheppo mutex_enter(&vdc->lock); 52061ae08745Sheppo vdc->dkio_flush_pending--; 52071ae08745Sheppo ASSERT(vdc->dkio_flush_pending >= 0); 52081ae08745Sheppo mutex_exit(&vdc->lock); 52098e6a2a04Slm66018 52108e6a2a04Slm66018 /* free the mem that was allocated when the callback was dispatched */ 52118e6a2a04Slm66018 kmem_free(arg, sizeof (vdc_dk_arg_t)); 52121ae08745Sheppo } 52131ae08745Sheppo 52141ae08745Sheppo /* 521587a7269eSachartre * Function: 52169642afceSachartre * vdc_dkio_gapart() 521787a7269eSachartre * 521887a7269eSachartre * Description: 521987a7269eSachartre * This function implements the DKIOCGAPART ioctl. 522087a7269eSachartre * 522187a7269eSachartre * Arguments: 522278fcd0a1Sachartre * vdc - soft state pointer 522387a7269eSachartre * arg - a pointer to a dk_map[NDKMAP] or dk_map32[NDKMAP] structure 522487a7269eSachartre * flag - ioctl flags 522587a7269eSachartre */ 522687a7269eSachartre static int 52279642afceSachartre vdc_dkio_gapart(vdc_t *vdc, caddr_t arg, int flag) 522887a7269eSachartre { 522978fcd0a1Sachartre struct dk_geom *geom; 5230342440ecSPrasad Singamsetty struct extvtoc *vtoc; 523187a7269eSachartre union { 523287a7269eSachartre struct dk_map map[NDKMAP]; 523387a7269eSachartre struct dk_map32 map32[NDKMAP]; 523487a7269eSachartre } data; 523587a7269eSachartre int i, rv, size; 523687a7269eSachartre 523778fcd0a1Sachartre mutex_enter(&vdc->lock); 523887a7269eSachartre 523978fcd0a1Sachartre if ((rv = vdc_validate_geometry(vdc)) != 0) { 524078fcd0a1Sachartre mutex_exit(&vdc->lock); 524187a7269eSachartre return (rv); 524278fcd0a1Sachartre } 524387a7269eSachartre 5244342440ecSPrasad Singamsetty if (vdc->vdisk_size > VD_OLDVTOC_LIMIT) { 5245342440ecSPrasad Singamsetty mutex_exit(&vdc->lock); 5246342440ecSPrasad Singamsetty return (EOVERFLOW); 5247342440ecSPrasad Singamsetty } 5248342440ecSPrasad Singamsetty 524978fcd0a1Sachartre vtoc = vdc->vtoc; 525078fcd0a1Sachartre geom = vdc->geom; 525187a7269eSachartre 525287a7269eSachartre if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 525387a7269eSachartre 525478fcd0a1Sachartre for (i = 0; i < vtoc->v_nparts; i++) { 525578fcd0a1Sachartre data.map32[i].dkl_cylno = vtoc->v_part[i].p_start / 525678fcd0a1Sachartre (geom->dkg_nhead * geom->dkg_nsect); 525778fcd0a1Sachartre data.map32[i].dkl_nblk = vtoc->v_part[i].p_size; 525887a7269eSachartre } 525987a7269eSachartre size = NDKMAP * sizeof (struct dk_map32); 526087a7269eSachartre 526187a7269eSachartre } else { 526287a7269eSachartre 526378fcd0a1Sachartre for (i = 0; i < vtoc->v_nparts; i++) { 526478fcd0a1Sachartre data.map[i].dkl_cylno = vtoc->v_part[i].p_start / 526578fcd0a1Sachartre (geom->dkg_nhead * geom->dkg_nsect); 526678fcd0a1Sachartre data.map[i].dkl_nblk = vtoc->v_part[i].p_size; 526787a7269eSachartre } 526887a7269eSachartre size = NDKMAP * sizeof (struct dk_map); 526987a7269eSachartre 527087a7269eSachartre } 527187a7269eSachartre 527278fcd0a1Sachartre mutex_exit(&vdc->lock); 527378fcd0a1Sachartre 527487a7269eSachartre if (ddi_copyout(&data, arg, size, flag) != 0) 527587a7269eSachartre return (EFAULT); 527687a7269eSachartre 527787a7269eSachartre return (0); 527887a7269eSachartre } 527987a7269eSachartre 528087a7269eSachartre /* 528187a7269eSachartre * Function: 52829642afceSachartre * vdc_dkio_partition() 52839642afceSachartre * 52849642afceSachartre * Description: 52859642afceSachartre * This function implements the DKIOCPARTITION ioctl. 52869642afceSachartre * 52879642afceSachartre * Arguments: 52889642afceSachartre * vdc - soft state pointer 52899642afceSachartre * arg - a pointer to a struct partition64 structure 52909642afceSachartre * flag - ioctl flags 52919642afceSachartre */ 52929642afceSachartre static int 52939642afceSachartre vdc_dkio_partition(vdc_t *vdc, caddr_t arg, int flag) 52949642afceSachartre { 52959642afceSachartre struct partition64 p64; 52969642afceSachartre efi_gpt_t *gpt; 52979642afceSachartre efi_gpe_t *gpe; 52989642afceSachartre vd_efi_dev_t edev; 52999642afceSachartre uint_t partno; 53009642afceSachartre int rv; 53019642afceSachartre 53029642afceSachartre if (ddi_copyin(arg, &p64, sizeof (struct partition64), flag)) { 53039642afceSachartre return (EFAULT); 53049642afceSachartre } 53059642afceSachartre 5306*65908c77Syu, larry liu - Sun Microsystems - Beijing China VDC_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl); 53079642afceSachartre 53089642afceSachartre if ((rv = vd_efi_alloc_and_read(&edev, &gpt, &gpe)) != 0) { 53099642afceSachartre return (rv); 53109642afceSachartre } 53119642afceSachartre 53129642afceSachartre partno = p64.p_partno; 53139642afceSachartre 53149642afceSachartre if (partno >= gpt->efi_gpt_NumberOfPartitionEntries) { 53159642afceSachartre vd_efi_free(&edev, gpt, gpe); 53169642afceSachartre return (ESRCH); 53179642afceSachartre } 53189642afceSachartre 53199642afceSachartre bcopy(&gpe[partno].efi_gpe_PartitionTypeGUID, &p64.p_type, 53209642afceSachartre sizeof (struct uuid)); 53219642afceSachartre p64.p_start = gpe[partno].efi_gpe_StartingLBA; 53229642afceSachartre p64.p_size = gpe[partno].efi_gpe_EndingLBA - p64.p_start + 1; 53239642afceSachartre 53249642afceSachartre if (ddi_copyout(&p64, arg, sizeof (struct partition64), flag)) { 53259642afceSachartre vd_efi_free(&edev, gpt, gpe); 53269642afceSachartre return (EFAULT); 53279642afceSachartre } 53289642afceSachartre 53299642afceSachartre vd_efi_free(&edev, gpt, gpe); 53309642afceSachartre return (0); 53319642afceSachartre } 53329642afceSachartre 53339642afceSachartre /* 53349642afceSachartre * Function: 533587a7269eSachartre * vdc_dioctl_rwcmd() 533687a7269eSachartre * 533787a7269eSachartre * Description: 533887a7269eSachartre * This function implements the DIOCTL_RWCMD ioctl. This ioctl is used 533987a7269eSachartre * for DKC_DIRECT disks to read or write at an absolute disk offset. 534087a7269eSachartre * 534187a7269eSachartre * Arguments: 534287a7269eSachartre * dev - device 534387a7269eSachartre * arg - a pointer to a dadkio_rwcmd or dadkio_rwcmd32 structure 534487a7269eSachartre * flag - ioctl flags 534587a7269eSachartre */ 534687a7269eSachartre static int 5347*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdc_dioctl_rwcmd(vdc_t *vdc, caddr_t arg, int flag) 534887a7269eSachartre { 534987a7269eSachartre struct dadkio_rwcmd32 rwcmd32; 535087a7269eSachartre struct dadkio_rwcmd rwcmd; 535187a7269eSachartre struct iovec aiov; 535287a7269eSachartre struct uio auio; 535387a7269eSachartre int rw, status; 535487a7269eSachartre struct buf *buf; 535587a7269eSachartre 535687a7269eSachartre if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 535787a7269eSachartre if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd32, 535887a7269eSachartre sizeof (struct dadkio_rwcmd32), flag)) { 535987a7269eSachartre return (EFAULT); 536087a7269eSachartre } 536187a7269eSachartre rwcmd.cmd = rwcmd32.cmd; 536287a7269eSachartre rwcmd.flags = rwcmd32.flags; 536387a7269eSachartre rwcmd.blkaddr = (daddr_t)rwcmd32.blkaddr; 536487a7269eSachartre rwcmd.buflen = rwcmd32.buflen; 536587a7269eSachartre rwcmd.bufaddr = (caddr_t)(uintptr_t)rwcmd32.bufaddr; 536687a7269eSachartre } else { 536787a7269eSachartre if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd, 536887a7269eSachartre sizeof (struct dadkio_rwcmd), flag)) { 536987a7269eSachartre return (EFAULT); 537087a7269eSachartre } 537187a7269eSachartre } 537287a7269eSachartre 537387a7269eSachartre switch (rwcmd.cmd) { 537487a7269eSachartre case DADKIO_RWCMD_READ: 537587a7269eSachartre rw = B_READ; 537687a7269eSachartre break; 537787a7269eSachartre case DADKIO_RWCMD_WRITE: 537887a7269eSachartre rw = B_WRITE; 537987a7269eSachartre break; 538087a7269eSachartre default: 538187a7269eSachartre return (EINVAL); 538287a7269eSachartre } 538387a7269eSachartre 538487a7269eSachartre bzero((caddr_t)&aiov, sizeof (struct iovec)); 538587a7269eSachartre aiov.iov_base = rwcmd.bufaddr; 538687a7269eSachartre aiov.iov_len = rwcmd.buflen; 538787a7269eSachartre 538887a7269eSachartre bzero((caddr_t)&auio, sizeof (struct uio)); 538987a7269eSachartre auio.uio_iov = &aiov; 539087a7269eSachartre auio.uio_iovcnt = 1; 5391*65908c77Syu, larry liu - Sun Microsystems - Beijing China auio.uio_loffset = rwcmd.blkaddr * vdc->vdisk_bsize; 539287a7269eSachartre auio.uio_resid = rwcmd.buflen; 539387a7269eSachartre auio.uio_segflg = flag & FKIOCTL ? UIO_SYSSPACE : UIO_USERSPACE; 539487a7269eSachartre 539587a7269eSachartre buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 539687a7269eSachartre bioinit(buf); 539787a7269eSachartre /* 539887a7269eSachartre * We use the private field of buf to specify that this is an 539987a7269eSachartre * I/O using an absolute offset. 540087a7269eSachartre */ 540187a7269eSachartre buf->b_private = (void *)VD_SLICE_NONE; 540287a7269eSachartre 5403*65908c77Syu, larry liu - Sun Microsystems - Beijing China status = physio(vdc_strategy, buf, VD_MAKE_DEV(vdc->instance, 0), 5404*65908c77Syu, larry liu - Sun Microsystems - Beijing China rw, vdc_min, &auio); 540587a7269eSachartre 540687a7269eSachartre biofini(buf); 540787a7269eSachartre kmem_free(buf, sizeof (buf_t)); 540887a7269eSachartre 540987a7269eSachartre return (status); 541087a7269eSachartre } 541187a7269eSachartre 541287a7269eSachartre /* 54132f5224aeSachartre * Allocate a buffer for a VD_OP_SCSICMD operation. The size of the allocated 54142f5224aeSachartre * buffer is returned in alloc_len. 54152f5224aeSachartre */ 54162f5224aeSachartre static vd_scsi_t * 54172f5224aeSachartre vdc_scsi_alloc(int cdb_len, int sense_len, int datain_len, int dataout_len, 54182f5224aeSachartre int *alloc_len) 54192f5224aeSachartre { 54202f5224aeSachartre vd_scsi_t *vd_scsi; 54212f5224aeSachartre int vd_scsi_len = VD_SCSI_SIZE; 54222f5224aeSachartre 54232f5224aeSachartre vd_scsi_len += P2ROUNDUP(cdb_len, sizeof (uint64_t)); 54242f5224aeSachartre vd_scsi_len += P2ROUNDUP(sense_len, sizeof (uint64_t)); 54252f5224aeSachartre vd_scsi_len += P2ROUNDUP(datain_len, sizeof (uint64_t)); 54262f5224aeSachartre vd_scsi_len += P2ROUNDUP(dataout_len, sizeof (uint64_t)); 54272f5224aeSachartre 54282f5224aeSachartre ASSERT(vd_scsi_len % sizeof (uint64_t) == 0); 54292f5224aeSachartre 54302f5224aeSachartre vd_scsi = kmem_zalloc(vd_scsi_len, KM_SLEEP); 54312f5224aeSachartre 54322f5224aeSachartre vd_scsi->cdb_len = cdb_len; 54332f5224aeSachartre vd_scsi->sense_len = sense_len; 54342f5224aeSachartre vd_scsi->datain_len = datain_len; 54352f5224aeSachartre vd_scsi->dataout_len = dataout_len; 54362f5224aeSachartre 54372f5224aeSachartre *alloc_len = vd_scsi_len; 54382f5224aeSachartre 54392f5224aeSachartre return (vd_scsi); 54402f5224aeSachartre } 54412f5224aeSachartre 54422f5224aeSachartre /* 54432f5224aeSachartre * Convert the status of a SCSI command to a Solaris return code. 54442f5224aeSachartre * 54452f5224aeSachartre * Arguments: 54462f5224aeSachartre * vd_scsi - The SCSI operation buffer. 54472f5224aeSachartre * log_error - indicate if an error message should be logged. 54482f5224aeSachartre * 54492f5224aeSachartre * Note that our SCSI error messages are rather primitive for the moment 54502f5224aeSachartre * and could be improved by decoding some data like the SCSI command and 54512f5224aeSachartre * the sense key. 54522f5224aeSachartre * 54532f5224aeSachartre * Return value: 54542f5224aeSachartre * 0 - Status is good. 54552f5224aeSachartre * EACCES - Status reports a reservation conflict. 54562f5224aeSachartre * ENOTSUP - Status reports a check condition and sense key 54572f5224aeSachartre * reports an illegal request. 54582f5224aeSachartre * EIO - Any other status. 54592f5224aeSachartre */ 54602f5224aeSachartre static int 54612f5224aeSachartre vdc_scsi_status(vdc_t *vdc, vd_scsi_t *vd_scsi, boolean_t log_error) 54622f5224aeSachartre { 54632f5224aeSachartre int rv; 54642f5224aeSachartre char path_str[MAXPATHLEN]; 54652f5224aeSachartre char panic_str[VDC_RESV_CONFLICT_FMT_LEN + MAXPATHLEN]; 54662f5224aeSachartre union scsi_cdb *cdb; 54672f5224aeSachartre struct scsi_extended_sense *sense; 54682f5224aeSachartre 54692f5224aeSachartre if (vd_scsi->cmd_status == STATUS_GOOD) 54702f5224aeSachartre /* no error */ 54712f5224aeSachartre return (0); 54722f5224aeSachartre 54732f5224aeSachartre /* when the tunable vdc_scsi_log_error is true we log all errors */ 54742f5224aeSachartre if (vdc_scsi_log_error) 54752f5224aeSachartre log_error = B_TRUE; 54762f5224aeSachartre 54772f5224aeSachartre if (log_error) { 54782f5224aeSachartre cmn_err(CE_WARN, "%s (vdc%d):\tError for Command: 0x%x)\n", 54792f5224aeSachartre ddi_pathname(vdc->dip, path_str), vdc->instance, 54802f5224aeSachartre GETCMD(VD_SCSI_DATA_CDB(vd_scsi))); 54812f5224aeSachartre } 54822f5224aeSachartre 54832f5224aeSachartre /* default returned value */ 54842f5224aeSachartre rv = EIO; 54852f5224aeSachartre 54862f5224aeSachartre switch (vd_scsi->cmd_status) { 54872f5224aeSachartre 54882f5224aeSachartre case STATUS_CHECK: 54892f5224aeSachartre case STATUS_TERMINATED: 54902f5224aeSachartre if (log_error) 54912f5224aeSachartre cmn_err(CE_CONT, "\tCheck Condition Error\n"); 54922f5224aeSachartre 54932f5224aeSachartre /* check sense buffer */ 54942f5224aeSachartre if (vd_scsi->sense_len == 0 || 54952f5224aeSachartre vd_scsi->sense_status != STATUS_GOOD) { 54962f5224aeSachartre if (log_error) 54972f5224aeSachartre cmn_err(CE_CONT, "\tNo Sense Data Available\n"); 54982f5224aeSachartre break; 54992f5224aeSachartre } 55002f5224aeSachartre 55012f5224aeSachartre sense = VD_SCSI_DATA_SENSE(vd_scsi); 55022f5224aeSachartre 55032f5224aeSachartre if (log_error) { 55042f5224aeSachartre cmn_err(CE_CONT, "\tSense Key: 0x%x\n" 55052f5224aeSachartre "\tASC: 0x%x, ASCQ: 0x%x\n", 55062f5224aeSachartre scsi_sense_key((uint8_t *)sense), 55072f5224aeSachartre scsi_sense_asc((uint8_t *)sense), 55082f5224aeSachartre scsi_sense_ascq((uint8_t *)sense)); 55092f5224aeSachartre } 55102f5224aeSachartre 55112f5224aeSachartre if (scsi_sense_key((uint8_t *)sense) == KEY_ILLEGAL_REQUEST) 55122f5224aeSachartre rv = ENOTSUP; 55132f5224aeSachartre break; 55142f5224aeSachartre 55152f5224aeSachartre case STATUS_BUSY: 55162f5224aeSachartre if (log_error) 55172f5224aeSachartre cmn_err(CE_NOTE, "\tDevice Busy\n"); 55182f5224aeSachartre break; 55192f5224aeSachartre 55202f5224aeSachartre case STATUS_RESERVATION_CONFLICT: 55212f5224aeSachartre /* 55222f5224aeSachartre * If the command was PERSISTENT_RESERVATION_[IN|OUT] then 55232f5224aeSachartre * reservation conflict could be due to various reasons like 55242f5224aeSachartre * incorrect keys, not registered or not reserved etc. So, 55252f5224aeSachartre * we should not panic in that case. 55262f5224aeSachartre */ 55272f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 55282f5224aeSachartre if (vdc->failfast_interval != 0 && 55292f5224aeSachartre cdb->scc_cmd != SCMD_PERSISTENT_RESERVE_IN && 55302f5224aeSachartre cdb->scc_cmd != SCMD_PERSISTENT_RESERVE_OUT) { 55312f5224aeSachartre /* failfast is enabled so we have to panic */ 55322f5224aeSachartre (void) snprintf(panic_str, sizeof (panic_str), 55332f5224aeSachartre VDC_RESV_CONFLICT_FMT_STR "%s", 55342f5224aeSachartre ddi_pathname(vdc->dip, path_str)); 55352f5224aeSachartre panic(panic_str); 55362f5224aeSachartre } 55372f5224aeSachartre if (log_error) 55382f5224aeSachartre cmn_err(CE_NOTE, "\tReservation Conflict\n"); 55392f5224aeSachartre rv = EACCES; 55402f5224aeSachartre break; 55412f5224aeSachartre 55422f5224aeSachartre case STATUS_QFULL: 55432f5224aeSachartre if (log_error) 55442f5224aeSachartre cmn_err(CE_NOTE, "\tQueue Full\n"); 55452f5224aeSachartre break; 55462f5224aeSachartre 55472f5224aeSachartre case STATUS_MET: 55482f5224aeSachartre case STATUS_INTERMEDIATE: 55492f5224aeSachartre case STATUS_SCSI2: 55502f5224aeSachartre case STATUS_INTERMEDIATE_MET: 55512f5224aeSachartre case STATUS_ACA_ACTIVE: 55522f5224aeSachartre if (log_error) 55532f5224aeSachartre cmn_err(CE_CONT, 55542f5224aeSachartre "\tUnexpected SCSI status received: 0x%x\n", 55552f5224aeSachartre vd_scsi->cmd_status); 55562f5224aeSachartre break; 55572f5224aeSachartre 55582f5224aeSachartre default: 55592f5224aeSachartre if (log_error) 55602f5224aeSachartre cmn_err(CE_CONT, 55612f5224aeSachartre "\tInvalid SCSI status received: 0x%x\n", 55622f5224aeSachartre vd_scsi->cmd_status); 55632f5224aeSachartre break; 55642f5224aeSachartre } 55652f5224aeSachartre 55662f5224aeSachartre return (rv); 55672f5224aeSachartre } 55682f5224aeSachartre 55692f5224aeSachartre /* 55702f5224aeSachartre * Implemented the USCSICMD uscsi(7I) ioctl. This ioctl is converted to 55712f5224aeSachartre * a VD_OP_SCSICMD operation which is sent to the vdisk server. If a SCSI 55722f5224aeSachartre * reset is requested (i.e. a flag USCSI_RESET* is set) then the ioctl is 55732f5224aeSachartre * converted to a VD_OP_RESET operation. 55742f5224aeSachartre */ 55752f5224aeSachartre static int 55762f5224aeSachartre vdc_uscsi_cmd(vdc_t *vdc, caddr_t arg, int mode) 55772f5224aeSachartre { 55782f5224aeSachartre struct uscsi_cmd uscsi; 55792f5224aeSachartre struct uscsi_cmd32 uscsi32; 55802f5224aeSachartre vd_scsi_t *vd_scsi; 55812f5224aeSachartre int vd_scsi_len; 55822f5224aeSachartre union scsi_cdb *cdb; 55832f5224aeSachartre struct scsi_extended_sense *sense; 55842f5224aeSachartre char *datain, *dataout; 55852f5224aeSachartre size_t cdb_len, datain_len, dataout_len, sense_len; 55862f5224aeSachartre int rv; 55872f5224aeSachartre 55882f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 55892f5224aeSachartre if (ddi_copyin(arg, &uscsi32, sizeof (struct uscsi_cmd32), 55902f5224aeSachartre mode) != 0) 55912f5224aeSachartre return (EFAULT); 55922f5224aeSachartre uscsi_cmd32touscsi_cmd((&uscsi32), (&uscsi)); 55932f5224aeSachartre } else { 55942f5224aeSachartre if (ddi_copyin(arg, &uscsi, sizeof (struct uscsi_cmd), 55952f5224aeSachartre mode) != 0) 55962f5224aeSachartre return (EFAULT); 55972f5224aeSachartre } 55982f5224aeSachartre 55992f5224aeSachartre /* a uscsi reset is converted to a VD_OP_RESET operation */ 56002f5224aeSachartre if (uscsi.uscsi_flags & (USCSI_RESET | USCSI_RESET_LUN | 56012f5224aeSachartre USCSI_RESET_ALL)) { 56022f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_RESET, NULL, 0, 0, 0, CB_SYNC, 56032f5224aeSachartre (void *)(uint64_t)mode, VIO_both_dir, B_TRUE); 56042f5224aeSachartre return (rv); 56052f5224aeSachartre } 56062f5224aeSachartre 56072f5224aeSachartre /* cdb buffer length */ 56082f5224aeSachartre cdb_len = uscsi.uscsi_cdblen; 56092f5224aeSachartre 56102f5224aeSachartre /* data in and out buffers length */ 56112f5224aeSachartre if (uscsi.uscsi_flags & USCSI_READ) { 56122f5224aeSachartre datain_len = uscsi.uscsi_buflen; 56132f5224aeSachartre dataout_len = 0; 56142f5224aeSachartre } else { 56152f5224aeSachartre datain_len = 0; 56162f5224aeSachartre dataout_len = uscsi.uscsi_buflen; 56172f5224aeSachartre } 56182f5224aeSachartre 56192f5224aeSachartre /* sense buffer length */ 56202f5224aeSachartre if (uscsi.uscsi_flags & USCSI_RQENABLE) 56212f5224aeSachartre sense_len = uscsi.uscsi_rqlen; 56222f5224aeSachartre else 56232f5224aeSachartre sense_len = 0; 56242f5224aeSachartre 56252f5224aeSachartre /* allocate buffer for the VD_SCSICMD_OP operation */ 56262f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, datain_len, dataout_len, 56272f5224aeSachartre &vd_scsi_len); 56282f5224aeSachartre 56292f5224aeSachartre /* 56302f5224aeSachartre * The documentation of USCSI_ISOLATE and USCSI_DIAGNOSE is very vague, 56312f5224aeSachartre * but basically they prevent a SCSI command from being retried in case 56322f5224aeSachartre * of an error. 56332f5224aeSachartre */ 56342f5224aeSachartre if ((uscsi.uscsi_flags & USCSI_ISOLATE) || 56352f5224aeSachartre (uscsi.uscsi_flags & USCSI_DIAGNOSE)) 56362f5224aeSachartre vd_scsi->options |= VD_SCSI_OPT_NORETRY; 56372f5224aeSachartre 56382f5224aeSachartre /* set task attribute */ 56392f5224aeSachartre if (uscsi.uscsi_flags & USCSI_NOTAG) { 56402f5224aeSachartre vd_scsi->task_attribute = 0; 56412f5224aeSachartre } else { 56422f5224aeSachartre if (uscsi.uscsi_flags & USCSI_HEAD) 56432f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_ACA; 56442f5224aeSachartre else if (uscsi.uscsi_flags & USCSI_HTAG) 56452f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_HQUEUE; 56462f5224aeSachartre else if (uscsi.uscsi_flags & USCSI_OTAG) 56472f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_ORDERED; 56482f5224aeSachartre else 56492f5224aeSachartre vd_scsi->task_attribute = 0; 56502f5224aeSachartre } 56512f5224aeSachartre 56522f5224aeSachartre /* set timeout */ 56532f5224aeSachartre vd_scsi->timeout = uscsi.uscsi_timeout; 56542f5224aeSachartre 56552f5224aeSachartre /* copy-in cdb data */ 56562f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 56572f5224aeSachartre if (ddi_copyin(uscsi.uscsi_cdb, cdb, cdb_len, mode) != 0) { 56582f5224aeSachartre rv = EFAULT; 56592f5224aeSachartre goto done; 56602f5224aeSachartre } 56612f5224aeSachartre 56622f5224aeSachartre /* keep a pointer to the sense buffer */ 56632f5224aeSachartre sense = VD_SCSI_DATA_SENSE(vd_scsi); 56642f5224aeSachartre 56652f5224aeSachartre /* keep a pointer to the data-in buffer */ 56662f5224aeSachartre datain = (char *)VD_SCSI_DATA_IN(vd_scsi); 56672f5224aeSachartre 56682f5224aeSachartre /* copy-in request data to the data-out buffer */ 56692f5224aeSachartre dataout = (char *)VD_SCSI_DATA_OUT(vd_scsi); 56702f5224aeSachartre if (!(uscsi.uscsi_flags & USCSI_READ)) { 56712f5224aeSachartre if (ddi_copyin(uscsi.uscsi_bufaddr, dataout, dataout_len, 56722f5224aeSachartre mode)) { 56732f5224aeSachartre rv = EFAULT; 56742f5224aeSachartre goto done; 56752f5224aeSachartre } 56762f5224aeSachartre } 56772f5224aeSachartre 56782f5224aeSachartre /* submit the request */ 56792f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 56802f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 56812f5224aeSachartre 56822f5224aeSachartre if (rv != 0) 56832f5224aeSachartre goto done; 56842f5224aeSachartre 56852f5224aeSachartre /* update scsi status */ 56862f5224aeSachartre uscsi.uscsi_status = vd_scsi->cmd_status; 56872f5224aeSachartre 56882f5224aeSachartre /* update sense data */ 56892f5224aeSachartre if ((uscsi.uscsi_flags & USCSI_RQENABLE) && 56902f5224aeSachartre (uscsi.uscsi_status == STATUS_CHECK || 56912f5224aeSachartre uscsi.uscsi_status == STATUS_TERMINATED)) { 56922f5224aeSachartre 56932f5224aeSachartre uscsi.uscsi_rqstatus = vd_scsi->sense_status; 56942f5224aeSachartre 56952f5224aeSachartre if (uscsi.uscsi_rqstatus == STATUS_GOOD) { 56962f5224aeSachartre uscsi.uscsi_rqresid = uscsi.uscsi_rqlen - 56972f5224aeSachartre vd_scsi->sense_len; 56982f5224aeSachartre if (ddi_copyout(sense, uscsi.uscsi_rqbuf, 56992f5224aeSachartre vd_scsi->sense_len, mode) != 0) { 57002f5224aeSachartre rv = EFAULT; 57012f5224aeSachartre goto done; 57022f5224aeSachartre } 57032f5224aeSachartre } 57042f5224aeSachartre } 57052f5224aeSachartre 57062f5224aeSachartre /* update request data */ 57072f5224aeSachartre if (uscsi.uscsi_status == STATUS_GOOD) { 57082f5224aeSachartre if (uscsi.uscsi_flags & USCSI_READ) { 57092f5224aeSachartre uscsi.uscsi_resid = uscsi.uscsi_buflen - 57102f5224aeSachartre vd_scsi->datain_len; 57112f5224aeSachartre if (ddi_copyout(datain, uscsi.uscsi_bufaddr, 57122f5224aeSachartre vd_scsi->datain_len, mode) != 0) { 57132f5224aeSachartre rv = EFAULT; 57142f5224aeSachartre goto done; 57152f5224aeSachartre } 57162f5224aeSachartre } else { 57172f5224aeSachartre uscsi.uscsi_resid = uscsi.uscsi_buflen - 57182f5224aeSachartre vd_scsi->dataout_len; 57192f5224aeSachartre } 57202f5224aeSachartre } 57212f5224aeSachartre 57222f5224aeSachartre /* copy-out result */ 57232f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 57242f5224aeSachartre uscsi_cmdtouscsi_cmd32((&uscsi), (&uscsi32)); 57252f5224aeSachartre if (ddi_copyout(&uscsi32, arg, sizeof (struct uscsi_cmd32), 57262f5224aeSachartre mode) != 0) { 57272f5224aeSachartre rv = EFAULT; 57282f5224aeSachartre goto done; 57292f5224aeSachartre } 57302f5224aeSachartre } else { 57312f5224aeSachartre if (ddi_copyout(&uscsi, arg, sizeof (struct uscsi_cmd), 57322f5224aeSachartre mode) != 0) { 57332f5224aeSachartre rv = EFAULT; 57342f5224aeSachartre goto done; 57352f5224aeSachartre } 57362f5224aeSachartre } 57372f5224aeSachartre 57382f5224aeSachartre /* get the return code from the SCSI command status */ 57392f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, 57402f5224aeSachartre !(uscsi.uscsi_flags & USCSI_SILENT)); 57412f5224aeSachartre 57422f5224aeSachartre done: 57432f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 57442f5224aeSachartre return (rv); 57452f5224aeSachartre } 57462f5224aeSachartre 57472f5224aeSachartre /* 57482f5224aeSachartre * Create a VD_OP_SCSICMD buffer for a SCSI PERSISTENT IN command. 57492f5224aeSachartre * 57502f5224aeSachartre * Arguments: 57512f5224aeSachartre * cmd - SCSI PERSISTENT IN command 57522f5224aeSachartre * len - length of the SCSI input buffer 57532f5224aeSachartre * vd_scsi_len - return the length of the allocated buffer 57542f5224aeSachartre * 57552f5224aeSachartre * Returned Value: 57562f5224aeSachartre * a pointer to the allocated VD_OP_SCSICMD buffer. 57572f5224aeSachartre */ 57582f5224aeSachartre static vd_scsi_t * 57592f5224aeSachartre vdc_scsi_alloc_persistent_in(uchar_t cmd, int len, int *vd_scsi_len) 57602f5224aeSachartre { 57612f5224aeSachartre int cdb_len, sense_len, datain_len, dataout_len; 57622f5224aeSachartre vd_scsi_t *vd_scsi; 57632f5224aeSachartre union scsi_cdb *cdb; 57642f5224aeSachartre 57652f5224aeSachartre cdb_len = CDB_GROUP1; 57662f5224aeSachartre sense_len = sizeof (struct scsi_extended_sense); 57672f5224aeSachartre datain_len = len; 57682f5224aeSachartre dataout_len = 0; 57692f5224aeSachartre 57702f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, datain_len, dataout_len, 57712f5224aeSachartre vd_scsi_len); 57722f5224aeSachartre 57732f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 57742f5224aeSachartre 57752f5224aeSachartre /* set cdb */ 57762f5224aeSachartre cdb->scc_cmd = SCMD_PERSISTENT_RESERVE_IN; 57772f5224aeSachartre cdb->cdb_opaque[1] = cmd; 57782f5224aeSachartre FORMG1COUNT(cdb, datain_len); 57792f5224aeSachartre 57802f5224aeSachartre vd_scsi->timeout = vdc_scsi_timeout; 57812f5224aeSachartre 57822f5224aeSachartre return (vd_scsi); 57832f5224aeSachartre } 57842f5224aeSachartre 57852f5224aeSachartre /* 57862f5224aeSachartre * Create a VD_OP_SCSICMD buffer for a SCSI PERSISTENT OUT command. 57872f5224aeSachartre * 57882f5224aeSachartre * Arguments: 57892f5224aeSachartre * cmd - SCSI PERSISTENT OUT command 57902f5224aeSachartre * len - length of the SCSI output buffer 57912f5224aeSachartre * vd_scsi_len - return the length of the allocated buffer 57922f5224aeSachartre * 57932f5224aeSachartre * Returned Code: 57942f5224aeSachartre * a pointer to the allocated VD_OP_SCSICMD buffer. 57952f5224aeSachartre */ 57962f5224aeSachartre static vd_scsi_t * 57972f5224aeSachartre vdc_scsi_alloc_persistent_out(uchar_t cmd, int len, int *vd_scsi_len) 57982f5224aeSachartre { 57992f5224aeSachartre int cdb_len, sense_len, datain_len, dataout_len; 58002f5224aeSachartre vd_scsi_t *vd_scsi; 58012f5224aeSachartre union scsi_cdb *cdb; 58022f5224aeSachartre 58032f5224aeSachartre cdb_len = CDB_GROUP1; 58042f5224aeSachartre sense_len = sizeof (struct scsi_extended_sense); 58052f5224aeSachartre datain_len = 0; 58062f5224aeSachartre dataout_len = len; 58072f5224aeSachartre 58082f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, datain_len, dataout_len, 58092f5224aeSachartre vd_scsi_len); 58102f5224aeSachartre 58112f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 58122f5224aeSachartre 58132f5224aeSachartre /* set cdb */ 58142f5224aeSachartre cdb->scc_cmd = SCMD_PERSISTENT_RESERVE_OUT; 58152f5224aeSachartre cdb->cdb_opaque[1] = cmd; 58162f5224aeSachartre FORMG1COUNT(cdb, dataout_len); 58172f5224aeSachartre 58182f5224aeSachartre vd_scsi->timeout = vdc_scsi_timeout; 58192f5224aeSachartre 58202f5224aeSachartre return (vd_scsi); 58212f5224aeSachartre } 58222f5224aeSachartre 58232f5224aeSachartre /* 58242f5224aeSachartre * Implement the MHIOCGRP_INKEYS mhd(7i) ioctl. The ioctl is converted 58252f5224aeSachartre * to a SCSI PERSISTENT IN READ KEYS command which is sent to the vdisk 58262f5224aeSachartre * server with a VD_OP_SCSICMD operation. 58272f5224aeSachartre */ 58282f5224aeSachartre static int 58292f5224aeSachartre vdc_mhd_inkeys(vdc_t *vdc, caddr_t arg, int mode) 58302f5224aeSachartre { 58312f5224aeSachartre vd_scsi_t *vd_scsi; 58322f5224aeSachartre mhioc_inkeys_t inkeys; 58332f5224aeSachartre mhioc_key_list_t klist; 58342f5224aeSachartre struct mhioc_inkeys32 inkeys32; 58352f5224aeSachartre struct mhioc_key_list32 klist32; 58362f5224aeSachartre sd_prin_readkeys_t *scsi_keys; 58372f5224aeSachartre void *user_keys; 58382f5224aeSachartre int vd_scsi_len; 58392f5224aeSachartre int listsize, listlen, rv; 58402f5224aeSachartre 58412f5224aeSachartre /* copyin arguments */ 58422f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 58432f5224aeSachartre rv = ddi_copyin(arg, &inkeys32, sizeof (inkeys32), mode); 58442f5224aeSachartre if (rv != 0) 58452f5224aeSachartre return (EFAULT); 58462f5224aeSachartre 58472f5224aeSachartre rv = ddi_copyin((caddr_t)(uintptr_t)inkeys32.li, &klist32, 58482f5224aeSachartre sizeof (klist32), mode); 58492f5224aeSachartre if (rv != 0) 58502f5224aeSachartre return (EFAULT); 58512f5224aeSachartre 58522f5224aeSachartre listsize = klist32.listsize; 58532f5224aeSachartre } else { 58542f5224aeSachartre rv = ddi_copyin(arg, &inkeys, sizeof (inkeys), mode); 58552f5224aeSachartre if (rv != 0) 58562f5224aeSachartre return (EFAULT); 58572f5224aeSachartre 58582f5224aeSachartre rv = ddi_copyin(inkeys.li, &klist, sizeof (klist), mode); 58592f5224aeSachartre if (rv != 0) 58602f5224aeSachartre return (EFAULT); 58612f5224aeSachartre 58622f5224aeSachartre listsize = klist.listsize; 58632f5224aeSachartre } 58642f5224aeSachartre 58652f5224aeSachartre /* build SCSI VD_OP request */ 58662f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_in(SD_READ_KEYS, 58672f5224aeSachartre sizeof (sd_prin_readkeys_t) - sizeof (caddr_t) + 58682f5224aeSachartre (sizeof (mhioc_resv_key_t) * listsize), &vd_scsi_len); 58692f5224aeSachartre 58702f5224aeSachartre scsi_keys = (sd_prin_readkeys_t *)VD_SCSI_DATA_IN(vd_scsi); 58712f5224aeSachartre 58722f5224aeSachartre /* submit the request */ 58732f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 58742f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 58752f5224aeSachartre 58762f5224aeSachartre if (rv != 0) 58772f5224aeSachartre goto done; 58782f5224aeSachartre 58792f5224aeSachartre listlen = scsi_keys->len / MHIOC_RESV_KEY_SIZE; 58802f5224aeSachartre 58812f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 58822f5224aeSachartre inkeys32.generation = scsi_keys->generation; 58832f5224aeSachartre rv = ddi_copyout(&inkeys32, arg, sizeof (inkeys32), mode); 58842f5224aeSachartre if (rv != 0) { 58852f5224aeSachartre rv = EFAULT; 58862f5224aeSachartre goto done; 58872f5224aeSachartre } 58882f5224aeSachartre 58892f5224aeSachartre klist32.listlen = listlen; 58902f5224aeSachartre rv = ddi_copyout(&klist32, (caddr_t)(uintptr_t)inkeys32.li, 58912f5224aeSachartre sizeof (klist32), mode); 58922f5224aeSachartre if (rv != 0) { 58932f5224aeSachartre rv = EFAULT; 58942f5224aeSachartre goto done; 58952f5224aeSachartre } 58962f5224aeSachartre 58972f5224aeSachartre user_keys = (caddr_t)(uintptr_t)klist32.list; 58982f5224aeSachartre } else { 58992f5224aeSachartre inkeys.generation = scsi_keys->generation; 59002f5224aeSachartre rv = ddi_copyout(&inkeys, arg, sizeof (inkeys), mode); 59012f5224aeSachartre if (rv != 0) { 59022f5224aeSachartre rv = EFAULT; 59032f5224aeSachartre goto done; 59042f5224aeSachartre } 59052f5224aeSachartre 59062f5224aeSachartre klist.listlen = listlen; 59072f5224aeSachartre rv = ddi_copyout(&klist, inkeys.li, sizeof (klist), mode); 59082f5224aeSachartre if (rv != 0) { 59092f5224aeSachartre rv = EFAULT; 59102f5224aeSachartre goto done; 59112f5224aeSachartre } 59122f5224aeSachartre 59132f5224aeSachartre user_keys = klist.list; 59142f5224aeSachartre } 59152f5224aeSachartre 59162f5224aeSachartre /* copy out keys */ 59172f5224aeSachartre if (listlen > 0 && listsize > 0) { 59182f5224aeSachartre if (listsize < listlen) 59192f5224aeSachartre listlen = listsize; 59202f5224aeSachartre rv = ddi_copyout(&scsi_keys->keylist, user_keys, 59212f5224aeSachartre listlen * MHIOC_RESV_KEY_SIZE, mode); 59222f5224aeSachartre if (rv != 0) 59232f5224aeSachartre rv = EFAULT; 59242f5224aeSachartre } 59252f5224aeSachartre 59262f5224aeSachartre if (rv == 0) 59272f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 59282f5224aeSachartre 59292f5224aeSachartre done: 59302f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 59312f5224aeSachartre 59322f5224aeSachartre return (rv); 59332f5224aeSachartre } 59342f5224aeSachartre 59352f5224aeSachartre /* 59362f5224aeSachartre * Implement the MHIOCGRP_INRESV mhd(7i) ioctl. The ioctl is converted 59372f5224aeSachartre * to a SCSI PERSISTENT IN READ RESERVATION command which is sent to 59382f5224aeSachartre * the vdisk server with a VD_OP_SCSICMD operation. 59392f5224aeSachartre */ 59402f5224aeSachartre static int 59412f5224aeSachartre vdc_mhd_inresv(vdc_t *vdc, caddr_t arg, int mode) 59422f5224aeSachartre { 59432f5224aeSachartre vd_scsi_t *vd_scsi; 59442f5224aeSachartre mhioc_inresvs_t inresv; 59452f5224aeSachartre mhioc_resv_desc_list_t rlist; 59462f5224aeSachartre struct mhioc_inresvs32 inresv32; 59472f5224aeSachartre struct mhioc_resv_desc_list32 rlist32; 59482f5224aeSachartre mhioc_resv_desc_t mhd_resv; 59492f5224aeSachartre sd_prin_readresv_t *scsi_resv; 59502f5224aeSachartre sd_readresv_desc_t *resv; 59512f5224aeSachartre mhioc_resv_desc_t *user_resv; 59522f5224aeSachartre int vd_scsi_len; 59532f5224aeSachartre int listsize, listlen, i, rv; 59542f5224aeSachartre 59552f5224aeSachartre /* copyin arguments */ 59562f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 59572f5224aeSachartre rv = ddi_copyin(arg, &inresv32, sizeof (inresv32), mode); 59582f5224aeSachartre if (rv != 0) 59592f5224aeSachartre return (EFAULT); 59602f5224aeSachartre 59612f5224aeSachartre rv = ddi_copyin((caddr_t)(uintptr_t)inresv32.li, &rlist32, 59622f5224aeSachartre sizeof (rlist32), mode); 59632f5224aeSachartre if (rv != 0) 59642f5224aeSachartre return (EFAULT); 59652f5224aeSachartre 59662f5224aeSachartre listsize = rlist32.listsize; 59672f5224aeSachartre } else { 59682f5224aeSachartre rv = ddi_copyin(arg, &inresv, sizeof (inresv), mode); 59692f5224aeSachartre if (rv != 0) 59702f5224aeSachartre return (EFAULT); 59712f5224aeSachartre 59722f5224aeSachartre rv = ddi_copyin(inresv.li, &rlist, sizeof (rlist), mode); 59732f5224aeSachartre if (rv != 0) 59742f5224aeSachartre return (EFAULT); 59752f5224aeSachartre 59762f5224aeSachartre listsize = rlist.listsize; 59772f5224aeSachartre } 59782f5224aeSachartre 59792f5224aeSachartre /* build SCSI VD_OP request */ 59802f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_in(SD_READ_RESV, 59812f5224aeSachartre sizeof (sd_prin_readresv_t) - sizeof (caddr_t) + 59822f5224aeSachartre (SCSI3_RESV_DESC_LEN * listsize), &vd_scsi_len); 59832f5224aeSachartre 59842f5224aeSachartre scsi_resv = (sd_prin_readresv_t *)VD_SCSI_DATA_IN(vd_scsi); 59852f5224aeSachartre 59862f5224aeSachartre /* submit the request */ 59872f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 59882f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 59892f5224aeSachartre 59902f5224aeSachartre if (rv != 0) 59912f5224aeSachartre goto done; 59922f5224aeSachartre 59932f5224aeSachartre listlen = scsi_resv->len / SCSI3_RESV_DESC_LEN; 59942f5224aeSachartre 59952f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 59962f5224aeSachartre inresv32.generation = scsi_resv->generation; 59972f5224aeSachartre rv = ddi_copyout(&inresv32, arg, sizeof (inresv32), mode); 59982f5224aeSachartre if (rv != 0) { 59992f5224aeSachartre rv = EFAULT; 60002f5224aeSachartre goto done; 60012f5224aeSachartre } 60022f5224aeSachartre 60032f5224aeSachartre rlist32.listlen = listlen; 60042f5224aeSachartre rv = ddi_copyout(&rlist32, (caddr_t)(uintptr_t)inresv32.li, 60052f5224aeSachartre sizeof (rlist32), mode); 60062f5224aeSachartre if (rv != 0) { 60072f5224aeSachartre rv = EFAULT; 60082f5224aeSachartre goto done; 60092f5224aeSachartre } 60102f5224aeSachartre 60112f5224aeSachartre user_resv = (mhioc_resv_desc_t *)(uintptr_t)rlist32.list; 60122f5224aeSachartre } else { 60132f5224aeSachartre inresv.generation = scsi_resv->generation; 60142f5224aeSachartre rv = ddi_copyout(&inresv, arg, sizeof (inresv), mode); 60152f5224aeSachartre if (rv != 0) { 60162f5224aeSachartre rv = EFAULT; 60172f5224aeSachartre goto done; 60182f5224aeSachartre } 60192f5224aeSachartre 60202f5224aeSachartre rlist.listlen = listlen; 60212f5224aeSachartre rv = ddi_copyout(&rlist, inresv.li, sizeof (rlist), mode); 60222f5224aeSachartre if (rv != 0) { 60232f5224aeSachartre rv = EFAULT; 60242f5224aeSachartre goto done; 60252f5224aeSachartre } 60262f5224aeSachartre 60272f5224aeSachartre user_resv = rlist.list; 60282f5224aeSachartre } 60292f5224aeSachartre 60302f5224aeSachartre /* copy out reservations */ 60312f5224aeSachartre if (listsize > 0 && listlen > 0) { 60322f5224aeSachartre if (listsize < listlen) 60332f5224aeSachartre listlen = listsize; 60342f5224aeSachartre resv = (sd_readresv_desc_t *)&scsi_resv->readresv_desc; 60352f5224aeSachartre 60362f5224aeSachartre for (i = 0; i < listlen; i++) { 60372f5224aeSachartre mhd_resv.type = resv->type; 60382f5224aeSachartre mhd_resv.scope = resv->scope; 60392f5224aeSachartre mhd_resv.scope_specific_addr = 60402f5224aeSachartre BE_32(resv->scope_specific_addr); 60412f5224aeSachartre bcopy(&resv->resvkey, &mhd_resv.key, 60422f5224aeSachartre MHIOC_RESV_KEY_SIZE); 60432f5224aeSachartre 60442f5224aeSachartre rv = ddi_copyout(&mhd_resv, user_resv, 60452f5224aeSachartre sizeof (mhd_resv), mode); 60462f5224aeSachartre if (rv != 0) { 60472f5224aeSachartre rv = EFAULT; 60482f5224aeSachartre goto done; 60492f5224aeSachartre } 60502f5224aeSachartre resv++; 60512f5224aeSachartre user_resv++; 60522f5224aeSachartre } 60532f5224aeSachartre } 60542f5224aeSachartre 60552f5224aeSachartre if (rv == 0) 60562f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 60572f5224aeSachartre 60582f5224aeSachartre done: 60592f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 60602f5224aeSachartre return (rv); 60612f5224aeSachartre } 60622f5224aeSachartre 60632f5224aeSachartre /* 60642f5224aeSachartre * Implement the MHIOCGRP_REGISTER mhd(7i) ioctl. The ioctl is converted 60652f5224aeSachartre * to a SCSI PERSISTENT OUT REGISTER command which is sent to the vdisk 60662f5224aeSachartre * server with a VD_OP_SCSICMD operation. 60672f5224aeSachartre */ 60682f5224aeSachartre static int 60692f5224aeSachartre vdc_mhd_register(vdc_t *vdc, caddr_t arg, int mode) 60702f5224aeSachartre { 60712f5224aeSachartre vd_scsi_t *vd_scsi; 60722f5224aeSachartre sd_prout_t *scsi_prout; 60732f5224aeSachartre mhioc_register_t mhd_reg; 60742f5224aeSachartre int vd_scsi_len, rv; 60752f5224aeSachartre 60762f5224aeSachartre /* copyin arguments */ 60772f5224aeSachartre rv = ddi_copyin(arg, &mhd_reg, sizeof (mhd_reg), mode); 60782f5224aeSachartre if (rv != 0) 60792f5224aeSachartre return (EFAULT); 60802f5224aeSachartre 60812f5224aeSachartre /* build SCSI VD_OP request */ 60822f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_REGISTER, 60832f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 60842f5224aeSachartre 60852f5224aeSachartre /* set parameters */ 60862f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 60872f5224aeSachartre bcopy(mhd_reg.oldkey.key, scsi_prout->res_key, MHIOC_RESV_KEY_SIZE); 60882f5224aeSachartre bcopy(mhd_reg.newkey.key, scsi_prout->service_key, MHIOC_RESV_KEY_SIZE); 60892f5224aeSachartre scsi_prout->aptpl = (uchar_t)mhd_reg.aptpl; 60902f5224aeSachartre 60912f5224aeSachartre /* submit the request */ 60922f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 60932f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 60942f5224aeSachartre 60952f5224aeSachartre if (rv == 0) 60962f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 60972f5224aeSachartre 60982f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 60992f5224aeSachartre return (rv); 61002f5224aeSachartre } 61012f5224aeSachartre 61022f5224aeSachartre /* 61032f5224aeSachartre * Implement the MHIOCGRP_RESERVE mhd(7i) ioctl. The ioctl is converted 61042f5224aeSachartre * to a SCSI PERSISTENT OUT RESERVE command which is sent to the vdisk 61052f5224aeSachartre * server with a VD_OP_SCSICMD operation. 61062f5224aeSachartre */ 61072f5224aeSachartre static int 61082f5224aeSachartre vdc_mhd_reserve(vdc_t *vdc, caddr_t arg, int mode) 61092f5224aeSachartre { 61102f5224aeSachartre union scsi_cdb *cdb; 61112f5224aeSachartre vd_scsi_t *vd_scsi; 61122f5224aeSachartre sd_prout_t *scsi_prout; 61132f5224aeSachartre mhioc_resv_desc_t mhd_resv; 61142f5224aeSachartre int vd_scsi_len, rv; 61152f5224aeSachartre 61162f5224aeSachartre /* copyin arguments */ 61172f5224aeSachartre rv = ddi_copyin(arg, &mhd_resv, sizeof (mhd_resv), mode); 61182f5224aeSachartre if (rv != 0) 61192f5224aeSachartre return (EFAULT); 61202f5224aeSachartre 61212f5224aeSachartre /* build SCSI VD_OP request */ 61222f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_RESERVE, 61232f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 61242f5224aeSachartre 61252f5224aeSachartre /* set parameters */ 61262f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 61272f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 61282f5224aeSachartre bcopy(mhd_resv.key.key, scsi_prout->res_key, MHIOC_RESV_KEY_SIZE); 61292f5224aeSachartre scsi_prout->scope_address = mhd_resv.scope_specific_addr; 61302f5224aeSachartre cdb->cdb_opaque[2] = mhd_resv.type; 61312f5224aeSachartre 61322f5224aeSachartre /* submit the request */ 61332f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 61342f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 61352f5224aeSachartre 61362f5224aeSachartre if (rv == 0) 61372f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 61382f5224aeSachartre 61392f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 61402f5224aeSachartre return (rv); 61412f5224aeSachartre } 61422f5224aeSachartre 61432f5224aeSachartre /* 61442f5224aeSachartre * Implement the MHIOCGRP_PREEMPTANDABORT mhd(7i) ioctl. The ioctl is 61452f5224aeSachartre * converted to a SCSI PERSISTENT OUT PREEMPT AND ABORT command which 61462f5224aeSachartre * is sent to the vdisk server with a VD_OP_SCSICMD operation. 61472f5224aeSachartre */ 61482f5224aeSachartre static int 61492f5224aeSachartre vdc_mhd_preemptabort(vdc_t *vdc, caddr_t arg, int mode) 61502f5224aeSachartre { 61512f5224aeSachartre union scsi_cdb *cdb; 61522f5224aeSachartre vd_scsi_t *vd_scsi; 61532f5224aeSachartre sd_prout_t *scsi_prout; 61542f5224aeSachartre mhioc_preemptandabort_t mhd_preempt; 61552f5224aeSachartre int vd_scsi_len, rv; 61562f5224aeSachartre 61572f5224aeSachartre /* copyin arguments */ 61582f5224aeSachartre rv = ddi_copyin(arg, &mhd_preempt, sizeof (mhd_preempt), mode); 61592f5224aeSachartre if (rv != 0) 61602f5224aeSachartre return (EFAULT); 61612f5224aeSachartre 61622f5224aeSachartre /* build SCSI VD_OP request */ 61632f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_PREEMPTANDABORT, 61642f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 61652f5224aeSachartre 61662f5224aeSachartre /* set parameters */ 61672f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_ACA; 61682f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 61692f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 61702f5224aeSachartre bcopy(mhd_preempt.resvdesc.key.key, scsi_prout->res_key, 61712f5224aeSachartre MHIOC_RESV_KEY_SIZE); 61722f5224aeSachartre bcopy(mhd_preempt.victim_key.key, scsi_prout->service_key, 61732f5224aeSachartre MHIOC_RESV_KEY_SIZE); 61742f5224aeSachartre scsi_prout->scope_address = mhd_preempt.resvdesc.scope_specific_addr; 61752f5224aeSachartre cdb->cdb_opaque[2] = mhd_preempt.resvdesc.type; 61762f5224aeSachartre 61772f5224aeSachartre /* submit the request */ 61782f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 61792f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 61802f5224aeSachartre 61812f5224aeSachartre if (rv == 0) 61822f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 61832f5224aeSachartre 61842f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 61852f5224aeSachartre return (rv); 61862f5224aeSachartre } 61872f5224aeSachartre 61882f5224aeSachartre /* 61892f5224aeSachartre * Implement the MHIOCGRP_REGISTERANDIGNOREKEY mhd(7i) ioctl. The ioctl 61902f5224aeSachartre * is converted to a SCSI PERSISTENT OUT REGISTER AND IGNORE EXISTING KEY 61912f5224aeSachartre * command which is sent to the vdisk server with a VD_OP_SCSICMD operation. 61922f5224aeSachartre */ 61932f5224aeSachartre static int 61942f5224aeSachartre vdc_mhd_registerignore(vdc_t *vdc, caddr_t arg, int mode) 61952f5224aeSachartre { 61962f5224aeSachartre vd_scsi_t *vd_scsi; 61972f5224aeSachartre sd_prout_t *scsi_prout; 61982f5224aeSachartre mhioc_registerandignorekey_t mhd_regi; 61992f5224aeSachartre int vd_scsi_len, rv; 62002f5224aeSachartre 62012f5224aeSachartre /* copyin arguments */ 62022f5224aeSachartre rv = ddi_copyin(arg, &mhd_regi, sizeof (mhd_regi), mode); 62032f5224aeSachartre if (rv != 0) 62042f5224aeSachartre return (EFAULT); 62052f5224aeSachartre 62062f5224aeSachartre /* build SCSI VD_OP request */ 62072f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_REGISTERANDIGNOREKEY, 62082f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 62092f5224aeSachartre 62102f5224aeSachartre /* set parameters */ 62112f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 62122f5224aeSachartre bcopy(mhd_regi.newkey.key, scsi_prout->service_key, 62132f5224aeSachartre MHIOC_RESV_KEY_SIZE); 62142f5224aeSachartre scsi_prout->aptpl = (uchar_t)mhd_regi.aptpl; 62152f5224aeSachartre 62162f5224aeSachartre /* submit the request */ 62172f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 62182f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 62192f5224aeSachartre 62202f5224aeSachartre if (rv == 0) 62212f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 62222f5224aeSachartre 62232f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 62242f5224aeSachartre return (rv); 62252f5224aeSachartre } 62262f5224aeSachartre 62272f5224aeSachartre /* 62282f5224aeSachartre * This function is used by the failfast mechanism to send a SCSI command 62292f5224aeSachartre * to check for reservation conflict. 62302f5224aeSachartre */ 62312f5224aeSachartre static int 62322f5224aeSachartre vdc_failfast_scsi_cmd(vdc_t *vdc, uchar_t scmd) 62332f5224aeSachartre { 62342f5224aeSachartre int cdb_len, sense_len, vd_scsi_len; 62352f5224aeSachartre vd_scsi_t *vd_scsi; 62362f5224aeSachartre union scsi_cdb *cdb; 62372f5224aeSachartre int rv; 62382f5224aeSachartre 62392f5224aeSachartre ASSERT(scmd == SCMD_TEST_UNIT_READY || scmd == SCMD_WRITE_G1); 62402f5224aeSachartre 62412f5224aeSachartre if (scmd == SCMD_WRITE_G1) 62422f5224aeSachartre cdb_len = CDB_GROUP1; 62432f5224aeSachartre else 62442f5224aeSachartre cdb_len = CDB_GROUP0; 62452f5224aeSachartre 62462f5224aeSachartre sense_len = sizeof (struct scsi_extended_sense); 62472f5224aeSachartre 62482f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, 0, 0, &vd_scsi_len); 62492f5224aeSachartre 62502f5224aeSachartre /* set cdb */ 62512f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 62522f5224aeSachartre cdb->scc_cmd = scmd; 62532f5224aeSachartre 62542f5224aeSachartre vd_scsi->timeout = vdc_scsi_timeout; 62552f5224aeSachartre 62562f5224aeSachartre /* 62572f5224aeSachartre * Submit the request. The last argument has to be B_FALSE so that 62582f5224aeSachartre * vdc_do_sync_op does not loop checking for reservation conflict if 62592f5224aeSachartre * the operation returns an error. 62602f5224aeSachartre */ 62612f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 62622f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)FKIOCTL, VIO_both_dir, B_FALSE); 62632f5224aeSachartre 62642f5224aeSachartre if (rv == 0) 62652f5224aeSachartre (void) vdc_scsi_status(vdc, vd_scsi, B_FALSE); 62662f5224aeSachartre 62672f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 62682f5224aeSachartre return (rv); 62692f5224aeSachartre } 62702f5224aeSachartre 62712f5224aeSachartre /* 62722f5224aeSachartre * This function is used by the failfast mechanism to check for reservation 62732f5224aeSachartre * conflict. It sends some SCSI commands which will fail with a reservation 62742f5224aeSachartre * conflict error if the system does not have access to the disk and this 62752f5224aeSachartre * will panic the system. 62762f5224aeSachartre * 62772f5224aeSachartre * Returned Code: 62782f5224aeSachartre * 0 - disk is accessible without reservation conflict error 62792f5224aeSachartre * != 0 - unable to check if disk is accessible 62802f5224aeSachartre */ 62812f5224aeSachartre int 62822f5224aeSachartre vdc_failfast_check_resv(vdc_t *vdc) 62832f5224aeSachartre { 62842f5224aeSachartre int failure = 0; 62852f5224aeSachartre 62862f5224aeSachartre /* 62872f5224aeSachartre * Send a TEST UNIT READY command. The command will panic 62882f5224aeSachartre * the system if it fails with a reservation conflict. 62892f5224aeSachartre */ 62902f5224aeSachartre if (vdc_failfast_scsi_cmd(vdc, SCMD_TEST_UNIT_READY) != 0) 62912f5224aeSachartre failure++; 62922f5224aeSachartre 62932f5224aeSachartre /* 62942f5224aeSachartre * With SPC-3 compliant devices TEST UNIT READY will succeed on 62952f5224aeSachartre * a reserved device, so we also do a WRITE(10) of zero byte in 62962f5224aeSachartre * order to provoke a Reservation Conflict status on those newer 62972f5224aeSachartre * devices. 62982f5224aeSachartre */ 62992f5224aeSachartre if (vdc_failfast_scsi_cmd(vdc, SCMD_WRITE_G1) != 0) 63002f5224aeSachartre failure++; 63012f5224aeSachartre 63022f5224aeSachartre return (failure); 63032f5224aeSachartre } 63042f5224aeSachartre 63052f5224aeSachartre /* 63062f5224aeSachartre * Add a pending I/O to the failfast I/O queue. An I/O is added to this 63072f5224aeSachartre * queue when it has failed and failfast is enabled. Then we have to check 63082f5224aeSachartre * if it has failed because of a reservation conflict in which case we have 63092f5224aeSachartre * to panic the system. 63102f5224aeSachartre * 63112f5224aeSachartre * Async I/O should be queued with their block I/O data transfer structure 63122f5224aeSachartre * (buf). Sync I/O should be queued with buf = NULL. 63132f5224aeSachartre */ 63142f5224aeSachartre static vdc_io_t * 63152f5224aeSachartre vdc_failfast_io_queue(vdc_t *vdc, struct buf *buf) 63162f5224aeSachartre { 63172f5224aeSachartre vdc_io_t *vio; 63182f5224aeSachartre 63192f5224aeSachartre ASSERT(MUTEX_HELD(&vdc->lock)); 63202f5224aeSachartre 63212f5224aeSachartre vio = kmem_alloc(sizeof (vdc_io_t), KM_SLEEP); 63222f5224aeSachartre vio->vio_next = vdc->failfast_io_queue; 63232f5224aeSachartre vio->vio_buf = buf; 63242f5224aeSachartre vio->vio_qtime = ddi_get_lbolt(); 63252f5224aeSachartre 63262f5224aeSachartre vdc->failfast_io_queue = vio; 63272f5224aeSachartre 63282f5224aeSachartre /* notify the failfast thread that a new I/O is queued */ 63292f5224aeSachartre cv_signal(&vdc->failfast_cv); 63302f5224aeSachartre 63312f5224aeSachartre return (vio); 63322f5224aeSachartre } 63332f5224aeSachartre 63342f5224aeSachartre /* 63352f5224aeSachartre * Remove and complete I/O in the failfast I/O queue which have been 63362f5224aeSachartre * added after the indicated deadline. A deadline of 0 means that all 63372f5224aeSachartre * I/O have to be unqueued and marked as completed. 63382f5224aeSachartre */ 63392f5224aeSachartre static void 63402f5224aeSachartre vdc_failfast_io_unqueue(vdc_t *vdc, clock_t deadline) 63412f5224aeSachartre { 63422f5224aeSachartre vdc_io_t *vio, *vio_tmp; 63432f5224aeSachartre 63442f5224aeSachartre ASSERT(MUTEX_HELD(&vdc->lock)); 63452f5224aeSachartre 63462f5224aeSachartre vio_tmp = NULL; 63472f5224aeSachartre vio = vdc->failfast_io_queue; 63482f5224aeSachartre 63492f5224aeSachartre if (deadline != 0) { 63502f5224aeSachartre /* 63512f5224aeSachartre * Skip any io queued after the deadline. The failfast 63522f5224aeSachartre * I/O queue is ordered starting with the last I/O added 63532f5224aeSachartre * to the queue. 63542f5224aeSachartre */ 63552f5224aeSachartre while (vio != NULL && vio->vio_qtime > deadline) { 63562f5224aeSachartre vio_tmp = vio; 63572f5224aeSachartre vio = vio->vio_next; 63582f5224aeSachartre } 63592f5224aeSachartre } 63602f5224aeSachartre 63612f5224aeSachartre if (vio == NULL) 63622f5224aeSachartre /* nothing to unqueue */ 63632f5224aeSachartre return; 63642f5224aeSachartre 63652f5224aeSachartre /* update the queue */ 63662f5224aeSachartre if (vio_tmp == NULL) 63672f5224aeSachartre vdc->failfast_io_queue = NULL; 63682f5224aeSachartre else 63692f5224aeSachartre vio_tmp->vio_next = NULL; 63702f5224aeSachartre 63712f5224aeSachartre /* 63722f5224aeSachartre * Complete unqueued I/O. Async I/O have a block I/O data transfer 63732f5224aeSachartre * structure (buf) and they are completed by calling biodone(). Sync 63742f5224aeSachartre * I/O do not have a buf and they are completed by setting the 63752f5224aeSachartre * vio_qtime to zero and signaling failfast_io_cv. In that case, the 63762f5224aeSachartre * thread waiting for the I/O to complete is responsible for freeing 63772f5224aeSachartre * the vio structure. 63782f5224aeSachartre */ 63792f5224aeSachartre while (vio != NULL) { 63802f5224aeSachartre vio_tmp = vio->vio_next; 63812f5224aeSachartre if (vio->vio_buf != NULL) { 638290e2f9dcSlm66018 VD_KSTAT_RUNQ_EXIT(vdc); 6383366a92acSlm66018 DTRACE_IO1(done, buf_t *, vio->vio_buf); 63842f5224aeSachartre biodone(vio->vio_buf); 63852f5224aeSachartre kmem_free(vio, sizeof (vdc_io_t)); 63862f5224aeSachartre } else { 63872f5224aeSachartre vio->vio_qtime = 0; 63882f5224aeSachartre } 63892f5224aeSachartre vio = vio_tmp; 63902f5224aeSachartre } 63912f5224aeSachartre 63922f5224aeSachartre cv_broadcast(&vdc->failfast_io_cv); 63932f5224aeSachartre } 63942f5224aeSachartre 63952f5224aeSachartre /* 63962f5224aeSachartre * Failfast Thread. 63972f5224aeSachartre * 63982f5224aeSachartre * While failfast is enabled, the failfast thread sends a TEST UNIT READY 63992f5224aeSachartre * and a zero size WRITE(10) SCSI commands on a regular basis to check that 64002f5224aeSachartre * we still have access to the disk. If a command fails with a RESERVATION 64012f5224aeSachartre * CONFLICT error then the system will immediatly panic. 64022f5224aeSachartre * 64032f5224aeSachartre * The failfast thread is also woken up when an I/O has failed. It then check 64042f5224aeSachartre * the access to the disk to ensure that the I/O failure was not due to a 64052f5224aeSachartre * reservation conflict. 64062f5224aeSachartre * 64072f5224aeSachartre * There is one failfast thread for each virtual disk for which failfast is 64082f5224aeSachartre * enabled. We could have only one thread sending requests for all disks but 64092f5224aeSachartre * this would need vdc to send asynchronous requests and to have callbacks to 64102f5224aeSachartre * process replies. 64112f5224aeSachartre */ 64122f5224aeSachartre static void 64132f5224aeSachartre vdc_failfast_thread(void *arg) 64142f5224aeSachartre { 64152f5224aeSachartre int status; 64162f5224aeSachartre vdc_t *vdc = (vdc_t *)arg; 64172f5224aeSachartre clock_t timeout, starttime; 64182f5224aeSachartre 64192f5224aeSachartre mutex_enter(&vdc->lock); 64202f5224aeSachartre 64212f5224aeSachartre while (vdc->failfast_interval != 0) { 64222f5224aeSachartre 64232f5224aeSachartre starttime = ddi_get_lbolt(); 64242f5224aeSachartre 64252f5224aeSachartre mutex_exit(&vdc->lock); 64262f5224aeSachartre 64272f5224aeSachartre /* check for reservation conflict */ 64282f5224aeSachartre status = vdc_failfast_check_resv(vdc); 64292f5224aeSachartre 64302f5224aeSachartre mutex_enter(&vdc->lock); 64312f5224aeSachartre /* 64322f5224aeSachartre * We have dropped the lock to send the SCSI command so we have 64332f5224aeSachartre * to check that failfast is still enabled. 64342f5224aeSachartre */ 64352f5224aeSachartre if (vdc->failfast_interval == 0) 64362f5224aeSachartre break; 64372f5224aeSachartre 64382f5224aeSachartre /* 64392f5224aeSachartre * If we have successfully check the disk access and there was 64402f5224aeSachartre * no reservation conflict then we can complete any I/O queued 64412f5224aeSachartre * before the last check. 64422f5224aeSachartre */ 64432f5224aeSachartre if (status == 0) 64442f5224aeSachartre vdc_failfast_io_unqueue(vdc, starttime); 64452f5224aeSachartre 64462f5224aeSachartre /* proceed again if some I/O are still in the queue */ 64472f5224aeSachartre if (vdc->failfast_io_queue != NULL) 64482f5224aeSachartre continue; 64492f5224aeSachartre 64502f5224aeSachartre timeout = ddi_get_lbolt() + 64512f5224aeSachartre drv_usectohz(vdc->failfast_interval); 64522f5224aeSachartre (void) cv_timedwait(&vdc->failfast_cv, &vdc->lock, timeout); 64532f5224aeSachartre } 64542f5224aeSachartre 64552f5224aeSachartre /* 64562f5224aeSachartre * Failfast is being stop so we can complete any queued I/O. 64572f5224aeSachartre */ 64582f5224aeSachartre vdc_failfast_io_unqueue(vdc, 0); 64592f5224aeSachartre vdc->failfast_thread = NULL; 64602f5224aeSachartre mutex_exit(&vdc->lock); 64612f5224aeSachartre thread_exit(); 64622f5224aeSachartre } 64632f5224aeSachartre 64642f5224aeSachartre /* 64652f5224aeSachartre * Implement the MHIOCENFAILFAST mhd(7i) ioctl. 64662f5224aeSachartre */ 64672f5224aeSachartre static int 64682f5224aeSachartre vdc_failfast(vdc_t *vdc, caddr_t arg, int mode) 64692f5224aeSachartre { 64702f5224aeSachartre unsigned int mh_time; 64712f5224aeSachartre 64722f5224aeSachartre if (ddi_copyin((void *)arg, &mh_time, sizeof (int), mode)) 64732f5224aeSachartre return (EFAULT); 64742f5224aeSachartre 64752f5224aeSachartre mutex_enter(&vdc->lock); 64762f5224aeSachartre if (mh_time != 0 && vdc->failfast_thread == NULL) { 64772f5224aeSachartre vdc->failfast_thread = thread_create(NULL, 0, 64782f5224aeSachartre vdc_failfast_thread, vdc, 0, &p0, TS_RUN, 64792f5224aeSachartre v.v_maxsyspri - 2); 64802f5224aeSachartre } 64812f5224aeSachartre 64822f5224aeSachartre vdc->failfast_interval = mh_time * 1000; 64832f5224aeSachartre cv_signal(&vdc->failfast_cv); 64842f5224aeSachartre mutex_exit(&vdc->lock); 64852f5224aeSachartre 64862f5224aeSachartre return (0); 64872f5224aeSachartre } 64882f5224aeSachartre 64892f5224aeSachartre /* 64902f5224aeSachartre * Implement the MHIOCTKOWN and MHIOCRELEASE mhd(7i) ioctls. These ioctls are 64912f5224aeSachartre * converted to VD_OP_SET_ACCESS operations. 64922f5224aeSachartre */ 64932f5224aeSachartre static int 64942f5224aeSachartre vdc_access_set(vdc_t *vdc, uint64_t flags, int mode) 64952f5224aeSachartre { 64962f5224aeSachartre int rv; 64972f5224aeSachartre 64982f5224aeSachartre /* submit owership command request */ 64992f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SET_ACCESS, (caddr_t)&flags, 65002f5224aeSachartre sizeof (uint64_t), 0, 0, CB_SYNC, (void *)(uint64_t)mode, 65012f5224aeSachartre VIO_both_dir, B_TRUE); 65022f5224aeSachartre 65032f5224aeSachartre return (rv); 65042f5224aeSachartre } 65052f5224aeSachartre 65062f5224aeSachartre /* 65072f5224aeSachartre * Implement the MHIOCSTATUS mhd(7i) ioctl. This ioctl is converted to a 65082f5224aeSachartre * VD_OP_GET_ACCESS operation. 65092f5224aeSachartre */ 65102f5224aeSachartre static int 65112f5224aeSachartre vdc_access_get(vdc_t *vdc, uint64_t *status, int mode) 65122f5224aeSachartre { 65132f5224aeSachartre int rv; 65142f5224aeSachartre 65152f5224aeSachartre /* submit owership command request */ 65162f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_GET_ACCESS, (caddr_t)status, 65172f5224aeSachartre sizeof (uint64_t), 0, 0, CB_SYNC, (void *)(uint64_t)mode, 65182f5224aeSachartre VIO_both_dir, B_TRUE); 65192f5224aeSachartre 65202f5224aeSachartre return (rv); 65212f5224aeSachartre } 65222f5224aeSachartre 65232f5224aeSachartre /* 65242f5224aeSachartre * Disk Ownership Thread. 65252f5224aeSachartre * 65262f5224aeSachartre * When we have taken the ownership of a disk, this thread waits to be 65272f5224aeSachartre * notified when the LDC channel is reset so that it can recover the 65282f5224aeSachartre * ownership. 65292f5224aeSachartre * 65302f5224aeSachartre * Note that the thread handling the LDC reset (vdc_process_msg_thread()) 65312f5224aeSachartre * can not be used to do the ownership recovery because it has to be 65322f5224aeSachartre * running to handle the reply message to the ownership operation. 65332f5224aeSachartre */ 65342f5224aeSachartre static void 65352f5224aeSachartre vdc_ownership_thread(void *arg) 65362f5224aeSachartre { 65372f5224aeSachartre vdc_t *vdc = (vdc_t *)arg; 65382f5224aeSachartre clock_t timeout; 65392f5224aeSachartre uint64_t status; 65402f5224aeSachartre 65412f5224aeSachartre mutex_enter(&vdc->ownership_lock); 65422f5224aeSachartre mutex_enter(&vdc->lock); 65432f5224aeSachartre 65442f5224aeSachartre while (vdc->ownership & VDC_OWNERSHIP_WANTED) { 65452f5224aeSachartre 65462f5224aeSachartre if ((vdc->ownership & VDC_OWNERSHIP_RESET) || 65472f5224aeSachartre !(vdc->ownership & VDC_OWNERSHIP_GRANTED)) { 65482f5224aeSachartre /* 65492f5224aeSachartre * There was a reset so the ownership has been lost, 65502f5224aeSachartre * try to recover. We do this without using the preempt 65512f5224aeSachartre * option so that we don't steal the ownership from 65522f5224aeSachartre * someone who has preempted us. 65532f5224aeSachartre */ 65542f5224aeSachartre DMSG(vdc, 0, "[%d] Ownership lost, recovering", 65552f5224aeSachartre vdc->instance); 65562f5224aeSachartre 65572f5224aeSachartre vdc->ownership &= ~(VDC_OWNERSHIP_RESET | 65582f5224aeSachartre VDC_OWNERSHIP_GRANTED); 65592f5224aeSachartre 65602f5224aeSachartre mutex_exit(&vdc->lock); 65612f5224aeSachartre 65622f5224aeSachartre status = vdc_access_set(vdc, VD_ACCESS_SET_EXCLUSIVE | 65632f5224aeSachartre VD_ACCESS_SET_PRESERVE, FKIOCTL); 65642f5224aeSachartre 65652f5224aeSachartre mutex_enter(&vdc->lock); 65662f5224aeSachartre 65672f5224aeSachartre if (status == 0) { 65682f5224aeSachartre DMSG(vdc, 0, "[%d] Ownership recovered", 65692f5224aeSachartre vdc->instance); 65702f5224aeSachartre vdc->ownership |= VDC_OWNERSHIP_GRANTED; 65712f5224aeSachartre } else { 65722f5224aeSachartre DMSG(vdc, 0, "[%d] Fail to recover ownership", 65732f5224aeSachartre vdc->instance); 65742f5224aeSachartre } 65752f5224aeSachartre 65762f5224aeSachartre } 65772f5224aeSachartre 65782f5224aeSachartre /* 65792f5224aeSachartre * If we have the ownership then we just wait for an event 65802f5224aeSachartre * to happen (LDC reset), otherwise we will retry to recover 65812f5224aeSachartre * after a delay. 65822f5224aeSachartre */ 65832f5224aeSachartre if (vdc->ownership & VDC_OWNERSHIP_GRANTED) 65842f5224aeSachartre timeout = 0; 65852f5224aeSachartre else 65862f5224aeSachartre timeout = ddi_get_lbolt() + 65872f5224aeSachartre drv_usectohz(vdc_ownership_delay); 65882f5224aeSachartre 65892f5224aeSachartre /* Release the ownership_lock and wait on the vdc lock */ 65902f5224aeSachartre mutex_exit(&vdc->ownership_lock); 65912f5224aeSachartre 65922f5224aeSachartre if (timeout == 0) 65932f5224aeSachartre (void) cv_wait(&vdc->ownership_cv, &vdc->lock); 65942f5224aeSachartre else 65952f5224aeSachartre (void) cv_timedwait(&vdc->ownership_cv, 65962f5224aeSachartre &vdc->lock, timeout); 65972f5224aeSachartre 65982f5224aeSachartre mutex_exit(&vdc->lock); 65992f5224aeSachartre 66002f5224aeSachartre mutex_enter(&vdc->ownership_lock); 66012f5224aeSachartre mutex_enter(&vdc->lock); 66022f5224aeSachartre } 66032f5224aeSachartre 66042f5224aeSachartre vdc->ownership_thread = NULL; 66052f5224aeSachartre mutex_exit(&vdc->lock); 66062f5224aeSachartre mutex_exit(&vdc->ownership_lock); 66072f5224aeSachartre 66082f5224aeSachartre thread_exit(); 66092f5224aeSachartre } 66102f5224aeSachartre 66112f5224aeSachartre static void 66122f5224aeSachartre vdc_ownership_update(vdc_t *vdc, int ownership_flags) 66132f5224aeSachartre { 66142f5224aeSachartre ASSERT(MUTEX_HELD(&vdc->ownership_lock)); 66152f5224aeSachartre 66162f5224aeSachartre mutex_enter(&vdc->lock); 66172f5224aeSachartre vdc->ownership = ownership_flags; 66182f5224aeSachartre if ((vdc->ownership & VDC_OWNERSHIP_WANTED) && 66192f5224aeSachartre vdc->ownership_thread == NULL) { 66202f5224aeSachartre /* start ownership thread */ 66212f5224aeSachartre vdc->ownership_thread = thread_create(NULL, 0, 66222f5224aeSachartre vdc_ownership_thread, vdc, 0, &p0, TS_RUN, 66232f5224aeSachartre v.v_maxsyspri - 2); 66242f5224aeSachartre } else { 66252f5224aeSachartre /* notify the ownership thread */ 66262f5224aeSachartre cv_signal(&vdc->ownership_cv); 66272f5224aeSachartre } 66282f5224aeSachartre mutex_exit(&vdc->lock); 66292f5224aeSachartre } 66302f5224aeSachartre 66312f5224aeSachartre /* 66322f5224aeSachartre * Get the size and the block size of a virtual disk from the vdisk server. 66332f5224aeSachartre */ 66342f5224aeSachartre static int 6635de3a5331SRamesh Chitrothu vdc_get_capacity(vdc_t *vdc, size_t *dsk_size, size_t *blk_size) 66362f5224aeSachartre { 66372f5224aeSachartre int rv = 0; 66382f5224aeSachartre size_t alloc_len; 66392f5224aeSachartre vd_capacity_t *vd_cap; 66402f5224aeSachartre 6641de3a5331SRamesh Chitrothu ASSERT(MUTEX_NOT_HELD(&vdc->lock)); 66422f5224aeSachartre 66432f5224aeSachartre alloc_len = P2ROUNDUP(sizeof (vd_capacity_t), sizeof (uint64_t)); 66442f5224aeSachartre 66452f5224aeSachartre vd_cap = kmem_zalloc(alloc_len, KM_SLEEP); 66462f5224aeSachartre 66472f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_GET_CAPACITY, (caddr_t)vd_cap, alloc_len, 66482f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)FKIOCTL, VIO_both_dir, B_TRUE); 66492f5224aeSachartre 6650de3a5331SRamesh Chitrothu *dsk_size = vd_cap->vdisk_size; 6651de3a5331SRamesh Chitrothu *blk_size = vd_cap->vdisk_block_size; 66522f5224aeSachartre 66532f5224aeSachartre kmem_free(vd_cap, alloc_len); 66542f5224aeSachartre return (rv); 66552f5224aeSachartre } 66562f5224aeSachartre 66572f5224aeSachartre /* 6658de3a5331SRamesh Chitrothu * Check the disk capacity. Disk size information is updated if size has 6659de3a5331SRamesh Chitrothu * changed. 6660de3a5331SRamesh Chitrothu * 6661de3a5331SRamesh Chitrothu * Return 0 if the disk capacity is available, or non-zero if it is not. 6662de3a5331SRamesh Chitrothu */ 6663de3a5331SRamesh Chitrothu static int 6664de3a5331SRamesh Chitrothu vdc_check_capacity(vdc_t *vdc) 6665de3a5331SRamesh Chitrothu { 6666de3a5331SRamesh Chitrothu size_t dsk_size, blk_size; 6667de3a5331SRamesh Chitrothu int rv; 6668de3a5331SRamesh Chitrothu 66693f4df6d3SAlexandre Chartre /* 66703f4df6d3SAlexandre Chartre * If the vdisk does not support the VD_OP_GET_CAPACITY operation 66713f4df6d3SAlexandre Chartre * then the disk capacity has been retrieved during the handshake 66723f4df6d3SAlexandre Chartre * and there's nothing more to do here. 66733f4df6d3SAlexandre Chartre */ 66743f4df6d3SAlexandre Chartre if (!VD_OP_SUPPORTED(vdc->operations, VD_OP_GET_CAPACITY)) 66753f4df6d3SAlexandre Chartre return (0); 66763f4df6d3SAlexandre Chartre 6677de3a5331SRamesh Chitrothu if ((rv = vdc_get_capacity(vdc, &dsk_size, &blk_size)) != 0) 6678de3a5331SRamesh Chitrothu return (rv); 6679de3a5331SRamesh Chitrothu 6680*65908c77Syu, larry liu - Sun Microsystems - Beijing China if (dsk_size == VD_SIZE_UNKNOWN || dsk_size == 0 || blk_size == 0) 6681de3a5331SRamesh Chitrothu return (EINVAL); 6682de3a5331SRamesh Chitrothu 6683de3a5331SRamesh Chitrothu mutex_enter(&vdc->lock); 6684*65908c77Syu, larry liu - Sun Microsystems - Beijing China /* 6685*65908c77Syu, larry liu - Sun Microsystems - Beijing China * First try to update the VIO block size (which is the same as the 6686*65908c77Syu, larry liu - Sun Microsystems - Beijing China * vdisk block size). If this returns an error then that means that 6687*65908c77Syu, larry liu - Sun Microsystems - Beijing China * we can not use that block size so basically the vdisk is unusable 6688*65908c77Syu, larry liu - Sun Microsystems - Beijing China * and we return an error. 6689*65908c77Syu, larry liu - Sun Microsystems - Beijing China */ 6690*65908c77Syu, larry liu - Sun Microsystems - Beijing China rv = vdc_update_vio_bsize(vdc, blk_size); 6691*65908c77Syu, larry liu - Sun Microsystems - Beijing China if (rv == 0) 6692de3a5331SRamesh Chitrothu vdc_update_size(vdc, dsk_size, blk_size, vdc->max_xfer_sz); 6693*65908c77Syu, larry liu - Sun Microsystems - Beijing China 6694de3a5331SRamesh Chitrothu mutex_exit(&vdc->lock); 6695de3a5331SRamesh Chitrothu 6696*65908c77Syu, larry liu - Sun Microsystems - Beijing China return (rv); 6697de3a5331SRamesh Chitrothu } 6698de3a5331SRamesh Chitrothu 6699de3a5331SRamesh Chitrothu /* 67001ae08745Sheppo * This structure is used in the DKIO(7I) array below. 67011ae08745Sheppo */ 67021ae08745Sheppo typedef struct vdc_dk_ioctl { 67031ae08745Sheppo uint8_t op; /* VD_OP_XXX value */ 67041ae08745Sheppo int cmd; /* Solaris ioctl operation number */ 67051ae08745Sheppo size_t nbytes; /* size of structure to be copied */ 67060a55fbb7Slm66018 67070a55fbb7Slm66018 /* function to convert between vDisk and Solaris structure formats */ 6708d10e4ef2Snarayan int (*convert)(vdc_t *vdc, void *vd_buf, void *ioctl_arg, 6709d10e4ef2Snarayan int mode, int dir); 67101ae08745Sheppo } vdc_dk_ioctl_t; 67111ae08745Sheppo 67121ae08745Sheppo /* 67131ae08745Sheppo * Subset of DKIO(7I) operations currently supported 67141ae08745Sheppo */ 67151ae08745Sheppo static vdc_dk_ioctl_t dk_ioctl[] = { 6716eff7243fSlm66018 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, 0, 67170a55fbb7Slm66018 vdc_null_copy_func}, 67180a55fbb7Slm66018 {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), 67194bac2208Snarayan vdc_get_wce_convert}, 67200a55fbb7Slm66018 {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), 67214bac2208Snarayan vdc_set_wce_convert}, 67220a55fbb7Slm66018 {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), 67230a55fbb7Slm66018 vdc_get_vtoc_convert}, 67240a55fbb7Slm66018 {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), 67250a55fbb7Slm66018 vdc_set_vtoc_convert}, 6726342440ecSPrasad Singamsetty {VD_OP_GET_VTOC, DKIOCGEXTVTOC, sizeof (vd_vtoc_t), 6727342440ecSPrasad Singamsetty vdc_get_extvtoc_convert}, 6728342440ecSPrasad Singamsetty {VD_OP_SET_VTOC, DKIOCSEXTVTOC, sizeof (vd_vtoc_t), 6729342440ecSPrasad Singamsetty vdc_set_extvtoc_convert}, 67300a55fbb7Slm66018 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), 67310a55fbb7Slm66018 vdc_get_geom_convert}, 67320a55fbb7Slm66018 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), 67330a55fbb7Slm66018 vdc_get_geom_convert}, 67340a55fbb7Slm66018 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), 67350a55fbb7Slm66018 vdc_get_geom_convert}, 67360a55fbb7Slm66018 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 67370a55fbb7Slm66018 vdc_set_geom_convert}, 67384bac2208Snarayan {VD_OP_GET_EFI, DKIOCGETEFI, 0, 67394bac2208Snarayan vdc_get_efi_convert}, 67404bac2208Snarayan {VD_OP_SET_EFI, DKIOCSETEFI, 0, 67414bac2208Snarayan vdc_set_efi_convert}, 67420a55fbb7Slm66018 674387a7269eSachartre /* DIOCTL_RWCMD is converted to a read or a write */ 674487a7269eSachartre {0, DIOCTL_RWCMD, sizeof (struct dadkio_rwcmd), NULL}, 674587a7269eSachartre 67462f5224aeSachartre /* mhd(7I) non-shared multihost disks ioctls */ 67472f5224aeSachartre {0, MHIOCTKOWN, 0, vdc_null_copy_func}, 67482f5224aeSachartre {0, MHIOCRELEASE, 0, vdc_null_copy_func}, 67492f5224aeSachartre {0, MHIOCSTATUS, 0, vdc_null_copy_func}, 67502f5224aeSachartre {0, MHIOCQRESERVE, 0, vdc_null_copy_func}, 67512f5224aeSachartre 67522f5224aeSachartre /* mhd(7I) shared multihost disks ioctls */ 67532f5224aeSachartre {0, MHIOCGRP_INKEYS, 0, vdc_null_copy_func}, 67542f5224aeSachartre {0, MHIOCGRP_INRESV, 0, vdc_null_copy_func}, 67552f5224aeSachartre {0, MHIOCGRP_REGISTER, 0, vdc_null_copy_func}, 67562f5224aeSachartre {0, MHIOCGRP_RESERVE, 0, vdc_null_copy_func}, 67572f5224aeSachartre {0, MHIOCGRP_PREEMPTANDABORT, 0, vdc_null_copy_func}, 67582f5224aeSachartre {0, MHIOCGRP_REGISTERANDIGNOREKEY, 0, vdc_null_copy_func}, 67592f5224aeSachartre 67602f5224aeSachartre /* mhd(7I) failfast ioctl */ 67612f5224aeSachartre {0, MHIOCENFAILFAST, 0, vdc_null_copy_func}, 67622f5224aeSachartre 67630a55fbb7Slm66018 /* 67640a55fbb7Slm66018 * These particular ioctls are not sent to the server - vdc fakes up 67650a55fbb7Slm66018 * the necessary info. 67660a55fbb7Slm66018 */ 67670a55fbb7Slm66018 {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, 67680a55fbb7Slm66018 {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, 67690a55fbb7Slm66018 {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, 67709642afceSachartre {0, DKIOCPARTITION, 0, vdc_null_copy_func }, 677187a7269eSachartre {0, DKIOCGAPART, 0, vdc_null_copy_func }, 67720a55fbb7Slm66018 {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, 67730a55fbb7Slm66018 {0, CDROMREADOFFSET, 0, vdc_null_copy_func} 67741ae08745Sheppo }; 67751ae08745Sheppo 67761ae08745Sheppo /* 6777edcc0754Sachartre * This function handles ioctl requests from the vd_efi_alloc_and_read() 6778edcc0754Sachartre * function and forward them to the vdisk. 67792f5224aeSachartre */ 67802f5224aeSachartre static int 6781edcc0754Sachartre vd_process_efi_ioctl(void *vdisk, int cmd, uintptr_t arg) 67822f5224aeSachartre { 6783edcc0754Sachartre vdc_t *vdc = (vdc_t *)vdisk; 6784edcc0754Sachartre dev_t dev; 67852f5224aeSachartre int rval; 6786edcc0754Sachartre 6787edcc0754Sachartre dev = makedevice(ddi_driver_major(vdc->dip), 6788edcc0754Sachartre VD_MAKE_DEV(vdc->instance, 0)); 6789edcc0754Sachartre 6790edcc0754Sachartre return (vd_process_ioctl(dev, cmd, (caddr_t)arg, FKIOCTL, &rval)); 67912f5224aeSachartre } 67922f5224aeSachartre 67932f5224aeSachartre /* 67941ae08745Sheppo * Function: 67951ae08745Sheppo * vd_process_ioctl() 67961ae08745Sheppo * 67971ae08745Sheppo * Description: 67980a55fbb7Slm66018 * This routine processes disk specific ioctl calls 67991ae08745Sheppo * 68001ae08745Sheppo * Arguments: 68011ae08745Sheppo * dev - the device number 68021ae08745Sheppo * cmd - the operation [dkio(7I)] to be processed 68031ae08745Sheppo * arg - pointer to user provided structure 68041ae08745Sheppo * (contains data to be set or reference parameter for get) 68051ae08745Sheppo * mode - bit flag, indicating open settings, 32/64 bit type, etc 68062f5224aeSachartre * rvalp - pointer to return value for calling process. 68071ae08745Sheppo * 68081ae08745Sheppo * Return Code: 68091ae08745Sheppo * 0 68101ae08745Sheppo * EFAULT 68111ae08745Sheppo * ENXIO 68121ae08745Sheppo * EIO 68131ae08745Sheppo * ENOTSUP 68141ae08745Sheppo */ 68151ae08745Sheppo static int 68162f5224aeSachartre vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode, int *rvalp) 68171ae08745Sheppo { 68180d0c8d4bSnarayan int instance = VDCUNIT(dev); 68191ae08745Sheppo vdc_t *vdc = NULL; 68201ae08745Sheppo int rv = -1; 68211ae08745Sheppo int idx = 0; /* index into dk_ioctl[] */ 68221ae08745Sheppo size_t len = 0; /* #bytes to send to vds */ 68231ae08745Sheppo size_t alloc_len = 0; /* #bytes to allocate mem for */ 68241ae08745Sheppo caddr_t mem_p = NULL; 68251ae08745Sheppo size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 68263af08d82Slm66018 vdc_dk_ioctl_t *iop; 68271ae08745Sheppo 68281ae08745Sheppo vdc = ddi_get_soft_state(vdc_state, instance); 68291ae08745Sheppo if (vdc == NULL) { 68301ae08745Sheppo cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 68311ae08745Sheppo instance); 68321ae08745Sheppo return (ENXIO); 68331ae08745Sheppo } 68341ae08745Sheppo 68353af08d82Slm66018 DMSG(vdc, 0, "[%d] Processing ioctl(%x) for dev %lx : model %x\n", 68363af08d82Slm66018 instance, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 68371ae08745Sheppo 68382f5224aeSachartre if (rvalp != NULL) { 68392f5224aeSachartre /* the return value of the ioctl is 0 by default */ 68402f5224aeSachartre *rvalp = 0; 68412f5224aeSachartre } 68422f5224aeSachartre 68431ae08745Sheppo /* 68441ae08745Sheppo * Validate the ioctl operation to be performed. 68451ae08745Sheppo * 68461ae08745Sheppo * If we have looped through the array without finding a match then we 68471ae08745Sheppo * don't support this ioctl. 68481ae08745Sheppo */ 68491ae08745Sheppo for (idx = 0; idx < nioctls; idx++) { 68501ae08745Sheppo if (cmd == dk_ioctl[idx].cmd) 68511ae08745Sheppo break; 68521ae08745Sheppo } 68531ae08745Sheppo 68541ae08745Sheppo if (idx >= nioctls) { 68553af08d82Slm66018 DMSG(vdc, 0, "[%d] Unsupported ioctl (0x%x)\n", 6856e1ebb9ecSlm66018 vdc->instance, cmd); 68571ae08745Sheppo return (ENOTSUP); 68581ae08745Sheppo } 68591ae08745Sheppo 68603af08d82Slm66018 iop = &(dk_ioctl[idx]); 68613af08d82Slm66018 68624bac2208Snarayan if (cmd == DKIOCGETEFI || cmd == DKIOCSETEFI) { 68634bac2208Snarayan /* size is not fixed for EFI ioctls, it depends on ioctl arg */ 68644bac2208Snarayan dk_efi_t dk_efi; 68654bac2208Snarayan 68664bac2208Snarayan rv = ddi_copyin(arg, &dk_efi, sizeof (dk_efi_t), mode); 68674bac2208Snarayan if (rv != 0) 68684bac2208Snarayan return (EFAULT); 68694bac2208Snarayan 68704bac2208Snarayan len = sizeof (vd_efi_t) - 1 + dk_efi.dki_length; 68714bac2208Snarayan } else { 68723af08d82Slm66018 len = iop->nbytes; 68734bac2208Snarayan } 68741ae08745Sheppo 68752f5224aeSachartre /* check if the ioctl is applicable */ 68761ae08745Sheppo switch (cmd) { 68771ae08745Sheppo case CDROMREADOFFSET: 68781ae08745Sheppo case DKIOCREMOVABLE: 68791ae08745Sheppo return (ENOTTY); 68801ae08745Sheppo 68812f5224aeSachartre case USCSICMD: 68822f5224aeSachartre case MHIOCTKOWN: 68832f5224aeSachartre case MHIOCSTATUS: 68842f5224aeSachartre case MHIOCQRESERVE: 68852f5224aeSachartre case MHIOCRELEASE: 68862f5224aeSachartre case MHIOCGRP_INKEYS: 68872f5224aeSachartre case MHIOCGRP_INRESV: 68882f5224aeSachartre case MHIOCGRP_REGISTER: 68892f5224aeSachartre case MHIOCGRP_RESERVE: 68902f5224aeSachartre case MHIOCGRP_PREEMPTANDABORT: 68912f5224aeSachartre case MHIOCGRP_REGISTERANDIGNOREKEY: 68922f5224aeSachartre case MHIOCENFAILFAST: 68932f5224aeSachartre if (vdc->cinfo == NULL) 68942f5224aeSachartre return (ENXIO); 68952f5224aeSachartre if (vdc->cinfo->dki_ctype != DKC_SCSI_CCS) 68962f5224aeSachartre return (ENOTTY); 68972f5224aeSachartre break; 68982f5224aeSachartre 68992f5224aeSachartre case DIOCTL_RWCMD: 69002f5224aeSachartre if (vdc->cinfo == NULL) 69012f5224aeSachartre return (ENXIO); 69022f5224aeSachartre if (vdc->cinfo->dki_ctype != DKC_DIRECT) 69032f5224aeSachartre return (ENOTTY); 69042f5224aeSachartre break; 69052f5224aeSachartre 69062f5224aeSachartre case DKIOCINFO: 69072f5224aeSachartre if (vdc->cinfo == NULL) 69082f5224aeSachartre return (ENXIO); 69092f5224aeSachartre break; 69102f5224aeSachartre 69112f5224aeSachartre case DKIOCGMEDIAINFO: 69122f5224aeSachartre if (vdc->minfo == NULL) 69132f5224aeSachartre return (ENXIO); 69142f5224aeSachartre if (vdc_check_capacity(vdc) != 0) 69152f5224aeSachartre /* disk capacity is not available */ 69162f5224aeSachartre return (EIO); 69172f5224aeSachartre break; 69182f5224aeSachartre } 69192f5224aeSachartre 69202f5224aeSachartre /* 69212f5224aeSachartre * Deal with ioctls which require a processing different than 69222f5224aeSachartre * converting ioctl arguments and sending a corresponding 69232f5224aeSachartre * VD operation. 69242f5224aeSachartre */ 69252f5224aeSachartre switch (cmd) { 69262f5224aeSachartre 69272f5224aeSachartre case USCSICMD: 69282f5224aeSachartre { 69292f5224aeSachartre return (vdc_uscsi_cmd(vdc, arg, mode)); 69302f5224aeSachartre } 69312f5224aeSachartre 69322f5224aeSachartre case MHIOCTKOWN: 69332f5224aeSachartre { 69342f5224aeSachartre mutex_enter(&vdc->ownership_lock); 69352f5224aeSachartre /* 69362f5224aeSachartre * We have to set VDC_OWNERSHIP_WANTED now so that the ownership 69372f5224aeSachartre * can be flagged with VDC_OWNERSHIP_RESET if the LDC is reset 69382f5224aeSachartre * while we are processing the ioctl. 69392f5224aeSachartre */ 69402f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_WANTED); 69412f5224aeSachartre 69422f5224aeSachartre rv = vdc_access_set(vdc, VD_ACCESS_SET_EXCLUSIVE | 69432f5224aeSachartre VD_ACCESS_SET_PREEMPT | VD_ACCESS_SET_PRESERVE, mode); 69442f5224aeSachartre if (rv == 0) { 69452f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_WANTED | 69462f5224aeSachartre VDC_OWNERSHIP_GRANTED); 69472f5224aeSachartre } else { 69482f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_NONE); 69492f5224aeSachartre } 69502f5224aeSachartre mutex_exit(&vdc->ownership_lock); 69512f5224aeSachartre return (rv); 69522f5224aeSachartre } 69532f5224aeSachartre 69542f5224aeSachartre case MHIOCRELEASE: 69552f5224aeSachartre { 69562f5224aeSachartre mutex_enter(&vdc->ownership_lock); 69572f5224aeSachartre rv = vdc_access_set(vdc, VD_ACCESS_SET_CLEAR, mode); 69582f5224aeSachartre if (rv == 0) { 69592f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_NONE); 69602f5224aeSachartre } 69612f5224aeSachartre mutex_exit(&vdc->ownership_lock); 69622f5224aeSachartre return (rv); 69632f5224aeSachartre } 69642f5224aeSachartre 69652f5224aeSachartre case MHIOCSTATUS: 69662f5224aeSachartre { 69672f5224aeSachartre uint64_t status; 69682f5224aeSachartre 69692f5224aeSachartre rv = vdc_access_get(vdc, &status, mode); 69702f5224aeSachartre if (rv == 0 && rvalp != NULL) 69712f5224aeSachartre *rvalp = (status & VD_ACCESS_ALLOWED)? 0 : 1; 69722f5224aeSachartre return (rv); 69732f5224aeSachartre } 69742f5224aeSachartre 69752f5224aeSachartre case MHIOCQRESERVE: 69762f5224aeSachartre { 69772f5224aeSachartre rv = vdc_access_set(vdc, VD_ACCESS_SET_EXCLUSIVE, mode); 69782f5224aeSachartre return (rv); 69792f5224aeSachartre } 69802f5224aeSachartre 69812f5224aeSachartre case MHIOCGRP_INKEYS: 69822f5224aeSachartre { 69832f5224aeSachartre return (vdc_mhd_inkeys(vdc, arg, mode)); 69842f5224aeSachartre } 69852f5224aeSachartre 69862f5224aeSachartre case MHIOCGRP_INRESV: 69872f5224aeSachartre { 69882f5224aeSachartre return (vdc_mhd_inresv(vdc, arg, mode)); 69892f5224aeSachartre } 69902f5224aeSachartre 69912f5224aeSachartre case MHIOCGRP_REGISTER: 69922f5224aeSachartre { 69932f5224aeSachartre return (vdc_mhd_register(vdc, arg, mode)); 69942f5224aeSachartre } 69952f5224aeSachartre 69962f5224aeSachartre case MHIOCGRP_RESERVE: 69972f5224aeSachartre { 69982f5224aeSachartre return (vdc_mhd_reserve(vdc, arg, mode)); 69992f5224aeSachartre } 70002f5224aeSachartre 70012f5224aeSachartre case MHIOCGRP_PREEMPTANDABORT: 70022f5224aeSachartre { 70032f5224aeSachartre return (vdc_mhd_preemptabort(vdc, arg, mode)); 70042f5224aeSachartre } 70052f5224aeSachartre 70062f5224aeSachartre case MHIOCGRP_REGISTERANDIGNOREKEY: 70072f5224aeSachartre { 70082f5224aeSachartre return (vdc_mhd_registerignore(vdc, arg, mode)); 70092f5224aeSachartre } 70102f5224aeSachartre 70112f5224aeSachartre case MHIOCENFAILFAST: 70122f5224aeSachartre { 70132f5224aeSachartre rv = vdc_failfast(vdc, arg, mode); 70142f5224aeSachartre return (rv); 70152f5224aeSachartre } 70162f5224aeSachartre 701787a7269eSachartre case DIOCTL_RWCMD: 701887a7269eSachartre { 7019*65908c77Syu, larry liu - Sun Microsystems - Beijing China return (vdc_dioctl_rwcmd(vdc, arg, mode)); 702087a7269eSachartre } 702187a7269eSachartre 702287a7269eSachartre case DKIOCGAPART: 702387a7269eSachartre { 70249642afceSachartre return (vdc_dkio_gapart(vdc, arg, mode)); 70259642afceSachartre } 70269642afceSachartre 70279642afceSachartre case DKIOCPARTITION: 70289642afceSachartre { 70299642afceSachartre return (vdc_dkio_partition(vdc, arg, mode)); 703087a7269eSachartre } 703187a7269eSachartre 70321ae08745Sheppo case DKIOCINFO: 70331ae08745Sheppo { 70341ae08745Sheppo struct dk_cinfo cinfo; 70351ae08745Sheppo 70361ae08745Sheppo bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 70370d0c8d4bSnarayan cinfo.dki_partition = VDCPART(dev); 70381ae08745Sheppo 70391ae08745Sheppo rv = ddi_copyout(&cinfo, (void *)arg, 70401ae08745Sheppo sizeof (struct dk_cinfo), mode); 70411ae08745Sheppo if (rv != 0) 70421ae08745Sheppo return (EFAULT); 70431ae08745Sheppo 70441ae08745Sheppo return (0); 70451ae08745Sheppo } 70461ae08745Sheppo 70471ae08745Sheppo case DKIOCGMEDIAINFO: 70488e6a2a04Slm66018 { 70492f5224aeSachartre ASSERT(vdc->vdisk_size != 0); 7050de3a5331SRamesh Chitrothu ASSERT(vdc->minfo->dki_capacity != 0); 70511ae08745Sheppo rv = ddi_copyout(vdc->minfo, (void *)arg, 70521ae08745Sheppo sizeof (struct dk_minfo), mode); 70531ae08745Sheppo if (rv != 0) 70541ae08745Sheppo return (EFAULT); 70551ae08745Sheppo 70561ae08745Sheppo return (0); 70571ae08745Sheppo } 70581ae08745Sheppo 70598e6a2a04Slm66018 case DKIOCFLUSHWRITECACHE: 70608e6a2a04Slm66018 { 706117cadca8Slm66018 struct dk_callback *dkc = 706217cadca8Slm66018 (struct dk_callback *)(uintptr_t)arg; 70638e6a2a04Slm66018 vdc_dk_arg_t *dkarg = NULL; 70648e6a2a04Slm66018 70653af08d82Slm66018 DMSG(vdc, 1, "[%d] Flush W$: mode %x\n", 70663af08d82Slm66018 instance, mode); 70678e6a2a04Slm66018 70688e6a2a04Slm66018 /* 70698e6a2a04Slm66018 * If arg is NULL, then there is no callback function 70708e6a2a04Slm66018 * registered and the call operates synchronously; we 70718e6a2a04Slm66018 * break and continue with the rest of the function and 70728e6a2a04Slm66018 * wait for vds to return (i.e. after the request to 70738e6a2a04Slm66018 * vds returns successfully, all writes completed prior 70748e6a2a04Slm66018 * to the ioctl will have been flushed from the disk 70758e6a2a04Slm66018 * write cache to persistent media. 70768e6a2a04Slm66018 * 70778e6a2a04Slm66018 * If a callback function is registered, we dispatch 70788e6a2a04Slm66018 * the request on a task queue and return immediately. 70798e6a2a04Slm66018 * The callback will deal with informing the calling 70808e6a2a04Slm66018 * thread that the flush request is completed. 70818e6a2a04Slm66018 */ 70828e6a2a04Slm66018 if (dkc == NULL) 70838e6a2a04Slm66018 break; 70848e6a2a04Slm66018 7085eff7243fSlm66018 /* 7086eff7243fSlm66018 * the asynchronous callback is only supported if 7087eff7243fSlm66018 * invoked from within the kernel 7088eff7243fSlm66018 */ 7089eff7243fSlm66018 if ((mode & FKIOCTL) == 0) 7090eff7243fSlm66018 return (ENOTSUP); 7091eff7243fSlm66018 70928e6a2a04Slm66018 dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP); 70938e6a2a04Slm66018 70948e6a2a04Slm66018 dkarg->mode = mode; 70958e6a2a04Slm66018 dkarg->dev = dev; 70968e6a2a04Slm66018 bcopy(dkc, &dkarg->dkc, sizeof (*dkc)); 70978e6a2a04Slm66018 70988e6a2a04Slm66018 mutex_enter(&vdc->lock); 70998e6a2a04Slm66018 vdc->dkio_flush_pending++; 71008e6a2a04Slm66018 dkarg->vdc = vdc; 71018e6a2a04Slm66018 mutex_exit(&vdc->lock); 71028e6a2a04Slm66018 71038e6a2a04Slm66018 /* put the request on a task queue */ 71048e6a2a04Slm66018 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 71058e6a2a04Slm66018 (void *)dkarg, DDI_SLEEP); 71063af08d82Slm66018 if (rv == NULL) { 71073af08d82Slm66018 /* clean up if dispatch fails */ 71083af08d82Slm66018 mutex_enter(&vdc->lock); 71093af08d82Slm66018 vdc->dkio_flush_pending--; 711078fcd0a1Sachartre mutex_exit(&vdc->lock); 71113af08d82Slm66018 kmem_free(dkarg, sizeof (vdc_dk_arg_t)); 71123af08d82Slm66018 } 71138e6a2a04Slm66018 71148e6a2a04Slm66018 return (rv == NULL ? ENOMEM : 0); 71158e6a2a04Slm66018 } 71168e6a2a04Slm66018 } 71178e6a2a04Slm66018 71181ae08745Sheppo /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 71193af08d82Slm66018 ASSERT(iop->op != 0); 71201ae08745Sheppo 712117cadca8Slm66018 /* check if the vDisk server handles the operation for this vDisk */ 712217cadca8Slm66018 if (VD_OP_SUPPORTED(vdc->operations, iop->op) == B_FALSE) { 712317cadca8Slm66018 DMSG(vdc, 0, "[%d] Unsupported VD_OP operation (0x%x)\n", 712417cadca8Slm66018 vdc->instance, iop->op); 712517cadca8Slm66018 return (ENOTSUP); 712617cadca8Slm66018 } 712717cadca8Slm66018 71281ae08745Sheppo /* LDC requires that the memory being mapped is 8-byte aligned */ 71291ae08745Sheppo alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 71303af08d82Slm66018 DMSG(vdc, 1, "[%d] struct size %ld alloc %ld\n", 71313af08d82Slm66018 instance, len, alloc_len); 71321ae08745Sheppo 7133eff7243fSlm66018 if (alloc_len > 0) 71341ae08745Sheppo mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 71351ae08745Sheppo 71360a55fbb7Slm66018 /* 7137eff7243fSlm66018 * Call the conversion function for this ioctl which, if necessary, 71380a55fbb7Slm66018 * converts from the Solaris format to the format ARC'ed 71390a55fbb7Slm66018 * as part of the vDisk protocol (FWARC 2006/195) 71400a55fbb7Slm66018 */ 71413af08d82Slm66018 ASSERT(iop->convert != NULL); 71423af08d82Slm66018 rv = (iop->convert)(vdc, arg, mem_p, mode, VD_COPYIN); 71431ae08745Sheppo if (rv != 0) { 71443af08d82Slm66018 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 7145e1ebb9ecSlm66018 instance, rv, cmd); 71461ae08745Sheppo if (mem_p != NULL) 71471ae08745Sheppo kmem_free(mem_p, alloc_len); 71480a55fbb7Slm66018 return (rv); 71491ae08745Sheppo } 71501ae08745Sheppo 71511ae08745Sheppo /* 71521ae08745Sheppo * send request to vds to service the ioctl. 71531ae08745Sheppo */ 71543af08d82Slm66018 rv = vdc_do_sync_op(vdc, iop->op, mem_p, alloc_len, 71550d0c8d4bSnarayan VDCPART(dev), 0, CB_SYNC, (void *)(uint64_t)mode, 71562f5224aeSachartre VIO_both_dir, B_TRUE); 715778fcd0a1Sachartre 71581ae08745Sheppo if (rv != 0) { 71591ae08745Sheppo /* 71601ae08745Sheppo * This is not necessarily an error. The ioctl could 71611ae08745Sheppo * be returning a value such as ENOTTY to indicate 71621ae08745Sheppo * that the ioctl is not applicable. 71631ae08745Sheppo */ 71643af08d82Slm66018 DMSG(vdc, 0, "[%d] vds returned %d for ioctl 0x%x\n", 7165e1ebb9ecSlm66018 instance, rv, cmd); 71661ae08745Sheppo if (mem_p != NULL) 71671ae08745Sheppo kmem_free(mem_p, alloc_len); 7168d10e4ef2Snarayan 71691ae08745Sheppo return (rv); 71701ae08745Sheppo } 71711ae08745Sheppo 71721ae08745Sheppo /* 71730a55fbb7Slm66018 * Call the conversion function (if it exists) for this ioctl 71740a55fbb7Slm66018 * which converts from the format ARC'ed as part of the vDisk 71750a55fbb7Slm66018 * protocol (FWARC 2006/195) back to a format understood by 71760a55fbb7Slm66018 * the rest of Solaris. 71771ae08745Sheppo */ 71783af08d82Slm66018 rv = (iop->convert)(vdc, mem_p, arg, mode, VD_COPYOUT); 71790a55fbb7Slm66018 if (rv != 0) { 71803af08d82Slm66018 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 7181e1ebb9ecSlm66018 instance, rv, cmd); 71821ae08745Sheppo if (mem_p != NULL) 71831ae08745Sheppo kmem_free(mem_p, alloc_len); 71840a55fbb7Slm66018 return (rv); 71851ae08745Sheppo } 71861ae08745Sheppo 71871ae08745Sheppo if (mem_p != NULL) 71881ae08745Sheppo kmem_free(mem_p, alloc_len); 71891ae08745Sheppo 71901ae08745Sheppo return (rv); 71911ae08745Sheppo } 71921ae08745Sheppo 71931ae08745Sheppo /* 71941ae08745Sheppo * Function: 71950a55fbb7Slm66018 * 71960a55fbb7Slm66018 * Description: 71970a55fbb7Slm66018 * This is an empty conversion function used by ioctl calls which 71980a55fbb7Slm66018 * do not need to convert the data being passed in/out to userland 71990a55fbb7Slm66018 */ 72000a55fbb7Slm66018 static int 7201d10e4ef2Snarayan vdc_null_copy_func(vdc_t *vdc, void *from, void *to, int mode, int dir) 72020a55fbb7Slm66018 { 7203d10e4ef2Snarayan _NOTE(ARGUNUSED(vdc)) 72040a55fbb7Slm66018 _NOTE(ARGUNUSED(from)) 72050a55fbb7Slm66018 _NOTE(ARGUNUSED(to)) 72060a55fbb7Slm66018 _NOTE(ARGUNUSED(mode)) 72070a55fbb7Slm66018 _NOTE(ARGUNUSED(dir)) 72080a55fbb7Slm66018 72090a55fbb7Slm66018 return (0); 72100a55fbb7Slm66018 } 72110a55fbb7Slm66018 72124bac2208Snarayan static int 72134bac2208Snarayan vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 72144bac2208Snarayan int mode, int dir) 72154bac2208Snarayan { 72164bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 72174bac2208Snarayan 72184bac2208Snarayan if (dir == VD_COPYIN) 72194bac2208Snarayan return (0); /* nothing to do */ 72204bac2208Snarayan 72214bac2208Snarayan if (ddi_copyout(from, to, sizeof (int), mode) != 0) 72224bac2208Snarayan return (EFAULT); 72234bac2208Snarayan 72244bac2208Snarayan return (0); 72254bac2208Snarayan } 72264bac2208Snarayan 72274bac2208Snarayan static int 72284bac2208Snarayan vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 72294bac2208Snarayan int mode, int dir) 72304bac2208Snarayan { 72314bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 72324bac2208Snarayan 72334bac2208Snarayan if (dir == VD_COPYOUT) 72344bac2208Snarayan return (0); /* nothing to do */ 72354bac2208Snarayan 72364bac2208Snarayan if (ddi_copyin(from, to, sizeof (int), mode) != 0) 72374bac2208Snarayan return (EFAULT); 72384bac2208Snarayan 72394bac2208Snarayan return (0); 72404bac2208Snarayan } 72414bac2208Snarayan 72420a55fbb7Slm66018 /* 72430a55fbb7Slm66018 * Function: 72440a55fbb7Slm66018 * vdc_get_vtoc_convert() 72450a55fbb7Slm66018 * 72460a55fbb7Slm66018 * Description: 7247d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCGVTOC 7248d10e4ef2Snarayan * Solaris structure to the format defined in FWARC 2006/195. 7249d10e4ef2Snarayan * 7250d10e4ef2Snarayan * In the struct vtoc definition, the timestamp field is marked as not 7251d10e4ef2Snarayan * supported so it is not part of vDisk protocol (FWARC 2006/195). 7252d10e4ef2Snarayan * However SVM uses that field to check it can write into the VTOC, 7253d10e4ef2Snarayan * so we fake up the info of that field. 72540a55fbb7Slm66018 * 72550a55fbb7Slm66018 * Arguments: 7256d10e4ef2Snarayan * vdc - the vDisk client 72570a55fbb7Slm66018 * from - the buffer containing the data to be copied from 72580a55fbb7Slm66018 * to - the buffer to be copied to 72590a55fbb7Slm66018 * mode - flags passed to ioctl() call 72600a55fbb7Slm66018 * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT 72610a55fbb7Slm66018 * 72620a55fbb7Slm66018 * Return Code: 72630a55fbb7Slm66018 * 0 - Success 72640a55fbb7Slm66018 * ENXIO - incorrect buffer passed in. 7265d10e4ef2Snarayan * EFAULT - ddi_copyout routine encountered an error. 72660a55fbb7Slm66018 */ 72670a55fbb7Slm66018 static int 7268d10e4ef2Snarayan vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 72690a55fbb7Slm66018 { 7270d10e4ef2Snarayan int i; 7271342440ecSPrasad Singamsetty struct vtoc vtoc; 7272342440ecSPrasad Singamsetty struct vtoc32 vtoc32; 7273342440ecSPrasad Singamsetty struct extvtoc evtoc; 7274342440ecSPrasad Singamsetty int rv; 72750a55fbb7Slm66018 72760a55fbb7Slm66018 if (dir != VD_COPYOUT) 72770a55fbb7Slm66018 return (0); /* nothing to do */ 72780a55fbb7Slm66018 72790a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 72800a55fbb7Slm66018 return (ENXIO); 72810a55fbb7Slm66018 7282342440ecSPrasad Singamsetty if (vdc->vdisk_size > VD_OLDVTOC_LIMIT) 7283342440ecSPrasad Singamsetty return (EOVERFLOW); 72840a55fbb7Slm66018 7285342440ecSPrasad Singamsetty VD_VTOC2VTOC((vd_vtoc_t *)from, &evtoc); 7286d10e4ef2Snarayan 7287d10e4ef2Snarayan /* fake the VTOC timestamp field */ 7288d10e4ef2Snarayan for (i = 0; i < V_NUMPAR; i++) { 7289342440ecSPrasad Singamsetty evtoc.timestamp[i] = vdc->vtoc->timestamp[i]; 7290d10e4ef2Snarayan } 7291d10e4ef2Snarayan 72920a55fbb7Slm66018 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 729317cadca8Slm66018 /* LINTED E_ASSIGN_NARROW_CONV */ 7294342440ecSPrasad Singamsetty extvtoctovtoc32(evtoc, vtoc32); 7295342440ecSPrasad Singamsetty rv = ddi_copyout(&vtoc32, to, sizeof (vtoc32), mode); 72960a55fbb7Slm66018 if (rv != 0) 72970a55fbb7Slm66018 rv = EFAULT; 7298342440ecSPrasad Singamsetty } else { 7299342440ecSPrasad Singamsetty extvtoctovtoc(evtoc, vtoc); 7300342440ecSPrasad Singamsetty rv = ddi_copyout(&vtoc, to, sizeof (vtoc), mode); 7301342440ecSPrasad Singamsetty if (rv != 0) 7302342440ecSPrasad Singamsetty rv = EFAULT; 7303342440ecSPrasad Singamsetty } 73040a55fbb7Slm66018 73050a55fbb7Slm66018 return (rv); 73060a55fbb7Slm66018 } 73070a55fbb7Slm66018 73080a55fbb7Slm66018 /* 73090a55fbb7Slm66018 * Function: 73100a55fbb7Slm66018 * vdc_set_vtoc_convert() 73110a55fbb7Slm66018 * 73120a55fbb7Slm66018 * Description: 7313d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCSVTOC 7314d10e4ef2Snarayan * Solaris structure to the format defined in FWARC 2006/195. 73150a55fbb7Slm66018 * 73160a55fbb7Slm66018 * Arguments: 7317d10e4ef2Snarayan * vdc - the vDisk client 73180a55fbb7Slm66018 * from - Buffer with data 73190a55fbb7Slm66018 * to - Buffer where data is to be copied to 73200a55fbb7Slm66018 * mode - flags passed to ioctl 73210a55fbb7Slm66018 * dir - direction of copy (in or out) 73220a55fbb7Slm66018 * 73230a55fbb7Slm66018 * Return Code: 73240a55fbb7Slm66018 * 0 - Success 73250a55fbb7Slm66018 * ENXIO - Invalid buffer passed in 73260a55fbb7Slm66018 * EFAULT - ddi_copyin of data failed 73270a55fbb7Slm66018 */ 73280a55fbb7Slm66018 static int 7329d10e4ef2Snarayan vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 73300a55fbb7Slm66018 { 7331342440ecSPrasad Singamsetty void *uvtoc; 7332342440ecSPrasad Singamsetty struct vtoc vtoc; 7333342440ecSPrasad Singamsetty struct vtoc32 vtoc32; 7334342440ecSPrasad Singamsetty struct extvtoc evtoc; 7335342440ecSPrasad Singamsetty int i, rv; 73360a55fbb7Slm66018 73370a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 73380a55fbb7Slm66018 return (ENXIO); 73390a55fbb7Slm66018 7340342440ecSPrasad Singamsetty if (vdc->vdisk_size > VD_OLDVTOC_LIMIT) 7341342440ecSPrasad Singamsetty return (EOVERFLOW); 73422f5224aeSachartre 7343342440ecSPrasad Singamsetty uvtoc = (dir == VD_COPYIN)? from : to; 73440a55fbb7Slm66018 73450a55fbb7Slm66018 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 7346342440ecSPrasad Singamsetty rv = ddi_copyin(uvtoc, &vtoc32, sizeof (vtoc32), mode); 7347342440ecSPrasad Singamsetty if (rv != 0) 7348342440ecSPrasad Singamsetty return (EFAULT); 7349342440ecSPrasad Singamsetty vtoc32toextvtoc(vtoc32, evtoc); 73500a55fbb7Slm66018 } else { 7351342440ecSPrasad Singamsetty rv = ddi_copyin(uvtoc, &vtoc, sizeof (vtoc), mode); 7352342440ecSPrasad Singamsetty if (rv != 0) 7353342440ecSPrasad Singamsetty return (EFAULT); 7354342440ecSPrasad Singamsetty vtoctoextvtoc(vtoc, evtoc); 73550a55fbb7Slm66018 } 73560a55fbb7Slm66018 73572f5224aeSachartre if (dir == VD_COPYOUT) { 73582f5224aeSachartre /* 73592f5224aeSachartre * The disk label may have changed. Revalidate the disk 73605b98b509Sachartre * geometry. This will also update the device nodes. 73612f5224aeSachartre */ 73622f5224aeSachartre vdc_validate(vdc); 73632f5224aeSachartre 73642f5224aeSachartre /* 73652f5224aeSachartre * We also need to keep track of the timestamp fields. 73662f5224aeSachartre */ 73672f5224aeSachartre for (i = 0; i < V_NUMPAR; i++) { 7368342440ecSPrasad Singamsetty vdc->vtoc->timestamp[i] = evtoc.timestamp[i]; 7369342440ecSPrasad Singamsetty } 7370342440ecSPrasad Singamsetty 7371342440ecSPrasad Singamsetty } else { 7372342440ecSPrasad Singamsetty VTOC2VD_VTOC(&evtoc, (vd_vtoc_t *)to); 73732f5224aeSachartre } 73742f5224aeSachartre 73752f5224aeSachartre return (0); 73762f5224aeSachartre } 73772f5224aeSachartre 7378342440ecSPrasad Singamsetty static int 7379342440ecSPrasad Singamsetty vdc_get_extvtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 7380342440ecSPrasad Singamsetty { 7381342440ecSPrasad Singamsetty int i, rv; 7382342440ecSPrasad Singamsetty struct extvtoc evtoc; 7383342440ecSPrasad Singamsetty 7384342440ecSPrasad Singamsetty if (dir != VD_COPYOUT) 7385342440ecSPrasad Singamsetty return (0); /* nothing to do */ 7386342440ecSPrasad Singamsetty 7387342440ecSPrasad Singamsetty if ((from == NULL) || (to == NULL)) 7388342440ecSPrasad Singamsetty return (ENXIO); 7389342440ecSPrasad Singamsetty 7390342440ecSPrasad Singamsetty VD_VTOC2VTOC((vd_vtoc_t *)from, &evtoc); 7391342440ecSPrasad Singamsetty 7392342440ecSPrasad Singamsetty /* fake the VTOC timestamp field */ 7393342440ecSPrasad Singamsetty for (i = 0; i < V_NUMPAR; i++) { 7394342440ecSPrasad Singamsetty evtoc.timestamp[i] = vdc->vtoc->timestamp[i]; 7395342440ecSPrasad Singamsetty } 7396342440ecSPrasad Singamsetty 7397342440ecSPrasad Singamsetty rv = ddi_copyout(&evtoc, to, sizeof (struct extvtoc), mode); 7398342440ecSPrasad Singamsetty if (rv != 0) 7399342440ecSPrasad Singamsetty rv = EFAULT; 7400342440ecSPrasad Singamsetty 7401342440ecSPrasad Singamsetty return (rv); 7402342440ecSPrasad Singamsetty } 7403342440ecSPrasad Singamsetty 7404342440ecSPrasad Singamsetty static int 7405342440ecSPrasad Singamsetty vdc_set_extvtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 7406342440ecSPrasad Singamsetty { 7407342440ecSPrasad Singamsetty void *uvtoc; 7408342440ecSPrasad Singamsetty struct extvtoc evtoc; 7409342440ecSPrasad Singamsetty int i, rv; 7410342440ecSPrasad Singamsetty 7411342440ecSPrasad Singamsetty if ((from == NULL) || (to == NULL)) 7412342440ecSPrasad Singamsetty return (ENXIO); 7413342440ecSPrasad Singamsetty 7414342440ecSPrasad Singamsetty uvtoc = (dir == VD_COPYIN)? from : to; 7415342440ecSPrasad Singamsetty 7416342440ecSPrasad Singamsetty rv = ddi_copyin(uvtoc, &evtoc, sizeof (struct extvtoc), mode); 7417342440ecSPrasad Singamsetty if (rv != 0) 7418342440ecSPrasad Singamsetty return (EFAULT); 7419342440ecSPrasad Singamsetty 7420342440ecSPrasad Singamsetty if (dir == VD_COPYOUT) { 7421342440ecSPrasad Singamsetty /* 7422342440ecSPrasad Singamsetty * The disk label may have changed. Revalidate the disk 7423342440ecSPrasad Singamsetty * geometry. This will also update the device nodes. 7424342440ecSPrasad Singamsetty */ 7425342440ecSPrasad Singamsetty vdc_validate(vdc); 7426342440ecSPrasad Singamsetty 7427342440ecSPrasad Singamsetty /* 7428342440ecSPrasad Singamsetty * We also need to keep track of the timestamp fields. 7429342440ecSPrasad Singamsetty */ 7430342440ecSPrasad Singamsetty for (i = 0; i < V_NUMPAR; i++) { 7431342440ecSPrasad Singamsetty vdc->vtoc->timestamp[i] = evtoc.timestamp[i]; 7432342440ecSPrasad Singamsetty } 7433342440ecSPrasad Singamsetty 7434342440ecSPrasad Singamsetty } else { 7435342440ecSPrasad Singamsetty VTOC2VD_VTOC(&evtoc, (vd_vtoc_t *)to); 7436342440ecSPrasad Singamsetty } 74370a55fbb7Slm66018 74380a55fbb7Slm66018 return (0); 74390a55fbb7Slm66018 } 74400a55fbb7Slm66018 74410a55fbb7Slm66018 /* 74420a55fbb7Slm66018 * Function: 74430a55fbb7Slm66018 * vdc_get_geom_convert() 74440a55fbb7Slm66018 * 74450a55fbb7Slm66018 * Description: 7446d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCGGEOM, 7447d10e4ef2Snarayan * DKIOCG_PHYSGEOM and DKIOG_VIRTGEOM Solaris structures to the format 7448d10e4ef2Snarayan * defined in FWARC 2006/195 74490a55fbb7Slm66018 * 74500a55fbb7Slm66018 * Arguments: 7451d10e4ef2Snarayan * vdc - the vDisk client 74520a55fbb7Slm66018 * from - Buffer with data 74530a55fbb7Slm66018 * to - Buffer where data is to be copied to 74540a55fbb7Slm66018 * mode - flags passed to ioctl 74550a55fbb7Slm66018 * dir - direction of copy (in or out) 74560a55fbb7Slm66018 * 74570a55fbb7Slm66018 * Return Code: 74580a55fbb7Slm66018 * 0 - Success 74590a55fbb7Slm66018 * ENXIO - Invalid buffer passed in 7460d10e4ef2Snarayan * EFAULT - ddi_copyout of data failed 74610a55fbb7Slm66018 */ 74620a55fbb7Slm66018 static int 7463d10e4ef2Snarayan vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 74640a55fbb7Slm66018 { 7465d10e4ef2Snarayan _NOTE(ARGUNUSED(vdc)) 7466d10e4ef2Snarayan 74670a55fbb7Slm66018 struct dk_geom geom; 74680a55fbb7Slm66018 int copy_len = sizeof (struct dk_geom); 74690a55fbb7Slm66018 int rv = 0; 74700a55fbb7Slm66018 74710a55fbb7Slm66018 if (dir != VD_COPYOUT) 74720a55fbb7Slm66018 return (0); /* nothing to do */ 74730a55fbb7Slm66018 74740a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 74750a55fbb7Slm66018 return (ENXIO); 74760a55fbb7Slm66018 74770a55fbb7Slm66018 VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); 74780a55fbb7Slm66018 rv = ddi_copyout(&geom, to, copy_len, mode); 74790a55fbb7Slm66018 if (rv != 0) 74800a55fbb7Slm66018 rv = EFAULT; 74810a55fbb7Slm66018 74820a55fbb7Slm66018 return (rv); 74830a55fbb7Slm66018 } 74840a55fbb7Slm66018 74850a55fbb7Slm66018 /* 74860a55fbb7Slm66018 * Function: 74870a55fbb7Slm66018 * vdc_set_geom_convert() 74880a55fbb7Slm66018 * 74890a55fbb7Slm66018 * Description: 7490d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCSGEOM 7491d10e4ef2Snarayan * Solaris structure to the format defined in FWARC 2006/195. 74920a55fbb7Slm66018 * 74930a55fbb7Slm66018 * Arguments: 7494d10e4ef2Snarayan * vdc - the vDisk client 74950a55fbb7Slm66018 * from - Buffer with data 74960a55fbb7Slm66018 * to - Buffer where data is to be copied to 74970a55fbb7Slm66018 * mode - flags passed to ioctl 74980a55fbb7Slm66018 * dir - direction of copy (in or out) 74990a55fbb7Slm66018 * 75000a55fbb7Slm66018 * Return Code: 75010a55fbb7Slm66018 * 0 - Success 75020a55fbb7Slm66018 * ENXIO - Invalid buffer passed in 75030a55fbb7Slm66018 * EFAULT - ddi_copyin of data failed 75040a55fbb7Slm66018 */ 75050a55fbb7Slm66018 static int 7506d10e4ef2Snarayan vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 75070a55fbb7Slm66018 { 7508d10e4ef2Snarayan _NOTE(ARGUNUSED(vdc)) 7509d10e4ef2Snarayan 75100a55fbb7Slm66018 vd_geom_t vdgeom; 75110a55fbb7Slm66018 void *tmp_mem = NULL; 75120a55fbb7Slm66018 int copy_len = sizeof (struct dk_geom); 75130a55fbb7Slm66018 int rv = 0; 75140a55fbb7Slm66018 75150a55fbb7Slm66018 if (dir != VD_COPYIN) 75160a55fbb7Slm66018 return (0); /* nothing to do */ 75170a55fbb7Slm66018 75180a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 75190a55fbb7Slm66018 return (ENXIO); 75200a55fbb7Slm66018 75210a55fbb7Slm66018 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 75220a55fbb7Slm66018 75230a55fbb7Slm66018 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 75240a55fbb7Slm66018 if (rv != 0) { 75250a55fbb7Slm66018 kmem_free(tmp_mem, copy_len); 75260a55fbb7Slm66018 return (EFAULT); 75270a55fbb7Slm66018 } 75280a55fbb7Slm66018 DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); 75290a55fbb7Slm66018 bcopy(&vdgeom, to, sizeof (vdgeom)); 75300a55fbb7Slm66018 kmem_free(tmp_mem, copy_len); 75310a55fbb7Slm66018 75320a55fbb7Slm66018 return (0); 75330a55fbb7Slm66018 } 75340a55fbb7Slm66018 75354bac2208Snarayan static int 75364bac2208Snarayan vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 75374bac2208Snarayan { 75384bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 75394bac2208Snarayan 75404bac2208Snarayan vd_efi_t *vd_efi; 75414bac2208Snarayan dk_efi_t dk_efi; 75424bac2208Snarayan int rv = 0; 75434bac2208Snarayan void *uaddr; 75444bac2208Snarayan 75454bac2208Snarayan if ((from == NULL) || (to == NULL)) 75464bac2208Snarayan return (ENXIO); 75474bac2208Snarayan 75484bac2208Snarayan if (dir == VD_COPYIN) { 75494bac2208Snarayan 75504bac2208Snarayan vd_efi = (vd_efi_t *)to; 75514bac2208Snarayan 75524bac2208Snarayan rv = ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode); 75534bac2208Snarayan if (rv != 0) 75544bac2208Snarayan return (EFAULT); 75554bac2208Snarayan 75564bac2208Snarayan vd_efi->lba = dk_efi.dki_lba; 75574bac2208Snarayan vd_efi->length = dk_efi.dki_length; 75584bac2208Snarayan bzero(vd_efi->data, vd_efi->length); 75594bac2208Snarayan 75604bac2208Snarayan } else { 75614bac2208Snarayan 75624bac2208Snarayan rv = ddi_copyin(to, &dk_efi, sizeof (dk_efi_t), mode); 75634bac2208Snarayan if (rv != 0) 75644bac2208Snarayan return (EFAULT); 75654bac2208Snarayan 75664bac2208Snarayan uaddr = dk_efi.dki_data; 75674bac2208Snarayan 75684bac2208Snarayan dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 75694bac2208Snarayan 75704bac2208Snarayan VD_EFI2DK_EFI((vd_efi_t *)from, &dk_efi); 75714bac2208Snarayan 75724bac2208Snarayan rv = ddi_copyout(dk_efi.dki_data, uaddr, dk_efi.dki_length, 75734bac2208Snarayan mode); 75744bac2208Snarayan if (rv != 0) 75754bac2208Snarayan return (EFAULT); 75764bac2208Snarayan 75774bac2208Snarayan kmem_free(dk_efi.dki_data, dk_efi.dki_length); 75784bac2208Snarayan } 75794bac2208Snarayan 75804bac2208Snarayan return (0); 75814bac2208Snarayan } 75824bac2208Snarayan 75834bac2208Snarayan static int 75844bac2208Snarayan vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 75854bac2208Snarayan { 75864bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 75874bac2208Snarayan 75884bac2208Snarayan dk_efi_t dk_efi; 75894bac2208Snarayan void *uaddr; 75904bac2208Snarayan 75912f5224aeSachartre if (dir == VD_COPYOUT) { 75922f5224aeSachartre /* 75932f5224aeSachartre * The disk label may have changed. Revalidate the disk 75945b98b509Sachartre * geometry. This will also update the device nodes. 75952f5224aeSachartre */ 75962f5224aeSachartre vdc_validate(vdc); 75972f5224aeSachartre return (0); 75982f5224aeSachartre } 75994bac2208Snarayan 76004bac2208Snarayan if ((from == NULL) || (to == NULL)) 76014bac2208Snarayan return (ENXIO); 76024bac2208Snarayan 76034bac2208Snarayan if (ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode) != 0) 76044bac2208Snarayan return (EFAULT); 76054bac2208Snarayan 76064bac2208Snarayan uaddr = dk_efi.dki_data; 76074bac2208Snarayan 76084bac2208Snarayan dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 76094bac2208Snarayan 76104bac2208Snarayan if (ddi_copyin(uaddr, dk_efi.dki_data, dk_efi.dki_length, mode) != 0) 76114bac2208Snarayan return (EFAULT); 76124bac2208Snarayan 76134bac2208Snarayan DK_EFI2VD_EFI(&dk_efi, (vd_efi_t *)to); 76144bac2208Snarayan 76154bac2208Snarayan kmem_free(dk_efi.dki_data, dk_efi.dki_length); 76164bac2208Snarayan 76174bac2208Snarayan return (0); 76184bac2208Snarayan } 76194bac2208Snarayan 762017cadca8Slm66018 762117cadca8Slm66018 /* -------------------------------------------------------------------------- */ 762217cadca8Slm66018 76230a55fbb7Slm66018 /* 76240a55fbb7Slm66018 * Function: 76251ae08745Sheppo * vdc_create_fake_geometry() 76261ae08745Sheppo * 76271ae08745Sheppo * Description: 762817cadca8Slm66018 * This routine fakes up the disk info needed for some DKIO ioctls such 762917cadca8Slm66018 * as DKIOCINFO and DKIOCGMEDIAINFO [just like lofi(7D) and ramdisk(7D) do] 76301ae08745Sheppo * 763117cadca8Slm66018 * Note: This function must not be called until the vDisk attributes have 763217cadca8Slm66018 * been exchanged as part of the handshake with the vDisk server. 76331ae08745Sheppo * 76341ae08745Sheppo * Arguments: 76351ae08745Sheppo * vdc - soft state pointer for this instance of the device driver. 76361ae08745Sheppo * 76371ae08745Sheppo * Return Code: 763878fcd0a1Sachartre * none. 76391ae08745Sheppo */ 764078fcd0a1Sachartre static void 76411ae08745Sheppo vdc_create_fake_geometry(vdc_t *vdc) 76421ae08745Sheppo { 76431ae08745Sheppo ASSERT(vdc != NULL); 764478fcd0a1Sachartre ASSERT(vdc->max_xfer_sz != 0); 76450d0c8d4bSnarayan 76460d0c8d4bSnarayan /* 76471ae08745Sheppo * DKIOCINFO support 76481ae08745Sheppo */ 764978fcd0a1Sachartre if (vdc->cinfo == NULL) 76501ae08745Sheppo vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 76511ae08745Sheppo 76521ae08745Sheppo (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 76531ae08745Sheppo (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 7654*65908c77Syu, larry liu - Sun Microsystems - Beijing China /* max_xfer_sz is #blocks so we don't need to divide by vdisk_bsize */ 76558e6a2a04Slm66018 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; 76562f5224aeSachartre 765787a7269eSachartre /* 76582f5224aeSachartre * We set the controller type to DKC_SCSI_CCS only if the VD_OP_SCSICMD 76592f5224aeSachartre * operation is supported, otherwise the controller type is DKC_DIRECT. 76602f5224aeSachartre * Version 1.0 does not support the VD_OP_SCSICMD operation, so the 76612f5224aeSachartre * controller type is always DKC_DIRECT in that case. 76622f5224aeSachartre * 766317cadca8Slm66018 * If the virtual disk is backed by a physical CD/DVD device or 766417cadca8Slm66018 * an ISO image, modify the controller type to indicate this 766587a7269eSachartre */ 766617cadca8Slm66018 switch (vdc->vdisk_media) { 766717cadca8Slm66018 case VD_MEDIA_CD: 766817cadca8Slm66018 case VD_MEDIA_DVD: 766917cadca8Slm66018 vdc->cinfo->dki_ctype = DKC_CDROM; 767017cadca8Slm66018 break; 767117cadca8Slm66018 case VD_MEDIA_FIXED: 76722f5224aeSachartre if (VD_OP_SUPPORTED(vdc->operations, VD_OP_SCSICMD)) 76732f5224aeSachartre vdc->cinfo->dki_ctype = DKC_SCSI_CCS; 76742f5224aeSachartre else 767587a7269eSachartre vdc->cinfo->dki_ctype = DKC_DIRECT; 767617cadca8Slm66018 break; 767717cadca8Slm66018 default: 767817cadca8Slm66018 /* in the case of v1.0 we default to a fixed disk */ 767917cadca8Slm66018 vdc->cinfo->dki_ctype = DKC_DIRECT; 768017cadca8Slm66018 break; 768117cadca8Slm66018 } 76821ae08745Sheppo vdc->cinfo->dki_flags = DKI_FMTVOL; 76831ae08745Sheppo vdc->cinfo->dki_cnum = 0; 76841ae08745Sheppo vdc->cinfo->dki_addr = 0; 76851ae08745Sheppo vdc->cinfo->dki_space = 0; 76861ae08745Sheppo vdc->cinfo->dki_prio = 0; 76871ae08745Sheppo vdc->cinfo->dki_vec = 0; 76881ae08745Sheppo vdc->cinfo->dki_unit = vdc->instance; 76891ae08745Sheppo vdc->cinfo->dki_slave = 0; 76901ae08745Sheppo /* 76911ae08745Sheppo * The partition number will be created on the fly depending on the 76921ae08745Sheppo * actual slice (i.e. minor node) that is used to request the data. 76931ae08745Sheppo */ 76941ae08745Sheppo vdc->cinfo->dki_partition = 0; 76951ae08745Sheppo 76961ae08745Sheppo /* 76971ae08745Sheppo * DKIOCGMEDIAINFO support 76981ae08745Sheppo */ 76990a55fbb7Slm66018 if (vdc->minfo == NULL) 77001ae08745Sheppo vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 770117cadca8Slm66018 770217cadca8Slm66018 if (vio_ver_is_supported(vdc->ver, 1, 1)) { 770317cadca8Slm66018 vdc->minfo->dki_media_type = 770417cadca8Slm66018 VD_MEDIATYPE2DK_MEDIATYPE(vdc->vdisk_media); 770517cadca8Slm66018 } else { 77061ae08745Sheppo vdc->minfo->dki_media_type = DK_FIXED_DISK; 770717cadca8Slm66018 } 770817cadca8Slm66018 77094bac2208Snarayan vdc->minfo->dki_capacity = vdc->vdisk_size; 7710*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->minfo->dki_lbsize = vdc->vdisk_bsize; 771178fcd0a1Sachartre } 77121ae08745Sheppo 771378fcd0a1Sachartre static ushort_t 771478fcd0a1Sachartre vdc_lbl2cksum(struct dk_label *label) 771578fcd0a1Sachartre { 771678fcd0a1Sachartre int count; 771778fcd0a1Sachartre ushort_t sum, *sp; 771878fcd0a1Sachartre 771978fcd0a1Sachartre count = (sizeof (struct dk_label)) / (sizeof (short)) - 1; 772078fcd0a1Sachartre sp = (ushort_t *)label; 772178fcd0a1Sachartre sum = 0; 772278fcd0a1Sachartre while (count--) { 772378fcd0a1Sachartre sum ^= *sp++; 772478fcd0a1Sachartre } 772578fcd0a1Sachartre 772678fcd0a1Sachartre return (sum); 77270a55fbb7Slm66018 } 77280a55fbb7Slm66018 7729de3a5331SRamesh Chitrothu static void 7730de3a5331SRamesh Chitrothu vdc_update_size(vdc_t *vdc, size_t dsk_size, size_t blk_size, size_t xfr_size) 7731de3a5331SRamesh Chitrothu { 7732de3a5331SRamesh Chitrothu vd_err_stats_t *stp; 7733de3a5331SRamesh Chitrothu 7734de3a5331SRamesh Chitrothu ASSERT(MUTEX_HELD(&vdc->lock)); 7735de3a5331SRamesh Chitrothu ASSERT(xfr_size != 0); 7736de3a5331SRamesh Chitrothu 7737de3a5331SRamesh Chitrothu /* 7738de3a5331SRamesh Chitrothu * If the disk size is unknown or sizes are unchanged then don't 7739de3a5331SRamesh Chitrothu * update anything. 7740de3a5331SRamesh Chitrothu */ 7741de3a5331SRamesh Chitrothu if (dsk_size == VD_SIZE_UNKNOWN || dsk_size == 0 || 7742*65908c77Syu, larry liu - Sun Microsystems - Beijing China (blk_size == vdc->vdisk_bsize && dsk_size == vdc->vdisk_size && 7743de3a5331SRamesh Chitrothu xfr_size == vdc->max_xfer_sz)) 7744de3a5331SRamesh Chitrothu return; 7745de3a5331SRamesh Chitrothu 7746de3a5331SRamesh Chitrothu /* 7747de3a5331SRamesh Chitrothu * We don't know at compile time what the vDisk server will think 7748de3a5331SRamesh Chitrothu * are good values but we apply a large (arbitrary) upper bound to 7749de3a5331SRamesh Chitrothu * prevent memory exhaustion in vdc if it was allocating a DRing 7750de3a5331SRamesh Chitrothu * based of huge values sent by the server. We probably will never 7751de3a5331SRamesh Chitrothu * exceed this except if the message was garbage. 7752de3a5331SRamesh Chitrothu */ 7753de3a5331SRamesh Chitrothu if ((xfr_size * blk_size) > (PAGESIZE * DEV_BSIZE)) { 7754de3a5331SRamesh Chitrothu DMSG(vdc, 0, "[%d] vds block transfer size too big;" 7755de3a5331SRamesh Chitrothu " using max supported by vdc", vdc->instance); 7756*65908c77Syu, larry liu - Sun Microsystems - Beijing China xfr_size = maxphys / blk_size; 7757de3a5331SRamesh Chitrothu } 7758de3a5331SRamesh Chitrothu 7759de3a5331SRamesh Chitrothu vdc->max_xfer_sz = xfr_size; 7760*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vdisk_bsize = blk_size; 7761de3a5331SRamesh Chitrothu vdc->vdisk_size = dsk_size; 7762de3a5331SRamesh Chitrothu 7763de3a5331SRamesh Chitrothu stp = (vd_err_stats_t *)vdc->err_stats->ks_data; 7764de3a5331SRamesh Chitrothu stp->vd_capacity.value.ui64 = dsk_size * blk_size; 7765de3a5331SRamesh Chitrothu 7766de3a5331SRamesh Chitrothu vdc->minfo->dki_capacity = dsk_size; 7767de3a5331SRamesh Chitrothu vdc->minfo->dki_lbsize = (uint_t)blk_size; 7768de3a5331SRamesh Chitrothu } 7769de3a5331SRamesh Chitrothu 77700a55fbb7Slm66018 /* 7771*65908c77Syu, larry liu - Sun Microsystems - Beijing China * Update information about the VIO block size. The VIO block size is the 7772*65908c77Syu, larry liu - Sun Microsystems - Beijing China * same as the vdisk block size which is stored in vdc->vdisk_bsize so we 7773*65908c77Syu, larry liu - Sun Microsystems - Beijing China * do not store that information again. 7774*65908c77Syu, larry liu - Sun Microsystems - Beijing China * 7775*65908c77Syu, larry liu - Sun Microsystems - Beijing China * However, buf structures will always use a logical block size of 512 bytes 7776*65908c77Syu, larry liu - Sun Microsystems - Beijing China * (DEV_BSIZE) and we will need to convert logical block numbers to VIO block 7777*65908c77Syu, larry liu - Sun Microsystems - Beijing China * numbers for each read or write operation using vdc_strategy(). To speed up 7778*65908c77Syu, larry liu - Sun Microsystems - Beijing China * this conversion, we expect the VIO block size to be a power of 2 and a 7779*65908c77Syu, larry liu - Sun Microsystems - Beijing China * multiple 512 bytes (DEV_BSIZE), and we cache some useful information. 7780*65908c77Syu, larry liu - Sun Microsystems - Beijing China * 7781*65908c77Syu, larry liu - Sun Microsystems - Beijing China * The function return EINVAL if the new VIO block size (blk_size) is not a 7782*65908c77Syu, larry liu - Sun Microsystems - Beijing China * power of 2 or not a multiple of 512 bytes, otherwise it returns 0. 7783*65908c77Syu, larry liu - Sun Microsystems - Beijing China */ 7784*65908c77Syu, larry liu - Sun Microsystems - Beijing China static int 7785*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdc_update_vio_bsize(vdc_t *vdc, uint32_t blk_size) 7786*65908c77Syu, larry liu - Sun Microsystems - Beijing China { 7787*65908c77Syu, larry liu - Sun Microsystems - Beijing China uint32_t ratio, n; 7788*65908c77Syu, larry liu - Sun Microsystems - Beijing China int nshift = 0; 7789*65908c77Syu, larry liu - Sun Microsystems - Beijing China 7790*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vio_bmask = 0; 7791*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vio_bshift = 0; 7792*65908c77Syu, larry liu - Sun Microsystems - Beijing China 7793*65908c77Syu, larry liu - Sun Microsystems - Beijing China ASSERT(blk_size > 0); 7794*65908c77Syu, larry liu - Sun Microsystems - Beijing China 7795*65908c77Syu, larry liu - Sun Microsystems - Beijing China if ((blk_size % DEV_BSIZE) != 0) 7796*65908c77Syu, larry liu - Sun Microsystems - Beijing China return (EINVAL); 7797*65908c77Syu, larry liu - Sun Microsystems - Beijing China 7798*65908c77Syu, larry liu - Sun Microsystems - Beijing China ratio = blk_size / DEV_BSIZE; 7799*65908c77Syu, larry liu - Sun Microsystems - Beijing China 7800*65908c77Syu, larry liu - Sun Microsystems - Beijing China for (n = ratio; n > 1; n >>= 1) { 7801*65908c77Syu, larry liu - Sun Microsystems - Beijing China if ((n & 0x1) != 0) { 7802*65908c77Syu, larry liu - Sun Microsystems - Beijing China /* blk_size is not a power of 2 */ 7803*65908c77Syu, larry liu - Sun Microsystems - Beijing China return (EINVAL); 7804*65908c77Syu, larry liu - Sun Microsystems - Beijing China } 7805*65908c77Syu, larry liu - Sun Microsystems - Beijing China nshift++; 7806*65908c77Syu, larry liu - Sun Microsystems - Beijing China } 7807*65908c77Syu, larry liu - Sun Microsystems - Beijing China 7808*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vio_bshift = nshift; 7809*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vio_bmask = ratio - 1; 7810*65908c77Syu, larry liu - Sun Microsystems - Beijing China 7811*65908c77Syu, larry liu - Sun Microsystems - Beijing China return (0); 7812*65908c77Syu, larry liu - Sun Microsystems - Beijing China } 7813*65908c77Syu, larry liu - Sun Microsystems - Beijing China 7814*65908c77Syu, larry liu - Sun Microsystems - Beijing China /* 78150a55fbb7Slm66018 * Function: 781678fcd0a1Sachartre * vdc_validate_geometry 78170a55fbb7Slm66018 * 78180a55fbb7Slm66018 * Description: 781978fcd0a1Sachartre * This routine discovers the label and geometry of the disk. It stores 782078fcd0a1Sachartre * the disk label and related information in the vdc structure. If it 782178fcd0a1Sachartre * fails to validate the geometry or to discover the disk label then 782278fcd0a1Sachartre * the label is marked as unknown (VD_DISK_LABEL_UNK). 78230a55fbb7Slm66018 * 78240a55fbb7Slm66018 * Arguments: 78250a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 78260a55fbb7Slm66018 * 78270a55fbb7Slm66018 * Return Code: 782878fcd0a1Sachartre * 0 - success. 782978fcd0a1Sachartre * EINVAL - unknown disk label. 783078fcd0a1Sachartre * ENOTSUP - geometry not applicable (EFI label). 783178fcd0a1Sachartre * EIO - error accessing the disk. 78320a55fbb7Slm66018 */ 78330a55fbb7Slm66018 static int 783478fcd0a1Sachartre vdc_validate_geometry(vdc_t *vdc) 78350a55fbb7Slm66018 { 7836d10e4ef2Snarayan buf_t *buf; /* BREAD requests need to be in a buf_t structure */ 78370a55fbb7Slm66018 dev_t dev; 78382f5224aeSachartre int rv, rval; 7839*65908c77Syu, larry liu - Sun Microsystems - Beijing China struct dk_label *label; 784078fcd0a1Sachartre struct dk_geom geom; 7841342440ecSPrasad Singamsetty struct extvtoc vtoc; 7842edcc0754Sachartre efi_gpt_t *gpt; 7843edcc0754Sachartre efi_gpe_t *gpe; 7844edcc0754Sachartre vd_efi_dev_t edev; 78450a55fbb7Slm66018 78460a55fbb7Slm66018 ASSERT(vdc != NULL); 784778fcd0a1Sachartre ASSERT(vdc->vtoc != NULL && vdc->geom != NULL); 784878fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 78490a55fbb7Slm66018 785078fcd0a1Sachartre mutex_exit(&vdc->lock); 7851de3a5331SRamesh Chitrothu /* 7852de3a5331SRamesh Chitrothu * Check the disk capacity in case it has changed. If that fails then 7853de3a5331SRamesh Chitrothu * we proceed and we will be using the disk size we currently have. 7854de3a5331SRamesh Chitrothu */ 7855de3a5331SRamesh Chitrothu (void) vdc_check_capacity(vdc); 78560a55fbb7Slm66018 dev = makedevice(ddi_driver_major(vdc->dip), 78570a55fbb7Slm66018 VD_MAKE_DEV(vdc->instance, 0)); 78584bac2208Snarayan 78592f5224aeSachartre rv = vd_process_ioctl(dev, DKIOCGGEOM, (caddr_t)&geom, FKIOCTL, &rval); 786078fcd0a1Sachartre if (rv == 0) 7861342440ecSPrasad Singamsetty rv = vd_process_ioctl(dev, DKIOCGEXTVTOC, (caddr_t)&vtoc, 78622f5224aeSachartre FKIOCTL, &rval); 78630d0c8d4bSnarayan 78644bac2208Snarayan if (rv == ENOTSUP) { 78654bac2208Snarayan /* 78664bac2208Snarayan * If the device does not support VTOC then we try 78674bac2208Snarayan * to read an EFI label. 7868edcc0754Sachartre * 7869edcc0754Sachartre * We need to know the block size and the disk size to 7870edcc0754Sachartre * be able to read an EFI label. 78714bac2208Snarayan */ 7872edcc0754Sachartre if (vdc->vdisk_size == 0) { 7873edcc0754Sachartre mutex_enter(&vdc->lock); 7874edcc0754Sachartre vdc_store_label_unk(vdc); 7875de3a5331SRamesh Chitrothu return (EIO); 7876edcc0754Sachartre } 78774bac2208Snarayan 7878*65908c77Syu, larry liu - Sun Microsystems - Beijing China VDC_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl); 7879edcc0754Sachartre 7880edcc0754Sachartre rv = vd_efi_alloc_and_read(&edev, &gpt, &gpe); 78814bac2208Snarayan 78824bac2208Snarayan if (rv) { 78833af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to get EFI (err=%d)", 78844bac2208Snarayan vdc->instance, rv); 788578fcd0a1Sachartre mutex_enter(&vdc->lock); 788678fcd0a1Sachartre vdc_store_label_unk(vdc); 788778fcd0a1Sachartre return (EIO); 788878fcd0a1Sachartre } 788978fcd0a1Sachartre 789078fcd0a1Sachartre mutex_enter(&vdc->lock); 7891edcc0754Sachartre vdc_store_label_efi(vdc, gpt, gpe); 7892edcc0754Sachartre vd_efi_free(&edev, gpt, gpe); 789378fcd0a1Sachartre return (ENOTSUP); 789478fcd0a1Sachartre } 789578fcd0a1Sachartre 789678fcd0a1Sachartre if (rv != 0) { 789778fcd0a1Sachartre DMSG(vdc, 0, "[%d] Failed to get VTOC (err=%d)", 789878fcd0a1Sachartre vdc->instance, rv); 789978fcd0a1Sachartre mutex_enter(&vdc->lock); 790078fcd0a1Sachartre vdc_store_label_unk(vdc); 790178fcd0a1Sachartre if (rv != EINVAL) 790278fcd0a1Sachartre rv = EIO; 79034bac2208Snarayan return (rv); 79044bac2208Snarayan } 79054bac2208Snarayan 790678fcd0a1Sachartre /* check that geometry and vtoc are valid */ 790778fcd0a1Sachartre if (geom.dkg_nhead == 0 || geom.dkg_nsect == 0 || 790878fcd0a1Sachartre vtoc.v_sanity != VTOC_SANE) { 790978fcd0a1Sachartre mutex_enter(&vdc->lock); 791078fcd0a1Sachartre vdc_store_label_unk(vdc); 791178fcd0a1Sachartre return (EINVAL); 791278fcd0a1Sachartre } 79134bac2208Snarayan 791478fcd0a1Sachartre /* 791578fcd0a1Sachartre * We have a disk and a valid VTOC. However this does not mean 791678fcd0a1Sachartre * that the disk currently have a VTOC label. The returned VTOC may 791778fcd0a1Sachartre * be a default VTOC to be used for configuring the disk (this is 791878fcd0a1Sachartre * what is done for disk image). So we read the label from the 791978fcd0a1Sachartre * beginning of the disk to ensure we really have a VTOC label. 792078fcd0a1Sachartre * 792178fcd0a1Sachartre * FUTURE: This could be the default way for reading the VTOC 792278fcd0a1Sachartre * from the disk as opposed to sending the VD_OP_GET_VTOC 792378fcd0a1Sachartre * to the server. This will be the default if vdc is implemented 792478fcd0a1Sachartre * ontop of cmlb. 792578fcd0a1Sachartre */ 792678fcd0a1Sachartre 792778fcd0a1Sachartre /* 792878fcd0a1Sachartre * Single slice disk does not support read using an absolute disk 792978fcd0a1Sachartre * offset so we just rely on the DKIOCGVTOC ioctl in that case. 793078fcd0a1Sachartre */ 793178fcd0a1Sachartre if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 793278fcd0a1Sachartre mutex_enter(&vdc->lock); 793378fcd0a1Sachartre if (vtoc.v_nparts != 1) { 793478fcd0a1Sachartre vdc_store_label_unk(vdc); 793578fcd0a1Sachartre return (EINVAL); 793678fcd0a1Sachartre } 793778fcd0a1Sachartre vdc_store_label_vtoc(vdc, &geom, &vtoc); 79384bac2208Snarayan return (0); 79394bac2208Snarayan } 79404bac2208Snarayan 794178fcd0a1Sachartre if (vtoc.v_nparts != V_NUMPAR) { 794278fcd0a1Sachartre mutex_enter(&vdc->lock); 794378fcd0a1Sachartre vdc_store_label_unk(vdc); 794478fcd0a1Sachartre return (EINVAL); 79450a55fbb7Slm66018 } 7946d10e4ef2Snarayan 7947d10e4ef2Snarayan /* 7948c813bb04SGabriel Carrillo * Most CD/DVDs do not have a disk label and the label is 7949c813bb04SGabriel Carrillo * generated by the disk driver. So the on-disk label check 7950c813bb04SGabriel Carrillo * below may fail and we return now to avoid this problem. 7951c813bb04SGabriel Carrillo */ 7952c813bb04SGabriel Carrillo if (vdc->vdisk_media == VD_MEDIA_CD || 7953c813bb04SGabriel Carrillo vdc->vdisk_media == VD_MEDIA_DVD) { 7954c813bb04SGabriel Carrillo mutex_enter(&vdc->lock); 7955c813bb04SGabriel Carrillo vdc_store_label_vtoc(vdc, &geom, &vtoc); 7956c813bb04SGabriel Carrillo return (0); 7957c813bb04SGabriel Carrillo } 7958c813bb04SGabriel Carrillo 7959c813bb04SGabriel Carrillo /* 7960d10e4ef2Snarayan * Read disk label from start of disk 7961d10e4ef2Snarayan */ 7962*65908c77Syu, larry liu - Sun Microsystems - Beijing China label = kmem_alloc(vdc->vdisk_bsize, KM_SLEEP); 7963d10e4ef2Snarayan buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 7964d10e4ef2Snarayan bioinit(buf); 7965*65908c77Syu, larry liu - Sun Microsystems - Beijing China buf->b_un.b_addr = (caddr_t)label; 7966*65908c77Syu, larry liu - Sun Microsystems - Beijing China buf->b_bcount = vdc->vdisk_bsize; 7967d10e4ef2Snarayan buf->b_flags = B_BUSY | B_READ; 796817cadca8Slm66018 buf->b_dev = cmpdev(dev); 7969*65908c77Syu, larry liu - Sun Microsystems - Beijing China rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)label, 7970*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vdisk_bsize, VD_SLICE_NONE, 0, CB_STRATEGY, buf, VIO_read_dir); 79713af08d82Slm66018 if (rv) { 79723af08d82Slm66018 DMSG(vdc, 1, "[%d] Failed to read disk block 0\n", 79733af08d82Slm66018 vdc->instance); 797411f54b6eSAlexandre Chartre } else if (ddi_in_panic()) { 797511f54b6eSAlexandre Chartre rv = vdc_drain_response(vdc, CB_STRATEGY, buf); 797611f54b6eSAlexandre Chartre if (rv == 0) { 797711f54b6eSAlexandre Chartre rv = geterror(buf); 797811f54b6eSAlexandre Chartre } 797978fcd0a1Sachartre } else { 7980d10e4ef2Snarayan rv = biowait(buf); 798178fcd0a1Sachartre } 798211f54b6eSAlexandre Chartre biofini(buf); 7983d10e4ef2Snarayan kmem_free(buf, sizeof (buf_t)); 79840a55fbb7Slm66018 7985*65908c77Syu, larry liu - Sun Microsystems - Beijing China if (rv != 0 || label->dkl_magic != DKL_MAGIC || 7986*65908c77Syu, larry liu - Sun Microsystems - Beijing China label->dkl_cksum != vdc_lbl2cksum(label)) { 798778fcd0a1Sachartre DMSG(vdc, 1, "[%d] Got VTOC with invalid label\n", 798878fcd0a1Sachartre vdc->instance); 7989*65908c77Syu, larry liu - Sun Microsystems - Beijing China kmem_free(label, vdc->vdisk_bsize); 799078fcd0a1Sachartre mutex_enter(&vdc->lock); 799178fcd0a1Sachartre vdc_store_label_unk(vdc); 799278fcd0a1Sachartre return (EINVAL); 799378fcd0a1Sachartre } 799478fcd0a1Sachartre 7995*65908c77Syu, larry liu - Sun Microsystems - Beijing China kmem_free(label, vdc->vdisk_bsize); 799678fcd0a1Sachartre mutex_enter(&vdc->lock); 799778fcd0a1Sachartre vdc_store_label_vtoc(vdc, &geom, &vtoc); 799878fcd0a1Sachartre return (0); 799978fcd0a1Sachartre } 800078fcd0a1Sachartre 800178fcd0a1Sachartre /* 800278fcd0a1Sachartre * Function: 800378fcd0a1Sachartre * vdc_validate 800478fcd0a1Sachartre * 800578fcd0a1Sachartre * Description: 800678fcd0a1Sachartre * This routine discovers the label of the disk and create the 800778fcd0a1Sachartre * appropriate device nodes if the label has changed. 800878fcd0a1Sachartre * 800978fcd0a1Sachartre * Arguments: 801078fcd0a1Sachartre * vdc - soft state pointer for this instance of the device driver. 801178fcd0a1Sachartre * 801278fcd0a1Sachartre * Return Code: 801378fcd0a1Sachartre * none. 801478fcd0a1Sachartre */ 801578fcd0a1Sachartre static void 801678fcd0a1Sachartre vdc_validate(vdc_t *vdc) 801778fcd0a1Sachartre { 801878fcd0a1Sachartre vd_disk_label_t old_label; 8019edcc0754Sachartre vd_slice_t old_slice[V_NUMPAR]; 802078fcd0a1Sachartre int rv; 802178fcd0a1Sachartre 802278fcd0a1Sachartre ASSERT(!MUTEX_HELD(&vdc->lock)); 802378fcd0a1Sachartre 802478fcd0a1Sachartre mutex_enter(&vdc->lock); 802578fcd0a1Sachartre 802678fcd0a1Sachartre /* save the current label and vtoc */ 802778fcd0a1Sachartre old_label = vdc->vdisk_label; 8028edcc0754Sachartre bcopy(vdc->slice, &old_slice, sizeof (vd_slice_t) * V_NUMPAR); 802978fcd0a1Sachartre 803078fcd0a1Sachartre /* check the geometry */ 803178fcd0a1Sachartre (void) vdc_validate_geometry(vdc); 803278fcd0a1Sachartre 803378fcd0a1Sachartre /* if the disk label has changed, update device nodes */ 803478fcd0a1Sachartre if (vdc->vdisk_label != old_label) { 803578fcd0a1Sachartre 803678fcd0a1Sachartre if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 803778fcd0a1Sachartre rv = vdc_create_device_nodes_efi(vdc); 803878fcd0a1Sachartre else 803978fcd0a1Sachartre rv = vdc_create_device_nodes_vtoc(vdc); 804078fcd0a1Sachartre 804178fcd0a1Sachartre if (rv != 0) { 804278fcd0a1Sachartre DMSG(vdc, 0, "![%d] Failed to update device nodes", 804378fcd0a1Sachartre vdc->instance); 804478fcd0a1Sachartre } 804578fcd0a1Sachartre } 804678fcd0a1Sachartre 804778fcd0a1Sachartre mutex_exit(&vdc->lock); 804878fcd0a1Sachartre } 804978fcd0a1Sachartre 805078fcd0a1Sachartre static void 805178fcd0a1Sachartre vdc_validate_task(void *arg) 805278fcd0a1Sachartre { 805378fcd0a1Sachartre vdc_t *vdc = (vdc_t *)arg; 805478fcd0a1Sachartre 805578fcd0a1Sachartre vdc_validate(vdc); 805678fcd0a1Sachartre 805778fcd0a1Sachartre mutex_enter(&vdc->lock); 805878fcd0a1Sachartre ASSERT(vdc->validate_pending > 0); 805978fcd0a1Sachartre vdc->validate_pending--; 806078fcd0a1Sachartre mutex_exit(&vdc->lock); 80611ae08745Sheppo } 80624bac2208Snarayan 80634bac2208Snarayan /* 80644bac2208Snarayan * Function: 80654bac2208Snarayan * vdc_setup_devid() 80664bac2208Snarayan * 80674bac2208Snarayan * Description: 80684bac2208Snarayan * This routine discovers the devid of a vDisk. It requests the devid of 80694bac2208Snarayan * the underlying device from the vDisk server, builds an encapsulated 80704bac2208Snarayan * devid based on the retrieved devid and registers that new devid to 80714bac2208Snarayan * the vDisk. 80724bac2208Snarayan * 80734bac2208Snarayan * Arguments: 80744bac2208Snarayan * vdc - soft state pointer for this instance of the device driver. 80754bac2208Snarayan * 80764bac2208Snarayan * Return Code: 80774bac2208Snarayan * 0 - A devid was succesfully registered for the vDisk 80784bac2208Snarayan */ 80794bac2208Snarayan static int 80804bac2208Snarayan vdc_setup_devid(vdc_t *vdc) 80814bac2208Snarayan { 80824bac2208Snarayan int rv; 80834bac2208Snarayan vd_devid_t *vd_devid; 80844bac2208Snarayan size_t bufsize, bufid_len; 80854bac2208Snarayan 80864bac2208Snarayan /* 80874bac2208Snarayan * At first sight, we don't know the size of the devid that the 80884bac2208Snarayan * server will return but this size will be encoded into the 80894bac2208Snarayan * reply. So we do a first request using a default size then we 80904bac2208Snarayan * check if this size was large enough. If not then we do a second 80914bac2208Snarayan * request with the correct size returned by the server. Note that 80924bac2208Snarayan * ldc requires size to be 8-byte aligned. 80934bac2208Snarayan */ 80944bac2208Snarayan bufsize = P2ROUNDUP(VD_DEVID_SIZE(VD_DEVID_DEFAULT_LEN), 80954bac2208Snarayan sizeof (uint64_t)); 80964bac2208Snarayan vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 80974bac2208Snarayan bufid_len = bufsize - sizeof (vd_efi_t) - 1; 80984bac2208Snarayan 80993af08d82Slm66018 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, (caddr_t)vd_devid, 81002f5224aeSachartre bufsize, 0, 0, CB_SYNC, 0, VIO_both_dir, B_TRUE); 81013af08d82Slm66018 81023af08d82Slm66018 DMSG(vdc, 2, "sync_op returned %d\n", rv); 81033af08d82Slm66018 81044bac2208Snarayan if (rv) { 81054bac2208Snarayan kmem_free(vd_devid, bufsize); 81064bac2208Snarayan return (rv); 81074bac2208Snarayan } 81084bac2208Snarayan 81094bac2208Snarayan if (vd_devid->length > bufid_len) { 81104bac2208Snarayan /* 81114bac2208Snarayan * The returned devid is larger than the buffer used. Try again 81124bac2208Snarayan * with a buffer with the right size. 81134bac2208Snarayan */ 81144bac2208Snarayan kmem_free(vd_devid, bufsize); 81154bac2208Snarayan bufsize = P2ROUNDUP(VD_DEVID_SIZE(vd_devid->length), 81164bac2208Snarayan sizeof (uint64_t)); 81174bac2208Snarayan vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 81184bac2208Snarayan bufid_len = bufsize - sizeof (vd_efi_t) - 1; 81194bac2208Snarayan 81203af08d82Slm66018 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, 81213af08d82Slm66018 (caddr_t)vd_devid, bufsize, 0, 0, CB_SYNC, 0, 81222f5224aeSachartre VIO_both_dir, B_TRUE); 81233af08d82Slm66018 81244bac2208Snarayan if (rv) { 81254bac2208Snarayan kmem_free(vd_devid, bufsize); 81264bac2208Snarayan return (rv); 81274bac2208Snarayan } 81284bac2208Snarayan } 81294bac2208Snarayan 81304bac2208Snarayan /* 81314bac2208Snarayan * The virtual disk should have the same device id as the one associated 81324bac2208Snarayan * with the physical disk it is mapped on, otherwise sharing a disk 81334bac2208Snarayan * between a LDom and a non-LDom may not work (for example for a shared 81344bac2208Snarayan * SVM disk set). 81354bac2208Snarayan * 81364bac2208Snarayan * The DDI framework does not allow creating a device id with any 81374bac2208Snarayan * type so we first create a device id of type DEVID_ENCAP and then 81384bac2208Snarayan * we restore the orignal type of the physical device. 81394bac2208Snarayan */ 81404bac2208Snarayan 81413af08d82Slm66018 DMSG(vdc, 2, ": devid length = %d\n", vd_devid->length); 81423af08d82Slm66018 81434bac2208Snarayan /* build an encapsulated devid based on the returned devid */ 81444bac2208Snarayan if (ddi_devid_init(vdc->dip, DEVID_ENCAP, vd_devid->length, 81454bac2208Snarayan vd_devid->id, &vdc->devid) != DDI_SUCCESS) { 81463af08d82Slm66018 DMSG(vdc, 1, "[%d] Fail to created devid\n", vdc->instance); 81474bac2208Snarayan kmem_free(vd_devid, bufsize); 81484bac2208Snarayan return (1); 81494bac2208Snarayan } 81504bac2208Snarayan 81514bac2208Snarayan DEVID_FORMTYPE((impl_devid_t *)vdc->devid, vd_devid->type); 81524bac2208Snarayan 81534bac2208Snarayan ASSERT(ddi_devid_valid(vdc->devid) == DDI_SUCCESS); 81544bac2208Snarayan 81554bac2208Snarayan kmem_free(vd_devid, bufsize); 81564bac2208Snarayan 81574bac2208Snarayan if (ddi_devid_register(vdc->dip, vdc->devid) != DDI_SUCCESS) { 81583af08d82Slm66018 DMSG(vdc, 1, "[%d] Fail to register devid\n", vdc->instance); 81594bac2208Snarayan return (1); 81604bac2208Snarayan } 81614bac2208Snarayan 81624bac2208Snarayan return (0); 81634bac2208Snarayan } 81644bac2208Snarayan 81654bac2208Snarayan static void 8166edcc0754Sachartre vdc_store_label_efi(vdc_t *vdc, efi_gpt_t *gpt, efi_gpe_t *gpe) 81674bac2208Snarayan { 8168edcc0754Sachartre int i, nparts; 81694bac2208Snarayan 817078fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 817178fcd0a1Sachartre 817278fcd0a1Sachartre vdc->vdisk_label = VD_DISK_LABEL_EFI; 8173342440ecSPrasad Singamsetty bzero(vdc->vtoc, sizeof (struct extvtoc)); 817478fcd0a1Sachartre bzero(vdc->geom, sizeof (struct dk_geom)); 8175edcc0754Sachartre bzero(vdc->slice, sizeof (vd_slice_t) * V_NUMPAR); 8176edcc0754Sachartre 8177edcc0754Sachartre nparts = gpt->efi_gpt_NumberOfPartitionEntries; 8178edcc0754Sachartre 8179edcc0754Sachartre for (i = 0; i < nparts && i < VD_EFI_WD_SLICE; i++) { 8180edcc0754Sachartre 8181d84f0041SAlexandre Chartre if (gpe[i].efi_gpe_StartingLBA == 0 && 8182edcc0754Sachartre gpe[i].efi_gpe_EndingLBA == 0) { 8183edcc0754Sachartre continue; 81844bac2208Snarayan } 8185edcc0754Sachartre 8186edcc0754Sachartre vdc->slice[i].start = gpe[i].efi_gpe_StartingLBA; 8187edcc0754Sachartre vdc->slice[i].nblocks = gpe[i].efi_gpe_EndingLBA - 8188edcc0754Sachartre gpe[i].efi_gpe_StartingLBA + 1; 8189edcc0754Sachartre } 8190edcc0754Sachartre 8191edcc0754Sachartre ASSERT(vdc->vdisk_size != 0); 8192edcc0754Sachartre vdc->slice[VD_EFI_WD_SLICE].start = 0; 8193edcc0754Sachartre vdc->slice[VD_EFI_WD_SLICE].nblocks = vdc->vdisk_size; 8194edcc0754Sachartre 81954bac2208Snarayan } 819678fcd0a1Sachartre 819778fcd0a1Sachartre static void 8198342440ecSPrasad Singamsetty vdc_store_label_vtoc(vdc_t *vdc, struct dk_geom *geom, struct extvtoc *vtoc) 819978fcd0a1Sachartre { 8200edcc0754Sachartre int i; 8201edcc0754Sachartre 820278fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 8203*65908c77Syu, larry liu - Sun Microsystems - Beijing China ASSERT(vdc->vdisk_bsize == vtoc->v_sectorsz); 820478fcd0a1Sachartre 820578fcd0a1Sachartre vdc->vdisk_label = VD_DISK_LABEL_VTOC; 8206342440ecSPrasad Singamsetty bcopy(vtoc, vdc->vtoc, sizeof (struct extvtoc)); 820778fcd0a1Sachartre bcopy(geom, vdc->geom, sizeof (struct dk_geom)); 8208edcc0754Sachartre bzero(vdc->slice, sizeof (vd_slice_t) * V_NUMPAR); 8209edcc0754Sachartre 8210edcc0754Sachartre for (i = 0; i < vtoc->v_nparts; i++) { 8211edcc0754Sachartre vdc->slice[i].start = vtoc->v_part[i].p_start; 8212edcc0754Sachartre vdc->slice[i].nblocks = vtoc->v_part[i].p_size; 8213edcc0754Sachartre } 821478fcd0a1Sachartre } 821578fcd0a1Sachartre 821678fcd0a1Sachartre static void 821778fcd0a1Sachartre vdc_store_label_unk(vdc_t *vdc) 821878fcd0a1Sachartre { 821978fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 822078fcd0a1Sachartre 822178fcd0a1Sachartre vdc->vdisk_label = VD_DISK_LABEL_UNK; 8222342440ecSPrasad Singamsetty bzero(vdc->vtoc, sizeof (struct extvtoc)); 822378fcd0a1Sachartre bzero(vdc->geom, sizeof (struct dk_geom)); 8224edcc0754Sachartre bzero(vdc->slice, sizeof (vd_slice_t) * V_NUMPAR); 822578fcd0a1Sachartre } 8226