11ae08745Sheppo /* 21ae08745Sheppo * CDDL HEADER START 31ae08745Sheppo * 41ae08745Sheppo * The contents of this file are subject to the terms of the 51ae08745Sheppo * Common Development and Distribution License (the "License"). 61ae08745Sheppo * You may not use this file except in compliance with the License. 71ae08745Sheppo * 81ae08745Sheppo * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91ae08745Sheppo * or http://www.opensolaris.org/os/licensing. 101ae08745Sheppo * See the License for the specific language governing permissions 111ae08745Sheppo * and limitations under the License. 121ae08745Sheppo * 131ae08745Sheppo * When distributing Covered Code, include this CDDL HEADER in each 141ae08745Sheppo * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151ae08745Sheppo * If applicable, add the following below this CDDL HEADER, with the 161ae08745Sheppo * fields enclosed by brackets "[]" replaced with your own identifying 171ae08745Sheppo * information: Portions Copyright [yyyy] [name of copyright owner] 181ae08745Sheppo * 191ae08745Sheppo * CDDL HEADER END 201ae08745Sheppo */ 211ae08745Sheppo 221ae08745Sheppo /* 23*ca6d1280SAlexandre Chartre * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 241ae08745Sheppo */ 251ae08745Sheppo 261ae08745Sheppo /* 271ae08745Sheppo * LDoms virtual disk client (vdc) device driver 281ae08745Sheppo * 291ae08745Sheppo * This driver runs on a guest logical domain and communicates with the virtual 301ae08745Sheppo * disk server (vds) driver running on the service domain which is exporting 311ae08745Sheppo * virtualized "disks" to the guest logical domain. 321ae08745Sheppo * 331ae08745Sheppo * The driver can be divided into four sections: 341ae08745Sheppo * 351ae08745Sheppo * 1) generic device driver housekeeping 361ae08745Sheppo * _init, _fini, attach, detach, ops structures, etc. 371ae08745Sheppo * 381ae08745Sheppo * 2) communication channel setup 391ae08745Sheppo * Setup the communications link over the LDC channel that vdc uses to 401ae08745Sheppo * talk to the vDisk server. Initialise the descriptor ring which 411ae08745Sheppo * allows the LDC clients to transfer data via memory mappings. 421ae08745Sheppo * 431ae08745Sheppo * 3) Support exported to upper layers (filesystems, etc) 441ae08745Sheppo * The upper layers call into vdc via strategy(9E) and DKIO(7I) 451ae08745Sheppo * ioctl calls. vdc will copy the data to be written to the descriptor 461ae08745Sheppo * ring or maps the buffer to store the data read by the vDisk 471ae08745Sheppo * server into the descriptor ring. It then sends a message to the 481ae08745Sheppo * vDisk server requesting it to complete the operation. 491ae08745Sheppo * 501ae08745Sheppo * 4) Handling responses from vDisk server. 511ae08745Sheppo * The vDisk server will ACK some or all of the messages vdc sends to it 521ae08745Sheppo * (this is configured during the handshake). Upon receipt of an ACK 531ae08745Sheppo * vdc will check the descriptor ring and signal to the upper layer 541ae08745Sheppo * code waiting on the IO. 551ae08745Sheppo */ 561ae08745Sheppo 57e1ebb9ecSlm66018 #include <sys/atomic.h> 581ae08745Sheppo #include <sys/conf.h> 591ae08745Sheppo #include <sys/disp.h> 601ae08745Sheppo #include <sys/ddi.h> 611ae08745Sheppo #include <sys/dkio.h> 621ae08745Sheppo #include <sys/efi_partition.h> 631ae08745Sheppo #include <sys/fcntl.h> 641ae08745Sheppo #include <sys/file.h> 65366a92acSlm66018 #include <sys/kstat.h> 661ae08745Sheppo #include <sys/mach_descrip.h> 671ae08745Sheppo #include <sys/modctl.h> 681ae08745Sheppo #include <sys/mdeg.h> 691ae08745Sheppo #include <sys/note.h> 701ae08745Sheppo #include <sys/open.h> 716ace3c90SAlexandre Chartre #include <sys/random.h> 72d10e4ef2Snarayan #include <sys/sdt.h> 731ae08745Sheppo #include <sys/stat.h> 741ae08745Sheppo #include <sys/sunddi.h> 751ae08745Sheppo #include <sys/types.h> 761ae08745Sheppo #include <sys/promif.h> 772f5224aeSachartre #include <sys/var.h> 781ae08745Sheppo #include <sys/vtoc.h> 791ae08745Sheppo #include <sys/archsystm.h> 801ae08745Sheppo #include <sys/sysmacros.h> 811ae08745Sheppo 821ae08745Sheppo #include <sys/cdio.h> 831ae08745Sheppo #include <sys/dktp/fdisk.h> 8487a7269eSachartre #include <sys/dktp/dadkio.h> 856ace3c90SAlexandre Chartre #include <sys/fs/dv_node.h> 862f5224aeSachartre #include <sys/mhd.h> 871ae08745Sheppo #include <sys/scsi/generic/sense.h> 882f5224aeSachartre #include <sys/scsi/impl/uscsi.h> 892f5224aeSachartre #include <sys/scsi/impl/services.h> 902f5224aeSachartre #include <sys/scsi/targets/sddef.h> 911ae08745Sheppo 921ae08745Sheppo #include <sys/ldoms.h> 931ae08745Sheppo #include <sys/ldc.h> 941ae08745Sheppo #include <sys/vio_common.h> 951ae08745Sheppo #include <sys/vio_mailbox.h> 9617cadca8Slm66018 #include <sys/vio_util.h> 971ae08745Sheppo #include <sys/vdsk_common.h> 981ae08745Sheppo #include <sys/vdsk_mailbox.h> 991ae08745Sheppo #include <sys/vdc.h> 1001ae08745Sheppo 101342440ecSPrasad Singamsetty #define VD_OLDVTOC_LIMIT 0x7fffffff 102342440ecSPrasad Singamsetty 1031ae08745Sheppo /* 1041ae08745Sheppo * function prototypes 1051ae08745Sheppo */ 1061ae08745Sheppo 1071ae08745Sheppo /* standard driver functions */ 1081ae08745Sheppo static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 1091ae08745Sheppo static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 1101ae08745Sheppo static int vdc_strategy(struct buf *buf); 1111ae08745Sheppo static int vdc_print(dev_t dev, char *str); 1121ae08745Sheppo static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 1131ae08745Sheppo static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 1141ae08745Sheppo static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 1151ae08745Sheppo static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 1161ae08745Sheppo cred_t *credp, int *rvalp); 1171ae08745Sheppo static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 1181ae08745Sheppo static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 1191ae08745Sheppo 1201ae08745Sheppo static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 1211ae08745Sheppo void *arg, void **resultp); 1221ae08745Sheppo static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 1231ae08745Sheppo static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 1245b98b509Sachartre static int vdc_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, 1255b98b509Sachartre int mod_flags, char *name, caddr_t valuep, int *lengthp); 1261ae08745Sheppo 1271ae08745Sheppo /* setup */ 1280d0c8d4bSnarayan static void vdc_min(struct buf *bufp); 1290a55fbb7Slm66018 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 1308cd10891Snarayan static int vdc_do_ldc_init(vdc_t *vdc, vdc_server_t *srvr); 1311ae08745Sheppo static int vdc_start_ldc_connection(vdc_t *vdc); 1321ae08745Sheppo static int vdc_create_device_nodes(vdc_t *vdc); 1334bac2208Snarayan static int vdc_create_device_nodes_efi(vdc_t *vdc); 1344bac2208Snarayan static int vdc_create_device_nodes_vtoc(vdc_t *vdc); 135366a92acSlm66018 static void vdc_create_io_kstats(vdc_t *vdc); 136366a92acSlm66018 static void vdc_create_err_kstats(vdc_t *vdc); 137366a92acSlm66018 static void vdc_set_err_kstats(vdc_t *vdc); 138655fd6a9Sachartre static int vdc_get_md_node(dev_info_t *dip, md_t **mdpp, 1398cd10891Snarayan mde_cookie_t *vd_nodep); 1408cd10891Snarayan static int vdc_init_ports(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_nodep); 1418cd10891Snarayan static void vdc_fini_ports(vdc_t *vdc); 1428cd10891Snarayan static void vdc_switch_server(vdc_t *vdcp); 1430a55fbb7Slm66018 static int vdc_do_ldc_up(vdc_t *vdc); 1448cd10891Snarayan static void vdc_terminate_ldc(vdc_t *vdc, vdc_server_t *srvr); 1451ae08745Sheppo static int vdc_init_descriptor_ring(vdc_t *vdc); 1461ae08745Sheppo static void vdc_destroy_descriptor_ring(vdc_t *vdc); 1474bac2208Snarayan static int vdc_setup_devid(vdc_t *vdc); 148edcc0754Sachartre static void vdc_store_label_efi(vdc_t *, efi_gpt_t *, efi_gpe_t *); 149342440ecSPrasad Singamsetty static void vdc_store_label_vtoc(vdc_t *, struct dk_geom *, 150342440ecSPrasad Singamsetty struct extvtoc *); 15178fcd0a1Sachartre static void vdc_store_label_unk(vdc_t *vdc); 15278fcd0a1Sachartre static boolean_t vdc_is_opened(vdc_t *vdc); 153de3a5331SRamesh Chitrothu static void vdc_update_size(vdc_t *vdc, size_t, size_t, size_t); 15465908c77Syu, larry liu - Sun Microsystems - Beijing China static int vdc_update_vio_bsize(vdc_t *vdc, uint32_t); 1551ae08745Sheppo 1561ae08745Sheppo /* handshake with vds */ 1570a55fbb7Slm66018 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 1583af08d82Slm66018 static int vdc_ver_negotiation(vdc_t *vdcp); 1591ae08745Sheppo static int vdc_init_attr_negotiation(vdc_t *vdc); 1603af08d82Slm66018 static int vdc_attr_negotiation(vdc_t *vdcp); 1611ae08745Sheppo static int vdc_init_dring_negotiate(vdc_t *vdc); 1623af08d82Slm66018 static int vdc_dring_negotiation(vdc_t *vdcp); 1633af08d82Slm66018 static int vdc_send_rdx(vdc_t *vdcp); 1643af08d82Slm66018 static int vdc_rdx_exchange(vdc_t *vdcp); 1650a55fbb7Slm66018 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 1661ae08745Sheppo 1670a55fbb7Slm66018 /* processing incoming messages from vDisk server */ 1681ae08745Sheppo static void vdc_process_msg_thread(vdc_t *vdc); 1693af08d82Slm66018 static int vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp); 1703af08d82Slm66018 1710a55fbb7Slm66018 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 1723af08d82Slm66018 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t *msg); 1730a55fbb7Slm66018 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 1740a55fbb7Slm66018 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 1750a55fbb7Slm66018 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 1763af08d82Slm66018 static int vdc_send_request(vdc_t *vdcp, int operation, 1773af08d82Slm66018 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 1786ace3c90SAlexandre Chartre buf_t *bufp, vio_desc_direction_t dir, int flags); 1793af08d82Slm66018 static int vdc_map_to_shared_dring(vdc_t *vdcp, int idx); 1803af08d82Slm66018 static int vdc_populate_descriptor(vdc_t *vdcp, int operation, 1813af08d82Slm66018 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 1826ace3c90SAlexandre Chartre buf_t *bufp, vio_desc_direction_t dir, int flags); 1832f5224aeSachartre static int vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, 1846ace3c90SAlexandre Chartre size_t nbytes, int slice, diskaddr_t offset, 1856ace3c90SAlexandre Chartre vio_desc_direction_t dir, boolean_t); 1866ace3c90SAlexandre Chartre static int vdc_do_op(vdc_t *vdc, int op, caddr_t addr, size_t nbytes, 1876ace3c90SAlexandre Chartre int slice, diskaddr_t offset, struct buf *bufp, 1886ace3c90SAlexandre Chartre vio_desc_direction_t dir, int flags); 1893af08d82Slm66018 1903af08d82Slm66018 static int vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp); 1916ace3c90SAlexandre Chartre static int vdc_drain_response(vdc_t *vdcp, struct buf *buf); 1921ae08745Sheppo static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 1933af08d82Slm66018 static int vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep); 194e1ebb9ecSlm66018 static int vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg); 1951ae08745Sheppo 1961ae08745Sheppo /* dkio */ 1972f5224aeSachartre static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode, 1982f5224aeSachartre int *rvalp); 199edcc0754Sachartre static int vd_process_efi_ioctl(void *vdisk, int cmd, uintptr_t arg); 20078fcd0a1Sachartre static void vdc_create_fake_geometry(vdc_t *vdc); 20178fcd0a1Sachartre static int vdc_validate_geometry(vdc_t *vdc); 20278fcd0a1Sachartre static void vdc_validate(vdc_t *vdc); 20378fcd0a1Sachartre static void vdc_validate_task(void *arg); 204d10e4ef2Snarayan static int vdc_null_copy_func(vdc_t *vdc, void *from, void *to, 205d10e4ef2Snarayan int mode, int dir); 2064bac2208Snarayan static int vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 2074bac2208Snarayan int mode, int dir); 2084bac2208Snarayan static int vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 2094bac2208Snarayan int mode, int dir); 210d10e4ef2Snarayan static int vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, 211d10e4ef2Snarayan int mode, int dir); 212d10e4ef2Snarayan static int vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, 213d10e4ef2Snarayan int mode, int dir); 214342440ecSPrasad Singamsetty static int vdc_get_extvtoc_convert(vdc_t *vdc, void *from, void *to, 215342440ecSPrasad Singamsetty int mode, int dir); 216342440ecSPrasad Singamsetty static int vdc_set_extvtoc_convert(vdc_t *vdc, void *from, void *to, 217342440ecSPrasad Singamsetty int mode, int dir); 218d10e4ef2Snarayan static int vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, 219d10e4ef2Snarayan int mode, int dir); 220d10e4ef2Snarayan static int vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, 221d10e4ef2Snarayan int mode, int dir); 2224bac2208Snarayan static int vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, 2234bac2208Snarayan int mode, int dir); 2244bac2208Snarayan static int vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, 2254bac2208Snarayan int mode, int dir); 2261ae08745Sheppo 2272f5224aeSachartre static void vdc_ownership_update(vdc_t *vdc, int ownership_flags); 2286ace3c90SAlexandre Chartre static int vdc_access_set(vdc_t *vdc, uint64_t flags); 2296ace3c90SAlexandre Chartre static vdc_io_t *vdc_eio_queue(vdc_t *vdc, int index); 2306ace3c90SAlexandre Chartre static void vdc_eio_unqueue(vdc_t *vdc, clock_t deadline, 2316ace3c90SAlexandre Chartre boolean_t complete_io); 2326ace3c90SAlexandre Chartre static int vdc_eio_check(vdc_t *vdc, int flags); 2336ace3c90SAlexandre Chartre static void vdc_eio_thread(void *arg); 2342f5224aeSachartre 2351ae08745Sheppo /* 2361ae08745Sheppo * Module variables 2371ae08745Sheppo */ 238e1ebb9ecSlm66018 239e1ebb9ecSlm66018 /* 240*ca6d1280SAlexandre Chartre * Number of handshake retries with the current server before switching to 241*ca6d1280SAlexandre Chartre * a different server. These retries are done so that we stick with the same 242*ca6d1280SAlexandre Chartre * server if vdc receives a LDC reset event during the initiation of the 243*ca6d1280SAlexandre Chartre * handshake. This can happen if vdc reset the LDC channel and then immediately 244*ca6d1280SAlexandre Chartre * retry a connexion before it has received the LDC reset event. 245*ca6d1280SAlexandre Chartre * 246*ca6d1280SAlexandre Chartre * If there is only one server then we "switch" to the same server. We also 247*ca6d1280SAlexandre Chartre * switch if the handshake has reached the attribute negotiate step whatever 248*ca6d1280SAlexandre Chartre * the number of handshake retries might be. 249*ca6d1280SAlexandre Chartre */ 250*ca6d1280SAlexandre Chartre static uint_t vdc_hshake_retries = VDC_HSHAKE_RETRIES; 251*ca6d1280SAlexandre Chartre 252*ca6d1280SAlexandre Chartre /* 253*ca6d1280SAlexandre Chartre * If the handshake done during the attach fails then the two following 254*ca6d1280SAlexandre Chartre * variables will also be used to control the number of retries for the 255*ca6d1280SAlexandre Chartre * next handshakes. In that case, when a handshake is done after the 256*ca6d1280SAlexandre Chartre * attach (i.e. the vdc lifecycle is VDC_ONLINE_PENDING) then the handshake 257*ca6d1280SAlexandre Chartre * will be retried until we have done an attribution negotiation with each 258*ca6d1280SAlexandre Chartre * server, with a specified minimum total number of negotations (the value 259*ca6d1280SAlexandre Chartre * of the vdc_hattr_min_initial or vdc_hattr_min variable). 260*ca6d1280SAlexandre Chartre * 261*ca6d1280SAlexandre Chartre * This prevents new I/Os on a newly used vdisk to block forever if the 262*ca6d1280SAlexandre Chartre * attribute negotiations can not be done, and to limit the amount of time 263*ca6d1280SAlexandre Chartre * before I/Os will fail. Basically, attribute negotiations will fail when 264*ca6d1280SAlexandre Chartre * the service is up but the backend does not exist. In that case, vds will 265*ca6d1280SAlexandre Chartre * typically retry to access the backend during 50 seconds. So I/Os will fail 266*ca6d1280SAlexandre Chartre * after the following amount of time: 267*ca6d1280SAlexandre Chartre * 268*ca6d1280SAlexandre Chartre * 50 seconds x max(number of servers, vdc->hattr_min) 269*ca6d1280SAlexandre Chartre * 270*ca6d1280SAlexandre Chartre * After that the handshake done during the attach has failed then the next 271*ca6d1280SAlexandre Chartre * handshake will use vdc_attr_min_initial. This handshake will correspond to 272*ca6d1280SAlexandre Chartre * the very first I/O to the device. If this handshake also fails then 273*ca6d1280SAlexandre Chartre * vdc_hattr_min will be used for subsequent handshakes. We typically allow 274*ca6d1280SAlexandre Chartre * more retries for the first handshake (VDC_HATTR_MIN_INITIAL = 3) to give more 275*ca6d1280SAlexandre Chartre * time for the backend to become available (50s x VDC_HATTR_MIN_INITIAL = 150s) 276*ca6d1280SAlexandre Chartre * in case this is a critical vdisk (e.g. vdisk access during boot). Then we use 277*ca6d1280SAlexandre Chartre * a smaller value (VDC_HATTR_MIN = 1) to avoid waiting too long for each I/O. 278*ca6d1280SAlexandre Chartre */ 279*ca6d1280SAlexandre Chartre static uint_t vdc_hattr_min_initial = VDC_HATTR_MIN_INITIAL; 280*ca6d1280SAlexandre Chartre static uint_t vdc_hattr_min = VDC_HATTR_MIN; 281*ca6d1280SAlexandre Chartre 282*ca6d1280SAlexandre Chartre /* 283e1ebb9ecSlm66018 * Tunable variables to control how long vdc waits before timing out on 284e1ebb9ecSlm66018 * various operations 285e1ebb9ecSlm66018 */ 286655fd6a9Sachartre static int vdc_timeout = 0; /* units: seconds */ 2878cd10891Snarayan static int vdc_ldcup_timeout = 1; /* units: seconds */ 288655fd6a9Sachartre 2893af08d82Slm66018 static uint64_t vdc_hz_min_ldc_delay; 2903af08d82Slm66018 static uint64_t vdc_min_timeout_ldc = 1 * MILLISEC; 2913af08d82Slm66018 static uint64_t vdc_hz_max_ldc_delay; 2923af08d82Slm66018 static uint64_t vdc_max_timeout_ldc = 100 * MILLISEC; 2933af08d82Slm66018 2943af08d82Slm66018 static uint64_t vdc_ldc_read_init_delay = 1 * MILLISEC; 2953af08d82Slm66018 static uint64_t vdc_ldc_read_max_delay = 100 * MILLISEC; 296e1ebb9ecSlm66018 297e1ebb9ecSlm66018 /* values for dumping - need to run in a tighter loop */ 298e1ebb9ecSlm66018 static uint64_t vdc_usec_timeout_dump = 100 * MILLISEC; /* 0.1s units: ns */ 299e1ebb9ecSlm66018 static int vdc_dump_retries = 100; 300e1ebb9ecSlm66018 3012f5224aeSachartre static uint16_t vdc_scsi_timeout = 60; /* 60s units: seconds */ 3022f5224aeSachartre 3032f5224aeSachartre static uint64_t vdc_ownership_delay = 6 * MICROSEC; /* 6s units: usec */ 3042f5224aeSachartre 305e1ebb9ecSlm66018 /* Count of the number of vdc instances attached */ 306e1ebb9ecSlm66018 static volatile uint32_t vdc_instance_count = 0; 3071ae08745Sheppo 3082f5224aeSachartre /* Tunable to log all SCSI errors */ 3092f5224aeSachartre static boolean_t vdc_scsi_log_error = B_FALSE; 3102f5224aeSachartre 3111ae08745Sheppo /* Soft state pointer */ 3121ae08745Sheppo static void *vdc_state; 3131ae08745Sheppo 3143af08d82Slm66018 /* 3153af08d82Slm66018 * Controlling the verbosity of the error/debug messages 3163af08d82Slm66018 * 3173af08d82Slm66018 * vdc_msglevel - controls level of messages 3183af08d82Slm66018 * vdc_matchinst - 64-bit variable where each bit corresponds 3193af08d82Slm66018 * to the vdc instance the vdc_msglevel applies. 3203af08d82Slm66018 */ 3213af08d82Slm66018 int vdc_msglevel = 0x0; 3223af08d82Slm66018 uint64_t vdc_matchinst = 0ull; 3231ae08745Sheppo 3240a55fbb7Slm66018 /* 3250a55fbb7Slm66018 * Supported vDisk protocol version pairs. 3260a55fbb7Slm66018 * 3270a55fbb7Slm66018 * The first array entry is the latest and preferred version. 3280a55fbb7Slm66018 */ 32917cadca8Slm66018 static const vio_ver_t vdc_version[] = {{1, 1}}; 3301ae08745Sheppo 3311ae08745Sheppo static struct cb_ops vdc_cb_ops = { 3321ae08745Sheppo vdc_open, /* cb_open */ 3331ae08745Sheppo vdc_close, /* cb_close */ 3341ae08745Sheppo vdc_strategy, /* cb_strategy */ 3351ae08745Sheppo vdc_print, /* cb_print */ 3361ae08745Sheppo vdc_dump, /* cb_dump */ 3371ae08745Sheppo vdc_read, /* cb_read */ 3381ae08745Sheppo vdc_write, /* cb_write */ 3391ae08745Sheppo vdc_ioctl, /* cb_ioctl */ 3401ae08745Sheppo nodev, /* cb_devmap */ 3411ae08745Sheppo nodev, /* cb_mmap */ 3421ae08745Sheppo nodev, /* cb_segmap */ 3431ae08745Sheppo nochpoll, /* cb_chpoll */ 3445b98b509Sachartre vdc_prop_op, /* cb_prop_op */ 3451ae08745Sheppo NULL, /* cb_str */ 3461ae08745Sheppo D_MP | D_64BIT, /* cb_flag */ 3471ae08745Sheppo CB_REV, /* cb_rev */ 3481ae08745Sheppo vdc_aread, /* cb_aread */ 3491ae08745Sheppo vdc_awrite /* cb_awrite */ 3501ae08745Sheppo }; 3511ae08745Sheppo 3521ae08745Sheppo static struct dev_ops vdc_ops = { 3531ae08745Sheppo DEVO_REV, /* devo_rev */ 3541ae08745Sheppo 0, /* devo_refcnt */ 3551ae08745Sheppo vdc_getinfo, /* devo_getinfo */ 3561ae08745Sheppo nulldev, /* devo_identify */ 3571ae08745Sheppo nulldev, /* devo_probe */ 3581ae08745Sheppo vdc_attach, /* devo_attach */ 3591ae08745Sheppo vdc_detach, /* devo_detach */ 3601ae08745Sheppo nodev, /* devo_reset */ 3611ae08745Sheppo &vdc_cb_ops, /* devo_cb_ops */ 3621ae08745Sheppo NULL, /* devo_bus_ops */ 36319397407SSherry Moore nulldev, /* devo_power */ 36419397407SSherry Moore ddi_quiesce_not_needed, /* devo_quiesce */ 3651ae08745Sheppo }; 3661ae08745Sheppo 3671ae08745Sheppo static struct modldrv modldrv = { 3681ae08745Sheppo &mod_driverops, 369205eeb1aSlm66018 "virtual disk client", 3701ae08745Sheppo &vdc_ops, 3711ae08745Sheppo }; 3721ae08745Sheppo 3731ae08745Sheppo static struct modlinkage modlinkage = { 3741ae08745Sheppo MODREV_1, 3751ae08745Sheppo &modldrv, 3761ae08745Sheppo NULL 3771ae08745Sheppo }; 3781ae08745Sheppo 3791ae08745Sheppo /* -------------------------------------------------------------------------- */ 3801ae08745Sheppo 3811ae08745Sheppo /* 3821ae08745Sheppo * Device Driver housekeeping and setup 3831ae08745Sheppo */ 3841ae08745Sheppo 3851ae08745Sheppo int 3861ae08745Sheppo _init(void) 3871ae08745Sheppo { 3881ae08745Sheppo int status; 3891ae08745Sheppo 3901ae08745Sheppo if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 3911ae08745Sheppo return (status); 3921ae08745Sheppo if ((status = mod_install(&modlinkage)) != 0) 3931ae08745Sheppo ddi_soft_state_fini(&vdc_state); 3941ae08745Sheppo return (status); 3951ae08745Sheppo } 3961ae08745Sheppo 3971ae08745Sheppo int 3981ae08745Sheppo _info(struct modinfo *modinfop) 3991ae08745Sheppo { 4001ae08745Sheppo return (mod_info(&modlinkage, modinfop)); 4011ae08745Sheppo } 4021ae08745Sheppo 4031ae08745Sheppo int 4041ae08745Sheppo _fini(void) 4051ae08745Sheppo { 4061ae08745Sheppo int status; 4071ae08745Sheppo 4081ae08745Sheppo if ((status = mod_remove(&modlinkage)) != 0) 4091ae08745Sheppo return (status); 4101ae08745Sheppo ddi_soft_state_fini(&vdc_state); 4111ae08745Sheppo return (0); 4121ae08745Sheppo } 4131ae08745Sheppo 4141ae08745Sheppo static int 4151ae08745Sheppo vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 4161ae08745Sheppo { 4171ae08745Sheppo _NOTE(ARGUNUSED(dip)) 4181ae08745Sheppo 4190d0c8d4bSnarayan int instance = VDCUNIT((dev_t)arg); 4201ae08745Sheppo vdc_t *vdc = NULL; 4211ae08745Sheppo 4221ae08745Sheppo switch (cmd) { 4231ae08745Sheppo case DDI_INFO_DEVT2DEVINFO: 4241ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 4251ae08745Sheppo *resultp = NULL; 4261ae08745Sheppo return (DDI_FAILURE); 4271ae08745Sheppo } 4281ae08745Sheppo *resultp = vdc->dip; 4291ae08745Sheppo return (DDI_SUCCESS); 4301ae08745Sheppo case DDI_INFO_DEVT2INSTANCE: 4311ae08745Sheppo *resultp = (void *)(uintptr_t)instance; 4321ae08745Sheppo return (DDI_SUCCESS); 4331ae08745Sheppo default: 4341ae08745Sheppo *resultp = NULL; 4351ae08745Sheppo return (DDI_FAILURE); 4361ae08745Sheppo } 4371ae08745Sheppo } 4381ae08745Sheppo 4391ae08745Sheppo static int 4401ae08745Sheppo vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4411ae08745Sheppo { 4426ace3c90SAlexandre Chartre kt_did_t eio_tid, ownership_tid; 4431ae08745Sheppo int instance; 4441ae08745Sheppo int rv; 445d7400d00Sachartre vdc_server_t *srvr; 4461ae08745Sheppo vdc_t *vdc = NULL; 4471ae08745Sheppo 4481ae08745Sheppo switch (cmd) { 4491ae08745Sheppo case DDI_DETACH: 4501ae08745Sheppo /* the real work happens below */ 4511ae08745Sheppo break; 4521ae08745Sheppo case DDI_SUSPEND: 4531ae08745Sheppo /* nothing to do for this non-device */ 4541ae08745Sheppo return (DDI_SUCCESS); 4551ae08745Sheppo default: 4561ae08745Sheppo return (DDI_FAILURE); 4571ae08745Sheppo } 4581ae08745Sheppo 4591ae08745Sheppo ASSERT(cmd == DDI_DETACH); 4601ae08745Sheppo instance = ddi_get_instance(dip); 4613af08d82Slm66018 DMSGX(1, "[%d] Entered\n", instance); 4621ae08745Sheppo 4631ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 464e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 4651ae08745Sheppo return (DDI_FAILURE); 4661ae08745Sheppo } 4671ae08745Sheppo 4686ace3c90SAlexandre Chartre if (vdc_is_opened(vdc)) { 4693af08d82Slm66018 DMSG(vdc, 0, "[%d] Cannot detach: device is open", instance); 4701ae08745Sheppo return (DDI_FAILURE); 4711ae08745Sheppo } 4721ae08745Sheppo 47378fcd0a1Sachartre if (vdc->dkio_flush_pending) { 47478fcd0a1Sachartre DMSG(vdc, 0, 47578fcd0a1Sachartre "[%d] Cannot detach: %d outstanding DKIO flushes\n", 47678fcd0a1Sachartre instance, vdc->dkio_flush_pending); 47778fcd0a1Sachartre return (DDI_FAILURE); 47878fcd0a1Sachartre } 47978fcd0a1Sachartre 48078fcd0a1Sachartre if (vdc->validate_pending) { 48178fcd0a1Sachartre DMSG(vdc, 0, 48278fcd0a1Sachartre "[%d] Cannot detach: %d outstanding validate request\n", 48378fcd0a1Sachartre instance, vdc->validate_pending); 48478fcd0a1Sachartre return (DDI_FAILURE); 48578fcd0a1Sachartre } 48678fcd0a1Sachartre 4873af08d82Slm66018 DMSG(vdc, 0, "[%d] proceeding...\n", instance); 4883af08d82Slm66018 4892f5224aeSachartre /* If we took ownership, release ownership */ 4902f5224aeSachartre mutex_enter(&vdc->ownership_lock); 4912f5224aeSachartre if (vdc->ownership & VDC_OWNERSHIP_GRANTED) { 4926ace3c90SAlexandre Chartre rv = vdc_access_set(vdc, VD_ACCESS_SET_CLEAR); 4932f5224aeSachartre if (rv == 0) { 4942f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_NONE); 4952f5224aeSachartre } 4962f5224aeSachartre } 4972f5224aeSachartre mutex_exit(&vdc->ownership_lock); 4982f5224aeSachartre 4993af08d82Slm66018 /* mark instance as detaching */ 500*ca6d1280SAlexandre Chartre mutex_enter(&vdc->lock); 5013af08d82Slm66018 vdc->lifecycle = VDC_LC_DETACHING; 502*ca6d1280SAlexandre Chartre mutex_exit(&vdc->lock); 5031ae08745Sheppo 5041ae08745Sheppo /* 505d7400d00Sachartre * Try and disable callbacks to prevent another handshake. We have to 506d7400d00Sachartre * disable callbacks for all servers. 5071ae08745Sheppo */ 508d7400d00Sachartre for (srvr = vdc->server_list; srvr != NULL; srvr = srvr->next) { 509d7400d00Sachartre rv = ldc_set_cb_mode(srvr->ldc_handle, LDC_CB_DISABLE); 510d7400d00Sachartre DMSG(vdc, 0, "callback disabled (ldc=%lu, rv=%d)\n", 511d7400d00Sachartre srvr->ldc_id, rv); 5128cd10891Snarayan } 5131ae08745Sheppo 5141ae08745Sheppo if (vdc->initialized & VDC_THREAD) { 5153af08d82Slm66018 mutex_enter(&vdc->read_lock); 5163af08d82Slm66018 if ((vdc->read_state == VDC_READ_WAITING) || 5173af08d82Slm66018 (vdc->read_state == VDC_READ_RESET)) { 5183af08d82Slm66018 vdc->read_state = VDC_READ_RESET; 5193af08d82Slm66018 cv_signal(&vdc->read_cv); 5201ae08745Sheppo } 5213af08d82Slm66018 5223af08d82Slm66018 mutex_exit(&vdc->read_lock); 5233af08d82Slm66018 5243af08d82Slm66018 /* wake up any thread waiting for connection to come online */ 5253af08d82Slm66018 mutex_enter(&vdc->lock); 5263af08d82Slm66018 if (vdc->state == VDC_STATE_INIT_WAITING) { 5273af08d82Slm66018 DMSG(vdc, 0, 5283af08d82Slm66018 "[%d] write reset - move to resetting state...\n", 5293af08d82Slm66018 instance); 5303af08d82Slm66018 vdc->state = VDC_STATE_RESETTING; 5313af08d82Slm66018 cv_signal(&vdc->initwait_cv); 5326ace3c90SAlexandre Chartre } else if (vdc->state == VDC_STATE_FAILED) { 5336ace3c90SAlexandre Chartre vdc->io_pending = B_TRUE; 5346ace3c90SAlexandre Chartre cv_signal(&vdc->io_pending_cv); 5353af08d82Slm66018 } 5363af08d82Slm66018 mutex_exit(&vdc->lock); 5373af08d82Slm66018 5383af08d82Slm66018 /* now wait until state transitions to VDC_STATE_DETACH */ 5393af08d82Slm66018 thread_join(vdc->msg_proc_thr->t_did); 5403af08d82Slm66018 ASSERT(vdc->state == VDC_STATE_DETACH); 5413af08d82Slm66018 DMSG(vdc, 0, "[%d] Reset thread exit and join ..\n", 5423af08d82Slm66018 vdc->instance); 5431ae08745Sheppo } 5441ae08745Sheppo 5451ae08745Sheppo mutex_enter(&vdc->lock); 5461ae08745Sheppo 5471ae08745Sheppo if (vdc->initialized & VDC_DRING) 5481ae08745Sheppo vdc_destroy_descriptor_ring(vdc); 5491ae08745Sheppo 5508cd10891Snarayan vdc_fini_ports(vdc); 5511ae08745Sheppo 5526ace3c90SAlexandre Chartre if (vdc->eio_thread) { 5536ace3c90SAlexandre Chartre eio_tid = vdc->eio_thread->t_did; 5542f5224aeSachartre vdc->failfast_interval = 0; 5556ace3c90SAlexandre Chartre ASSERT(vdc->num_servers == 0); 5566ace3c90SAlexandre Chartre cv_signal(&vdc->eio_cv); 5572f5224aeSachartre } else { 5586ace3c90SAlexandre Chartre eio_tid = 0; 5592f5224aeSachartre } 5602f5224aeSachartre 5612f5224aeSachartre if (vdc->ownership & VDC_OWNERSHIP_WANTED) { 5622f5224aeSachartre ownership_tid = vdc->ownership_thread->t_did; 5632f5224aeSachartre vdc->ownership = VDC_OWNERSHIP_NONE; 5642f5224aeSachartre cv_signal(&vdc->ownership_cv); 5652f5224aeSachartre } else { 5662f5224aeSachartre ownership_tid = 0; 5672f5224aeSachartre } 5682f5224aeSachartre 5691ae08745Sheppo mutex_exit(&vdc->lock); 5701ae08745Sheppo 5716ace3c90SAlexandre Chartre if (eio_tid != 0) 5726ace3c90SAlexandre Chartre thread_join(eio_tid); 5732f5224aeSachartre 5742f5224aeSachartre if (ownership_tid != 0) 5752f5224aeSachartre thread_join(ownership_tid); 5762f5224aeSachartre 5775b98b509Sachartre if (vdc->initialized & VDC_MINOR) 5781ae08745Sheppo ddi_remove_minor_node(dip, NULL); 5791ae08745Sheppo 580366a92acSlm66018 if (vdc->io_stats) { 581366a92acSlm66018 kstat_delete(vdc->io_stats); 582366a92acSlm66018 vdc->io_stats = NULL; 583366a92acSlm66018 } 584366a92acSlm66018 585366a92acSlm66018 if (vdc->err_stats) { 586366a92acSlm66018 kstat_delete(vdc->err_stats); 587366a92acSlm66018 vdc->err_stats = NULL; 588366a92acSlm66018 } 589366a92acSlm66018 5901ae08745Sheppo if (vdc->initialized & VDC_LOCKS) { 5911ae08745Sheppo mutex_destroy(&vdc->lock); 5923af08d82Slm66018 mutex_destroy(&vdc->read_lock); 5932f5224aeSachartre mutex_destroy(&vdc->ownership_lock); 5943af08d82Slm66018 cv_destroy(&vdc->initwait_cv); 5953af08d82Slm66018 cv_destroy(&vdc->dring_free_cv); 5963af08d82Slm66018 cv_destroy(&vdc->membind_cv); 5973af08d82Slm66018 cv_destroy(&vdc->sync_blocked_cv); 5983af08d82Slm66018 cv_destroy(&vdc->read_cv); 5993af08d82Slm66018 cv_destroy(&vdc->running_cv); 6006ace3c90SAlexandre Chartre cv_destroy(&vdc->io_pending_cv); 6012f5224aeSachartre cv_destroy(&vdc->ownership_cv); 6026ace3c90SAlexandre Chartre cv_destroy(&vdc->eio_cv); 6031ae08745Sheppo } 6041ae08745Sheppo 6051ae08745Sheppo if (vdc->minfo) 6061ae08745Sheppo kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 6071ae08745Sheppo 6081ae08745Sheppo if (vdc->cinfo) 6091ae08745Sheppo kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 6101ae08745Sheppo 6111ae08745Sheppo if (vdc->vtoc) 612342440ecSPrasad Singamsetty kmem_free(vdc->vtoc, sizeof (struct extvtoc)); 6131ae08745Sheppo 61478fcd0a1Sachartre if (vdc->geom) 61578fcd0a1Sachartre kmem_free(vdc->geom, sizeof (struct dk_geom)); 6160a55fbb7Slm66018 6174bac2208Snarayan if (vdc->devid) { 6184bac2208Snarayan ddi_devid_unregister(dip); 6194bac2208Snarayan ddi_devid_free(vdc->devid); 6204bac2208Snarayan } 6214bac2208Snarayan 6221ae08745Sheppo if (vdc->initialized & VDC_SOFT_STATE) 6231ae08745Sheppo ddi_soft_state_free(vdc_state, instance); 6241ae08745Sheppo 6253af08d82Slm66018 DMSG(vdc, 0, "[%d] End %p\n", instance, (void *)vdc); 6261ae08745Sheppo 6271ae08745Sheppo return (DDI_SUCCESS); 6281ae08745Sheppo } 6291ae08745Sheppo 6301ae08745Sheppo 6311ae08745Sheppo static int 6321ae08745Sheppo vdc_do_attach(dev_info_t *dip) 6331ae08745Sheppo { 6341ae08745Sheppo int instance; 6351ae08745Sheppo vdc_t *vdc = NULL; 6361ae08745Sheppo int status; 637655fd6a9Sachartre md_t *mdp; 6388cd10891Snarayan mde_cookie_t vd_node; 6391ae08745Sheppo 6401ae08745Sheppo ASSERT(dip != NULL); 6411ae08745Sheppo 6421ae08745Sheppo instance = ddi_get_instance(dip); 6431ae08745Sheppo if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 644e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't alloc state structure", 645e1ebb9ecSlm66018 instance); 6461ae08745Sheppo return (DDI_FAILURE); 6471ae08745Sheppo } 6481ae08745Sheppo 6491ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 650e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 6511ae08745Sheppo return (DDI_FAILURE); 6521ae08745Sheppo } 6531ae08745Sheppo 6541ae08745Sheppo /* 6551ae08745Sheppo * We assign the value to initialized in this case to zero out the 6561ae08745Sheppo * variable and then set bits in it to indicate what has been done 6571ae08745Sheppo */ 6581ae08745Sheppo vdc->initialized = VDC_SOFT_STATE; 6591ae08745Sheppo 6603af08d82Slm66018 vdc_hz_min_ldc_delay = drv_usectohz(vdc_min_timeout_ldc); 6613af08d82Slm66018 vdc_hz_max_ldc_delay = drv_usectohz(vdc_max_timeout_ldc); 6621ae08745Sheppo 6631ae08745Sheppo vdc->dip = dip; 6641ae08745Sheppo vdc->instance = instance; 6651ae08745Sheppo vdc->vdisk_type = VD_DISK_TYPE_UNK; 6664bac2208Snarayan vdc->vdisk_label = VD_DISK_LABEL_UNK; 6673af08d82Slm66018 vdc->state = VDC_STATE_INIT; 6683af08d82Slm66018 vdc->lifecycle = VDC_LC_ATTACHING; 6691ae08745Sheppo vdc->session_id = 0; 67065908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vdisk_bsize = DEV_BSIZE; 67165908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vio_bmask = 0; 67265908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vio_bshift = 0; 67365908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->max_xfer_sz = maxphys / vdc->vdisk_bsize; 6741ae08745Sheppo 67517cadca8Slm66018 /* 67617cadca8Slm66018 * We assume, for now, that the vDisk server will export 'read' 67717cadca8Slm66018 * operations to us at a minimum (this is needed because of checks 67817cadca8Slm66018 * in vdc for supported operations early in the handshake process). 67917cadca8Slm66018 * The vDisk server will return ENOTSUP if this is not the case. 68017cadca8Slm66018 * The value will be overwritten during the attribute exchange with 68117cadca8Slm66018 * the bitmask of operations exported by server. 68217cadca8Slm66018 */ 68317cadca8Slm66018 vdc->operations = VD_OP_MASK_READ; 68417cadca8Slm66018 6851ae08745Sheppo vdc->vtoc = NULL; 68678fcd0a1Sachartre vdc->geom = NULL; 6871ae08745Sheppo vdc->cinfo = NULL; 6881ae08745Sheppo vdc->minfo = NULL; 6891ae08745Sheppo 6901ae08745Sheppo mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 6913af08d82Slm66018 cv_init(&vdc->initwait_cv, NULL, CV_DRIVER, NULL); 6923af08d82Slm66018 cv_init(&vdc->dring_free_cv, NULL, CV_DRIVER, NULL); 6933af08d82Slm66018 cv_init(&vdc->membind_cv, NULL, CV_DRIVER, NULL); 6943af08d82Slm66018 cv_init(&vdc->running_cv, NULL, CV_DRIVER, NULL); 6956ace3c90SAlexandre Chartre cv_init(&vdc->io_pending_cv, NULL, CV_DRIVER, NULL); 6963af08d82Slm66018 6976ace3c90SAlexandre Chartre vdc->io_pending = B_FALSE; 6983af08d82Slm66018 vdc->threads_pending = 0; 6993af08d82Slm66018 vdc->sync_op_blocked = B_FALSE; 7003af08d82Slm66018 cv_init(&vdc->sync_blocked_cv, NULL, CV_DRIVER, NULL); 7013af08d82Slm66018 7022f5224aeSachartre mutex_init(&vdc->ownership_lock, NULL, MUTEX_DRIVER, NULL); 7032f5224aeSachartre cv_init(&vdc->ownership_cv, NULL, CV_DRIVER, NULL); 7046ace3c90SAlexandre Chartre cv_init(&vdc->eio_cv, NULL, CV_DRIVER, NULL); 7052f5224aeSachartre 7063af08d82Slm66018 /* init blocking msg read functionality */ 7073af08d82Slm66018 mutex_init(&vdc->read_lock, NULL, MUTEX_DRIVER, NULL); 7083af08d82Slm66018 cv_init(&vdc->read_cv, NULL, CV_DRIVER, NULL); 7093af08d82Slm66018 vdc->read_state = VDC_READ_IDLE; 7103af08d82Slm66018 7111ae08745Sheppo vdc->initialized |= VDC_LOCKS; 7121ae08745Sheppo 713655fd6a9Sachartre /* get device and port MD node for this disk instance */ 7148cd10891Snarayan if (vdc_get_md_node(dip, &mdp, &vd_node) != 0) { 715655fd6a9Sachartre cmn_err(CE_NOTE, "[%d] Could not get machine description node", 716655fd6a9Sachartre instance); 717655fd6a9Sachartre return (DDI_FAILURE); 718655fd6a9Sachartre } 719655fd6a9Sachartre 7208cd10891Snarayan if (vdc_init_ports(vdc, mdp, vd_node) != 0) { 7218cd10891Snarayan cmn_err(CE_NOTE, "[%d] Error initialising ports", instance); 7228cd10891Snarayan return (DDI_FAILURE); 723655fd6a9Sachartre } 724655fd6a9Sachartre 725655fd6a9Sachartre (void) md_fini_handle(mdp); 726655fd6a9Sachartre 727de3a5331SRamesh Chitrothu /* Create the kstats for saving the I/O statistics used by iostat(1M) */ 728de3a5331SRamesh Chitrothu vdc_create_io_kstats(vdc); 729de3a5331SRamesh Chitrothu vdc_create_err_kstats(vdc); 730de3a5331SRamesh Chitrothu 731de3a5331SRamesh Chitrothu /* Initialize remaining structures before starting the msg thread */ 732de3a5331SRamesh Chitrothu vdc->vdisk_label = VD_DISK_LABEL_UNK; 733342440ecSPrasad Singamsetty vdc->vtoc = kmem_zalloc(sizeof (struct extvtoc), KM_SLEEP); 734de3a5331SRamesh Chitrothu vdc->geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP); 735de3a5331SRamesh Chitrothu vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 736de3a5331SRamesh Chitrothu 7373af08d82Slm66018 /* initialize the thread responsible for managing state with server */ 7383af08d82Slm66018 vdc->msg_proc_thr = thread_create(NULL, 0, vdc_process_msg_thread, 7391ae08745Sheppo vdc, 0, &p0, TS_RUN, minclsyspri); 7403af08d82Slm66018 if (vdc->msg_proc_thr == NULL) { 7411ae08745Sheppo cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 7421ae08745Sheppo instance); 7431ae08745Sheppo return (DDI_FAILURE); 7441ae08745Sheppo } 7453af08d82Slm66018 7466ace3c90SAlexandre Chartre /* 7476ace3c90SAlexandre Chartre * If there are multiple servers then start the eio thread. 7486ace3c90SAlexandre Chartre */ 7496ace3c90SAlexandre Chartre if (vdc->num_servers > 1) { 7506ace3c90SAlexandre Chartre vdc->eio_thread = thread_create(NULL, 0, vdc_eio_thread, vdc, 0, 7516ace3c90SAlexandre Chartre &p0, TS_RUN, v.v_maxsyspri - 2); 7526ace3c90SAlexandre Chartre if (vdc->eio_thread == NULL) { 7536ace3c90SAlexandre Chartre cmn_err(CE_NOTE, "[%d] Failed to create error " 7546ace3c90SAlexandre Chartre "I/O thread", instance); 7556ace3c90SAlexandre Chartre return (DDI_FAILURE); 7566ace3c90SAlexandre Chartre } 7576ace3c90SAlexandre Chartre } 7586ace3c90SAlexandre Chartre 7591ae08745Sheppo vdc->initialized |= VDC_THREAD; 7601ae08745Sheppo 761e1ebb9ecSlm66018 atomic_inc_32(&vdc_instance_count); 7621ae08745Sheppo 7630a55fbb7Slm66018 /* 76478fcd0a1Sachartre * Check the disk label. This will send requests and do the handshake. 76578fcd0a1Sachartre * We don't really care about the disk label now. What we really need is 76678fcd0a1Sachartre * the handshake do be done so that we know the type of the disk (slice 76778fcd0a1Sachartre * or full disk) and the appropriate device nodes can be created. 7680a55fbb7Slm66018 */ 76978fcd0a1Sachartre 77078fcd0a1Sachartre mutex_enter(&vdc->lock); 77178fcd0a1Sachartre (void) vdc_validate_geometry(vdc); 77278fcd0a1Sachartre mutex_exit(&vdc->lock); 7731ae08745Sheppo 7741ae08745Sheppo /* 7755b98b509Sachartre * Now that we have the device info we can create the device nodes 7761ae08745Sheppo */ 7771ae08745Sheppo status = vdc_create_device_nodes(vdc); 7781ae08745Sheppo if (status) { 7793af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to create device nodes", 7801ae08745Sheppo instance); 7813af08d82Slm66018 goto return_status; 7821ae08745Sheppo } 7831ae08745Sheppo 7844bac2208Snarayan /* 785366a92acSlm66018 * Fill in the fields of the error statistics kstat that were not 786366a92acSlm66018 * available when creating the kstat 787366a92acSlm66018 */ 788366a92acSlm66018 vdc_set_err_kstats(vdc); 7891ae08745Sheppo ddi_report_dev(dip); 790*ca6d1280SAlexandre Chartre ASSERT(vdc->lifecycle == VDC_LC_ONLINE || 791*ca6d1280SAlexandre Chartre vdc->lifecycle == VDC_LC_ONLINE_PENDING); 7923af08d82Slm66018 DMSG(vdc, 0, "[%d] Attach tasks successful\n", instance); 7931ae08745Sheppo 7943af08d82Slm66018 return_status: 7953af08d82Slm66018 DMSG(vdc, 0, "[%d] Attach completed\n", instance); 7961ae08745Sheppo return (status); 7971ae08745Sheppo } 7981ae08745Sheppo 7991ae08745Sheppo static int 8001ae08745Sheppo vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 8011ae08745Sheppo { 8021ae08745Sheppo int status; 8031ae08745Sheppo 8041ae08745Sheppo switch (cmd) { 8051ae08745Sheppo case DDI_ATTACH: 8061ae08745Sheppo if ((status = vdc_do_attach(dip)) != 0) 8071ae08745Sheppo (void) vdc_detach(dip, DDI_DETACH); 8081ae08745Sheppo return (status); 8091ae08745Sheppo case DDI_RESUME: 8101ae08745Sheppo /* nothing to do for this non-device */ 8111ae08745Sheppo return (DDI_SUCCESS); 8121ae08745Sheppo default: 8131ae08745Sheppo return (DDI_FAILURE); 8141ae08745Sheppo } 8151ae08745Sheppo } 8161ae08745Sheppo 8171ae08745Sheppo static int 8188cd10891Snarayan vdc_do_ldc_init(vdc_t *vdc, vdc_server_t *srvr) 8191ae08745Sheppo { 8201ae08745Sheppo int status = 0; 8211ae08745Sheppo ldc_status_t ldc_state; 8221ae08745Sheppo ldc_attr_t ldc_attr; 8231ae08745Sheppo 8241ae08745Sheppo ASSERT(vdc != NULL); 8258cd10891Snarayan ASSERT(srvr != NULL); 8261ae08745Sheppo 8271ae08745Sheppo ldc_attr.devclass = LDC_DEV_BLK; 8281ae08745Sheppo ldc_attr.instance = vdc->instance; 8291ae08745Sheppo ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 830e1ebb9ecSlm66018 ldc_attr.mtu = VD_LDC_MTU; 8311ae08745Sheppo 8328cd10891Snarayan if ((srvr->state & VDC_LDC_INIT) == 0) { 8338cd10891Snarayan status = ldc_init(srvr->ldc_id, &ldc_attr, 8348cd10891Snarayan &srvr->ldc_handle); 8351ae08745Sheppo if (status != 0) { 8363af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_init(chan %ld) returned %d", 8378cd10891Snarayan vdc->instance, srvr->ldc_id, status); 8381ae08745Sheppo return (status); 8391ae08745Sheppo } 8408cd10891Snarayan srvr->state |= VDC_LDC_INIT; 8411ae08745Sheppo } 8428cd10891Snarayan status = ldc_status(srvr->ldc_handle, &ldc_state); 8431ae08745Sheppo if (status != 0) { 8443af08d82Slm66018 DMSG(vdc, 0, "[%d] Cannot discover LDC status [err=%d]", 845e1ebb9ecSlm66018 vdc->instance, status); 8468cd10891Snarayan goto init_exit; 8471ae08745Sheppo } 8488cd10891Snarayan srvr->ldc_state = ldc_state; 8491ae08745Sheppo 8508cd10891Snarayan if ((srvr->state & VDC_LDC_CB) == 0) { 8518cd10891Snarayan status = ldc_reg_callback(srvr->ldc_handle, vdc_handle_cb, 8528cd10891Snarayan (caddr_t)srvr); 8531ae08745Sheppo if (status != 0) { 8543af08d82Slm66018 DMSG(vdc, 0, "[%d] LDC callback reg. failed (%d)", 855e1ebb9ecSlm66018 vdc->instance, status); 8568cd10891Snarayan goto init_exit; 8571ae08745Sheppo } 8588cd10891Snarayan srvr->state |= VDC_LDC_CB; 8591ae08745Sheppo } 8601ae08745Sheppo 8611ae08745Sheppo /* 8621ae08745Sheppo * At this stage we have initialised LDC, we will now try and open 8631ae08745Sheppo * the connection. 8641ae08745Sheppo */ 8658cd10891Snarayan if (srvr->ldc_state == LDC_INIT) { 8668cd10891Snarayan status = ldc_open(srvr->ldc_handle); 8671ae08745Sheppo if (status != 0) { 8683af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_open(chan %ld) returned %d", 8698cd10891Snarayan vdc->instance, srvr->ldc_id, status); 8708cd10891Snarayan goto init_exit; 8711ae08745Sheppo } 8728cd10891Snarayan srvr->state |= VDC_LDC_OPEN; 8738cd10891Snarayan } 8748cd10891Snarayan 8758cd10891Snarayan init_exit: 8768cd10891Snarayan if (status) { 8778cd10891Snarayan vdc_terminate_ldc(vdc, srvr); 8781ae08745Sheppo } 8791ae08745Sheppo 8801ae08745Sheppo return (status); 8811ae08745Sheppo } 8821ae08745Sheppo 8831ae08745Sheppo static int 8841ae08745Sheppo vdc_start_ldc_connection(vdc_t *vdc) 8851ae08745Sheppo { 8861ae08745Sheppo int status = 0; 8871ae08745Sheppo 8881ae08745Sheppo ASSERT(vdc != NULL); 8891ae08745Sheppo 8903af08d82Slm66018 ASSERT(MUTEX_HELD(&vdc->lock)); 8911ae08745Sheppo 8920a55fbb7Slm66018 status = vdc_do_ldc_up(vdc); 8931ae08745Sheppo 8943af08d82Slm66018 DMSG(vdc, 0, "[%d] Finished bringing up LDC\n", vdc->instance); 8951ae08745Sheppo 8963af08d82Slm66018 return (status); 8973af08d82Slm66018 } 8983af08d82Slm66018 8993af08d82Slm66018 static int 9003af08d82Slm66018 vdc_stop_ldc_connection(vdc_t *vdcp) 9013af08d82Slm66018 { 9023af08d82Slm66018 int status; 9033af08d82Slm66018 9048cd10891Snarayan ASSERT(vdcp != NULL); 9058cd10891Snarayan 9068cd10891Snarayan ASSERT(MUTEX_HELD(&vdcp->lock)); 9078cd10891Snarayan 9083af08d82Slm66018 DMSG(vdcp, 0, ": Resetting connection to vDisk server : state %d\n", 9093af08d82Slm66018 vdcp->state); 9103af08d82Slm66018 9118cd10891Snarayan status = ldc_down(vdcp->curr_server->ldc_handle); 9123af08d82Slm66018 DMSG(vdcp, 0, "ldc_down() = %d\n", status); 9133af08d82Slm66018 9143af08d82Slm66018 vdcp->initialized &= ~VDC_HANDSHAKE; 9153af08d82Slm66018 DMSG(vdcp, 0, "initialized=%x\n", vdcp->initialized); 9161ae08745Sheppo 9171ae08745Sheppo return (status); 9181ae08745Sheppo } 9191ae08745Sheppo 920366a92acSlm66018 static void 921366a92acSlm66018 vdc_create_io_kstats(vdc_t *vdc) 922366a92acSlm66018 { 923366a92acSlm66018 if (vdc->io_stats != NULL) { 924366a92acSlm66018 DMSG(vdc, 0, "[%d] I/O kstat already exists\n", vdc->instance); 925366a92acSlm66018 return; 926366a92acSlm66018 } 927366a92acSlm66018 928366a92acSlm66018 vdc->io_stats = kstat_create(VDC_DRIVER_NAME, vdc->instance, NULL, 929366a92acSlm66018 "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 930366a92acSlm66018 if (vdc->io_stats != NULL) { 931366a92acSlm66018 vdc->io_stats->ks_lock = &vdc->lock; 932366a92acSlm66018 kstat_install(vdc->io_stats); 933366a92acSlm66018 } else { 934366a92acSlm66018 cmn_err(CE_NOTE, "[%d] Failed to create kstat: I/O statistics" 935366a92acSlm66018 " will not be gathered", vdc->instance); 936366a92acSlm66018 } 937366a92acSlm66018 } 938366a92acSlm66018 939366a92acSlm66018 static void 940366a92acSlm66018 vdc_create_err_kstats(vdc_t *vdc) 941366a92acSlm66018 { 942366a92acSlm66018 vd_err_stats_t *stp; 943366a92acSlm66018 char kstatmodule_err[KSTAT_STRLEN]; 944366a92acSlm66018 char kstatname[KSTAT_STRLEN]; 945366a92acSlm66018 int ndata = (sizeof (vd_err_stats_t) / sizeof (kstat_named_t)); 946366a92acSlm66018 int instance = vdc->instance; 947366a92acSlm66018 948366a92acSlm66018 if (vdc->err_stats != NULL) { 949366a92acSlm66018 DMSG(vdc, 0, "[%d] ERR kstat already exists\n", vdc->instance); 950366a92acSlm66018 return; 951366a92acSlm66018 } 952366a92acSlm66018 953366a92acSlm66018 (void) snprintf(kstatmodule_err, sizeof (kstatmodule_err), 954366a92acSlm66018 "%serr", VDC_DRIVER_NAME); 955366a92acSlm66018 (void) snprintf(kstatname, sizeof (kstatname), 956366a92acSlm66018 "%s%d,err", VDC_DRIVER_NAME, instance); 957366a92acSlm66018 958366a92acSlm66018 vdc->err_stats = kstat_create(kstatmodule_err, instance, kstatname, 959366a92acSlm66018 "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT); 960366a92acSlm66018 961366a92acSlm66018 if (vdc->err_stats == NULL) { 962366a92acSlm66018 cmn_err(CE_NOTE, "[%d] Failed to create kstat: Error statistics" 963366a92acSlm66018 " will not be gathered", instance); 964366a92acSlm66018 return; 965366a92acSlm66018 } 966366a92acSlm66018 967366a92acSlm66018 stp = (vd_err_stats_t *)vdc->err_stats->ks_data; 968366a92acSlm66018 kstat_named_init(&stp->vd_softerrs, "Soft Errors", 969366a92acSlm66018 KSTAT_DATA_UINT32); 970366a92acSlm66018 kstat_named_init(&stp->vd_transerrs, "Transport Errors", 971366a92acSlm66018 KSTAT_DATA_UINT32); 972366a92acSlm66018 kstat_named_init(&stp->vd_protoerrs, "Protocol Errors", 973366a92acSlm66018 KSTAT_DATA_UINT32); 974366a92acSlm66018 kstat_named_init(&stp->vd_vid, "Vendor", 975366a92acSlm66018 KSTAT_DATA_CHAR); 976366a92acSlm66018 kstat_named_init(&stp->vd_pid, "Product", 977366a92acSlm66018 KSTAT_DATA_CHAR); 978366a92acSlm66018 kstat_named_init(&stp->vd_capacity, "Size", 979366a92acSlm66018 KSTAT_DATA_ULONGLONG); 980366a92acSlm66018 981366a92acSlm66018 vdc->err_stats->ks_update = nulldev; 982366a92acSlm66018 983366a92acSlm66018 kstat_install(vdc->err_stats); 984366a92acSlm66018 } 985366a92acSlm66018 986366a92acSlm66018 static void 987366a92acSlm66018 vdc_set_err_kstats(vdc_t *vdc) 988366a92acSlm66018 { 989366a92acSlm66018 vd_err_stats_t *stp; 990366a92acSlm66018 991366a92acSlm66018 if (vdc->err_stats == NULL) 992366a92acSlm66018 return; 993366a92acSlm66018 994366a92acSlm66018 mutex_enter(&vdc->lock); 995366a92acSlm66018 996366a92acSlm66018 stp = (vd_err_stats_t *)vdc->err_stats->ks_data; 997366a92acSlm66018 ASSERT(stp != NULL); 998366a92acSlm66018 99965908c77Syu, larry liu - Sun Microsystems - Beijing China stp->vd_capacity.value.ui64 = vdc->vdisk_size * vdc->vdisk_bsize; 1000366a92acSlm66018 (void) strcpy(stp->vd_vid.value.c, "SUN"); 1001366a92acSlm66018 (void) strcpy(stp->vd_pid.value.c, "VDSK"); 1002366a92acSlm66018 1003366a92acSlm66018 mutex_exit(&vdc->lock); 1004366a92acSlm66018 } 1005366a92acSlm66018 10064bac2208Snarayan static int 10074bac2208Snarayan vdc_create_device_nodes_efi(vdc_t *vdc) 10084bac2208Snarayan { 10094bac2208Snarayan ddi_remove_minor_node(vdc->dip, "h"); 10104bac2208Snarayan ddi_remove_minor_node(vdc->dip, "h,raw"); 10114bac2208Snarayan 10124bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "wd", S_IFBLK, 10134bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 10144bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 10154bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd'", 10164bac2208Snarayan vdc->instance); 10174bac2208Snarayan return (EIO); 10184bac2208Snarayan } 10194bac2208Snarayan 10204bac2208Snarayan /* if any device node is created we set this flag */ 10214bac2208Snarayan vdc->initialized |= VDC_MINOR; 10224bac2208Snarayan 10234bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "wd,raw", S_IFCHR, 10244bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 10254bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 10264bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd,raw'", 10274bac2208Snarayan vdc->instance); 10284bac2208Snarayan return (EIO); 10294bac2208Snarayan } 10304bac2208Snarayan 10314bac2208Snarayan return (0); 10324bac2208Snarayan } 10334bac2208Snarayan 10344bac2208Snarayan static int 10354bac2208Snarayan vdc_create_device_nodes_vtoc(vdc_t *vdc) 10364bac2208Snarayan { 10374bac2208Snarayan ddi_remove_minor_node(vdc->dip, "wd"); 10384bac2208Snarayan ddi_remove_minor_node(vdc->dip, "wd,raw"); 10394bac2208Snarayan 10404bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "h", S_IFBLK, 10414bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 10424bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 10434bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h'", 10444bac2208Snarayan vdc->instance); 10454bac2208Snarayan return (EIO); 10464bac2208Snarayan } 10474bac2208Snarayan 10484bac2208Snarayan /* if any device node is created we set this flag */ 10494bac2208Snarayan vdc->initialized |= VDC_MINOR; 10504bac2208Snarayan 10514bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "h,raw", S_IFCHR, 10524bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 10534bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 10544bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h,raw'", 10554bac2208Snarayan vdc->instance); 10564bac2208Snarayan return (EIO); 10574bac2208Snarayan } 10584bac2208Snarayan 10594bac2208Snarayan return (0); 10604bac2208Snarayan } 10611ae08745Sheppo 10621ae08745Sheppo /* 10631ae08745Sheppo * Function: 10641ae08745Sheppo * vdc_create_device_nodes 10651ae08745Sheppo * 10661ae08745Sheppo * Description: 10671ae08745Sheppo * This function creates the block and character device nodes under 10685b98b509Sachartre * /devices. It is called as part of the attach(9E) of the instance 10695b98b509Sachartre * during the handshake with vds after vds has sent the attributes 10705b98b509Sachartre * to vdc. 10711ae08745Sheppo * 10721ae08745Sheppo * If the device is of type VD_DISK_TYPE_SLICE then the minor node 10731ae08745Sheppo * of 2 is used in keeping with the Solaris convention that slice 2 10741ae08745Sheppo * refers to a whole disk. Slices start at 'a' 10751ae08745Sheppo * 10761ae08745Sheppo * Parameters: 10771ae08745Sheppo * vdc - soft state pointer 10781ae08745Sheppo * 10791ae08745Sheppo * Return Values 10801ae08745Sheppo * 0 - Success 10811ae08745Sheppo * EIO - Failed to create node 10821ae08745Sheppo */ 10831ae08745Sheppo static int 10841ae08745Sheppo vdc_create_device_nodes(vdc_t *vdc) 10851ae08745Sheppo { 10864bac2208Snarayan char name[sizeof ("s,raw")]; 10871ae08745Sheppo dev_info_t *dip = NULL; 10884bac2208Snarayan int instance, status; 10891ae08745Sheppo int num_slices = 1; 10901ae08745Sheppo int i; 10911ae08745Sheppo 10921ae08745Sheppo ASSERT(vdc != NULL); 10931ae08745Sheppo 10941ae08745Sheppo instance = vdc->instance; 10951ae08745Sheppo dip = vdc->dip; 10961ae08745Sheppo 10971ae08745Sheppo switch (vdc->vdisk_type) { 10981ae08745Sheppo case VD_DISK_TYPE_DISK: 10996ace3c90SAlexandre Chartre case VD_DISK_TYPE_UNK: 11001ae08745Sheppo num_slices = V_NUMPAR; 11011ae08745Sheppo break; 11021ae08745Sheppo case VD_DISK_TYPE_SLICE: 11031ae08745Sheppo num_slices = 1; 11041ae08745Sheppo break; 11051ae08745Sheppo default: 11066ace3c90SAlexandre Chartre ASSERT(0); 11071ae08745Sheppo } 11081ae08745Sheppo 11094bac2208Snarayan /* 11104bac2208Snarayan * Minor nodes are different for EFI disks: EFI disks do not have 11114bac2208Snarayan * a minor node 'g' for the minor number corresponding to slice 11124bac2208Snarayan * VD_EFI_WD_SLICE (slice 7) instead they have a minor node 'wd' 11134bac2208Snarayan * representing the whole disk. 11144bac2208Snarayan */ 11151ae08745Sheppo for (i = 0; i < num_slices; i++) { 11164bac2208Snarayan 11174bac2208Snarayan if (i == VD_EFI_WD_SLICE) { 11184bac2208Snarayan if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 11194bac2208Snarayan status = vdc_create_device_nodes_efi(vdc); 11204bac2208Snarayan else 11214bac2208Snarayan status = vdc_create_device_nodes_vtoc(vdc); 11224bac2208Snarayan if (status != 0) 11234bac2208Snarayan return (status); 11244bac2208Snarayan continue; 11254bac2208Snarayan } 11264bac2208Snarayan 11271ae08745Sheppo (void) snprintf(name, sizeof (name), "%c", 'a' + i); 11281ae08745Sheppo if (ddi_create_minor_node(dip, name, S_IFBLK, 11291ae08745Sheppo VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 1130e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't add block node '%s'", 1131e1ebb9ecSlm66018 instance, name); 11321ae08745Sheppo return (EIO); 11331ae08745Sheppo } 11341ae08745Sheppo 11351ae08745Sheppo /* if any device node is created we set this flag */ 11361ae08745Sheppo vdc->initialized |= VDC_MINOR; 11371ae08745Sheppo 113887a7269eSachartre (void) snprintf(name, sizeof (name), "%c%s", 'a' + i, ",raw"); 113987a7269eSachartre 11401ae08745Sheppo if (ddi_create_minor_node(dip, name, S_IFCHR, 11411ae08745Sheppo VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 1142e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't add raw node '%s'", 1143e1ebb9ecSlm66018 instance, name); 11441ae08745Sheppo return (EIO); 11451ae08745Sheppo } 11461ae08745Sheppo } 11471ae08745Sheppo 11481ae08745Sheppo return (0); 11491ae08745Sheppo } 11501ae08745Sheppo 11511ae08745Sheppo /* 11525b98b509Sachartre * Driver prop_op(9e) entry point function. Return the number of blocks for 11535b98b509Sachartre * the partition in question or forward the request to the property facilities. 11541ae08745Sheppo */ 11551ae08745Sheppo static int 11565b98b509Sachartre vdc_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 11575b98b509Sachartre char *name, caddr_t valuep, int *lengthp) 11581ae08745Sheppo { 11595b98b509Sachartre int instance = ddi_get_instance(dip); 11605b98b509Sachartre vdc_t *vdc; 11615b98b509Sachartre uint64_t nblocks; 11625b98b509Sachartre uint_t blksize; 11631ae08745Sheppo 11645b98b509Sachartre vdc = ddi_get_soft_state(vdc_state, instance); 11651ae08745Sheppo 11665b98b509Sachartre if (dev == DDI_DEV_T_ANY || vdc == NULL) { 11675b98b509Sachartre return (ddi_prop_op(dev, dip, prop_op, mod_flags, 11685b98b509Sachartre name, valuep, lengthp)); 11691ae08745Sheppo } 11701ae08745Sheppo 11715b98b509Sachartre mutex_enter(&vdc->lock); 11725b98b509Sachartre (void) vdc_validate_geometry(vdc); 117378fcd0a1Sachartre if (vdc->vdisk_label == VD_DISK_LABEL_UNK) { 11745b98b509Sachartre mutex_exit(&vdc->lock); 11755b98b509Sachartre return (ddi_prop_op(dev, dip, prop_op, mod_flags, 11765b98b509Sachartre name, valuep, lengthp)); 117778fcd0a1Sachartre } 11785b98b509Sachartre nblocks = vdc->slice[VDCPART(dev)].nblocks; 117965908c77Syu, larry liu - Sun Microsystems - Beijing China blksize = vdc->vdisk_bsize; 11805b98b509Sachartre mutex_exit(&vdc->lock); 118178fcd0a1Sachartre 11825b98b509Sachartre return (ddi_prop_op_nblocks_blksize(dev, dip, prop_op, mod_flags, 11835b98b509Sachartre name, valuep, lengthp, nblocks, blksize)); 11841ae08745Sheppo } 11851ae08745Sheppo 118678fcd0a1Sachartre /* 118778fcd0a1Sachartre * Function: 118878fcd0a1Sachartre * vdc_is_opened 118978fcd0a1Sachartre * 119078fcd0a1Sachartre * Description: 119178fcd0a1Sachartre * This function checks if any slice of a given virtual disk is 119278fcd0a1Sachartre * currently opened. 119378fcd0a1Sachartre * 119478fcd0a1Sachartre * Parameters: 119578fcd0a1Sachartre * vdc - soft state pointer 119678fcd0a1Sachartre * 119778fcd0a1Sachartre * Return Values 119878fcd0a1Sachartre * B_TRUE - at least one slice is opened. 119978fcd0a1Sachartre * B_FALSE - no slice is opened. 120078fcd0a1Sachartre */ 120178fcd0a1Sachartre static boolean_t 120278fcd0a1Sachartre vdc_is_opened(vdc_t *vdc) 120378fcd0a1Sachartre { 12046ace3c90SAlexandre Chartre int i; 120578fcd0a1Sachartre 120678fcd0a1Sachartre /* check if there's any layered open */ 12076ace3c90SAlexandre Chartre for (i = 0; i < V_NUMPAR; i++) { 120878fcd0a1Sachartre if (vdc->open_lyr[i] > 0) 120978fcd0a1Sachartre return (B_TRUE); 121078fcd0a1Sachartre } 121178fcd0a1Sachartre 121278fcd0a1Sachartre /* check if there is any other kind of open */ 121378fcd0a1Sachartre for (i = 0; i < OTYPCNT; i++) { 121478fcd0a1Sachartre if (vdc->open[i] != 0) 121578fcd0a1Sachartre return (B_TRUE); 121678fcd0a1Sachartre } 121778fcd0a1Sachartre 121878fcd0a1Sachartre return (B_FALSE); 121978fcd0a1Sachartre } 122078fcd0a1Sachartre 122178fcd0a1Sachartre static int 122278fcd0a1Sachartre vdc_mark_opened(vdc_t *vdc, int slice, int flag, int otyp) 122378fcd0a1Sachartre { 122478fcd0a1Sachartre uint8_t slicemask; 122578fcd0a1Sachartre int i; 122678fcd0a1Sachartre 122778fcd0a1Sachartre ASSERT(otyp < OTYPCNT); 122878fcd0a1Sachartre ASSERT(slice < V_NUMPAR); 122978fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 123078fcd0a1Sachartre 123178fcd0a1Sachartre slicemask = 1 << slice; 123278fcd0a1Sachartre 12336ace3c90SAlexandre Chartre /* 12346ace3c90SAlexandre Chartre * If we have a single-slice disk which was unavailable during the 12356ace3c90SAlexandre Chartre * attach then a device was created for each 8 slices. Now that 12366ace3c90SAlexandre Chartre * the type is known, we prevent opening any slice other than 0 12376ace3c90SAlexandre Chartre * even if a device still exists. 12386ace3c90SAlexandre Chartre */ 12396ace3c90SAlexandre Chartre if (vdc->vdisk_type == VD_DISK_TYPE_SLICE && slice != 0) 12406ace3c90SAlexandre Chartre return (EIO); 12416ace3c90SAlexandre Chartre 124278fcd0a1Sachartre /* check if slice is already exclusively opened */ 124378fcd0a1Sachartre if (vdc->open_excl & slicemask) 124478fcd0a1Sachartre return (EBUSY); 124578fcd0a1Sachartre 124678fcd0a1Sachartre /* if open exclusive, check if slice is already opened */ 124778fcd0a1Sachartre if (flag & FEXCL) { 124878fcd0a1Sachartre if (vdc->open_lyr[slice] > 0) 124978fcd0a1Sachartre return (EBUSY); 125078fcd0a1Sachartre for (i = 0; i < OTYPCNT; i++) { 125178fcd0a1Sachartre if (vdc->open[i] & slicemask) 125278fcd0a1Sachartre return (EBUSY); 125378fcd0a1Sachartre } 125478fcd0a1Sachartre vdc->open_excl |= slicemask; 125578fcd0a1Sachartre } 125678fcd0a1Sachartre 125778fcd0a1Sachartre /* mark slice as opened */ 125878fcd0a1Sachartre if (otyp == OTYP_LYR) { 125978fcd0a1Sachartre vdc->open_lyr[slice]++; 126078fcd0a1Sachartre } else { 126178fcd0a1Sachartre vdc->open[otyp] |= slicemask; 126278fcd0a1Sachartre } 126378fcd0a1Sachartre 126478fcd0a1Sachartre return (0); 126578fcd0a1Sachartre } 126678fcd0a1Sachartre 126778fcd0a1Sachartre static void 126878fcd0a1Sachartre vdc_mark_closed(vdc_t *vdc, int slice, int flag, int otyp) 126978fcd0a1Sachartre { 127078fcd0a1Sachartre uint8_t slicemask; 127178fcd0a1Sachartre 127278fcd0a1Sachartre ASSERT(otyp < OTYPCNT); 127378fcd0a1Sachartre ASSERT(slice < V_NUMPAR); 127478fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 127578fcd0a1Sachartre 127678fcd0a1Sachartre slicemask = 1 << slice; 127778fcd0a1Sachartre 127878fcd0a1Sachartre if (otyp == OTYP_LYR) { 127978fcd0a1Sachartre ASSERT(vdc->open_lyr[slice] > 0); 128078fcd0a1Sachartre vdc->open_lyr[slice]--; 128178fcd0a1Sachartre } else { 128278fcd0a1Sachartre vdc->open[otyp] &= ~slicemask; 128378fcd0a1Sachartre } 128478fcd0a1Sachartre 128578fcd0a1Sachartre if (flag & FEXCL) 128678fcd0a1Sachartre vdc->open_excl &= ~slicemask; 128778fcd0a1Sachartre } 128878fcd0a1Sachartre 12891ae08745Sheppo static int 12901ae08745Sheppo vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 12911ae08745Sheppo { 12921ae08745Sheppo _NOTE(ARGUNUSED(cred)) 12931ae08745Sheppo 1294179e09c2Sachartre int instance, nodelay; 129578fcd0a1Sachartre int slice, status = 0; 12961ae08745Sheppo vdc_t *vdc; 12971ae08745Sheppo 12981ae08745Sheppo ASSERT(dev != NULL); 12990d0c8d4bSnarayan instance = VDCUNIT(*dev); 13001ae08745Sheppo 130178fcd0a1Sachartre if (otyp >= OTYPCNT) 13021ae08745Sheppo return (EINVAL); 13031ae08745Sheppo 13041ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1305e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 13061ae08745Sheppo return (ENXIO); 13071ae08745Sheppo } 13081ae08745Sheppo 13093af08d82Slm66018 DMSG(vdc, 0, "minor = %d flag = %x, otyp = %x\n", 13103af08d82Slm66018 getminor(*dev), flag, otyp); 13111ae08745Sheppo 131278fcd0a1Sachartre slice = VDCPART(*dev); 131378fcd0a1Sachartre 1314179e09c2Sachartre nodelay = flag & (FNDELAY | FNONBLOCK); 1315179e09c2Sachartre 1316179e09c2Sachartre if ((flag & FWRITE) && (!nodelay) && 1317179e09c2Sachartre !(VD_OP_SUPPORTED(vdc->operations, VD_OP_BWRITE))) { 1318179e09c2Sachartre return (EROFS); 1319179e09c2Sachartre } 1320179e09c2Sachartre 13211ae08745Sheppo mutex_enter(&vdc->lock); 132278fcd0a1Sachartre 132378fcd0a1Sachartre status = vdc_mark_opened(vdc, slice, flag, otyp); 132478fcd0a1Sachartre 132578fcd0a1Sachartre if (status != 0) { 132678fcd0a1Sachartre mutex_exit(&vdc->lock); 132778fcd0a1Sachartre return (status); 132878fcd0a1Sachartre } 132978fcd0a1Sachartre 13306ace3c90SAlexandre Chartre /* 13316ace3c90SAlexandre Chartre * If the disk type is unknown then we have to wait for the 13326ace3c90SAlexandre Chartre * handshake to complete because we don't know if the slice 13336ace3c90SAlexandre Chartre * device we are opening effectively exists. 13346ace3c90SAlexandre Chartre */ 13356ace3c90SAlexandre Chartre if (vdc->vdisk_type != VD_DISK_TYPE_UNK && nodelay) { 133678fcd0a1Sachartre 133778fcd0a1Sachartre /* don't resubmit a validate request if there's already one */ 133878fcd0a1Sachartre if (vdc->validate_pending > 0) { 133978fcd0a1Sachartre mutex_exit(&vdc->lock); 134078fcd0a1Sachartre return (0); 134178fcd0a1Sachartre } 134278fcd0a1Sachartre 134378fcd0a1Sachartre /* call vdc_validate() asynchronously to avoid blocking */ 134478fcd0a1Sachartre if (taskq_dispatch(system_taskq, vdc_validate_task, 134578fcd0a1Sachartre (void *)vdc, TQ_NOSLEEP) == NULL) { 134678fcd0a1Sachartre vdc_mark_closed(vdc, slice, flag, otyp); 134778fcd0a1Sachartre mutex_exit(&vdc->lock); 134878fcd0a1Sachartre return (ENXIO); 134978fcd0a1Sachartre } 135078fcd0a1Sachartre 135178fcd0a1Sachartre vdc->validate_pending++; 135278fcd0a1Sachartre mutex_exit(&vdc->lock); 135378fcd0a1Sachartre return (0); 135478fcd0a1Sachartre } 135578fcd0a1Sachartre 13561ae08745Sheppo mutex_exit(&vdc->lock); 13571ae08745Sheppo 135878fcd0a1Sachartre vdc_validate(vdc); 135978fcd0a1Sachartre 136078fcd0a1Sachartre mutex_enter(&vdc->lock); 136178fcd0a1Sachartre 13626ace3c90SAlexandre Chartre if (vdc->vdisk_type == VD_DISK_TYPE_UNK || 13636ace3c90SAlexandre Chartre (vdc->vdisk_type == VD_DISK_TYPE_SLICE && slice != 0) || 13646ace3c90SAlexandre Chartre (!nodelay && (vdc->vdisk_label == VD_DISK_LABEL_UNK || 13656ace3c90SAlexandre Chartre vdc->slice[slice].nblocks == 0))) { 136678fcd0a1Sachartre vdc_mark_closed(vdc, slice, flag, otyp); 136778fcd0a1Sachartre status = EIO; 136878fcd0a1Sachartre } 136978fcd0a1Sachartre 137078fcd0a1Sachartre mutex_exit(&vdc->lock); 137178fcd0a1Sachartre 137278fcd0a1Sachartre return (status); 13731ae08745Sheppo } 13741ae08745Sheppo 13751ae08745Sheppo static int 13761ae08745Sheppo vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 13771ae08745Sheppo { 13781ae08745Sheppo _NOTE(ARGUNUSED(cred)) 13791ae08745Sheppo 13801ae08745Sheppo int instance; 138178fcd0a1Sachartre int slice; 13822f5224aeSachartre int rv, rval; 13831ae08745Sheppo vdc_t *vdc; 13841ae08745Sheppo 13850d0c8d4bSnarayan instance = VDCUNIT(dev); 13861ae08745Sheppo 138778fcd0a1Sachartre if (otyp >= OTYPCNT) 13881ae08745Sheppo return (EINVAL); 13891ae08745Sheppo 13901ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1391e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 13921ae08745Sheppo return (ENXIO); 13931ae08745Sheppo } 13941ae08745Sheppo 13953af08d82Slm66018 DMSG(vdc, 0, "[%d] flag = %x, otyp = %x\n", instance, flag, otyp); 13961ae08745Sheppo 139778fcd0a1Sachartre slice = VDCPART(dev); 139878fcd0a1Sachartre 13998259acd8Szk194757 /* 14008259acd8Szk194757 * Attempt to flush the W$ on a close operation. If this is 14018259acd8Szk194757 * not a supported IOCTL command or the backing device is read-only 14028259acd8Szk194757 * do not fail the close operation. 14038259acd8Szk194757 */ 14042f5224aeSachartre rv = vd_process_ioctl(dev, DKIOCFLUSHWRITECACHE, NULL, FKIOCTL, &rval); 14058259acd8Szk194757 14068259acd8Szk194757 if (rv != 0 && rv != ENOTSUP && rv != ENOTTY && rv != EROFS) { 14078259acd8Szk194757 DMSG(vdc, 0, "[%d] flush failed with error %d on close\n", 14088259acd8Szk194757 instance, rv); 14098259acd8Szk194757 return (EIO); 14108259acd8Szk194757 } 14118259acd8Szk194757 14121ae08745Sheppo mutex_enter(&vdc->lock); 141378fcd0a1Sachartre vdc_mark_closed(vdc, slice, flag, otyp); 14141ae08745Sheppo mutex_exit(&vdc->lock); 14151ae08745Sheppo 14161ae08745Sheppo return (0); 14171ae08745Sheppo } 14181ae08745Sheppo 14191ae08745Sheppo static int 14201ae08745Sheppo vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 14211ae08745Sheppo { 14221ae08745Sheppo _NOTE(ARGUNUSED(credp)) 14231ae08745Sheppo 14242f5224aeSachartre return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode, rvalp)); 14251ae08745Sheppo } 14261ae08745Sheppo 14271ae08745Sheppo static int 14281ae08745Sheppo vdc_print(dev_t dev, char *str) 14291ae08745Sheppo { 14300d0c8d4bSnarayan cmn_err(CE_NOTE, "vdc%d: %s", VDCUNIT(dev), str); 14311ae08745Sheppo return (0); 14321ae08745Sheppo } 14331ae08745Sheppo 14341ae08745Sheppo static int 14351ae08745Sheppo vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 14361ae08745Sheppo { 14376ace3c90SAlexandre Chartre int rv, flags; 1438d10e4ef2Snarayan size_t nbytes = nblk * DEV_BSIZE; 14390d0c8d4bSnarayan int instance = VDCUNIT(dev); 1440d10e4ef2Snarayan vdc_t *vdc = NULL; 144165908c77Syu, larry liu - Sun Microsystems - Beijing China diskaddr_t vio_blkno; 14421ae08745Sheppo 14431ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1444e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 14451ae08745Sheppo return (ENXIO); 14461ae08745Sheppo } 14471ae08745Sheppo 14483af08d82Slm66018 DMSG(vdc, 2, "[%d] dump %ld bytes at block 0x%lx : addr=0x%p\n", 14493af08d82Slm66018 instance, nbytes, blkno, (void *)addr); 145065908c77Syu, larry liu - Sun Microsystems - Beijing China 145165908c77Syu, larry liu - Sun Microsystems - Beijing China /* convert logical block to vio block */ 145265908c77Syu, larry liu - Sun Microsystems - Beijing China if ((blkno & vdc->vio_bmask) != 0) { 145365908c77Syu, larry liu - Sun Microsystems - Beijing China DMSG(vdc, 0, "Misaligned block number (%lu)\n", blkno); 145465908c77Syu, larry liu - Sun Microsystems - Beijing China return (EINVAL); 145565908c77Syu, larry liu - Sun Microsystems - Beijing China } 145665908c77Syu, larry liu - Sun Microsystems - Beijing China vio_blkno = blkno >> vdc->vio_bshift; 145765908c77Syu, larry liu - Sun Microsystems - Beijing China 14586ace3c90SAlexandre Chartre /* 14596ace3c90SAlexandre Chartre * If we are panicking, we need the state to be "running" so that we 14606ace3c90SAlexandre Chartre * can submit I/Os, but we don't want to check for any backend error. 14616ace3c90SAlexandre Chartre */ 14626ace3c90SAlexandre Chartre flags = (ddi_in_panic())? VDC_OP_STATE_RUNNING : VDC_OP_NORMAL; 14636ace3c90SAlexandre Chartre 14646ace3c90SAlexandre Chartre rv = vdc_do_op(vdc, VD_OP_BWRITE, addr, nbytes, VDCPART(dev), 14656ace3c90SAlexandre Chartre vio_blkno, NULL, VIO_write_dir, flags); 14666ace3c90SAlexandre Chartre 14673af08d82Slm66018 if (rv) { 14683af08d82Slm66018 DMSG(vdc, 0, "Failed to do a disk dump (err=%d)\n", rv); 14691ae08745Sheppo return (rv); 14701ae08745Sheppo } 14711ae08745Sheppo 14723af08d82Slm66018 DMSG(vdc, 0, "[%d] End\n", instance); 14733af08d82Slm66018 14743af08d82Slm66018 return (0); 14753af08d82Slm66018 } 14763af08d82Slm66018 14771ae08745Sheppo /* -------------------------------------------------------------------------- */ 14781ae08745Sheppo 14791ae08745Sheppo /* 14801ae08745Sheppo * Disk access routines 14811ae08745Sheppo * 14821ae08745Sheppo */ 14831ae08745Sheppo 14841ae08745Sheppo /* 14851ae08745Sheppo * vdc_strategy() 14861ae08745Sheppo * 14871ae08745Sheppo * Return Value: 14881ae08745Sheppo * 0: As per strategy(9E), the strategy() function must return 0 14891ae08745Sheppo * [ bioerror(9f) sets b_flags to the proper error code ] 14901ae08745Sheppo */ 14911ae08745Sheppo static int 14921ae08745Sheppo vdc_strategy(struct buf *buf) 14931ae08745Sheppo { 149465908c77Syu, larry liu - Sun Microsystems - Beijing China diskaddr_t vio_blkno; 14951ae08745Sheppo vdc_t *vdc = NULL; 14960d0c8d4bSnarayan int instance = VDCUNIT(buf->b_edev); 14971ae08745Sheppo int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 149887a7269eSachartre int slice; 14991ae08745Sheppo 15001ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1501e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 15021ae08745Sheppo bioerror(buf, ENXIO); 15031ae08745Sheppo biodone(buf); 15041ae08745Sheppo return (0); 15051ae08745Sheppo } 15061ae08745Sheppo 15073af08d82Slm66018 DMSG(vdc, 2, "[%d] %s %ld bytes at block %llx : b_addr=0x%p\n", 15083af08d82Slm66018 instance, (buf->b_flags & B_READ) ? "Read" : "Write", 15093af08d82Slm66018 buf->b_bcount, buf->b_lblkno, (void *)buf->b_un.b_addr); 1510d10e4ef2Snarayan 15111ae08745Sheppo bp_mapin(buf); 15121ae08745Sheppo 151387a7269eSachartre if ((long)buf->b_private == VD_SLICE_NONE) { 151487a7269eSachartre /* I/O using an absolute disk offset */ 151587a7269eSachartre slice = VD_SLICE_NONE; 151687a7269eSachartre } else { 151787a7269eSachartre slice = VDCPART(buf->b_edev); 151887a7269eSachartre } 151987a7269eSachartre 152065908c77Syu, larry liu - Sun Microsystems - Beijing China /* 152165908c77Syu, larry liu - Sun Microsystems - Beijing China * In the buf structure, b_lblkno represents a logical block number 152265908c77Syu, larry liu - Sun Microsystems - Beijing China * using a block size of 512 bytes. For the VIO request, this block 152365908c77Syu, larry liu - Sun Microsystems - Beijing China * number has to be converted to be represented with the block size 152465908c77Syu, larry liu - Sun Microsystems - Beijing China * used by the VIO protocol. 152565908c77Syu, larry liu - Sun Microsystems - Beijing China */ 152665908c77Syu, larry liu - Sun Microsystems - Beijing China if ((buf->b_lblkno & vdc->vio_bmask) != 0) { 152765908c77Syu, larry liu - Sun Microsystems - Beijing China bioerror(buf, EINVAL); 152865908c77Syu, larry liu - Sun Microsystems - Beijing China biodone(buf); 152965908c77Syu, larry liu - Sun Microsystems - Beijing China return (0); 153065908c77Syu, larry liu - Sun Microsystems - Beijing China } 153165908c77Syu, larry liu - Sun Microsystems - Beijing China vio_blkno = buf->b_lblkno >> vdc->vio_bshift; 153265908c77Syu, larry liu - Sun Microsystems - Beijing China 15336ace3c90SAlexandre Chartre /* submit the I/O, any error will be reported in the buf structure */ 15346ace3c90SAlexandre Chartre (void) vdc_do_op(vdc, op, (caddr_t)buf->b_un.b_addr, 153565908c77Syu, larry liu - Sun Microsystems - Beijing China buf->b_bcount, slice, vio_blkno, 15366ace3c90SAlexandre Chartre buf, (op == VD_OP_BREAD) ? VIO_read_dir : VIO_write_dir, 15376ace3c90SAlexandre Chartre VDC_OP_NORMAL); 1538d10e4ef2Snarayan 15391ae08745Sheppo return (0); 15401ae08745Sheppo } 15411ae08745Sheppo 15420d0c8d4bSnarayan /* 15430d0c8d4bSnarayan * Function: 15440d0c8d4bSnarayan * vdc_min 15450d0c8d4bSnarayan * 15460d0c8d4bSnarayan * Description: 15470d0c8d4bSnarayan * Routine to limit the size of a data transfer. Used in 15480d0c8d4bSnarayan * conjunction with physio(9F). 15490d0c8d4bSnarayan * 15500d0c8d4bSnarayan * Arguments: 15510d0c8d4bSnarayan * bp - pointer to the indicated buf(9S) struct. 15520d0c8d4bSnarayan * 15530d0c8d4bSnarayan */ 15540d0c8d4bSnarayan static void 15550d0c8d4bSnarayan vdc_min(struct buf *bufp) 15560d0c8d4bSnarayan { 15570d0c8d4bSnarayan vdc_t *vdc = NULL; 15580d0c8d4bSnarayan int instance = VDCUNIT(bufp->b_edev); 15590d0c8d4bSnarayan 15600d0c8d4bSnarayan vdc = ddi_get_soft_state(vdc_state, instance); 15610d0c8d4bSnarayan VERIFY(vdc != NULL); 15620d0c8d4bSnarayan 156365908c77Syu, larry liu - Sun Microsystems - Beijing China if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->vdisk_bsize)) { 156465908c77Syu, larry liu - Sun Microsystems - Beijing China bufp->b_bcount = vdc->max_xfer_sz * vdc->vdisk_bsize; 15650d0c8d4bSnarayan } 15660d0c8d4bSnarayan } 15671ae08745Sheppo 15681ae08745Sheppo static int 15691ae08745Sheppo vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 15701ae08745Sheppo { 15711ae08745Sheppo _NOTE(ARGUNUSED(cred)) 15721ae08745Sheppo 15730d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 15740d0c8d4bSnarayan return (physio(vdc_strategy, NULL, dev, B_READ, vdc_min, uio)); 15751ae08745Sheppo } 15761ae08745Sheppo 15771ae08745Sheppo static int 15781ae08745Sheppo vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 15791ae08745Sheppo { 15801ae08745Sheppo _NOTE(ARGUNUSED(cred)) 15811ae08745Sheppo 15820d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 15830d0c8d4bSnarayan return (physio(vdc_strategy, NULL, dev, B_WRITE, vdc_min, uio)); 15841ae08745Sheppo } 15851ae08745Sheppo 15861ae08745Sheppo static int 15871ae08745Sheppo vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 15881ae08745Sheppo { 15891ae08745Sheppo _NOTE(ARGUNUSED(cred)) 15901ae08745Sheppo 15910d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 15920d0c8d4bSnarayan return (aphysio(vdc_strategy, anocancel, dev, B_READ, vdc_min, aio)); 15931ae08745Sheppo } 15941ae08745Sheppo 15951ae08745Sheppo static int 15961ae08745Sheppo vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 15971ae08745Sheppo { 15981ae08745Sheppo _NOTE(ARGUNUSED(cred)) 15991ae08745Sheppo 16000d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 16010d0c8d4bSnarayan return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, vdc_min, aio)); 16021ae08745Sheppo } 16031ae08745Sheppo 16041ae08745Sheppo 16051ae08745Sheppo /* -------------------------------------------------------------------------- */ 16061ae08745Sheppo 16071ae08745Sheppo /* 16081ae08745Sheppo * Handshake support 16091ae08745Sheppo */ 16101ae08745Sheppo 16111ae08745Sheppo 16120a55fbb7Slm66018 /* 16130a55fbb7Slm66018 * Function: 16140a55fbb7Slm66018 * vdc_init_ver_negotiation() 16150a55fbb7Slm66018 * 16160a55fbb7Slm66018 * Description: 16170a55fbb7Slm66018 * 16180a55fbb7Slm66018 * Arguments: 16190a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 16200a55fbb7Slm66018 * 16210a55fbb7Slm66018 * Return Code: 16220a55fbb7Slm66018 * 0 - Success 16230a55fbb7Slm66018 */ 16241ae08745Sheppo static int 16250a55fbb7Slm66018 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 16261ae08745Sheppo { 16271ae08745Sheppo vio_ver_msg_t pkt; 16281ae08745Sheppo size_t msglen = sizeof (pkt); 16291ae08745Sheppo int status = -1; 16301ae08745Sheppo 16311ae08745Sheppo ASSERT(vdc != NULL); 16321ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 16331ae08745Sheppo 16343af08d82Slm66018 DMSG(vdc, 0, "[%d] Entered.\n", vdc->instance); 1635e1ebb9ecSlm66018 16361ae08745Sheppo /* 16371ae08745Sheppo * set the Session ID to a unique value 16381ae08745Sheppo * (the lower 32 bits of the clock tick) 16391ae08745Sheppo */ 16401ae08745Sheppo vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 16413af08d82Slm66018 DMSG(vdc, 0, "[%d] Set SID to 0x%lx\n", vdc->instance, vdc->session_id); 16421ae08745Sheppo 16431ae08745Sheppo pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 16441ae08745Sheppo pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 16451ae08745Sheppo pkt.tag.vio_subtype_env = VIO_VER_INFO; 16461ae08745Sheppo pkt.tag.vio_sid = vdc->session_id; 16471ae08745Sheppo pkt.dev_class = VDEV_DISK; 16480a55fbb7Slm66018 pkt.ver_major = ver.major; 16490a55fbb7Slm66018 pkt.ver_minor = ver.minor; 16501ae08745Sheppo 16510a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 16523af08d82Slm66018 DMSG(vdc, 0, "[%d] Ver info sent (status = %d)\n", 16533af08d82Slm66018 vdc->instance, status); 16541ae08745Sheppo if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 16553af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to send Ver negotiation info: " 16568cd10891Snarayan "id(%lx) rv(%d) size(%ld)", vdc->instance, 16578cd10891Snarayan vdc->curr_server->ldc_handle, status, msglen); 16581ae08745Sheppo if (msglen != sizeof (vio_ver_msg_t)) 16591ae08745Sheppo status = ENOMSG; 16601ae08745Sheppo } 16611ae08745Sheppo 16621ae08745Sheppo return (status); 16631ae08745Sheppo } 16641ae08745Sheppo 16650a55fbb7Slm66018 /* 16660a55fbb7Slm66018 * Function: 16673af08d82Slm66018 * vdc_ver_negotiation() 16683af08d82Slm66018 * 16693af08d82Slm66018 * Description: 16703af08d82Slm66018 * 16713af08d82Slm66018 * Arguments: 16723af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 16733af08d82Slm66018 * 16743af08d82Slm66018 * Return Code: 16753af08d82Slm66018 * 0 - Success 16763af08d82Slm66018 */ 16773af08d82Slm66018 static int 16783af08d82Slm66018 vdc_ver_negotiation(vdc_t *vdcp) 16793af08d82Slm66018 { 16803af08d82Slm66018 vio_msg_t vio_msg; 16813af08d82Slm66018 int status; 16823af08d82Slm66018 16833af08d82Slm66018 if (status = vdc_init_ver_negotiation(vdcp, vdc_version[0])) 16843af08d82Slm66018 return (status); 16853af08d82Slm66018 16863af08d82Slm66018 /* release lock and wait for response */ 16873af08d82Slm66018 mutex_exit(&vdcp->lock); 16883af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 16893af08d82Slm66018 mutex_enter(&vdcp->lock); 16903af08d82Slm66018 if (status) { 16913af08d82Slm66018 DMSG(vdcp, 0, 16923af08d82Slm66018 "[%d] Failed waiting for Ver negotiation response, rv(%d)", 16933af08d82Slm66018 vdcp->instance, status); 16943af08d82Slm66018 return (status); 16953af08d82Slm66018 } 16963af08d82Slm66018 16973af08d82Slm66018 /* check type and sub_type ... */ 16983af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 16993af08d82Slm66018 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 17003af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid ver negotiation response\n", 17013af08d82Slm66018 vdcp->instance); 17023af08d82Slm66018 return (EPROTO); 17033af08d82Slm66018 } 17043af08d82Slm66018 17053af08d82Slm66018 return (vdc_handle_ver_msg(vdcp, (vio_ver_msg_t *)&vio_msg)); 17063af08d82Slm66018 } 17073af08d82Slm66018 17083af08d82Slm66018 /* 17093af08d82Slm66018 * Function: 17100a55fbb7Slm66018 * vdc_init_attr_negotiation() 17110a55fbb7Slm66018 * 17120a55fbb7Slm66018 * Description: 17130a55fbb7Slm66018 * 17140a55fbb7Slm66018 * Arguments: 17150a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 17160a55fbb7Slm66018 * 17170a55fbb7Slm66018 * Return Code: 17180a55fbb7Slm66018 * 0 - Success 17190a55fbb7Slm66018 */ 17201ae08745Sheppo static int 17211ae08745Sheppo vdc_init_attr_negotiation(vdc_t *vdc) 17221ae08745Sheppo { 17231ae08745Sheppo vd_attr_msg_t pkt; 17241ae08745Sheppo size_t msglen = sizeof (pkt); 17251ae08745Sheppo int status; 17261ae08745Sheppo 17271ae08745Sheppo ASSERT(vdc != NULL); 17281ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 17291ae08745Sheppo 17303af08d82Slm66018 DMSG(vdc, 0, "[%d] entered\n", vdc->instance); 17311ae08745Sheppo 17321ae08745Sheppo /* fill in tag */ 17331ae08745Sheppo pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 17341ae08745Sheppo pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 17351ae08745Sheppo pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 17361ae08745Sheppo pkt.tag.vio_sid = vdc->session_id; 17371ae08745Sheppo /* fill in payload */ 17381ae08745Sheppo pkt.max_xfer_sz = vdc->max_xfer_sz; 173965908c77Syu, larry liu - Sun Microsystems - Beijing China pkt.vdisk_block_size = vdc->vdisk_bsize; 1740f0ca1d9aSsb155480 pkt.xfer_mode = VIO_DRING_MODE_V1_0; 17411ae08745Sheppo pkt.operations = 0; /* server will set bits of valid operations */ 17421ae08745Sheppo pkt.vdisk_type = 0; /* server will set to valid device type */ 174317cadca8Slm66018 pkt.vdisk_media = 0; /* server will set to valid media type */ 17441ae08745Sheppo pkt.vdisk_size = 0; /* server will set to valid size */ 17451ae08745Sheppo 17460a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 17473af08d82Slm66018 DMSG(vdc, 0, "Attr info sent (status = %d)\n", status); 17481ae08745Sheppo 1749f3241e46Sanbui if ((status != 0) || (msglen != sizeof (vd_attr_msg_t))) { 17503af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to send Attr negotiation info: " 17518cd10891Snarayan "id(%lx) rv(%d) size(%ld)", vdc->instance, 17528cd10891Snarayan vdc->curr_server->ldc_handle, status, msglen); 1753f3241e46Sanbui if (msglen != sizeof (vd_attr_msg_t)) 17541ae08745Sheppo status = ENOMSG; 17551ae08745Sheppo } 17561ae08745Sheppo 17571ae08745Sheppo return (status); 17581ae08745Sheppo } 17591ae08745Sheppo 17600a55fbb7Slm66018 /* 17610a55fbb7Slm66018 * Function: 17623af08d82Slm66018 * vdc_attr_negotiation() 17633af08d82Slm66018 * 17643af08d82Slm66018 * Description: 17653af08d82Slm66018 * 17663af08d82Slm66018 * Arguments: 17673af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 17683af08d82Slm66018 * 17693af08d82Slm66018 * Return Code: 17703af08d82Slm66018 * 0 - Success 17713af08d82Slm66018 */ 17723af08d82Slm66018 static int 17733af08d82Slm66018 vdc_attr_negotiation(vdc_t *vdcp) 17743af08d82Slm66018 { 17753af08d82Slm66018 int status; 17763af08d82Slm66018 vio_msg_t vio_msg; 17773af08d82Slm66018 17783af08d82Slm66018 if (status = vdc_init_attr_negotiation(vdcp)) 17793af08d82Slm66018 return (status); 17803af08d82Slm66018 17813af08d82Slm66018 /* release lock and wait for response */ 17823af08d82Slm66018 mutex_exit(&vdcp->lock); 17833af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 17843af08d82Slm66018 mutex_enter(&vdcp->lock); 17853af08d82Slm66018 if (status) { 17863af08d82Slm66018 DMSG(vdcp, 0, 17873af08d82Slm66018 "[%d] Failed waiting for Attr negotiation response, rv(%d)", 17883af08d82Slm66018 vdcp->instance, status); 17893af08d82Slm66018 return (status); 17903af08d82Slm66018 } 17913af08d82Slm66018 17923af08d82Slm66018 /* check type and sub_type ... */ 17933af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 17943af08d82Slm66018 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 17953af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid attr negotiation response\n", 17963af08d82Slm66018 vdcp->instance); 17973af08d82Slm66018 return (EPROTO); 17983af08d82Slm66018 } 17993af08d82Slm66018 18003af08d82Slm66018 return (vdc_handle_attr_msg(vdcp, (vd_attr_msg_t *)&vio_msg)); 18013af08d82Slm66018 } 18023af08d82Slm66018 18033af08d82Slm66018 18043af08d82Slm66018 /* 18053af08d82Slm66018 * Function: 18060a55fbb7Slm66018 * vdc_init_dring_negotiate() 18070a55fbb7Slm66018 * 18080a55fbb7Slm66018 * Description: 18090a55fbb7Slm66018 * 18100a55fbb7Slm66018 * Arguments: 18110a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 18120a55fbb7Slm66018 * 18130a55fbb7Slm66018 * Return Code: 18140a55fbb7Slm66018 * 0 - Success 18150a55fbb7Slm66018 */ 18161ae08745Sheppo static int 18171ae08745Sheppo vdc_init_dring_negotiate(vdc_t *vdc) 18181ae08745Sheppo { 18191ae08745Sheppo vio_dring_reg_msg_t pkt; 18201ae08745Sheppo size_t msglen = sizeof (pkt); 18211ae08745Sheppo int status = -1; 18223af08d82Slm66018 int retry; 18233af08d82Slm66018 int nretries = 10; 18241ae08745Sheppo 18251ae08745Sheppo ASSERT(vdc != NULL); 18261ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 18271ae08745Sheppo 18283af08d82Slm66018 for (retry = 0; retry < nretries; retry++) { 18291ae08745Sheppo status = vdc_init_descriptor_ring(vdc); 18303af08d82Slm66018 if (status != EAGAIN) 18313af08d82Slm66018 break; 18323af08d82Slm66018 drv_usecwait(vdc_min_timeout_ldc); 18333af08d82Slm66018 } 18343af08d82Slm66018 18351ae08745Sheppo if (status != 0) { 18363af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to init DRing (status = %d)\n", 18371ae08745Sheppo vdc->instance, status); 18381ae08745Sheppo return (status); 18391ae08745Sheppo } 18403af08d82Slm66018 18413af08d82Slm66018 DMSG(vdc, 0, "[%d] Init of descriptor ring completed (status = %d)\n", 1842e1ebb9ecSlm66018 vdc->instance, status); 18431ae08745Sheppo 18441ae08745Sheppo /* fill in tag */ 18451ae08745Sheppo pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 18461ae08745Sheppo pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 18471ae08745Sheppo pkt.tag.vio_subtype_env = VIO_DRING_REG; 18481ae08745Sheppo pkt.tag.vio_sid = vdc->session_id; 18491ae08745Sheppo /* fill in payload */ 18501ae08745Sheppo pkt.dring_ident = 0; 1851e1ebb9ecSlm66018 pkt.num_descriptors = vdc->dring_len; 1852e1ebb9ecSlm66018 pkt.descriptor_size = vdc->dring_entry_size; 18531ae08745Sheppo pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 18541ae08745Sheppo pkt.ncookies = vdc->dring_cookie_count; 18551ae08745Sheppo pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 18561ae08745Sheppo 18570a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 18581ae08745Sheppo if (status != 0) { 18593af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to register DRing (err = %d)", 1860e1ebb9ecSlm66018 vdc->instance, status); 18611ae08745Sheppo } 18621ae08745Sheppo 18631ae08745Sheppo return (status); 18641ae08745Sheppo } 18651ae08745Sheppo 18661ae08745Sheppo 18673af08d82Slm66018 /* 18683af08d82Slm66018 * Function: 18693af08d82Slm66018 * vdc_dring_negotiation() 18703af08d82Slm66018 * 18713af08d82Slm66018 * Description: 18723af08d82Slm66018 * 18733af08d82Slm66018 * Arguments: 18743af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 18753af08d82Slm66018 * 18763af08d82Slm66018 * Return Code: 18773af08d82Slm66018 * 0 - Success 18783af08d82Slm66018 */ 18793af08d82Slm66018 static int 18803af08d82Slm66018 vdc_dring_negotiation(vdc_t *vdcp) 18813af08d82Slm66018 { 18823af08d82Slm66018 int status; 18833af08d82Slm66018 vio_msg_t vio_msg; 18843af08d82Slm66018 18853af08d82Slm66018 if (status = vdc_init_dring_negotiate(vdcp)) 18863af08d82Slm66018 return (status); 18873af08d82Slm66018 18883af08d82Slm66018 /* release lock and wait for response */ 18893af08d82Slm66018 mutex_exit(&vdcp->lock); 18903af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 18913af08d82Slm66018 mutex_enter(&vdcp->lock); 18923af08d82Slm66018 if (status) { 18933af08d82Slm66018 DMSG(vdcp, 0, 18943af08d82Slm66018 "[%d] Failed waiting for Dring negotiation response," 18953af08d82Slm66018 " rv(%d)", vdcp->instance, status); 18963af08d82Slm66018 return (status); 18973af08d82Slm66018 } 18983af08d82Slm66018 18993af08d82Slm66018 /* check type and sub_type ... */ 19003af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 19013af08d82Slm66018 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 19023af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid Dring negotiation response\n", 19033af08d82Slm66018 vdcp->instance); 19043af08d82Slm66018 return (EPROTO); 19053af08d82Slm66018 } 19063af08d82Slm66018 19073af08d82Slm66018 return (vdc_handle_dring_reg_msg(vdcp, 19083af08d82Slm66018 (vio_dring_reg_msg_t *)&vio_msg)); 19093af08d82Slm66018 } 19103af08d82Slm66018 19113af08d82Slm66018 19123af08d82Slm66018 /* 19133af08d82Slm66018 * Function: 19143af08d82Slm66018 * vdc_send_rdx() 19153af08d82Slm66018 * 19163af08d82Slm66018 * Description: 19173af08d82Slm66018 * 19183af08d82Slm66018 * Arguments: 19193af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 19203af08d82Slm66018 * 19213af08d82Slm66018 * Return Code: 19223af08d82Slm66018 * 0 - Success 19233af08d82Slm66018 */ 19243af08d82Slm66018 static int 19253af08d82Slm66018 vdc_send_rdx(vdc_t *vdcp) 19263af08d82Slm66018 { 19273af08d82Slm66018 vio_msg_t msg; 19283af08d82Slm66018 size_t msglen = sizeof (vio_msg_t); 19293af08d82Slm66018 int status; 19303af08d82Slm66018 19313af08d82Slm66018 /* 19323af08d82Slm66018 * Send an RDX message to vds to indicate we are ready 19333af08d82Slm66018 * to send data 19343af08d82Slm66018 */ 19353af08d82Slm66018 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 19363af08d82Slm66018 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 19373af08d82Slm66018 msg.tag.vio_subtype_env = VIO_RDX; 19383af08d82Slm66018 msg.tag.vio_sid = vdcp->session_id; 19393af08d82Slm66018 status = vdc_send(vdcp, (caddr_t)&msg, &msglen); 19403af08d82Slm66018 if (status != 0) { 19413af08d82Slm66018 DMSG(vdcp, 0, "[%d] Failed to send RDX message (%d)", 19423af08d82Slm66018 vdcp->instance, status); 19433af08d82Slm66018 } 19443af08d82Slm66018 19453af08d82Slm66018 return (status); 19463af08d82Slm66018 } 19473af08d82Slm66018 19483af08d82Slm66018 /* 19493af08d82Slm66018 * Function: 19503af08d82Slm66018 * vdc_handle_rdx() 19513af08d82Slm66018 * 19523af08d82Slm66018 * Description: 19533af08d82Slm66018 * 19543af08d82Slm66018 * Arguments: 19553af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 19563af08d82Slm66018 * msgp - received msg 19573af08d82Slm66018 * 19583af08d82Slm66018 * Return Code: 19593af08d82Slm66018 * 0 - Success 19603af08d82Slm66018 */ 19613af08d82Slm66018 static int 19623af08d82Slm66018 vdc_handle_rdx(vdc_t *vdcp, vio_rdx_msg_t *msgp) 19633af08d82Slm66018 { 19643af08d82Slm66018 _NOTE(ARGUNUSED(vdcp)) 19653af08d82Slm66018 _NOTE(ARGUNUSED(msgp)) 19663af08d82Slm66018 19673af08d82Slm66018 ASSERT(msgp->tag.vio_msgtype == VIO_TYPE_CTRL); 19683af08d82Slm66018 ASSERT(msgp->tag.vio_subtype == VIO_SUBTYPE_ACK); 19693af08d82Slm66018 ASSERT(msgp->tag.vio_subtype_env == VIO_RDX); 19703af08d82Slm66018 19713af08d82Slm66018 DMSG(vdcp, 1, "[%d] Got an RDX msg", vdcp->instance); 19723af08d82Slm66018 19733af08d82Slm66018 return (0); 19743af08d82Slm66018 } 19753af08d82Slm66018 19763af08d82Slm66018 /* 19773af08d82Slm66018 * Function: 19783af08d82Slm66018 * vdc_rdx_exchange() 19793af08d82Slm66018 * 19803af08d82Slm66018 * Description: 19813af08d82Slm66018 * 19823af08d82Slm66018 * Arguments: 19833af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 19843af08d82Slm66018 * 19853af08d82Slm66018 * Return Code: 19863af08d82Slm66018 * 0 - Success 19873af08d82Slm66018 */ 19883af08d82Slm66018 static int 19893af08d82Slm66018 vdc_rdx_exchange(vdc_t *vdcp) 19903af08d82Slm66018 { 19913af08d82Slm66018 int status; 19923af08d82Slm66018 vio_msg_t vio_msg; 19933af08d82Slm66018 19943af08d82Slm66018 if (status = vdc_send_rdx(vdcp)) 19953af08d82Slm66018 return (status); 19963af08d82Slm66018 19973af08d82Slm66018 /* release lock and wait for response */ 19983af08d82Slm66018 mutex_exit(&vdcp->lock); 19993af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 20003af08d82Slm66018 mutex_enter(&vdcp->lock); 20013af08d82Slm66018 if (status) { 200287a7269eSachartre DMSG(vdcp, 0, "[%d] Failed waiting for RDX response, rv(%d)", 200387a7269eSachartre vdcp->instance, status); 20043af08d82Slm66018 return (status); 20053af08d82Slm66018 } 20063af08d82Slm66018 20073af08d82Slm66018 /* check type and sub_type ... */ 20083af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 20093af08d82Slm66018 vio_msg.tag.vio_subtype != VIO_SUBTYPE_ACK) { 201087a7269eSachartre DMSG(vdcp, 0, "[%d] Invalid RDX response\n", vdcp->instance); 20113af08d82Slm66018 return (EPROTO); 20123af08d82Slm66018 } 20133af08d82Slm66018 20143af08d82Slm66018 return (vdc_handle_rdx(vdcp, (vio_rdx_msg_t *)&vio_msg)); 20153af08d82Slm66018 } 20163af08d82Slm66018 20173af08d82Slm66018 20181ae08745Sheppo /* -------------------------------------------------------------------------- */ 20191ae08745Sheppo 20201ae08745Sheppo /* 20211ae08745Sheppo * LDC helper routines 20221ae08745Sheppo */ 20231ae08745Sheppo 20243af08d82Slm66018 static int 20253af08d82Slm66018 vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp) 20263af08d82Slm66018 { 20273af08d82Slm66018 int status; 202817cadca8Slm66018 uint64_t delay_time; 20293af08d82Slm66018 size_t len; 20303af08d82Slm66018 2031ea43803bSAlexandre Chartre /* 2032ea43803bSAlexandre Chartre * Until we get a blocking ldc read we have to retry until the entire 2033ea43803bSAlexandre Chartre * LDC message has arrived before ldc_read() will return that message. 2034ea43803bSAlexandre Chartre * If ldc_read() succeed but returns a zero length message then that 2035ea43803bSAlexandre Chartre * means that the LDC queue is empty and we have to wait for a 2036ea43803bSAlexandre Chartre * notification from the LDC callback which will set the read_state to 2037ea43803bSAlexandre Chartre * VDC_READ_PENDING. Note we also bail out if the channel is reset or 2038ea43803bSAlexandre Chartre * goes away. 2039ea43803bSAlexandre Chartre */ 2040ea43803bSAlexandre Chartre delay_time = vdc_ldc_read_init_delay; 20413af08d82Slm66018 2042ea43803bSAlexandre Chartre for (;;) { 2043ea43803bSAlexandre Chartre 2044ea43803bSAlexandre Chartre len = *nbytesp; 2045ea43803bSAlexandre Chartre /* 2046ea43803bSAlexandre Chartre * vdc->curr_server is protected by vdc->lock but to avoid 2047ea43803bSAlexandre Chartre * contentions we don't take the lock here. We can do this 2048ea43803bSAlexandre Chartre * safely because vdc_recv() is only called from thread 2049ea43803bSAlexandre Chartre * process_msg_thread() which is also the only thread that 2050ea43803bSAlexandre Chartre * can change vdc->curr_server. 2051ea43803bSAlexandre Chartre */ 2052ea43803bSAlexandre Chartre status = ldc_read(vdc->curr_server->ldc_handle, 2053ea43803bSAlexandre Chartre (caddr_t)msgp, &len); 2054ea43803bSAlexandre Chartre 2055ea43803bSAlexandre Chartre if (status == EAGAIN) { 2056ea43803bSAlexandre Chartre delay_time *= 2; 2057ea43803bSAlexandre Chartre if (delay_time >= vdc_ldc_read_max_delay) 2058ea43803bSAlexandre Chartre delay_time = vdc_ldc_read_max_delay; 2059ea43803bSAlexandre Chartre delay(delay_time); 2060ea43803bSAlexandre Chartre continue; 2061ea43803bSAlexandre Chartre } 2062ea43803bSAlexandre Chartre 2063ea43803bSAlexandre Chartre if (status != 0) { 2064ea43803bSAlexandre Chartre DMSG(vdc, 0, "ldc_read returned %d\n", status); 2065ea43803bSAlexandre Chartre break; 2066ea43803bSAlexandre Chartre } 2067ea43803bSAlexandre Chartre 2068ea43803bSAlexandre Chartre if (len != 0) { 2069ea43803bSAlexandre Chartre *nbytesp = len; 2070ea43803bSAlexandre Chartre break; 2071ea43803bSAlexandre Chartre } 2072ea43803bSAlexandre Chartre 2073ea43803bSAlexandre Chartre mutex_enter(&vdc->read_lock); 20743af08d82Slm66018 20753af08d82Slm66018 while (vdc->read_state != VDC_READ_PENDING) { 20763af08d82Slm66018 20773af08d82Slm66018 /* detect if the connection has been reset */ 20783af08d82Slm66018 if (vdc->read_state == VDC_READ_RESET) { 2079ea43803bSAlexandre Chartre mutex_exit(&vdc->read_lock); 2080ea43803bSAlexandre Chartre return (ECONNRESET); 20813af08d82Slm66018 } 20823af08d82Slm66018 2083ea43803bSAlexandre Chartre vdc->read_state = VDC_READ_WAITING; 20843af08d82Slm66018 cv_wait(&vdc->read_cv, &vdc->read_lock); 20853af08d82Slm66018 } 20863af08d82Slm66018 20873af08d82Slm66018 vdc->read_state = VDC_READ_IDLE; 20883af08d82Slm66018 mutex_exit(&vdc->read_lock); 20893af08d82Slm66018 2090ea43803bSAlexandre Chartre delay_time = vdc_ldc_read_init_delay; 2091ea43803bSAlexandre Chartre } 2092ea43803bSAlexandre Chartre 20933af08d82Slm66018 return (status); 20943af08d82Slm66018 } 20953af08d82Slm66018 20963af08d82Slm66018 20973af08d82Slm66018 20983af08d82Slm66018 #ifdef DEBUG 20993af08d82Slm66018 void 21003af08d82Slm66018 vdc_decode_tag(vdc_t *vdcp, vio_msg_t *msg) 21013af08d82Slm66018 { 21023af08d82Slm66018 char *ms, *ss, *ses; 21033af08d82Slm66018 switch (msg->tag.vio_msgtype) { 21043af08d82Slm66018 #define Q(_s) case _s : ms = #_s; break; 21053af08d82Slm66018 Q(VIO_TYPE_CTRL) 21063af08d82Slm66018 Q(VIO_TYPE_DATA) 21073af08d82Slm66018 Q(VIO_TYPE_ERR) 21083af08d82Slm66018 #undef Q 21093af08d82Slm66018 default: ms = "unknown"; break; 21103af08d82Slm66018 } 21113af08d82Slm66018 21123af08d82Slm66018 switch (msg->tag.vio_subtype) { 21133af08d82Slm66018 #define Q(_s) case _s : ss = #_s; break; 21143af08d82Slm66018 Q(VIO_SUBTYPE_INFO) 21153af08d82Slm66018 Q(VIO_SUBTYPE_ACK) 21163af08d82Slm66018 Q(VIO_SUBTYPE_NACK) 21173af08d82Slm66018 #undef Q 21183af08d82Slm66018 default: ss = "unknown"; break; 21193af08d82Slm66018 } 21203af08d82Slm66018 21213af08d82Slm66018 switch (msg->tag.vio_subtype_env) { 21223af08d82Slm66018 #define Q(_s) case _s : ses = #_s; break; 21233af08d82Slm66018 Q(VIO_VER_INFO) 21243af08d82Slm66018 Q(VIO_ATTR_INFO) 21253af08d82Slm66018 Q(VIO_DRING_REG) 21263af08d82Slm66018 Q(VIO_DRING_UNREG) 21273af08d82Slm66018 Q(VIO_RDX) 21283af08d82Slm66018 Q(VIO_PKT_DATA) 21293af08d82Slm66018 Q(VIO_DESC_DATA) 21303af08d82Slm66018 Q(VIO_DRING_DATA) 21313af08d82Slm66018 #undef Q 21323af08d82Slm66018 default: ses = "unknown"; break; 21333af08d82Slm66018 } 21343af08d82Slm66018 21353af08d82Slm66018 DMSG(vdcp, 3, "(%x/%x/%x) message : (%s/%s/%s)\n", 21363af08d82Slm66018 msg->tag.vio_msgtype, msg->tag.vio_subtype, 21373af08d82Slm66018 msg->tag.vio_subtype_env, ms, ss, ses); 21383af08d82Slm66018 } 21393af08d82Slm66018 #endif 21403af08d82Slm66018 21411ae08745Sheppo /* 21421ae08745Sheppo * Function: 21431ae08745Sheppo * vdc_send() 21441ae08745Sheppo * 21451ae08745Sheppo * Description: 21461ae08745Sheppo * The function encapsulates the call to write a message using LDC. 21471ae08745Sheppo * If LDC indicates that the call failed due to the queue being full, 214817cadca8Slm66018 * we retry the ldc_write(), otherwise we return the error returned by LDC. 21491ae08745Sheppo * 21501ae08745Sheppo * Arguments: 21511ae08745Sheppo * ldc_handle - LDC handle for the channel this instance of vdc uses 21521ae08745Sheppo * pkt - address of LDC message to be sent 21531ae08745Sheppo * msglen - the size of the message being sent. When the function 21541ae08745Sheppo * returns, this contains the number of bytes written. 21551ae08745Sheppo * 21561ae08745Sheppo * Return Code: 21571ae08745Sheppo * 0 - Success. 21581ae08745Sheppo * EINVAL - pkt or msglen were NULL 21591ae08745Sheppo * ECONNRESET - The connection was not up. 21601ae08745Sheppo * EWOULDBLOCK - LDC queue is full 21611ae08745Sheppo * xxx - other error codes returned by ldc_write 21621ae08745Sheppo */ 21631ae08745Sheppo static int 21640a55fbb7Slm66018 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 21651ae08745Sheppo { 21661ae08745Sheppo size_t size = 0; 21671ae08745Sheppo int status = 0; 21683af08d82Slm66018 clock_t delay_ticks; 21691ae08745Sheppo 21700a55fbb7Slm66018 ASSERT(vdc != NULL); 21710a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 21721ae08745Sheppo ASSERT(msglen != NULL); 21731ae08745Sheppo ASSERT(*msglen != 0); 21741ae08745Sheppo 21753af08d82Slm66018 #ifdef DEBUG 217617cadca8Slm66018 vdc_decode_tag(vdc, (vio_msg_t *)(uintptr_t)pkt); 21773af08d82Slm66018 #endif 21783af08d82Slm66018 /* 21793af08d82Slm66018 * Wait indefinitely to send if channel 21803af08d82Slm66018 * is busy, but bail out if we succeed or 21813af08d82Slm66018 * if the channel closes or is reset. 21823af08d82Slm66018 */ 21833af08d82Slm66018 delay_ticks = vdc_hz_min_ldc_delay; 21841ae08745Sheppo do { 21851ae08745Sheppo size = *msglen; 21868cd10891Snarayan status = ldc_write(vdc->curr_server->ldc_handle, pkt, &size); 21873af08d82Slm66018 if (status == EWOULDBLOCK) { 21883af08d82Slm66018 delay(delay_ticks); 21893af08d82Slm66018 /* geometric backoff */ 21903af08d82Slm66018 delay_ticks *= 2; 21913af08d82Slm66018 if (delay_ticks > vdc_hz_max_ldc_delay) 21923af08d82Slm66018 delay_ticks = vdc_hz_max_ldc_delay; 21933af08d82Slm66018 } 21943af08d82Slm66018 } while (status == EWOULDBLOCK); 21951ae08745Sheppo 21960a55fbb7Slm66018 /* if LDC had serious issues --- reset vdc state */ 21970a55fbb7Slm66018 if (status == EIO || status == ECONNRESET) { 21983af08d82Slm66018 /* LDC had serious issues --- reset vdc state */ 21993af08d82Slm66018 mutex_enter(&vdc->read_lock); 22003af08d82Slm66018 if ((vdc->read_state == VDC_READ_WAITING) || 22013af08d82Slm66018 (vdc->read_state == VDC_READ_RESET)) 22023af08d82Slm66018 cv_signal(&vdc->read_cv); 22033af08d82Slm66018 vdc->read_state = VDC_READ_RESET; 22043af08d82Slm66018 mutex_exit(&vdc->read_lock); 22053af08d82Slm66018 22063af08d82Slm66018 /* wake up any waiters in the reset thread */ 22073af08d82Slm66018 if (vdc->state == VDC_STATE_INIT_WAITING) { 22083af08d82Slm66018 DMSG(vdc, 0, "[%d] write reset - " 22093af08d82Slm66018 "vdc is resetting ..\n", vdc->instance); 22103af08d82Slm66018 vdc->state = VDC_STATE_RESETTING; 22113af08d82Slm66018 cv_signal(&vdc->initwait_cv); 22123af08d82Slm66018 } 22133af08d82Slm66018 22143af08d82Slm66018 return (ECONNRESET); 22150a55fbb7Slm66018 } 22160a55fbb7Slm66018 22171ae08745Sheppo /* return the last size written */ 22181ae08745Sheppo *msglen = size; 22191ae08745Sheppo 22201ae08745Sheppo return (status); 22211ae08745Sheppo } 22221ae08745Sheppo 22231ae08745Sheppo /* 22241ae08745Sheppo * Function: 2225655fd6a9Sachartre * vdc_get_md_node 22261ae08745Sheppo * 22271ae08745Sheppo * Description: 22288cd10891Snarayan * Get the MD, the device node for the given disk instance. The 22298cd10891Snarayan * caller is responsible for cleaning up the reference to the 22308cd10891Snarayan * returned MD (mdpp) by calling md_fini_handle(). 22311ae08745Sheppo * 22321ae08745Sheppo * Arguments: 22331ae08745Sheppo * dip - dev info pointer for this instance of the device driver. 2234655fd6a9Sachartre * mdpp - the returned MD. 2235655fd6a9Sachartre * vd_nodep - the returned device node. 22361ae08745Sheppo * 22371ae08745Sheppo * Return Code: 22381ae08745Sheppo * 0 - Success. 22391ae08745Sheppo * ENOENT - Expected node or property did not exist. 22401ae08745Sheppo * ENXIO - Unexpected error communicating with MD framework 22411ae08745Sheppo */ 22421ae08745Sheppo static int 22438cd10891Snarayan vdc_get_md_node(dev_info_t *dip, md_t **mdpp, mde_cookie_t *vd_nodep) 22441ae08745Sheppo { 22451ae08745Sheppo int status = ENOENT; 22461ae08745Sheppo char *node_name = NULL; 22471ae08745Sheppo md_t *mdp = NULL; 22481ae08745Sheppo int num_nodes; 22491ae08745Sheppo int num_vdevs; 22501ae08745Sheppo mde_cookie_t rootnode; 22511ae08745Sheppo mde_cookie_t *listp = NULL; 22521ae08745Sheppo boolean_t found_inst = B_FALSE; 22531ae08745Sheppo int listsz; 22541ae08745Sheppo int idx; 22551ae08745Sheppo uint64_t md_inst; 22561ae08745Sheppo int obp_inst; 22571ae08745Sheppo int instance = ddi_get_instance(dip); 22581ae08745Sheppo 22591ae08745Sheppo /* 22601ae08745Sheppo * Get the OBP instance number for comparison with the MD instance 22611ae08745Sheppo * 22621ae08745Sheppo * The "cfg-handle" property of a vdc node in an MD contains the MD's 22631ae08745Sheppo * notion of "instance", or unique identifier, for that node; OBP 22641ae08745Sheppo * stores the value of the "cfg-handle" MD property as the value of 22651ae08745Sheppo * the "reg" property on the node in the device tree it builds from 22661ae08745Sheppo * the MD and passes to Solaris. Thus, we look up the devinfo node's 22671ae08745Sheppo * "reg" property value to uniquely identify this device instance. 22681ae08745Sheppo * If the "reg" property cannot be found, the device tree state is 22691ae08745Sheppo * presumably so broken that there is no point in continuing. 22701ae08745Sheppo */ 22711ae08745Sheppo if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 22721ae08745Sheppo cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 22731ae08745Sheppo return (ENOENT); 22741ae08745Sheppo } 22751ae08745Sheppo obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 22761ae08745Sheppo OBP_REG, -1); 22773af08d82Slm66018 DMSGX(1, "[%d] OBP inst=%d\n", instance, obp_inst); 22781ae08745Sheppo 22791ae08745Sheppo /* 2280655fd6a9Sachartre * We now walk the MD nodes to find the node for this vdisk. 22811ae08745Sheppo */ 22821ae08745Sheppo if ((mdp = md_get_handle()) == NULL) { 22831ae08745Sheppo cmn_err(CE_WARN, "unable to init machine description"); 22841ae08745Sheppo return (ENXIO); 22851ae08745Sheppo } 22861ae08745Sheppo 22871ae08745Sheppo num_nodes = md_node_count(mdp); 22881ae08745Sheppo ASSERT(num_nodes > 0); 22891ae08745Sheppo 22901ae08745Sheppo listsz = num_nodes * sizeof (mde_cookie_t); 22911ae08745Sheppo 22921ae08745Sheppo /* allocate memory for nodes */ 22931ae08745Sheppo listp = kmem_zalloc(listsz, KM_SLEEP); 22941ae08745Sheppo 22951ae08745Sheppo rootnode = md_root_node(mdp); 22961ae08745Sheppo ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 22971ae08745Sheppo 22981ae08745Sheppo /* 22991ae08745Sheppo * Search for all the virtual devices, we will then check to see which 23001ae08745Sheppo * ones are disk nodes. 23011ae08745Sheppo */ 23021ae08745Sheppo num_vdevs = md_scan_dag(mdp, rootnode, 23031ae08745Sheppo md_find_name(mdp, VDC_MD_VDEV_NAME), 23041ae08745Sheppo md_find_name(mdp, "fwd"), listp); 23051ae08745Sheppo 23061ae08745Sheppo if (num_vdevs <= 0) { 23071ae08745Sheppo cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 23081ae08745Sheppo status = ENOENT; 23091ae08745Sheppo goto done; 23101ae08745Sheppo } 23111ae08745Sheppo 23123af08d82Slm66018 DMSGX(1, "[%d] num_vdevs=%d\n", instance, num_vdevs); 23131ae08745Sheppo for (idx = 0; idx < num_vdevs; idx++) { 23141ae08745Sheppo status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 23151ae08745Sheppo if ((status != 0) || (node_name == NULL)) { 23161ae08745Sheppo cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 23171ae08745Sheppo ": err %d", VDC_MD_VDEV_NAME, status); 23181ae08745Sheppo continue; 23191ae08745Sheppo } 23201ae08745Sheppo 23213af08d82Slm66018 DMSGX(1, "[%d] Found node '%s'\n", instance, node_name); 23221ae08745Sheppo if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 23231ae08745Sheppo status = md_get_prop_val(mdp, listp[idx], 23241ae08745Sheppo VDC_MD_CFG_HDL, &md_inst); 23253af08d82Slm66018 DMSGX(1, "[%d] vdc inst in MD=%lx\n", 23263af08d82Slm66018 instance, md_inst); 23271ae08745Sheppo if ((status == 0) && (md_inst == obp_inst)) { 23281ae08745Sheppo found_inst = B_TRUE; 23291ae08745Sheppo break; 23301ae08745Sheppo } 23311ae08745Sheppo } 23321ae08745Sheppo } 23331ae08745Sheppo 23340a55fbb7Slm66018 if (!found_inst) { 23353af08d82Slm66018 DMSGX(0, "Unable to find correct '%s' node", VDC_MD_DISK_NAME); 23361ae08745Sheppo status = ENOENT; 23371ae08745Sheppo goto done; 23381ae08745Sheppo } 23393af08d82Slm66018 DMSGX(0, "[%d] MD inst=%lx\n", instance, md_inst); 23401ae08745Sheppo 2341655fd6a9Sachartre *vd_nodep = listp[idx]; 2342655fd6a9Sachartre *mdpp = mdp; 2343655fd6a9Sachartre done: 2344655fd6a9Sachartre kmem_free(listp, listsz); 2345655fd6a9Sachartre return (status); 2346655fd6a9Sachartre } 2347655fd6a9Sachartre 2348655fd6a9Sachartre /* 2349655fd6a9Sachartre * Function: 23508cd10891Snarayan * vdc_init_ports 2351655fd6a9Sachartre * 2352655fd6a9Sachartre * Description: 23538cd10891Snarayan * Initialize all the ports for this vdisk instance. 2354655fd6a9Sachartre * 2355655fd6a9Sachartre * Arguments: 23568cd10891Snarayan * vdc - soft state pointer for this instance of the device driver. 23578cd10891Snarayan * mdp - md pointer 23588cd10891Snarayan * vd_nodep - device md node. 2359655fd6a9Sachartre * 2360655fd6a9Sachartre * Return Code: 2361655fd6a9Sachartre * 0 - Success. 2362655fd6a9Sachartre * ENOENT - Expected node or property did not exist. 2363655fd6a9Sachartre */ 2364655fd6a9Sachartre static int 23658cd10891Snarayan vdc_init_ports(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_nodep) 2366655fd6a9Sachartre { 2367655fd6a9Sachartre int status = 0; 23688cd10891Snarayan int idx; 23698cd10891Snarayan int num_nodes; 23708cd10891Snarayan int num_vports; 23718cd10891Snarayan int num_chans; 23728cd10891Snarayan int listsz; 23738cd10891Snarayan mde_cookie_t vd_port; 23748cd10891Snarayan mde_cookie_t *chanp = NULL; 23758cd10891Snarayan mde_cookie_t *portp = NULL; 23768cd10891Snarayan vdc_server_t *srvr; 23778cd10891Snarayan vdc_server_t *prev_srvr = NULL; 2378655fd6a9Sachartre 23798cd10891Snarayan /* 23808cd10891Snarayan * We now walk the MD nodes to find the port nodes for this vdisk. 23818cd10891Snarayan */ 2382655fd6a9Sachartre num_nodes = md_node_count(mdp); 2383655fd6a9Sachartre ASSERT(num_nodes > 0); 2384655fd6a9Sachartre 2385655fd6a9Sachartre listsz = num_nodes * sizeof (mde_cookie_t); 2386655fd6a9Sachartre 2387655fd6a9Sachartre /* allocate memory for nodes */ 23888cd10891Snarayan portp = kmem_zalloc(listsz, KM_SLEEP); 2389655fd6a9Sachartre chanp = kmem_zalloc(listsz, KM_SLEEP); 2390655fd6a9Sachartre 23918cd10891Snarayan num_vports = md_scan_dag(mdp, vd_nodep, 23928cd10891Snarayan md_find_name(mdp, VDC_MD_PORT_NAME), 23938cd10891Snarayan md_find_name(mdp, "fwd"), portp); 23948cd10891Snarayan if (num_vports == 0) { 23958cd10891Snarayan DMSGX(0, "Found no '%s' node for '%s' port\n", 23968cd10891Snarayan VDC_MD_PORT_NAME, VDC_MD_VDEV_NAME); 23978cd10891Snarayan status = ENOENT; 23988cd10891Snarayan goto done; 23998cd10891Snarayan } 24008cd10891Snarayan 24018cd10891Snarayan DMSGX(1, "Found %d '%s' node(s) for '%s' port\n", 24028cd10891Snarayan num_vports, VDC_MD_PORT_NAME, VDC_MD_VDEV_NAME); 24038cd10891Snarayan 24048cd10891Snarayan vdc->num_servers = 0; 24058cd10891Snarayan for (idx = 0; idx < num_vports; idx++) { 24068cd10891Snarayan 24078cd10891Snarayan /* initialize this port */ 24088cd10891Snarayan vd_port = portp[idx]; 24098cd10891Snarayan srvr = kmem_zalloc(sizeof (vdc_server_t), KM_SLEEP); 24108cd10891Snarayan srvr->vdcp = vdc; 24116ace3c90SAlexandre Chartre srvr->svc_state = VDC_SERVICE_OFFLINE; 24126ace3c90SAlexandre Chartre srvr->log_state = VDC_SERVICE_NONE; 24138cd10891Snarayan 24148cd10891Snarayan /* get port id */ 24158cd10891Snarayan if (md_get_prop_val(mdp, vd_port, VDC_MD_ID, &srvr->id) != 0) { 24168cd10891Snarayan cmn_err(CE_NOTE, "vDisk port '%s' property not found", 24178cd10891Snarayan VDC_MD_ID); 24188cd10891Snarayan kmem_free(srvr, sizeof (vdc_server_t)); 24198cd10891Snarayan continue; 24208cd10891Snarayan } 24218cd10891Snarayan 24228cd10891Snarayan /* set the connection timeout */ 24238cd10891Snarayan if (md_get_prop_val(mdp, vd_port, VDC_MD_TIMEOUT, 24248cd10891Snarayan &srvr->ctimeout) != 0) { 24258cd10891Snarayan srvr->ctimeout = 0; 24268cd10891Snarayan } 24278cd10891Snarayan 24288cd10891Snarayan /* get the ldc id */ 24298cd10891Snarayan num_chans = md_scan_dag(mdp, vd_port, 24301ae08745Sheppo md_find_name(mdp, VDC_MD_CHAN_NAME), 24311ae08745Sheppo md_find_name(mdp, "fwd"), chanp); 24321ae08745Sheppo 24331ae08745Sheppo /* expecting at least one channel */ 24341ae08745Sheppo if (num_chans <= 0) { 24351ae08745Sheppo cmn_err(CE_NOTE, "No '%s' node for '%s' port", 24361ae08745Sheppo VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 24378cd10891Snarayan kmem_free(srvr, sizeof (vdc_server_t)); 24388cd10891Snarayan continue; 24391ae08745Sheppo } else if (num_chans != 1) { 24408cd10891Snarayan DMSGX(0, "Expected 1 '%s' node for '%s' port, " 24418cd10891Snarayan "found %d\n", VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, 24428cd10891Snarayan num_chans); 24431ae08745Sheppo } 24441ae08745Sheppo 24451ae08745Sheppo /* 24461ae08745Sheppo * We use the first channel found (index 0), irrespective of how 24471ae08745Sheppo * many are there in total. 24481ae08745Sheppo */ 24498cd10891Snarayan if (md_get_prop_val(mdp, chanp[0], VDC_MD_ID, 24508cd10891Snarayan &srvr->ldc_id) != 0) { 24518cd10891Snarayan cmn_err(CE_NOTE, "Channel '%s' property not found", 24528cd10891Snarayan VDC_MD_ID); 24538cd10891Snarayan kmem_free(srvr, sizeof (vdc_server_t)); 24548cd10891Snarayan continue; 24558cd10891Snarayan } 24568cd10891Snarayan 24578cd10891Snarayan /* 24588cd10891Snarayan * now initialise LDC channel which will be used to 24598cd10891Snarayan * communicate with this server 24608cd10891Snarayan */ 24618cd10891Snarayan if (vdc_do_ldc_init(vdc, srvr) != 0) { 24628cd10891Snarayan kmem_free(srvr, sizeof (vdc_server_t)); 24638cd10891Snarayan continue; 24648cd10891Snarayan } 24658cd10891Snarayan 24668cd10891Snarayan /* add server to list */ 2467d7400d00Sachartre if (prev_srvr) 24688cd10891Snarayan prev_srvr->next = srvr; 2469d7400d00Sachartre else 24708cd10891Snarayan vdc->server_list = srvr; 2471d7400d00Sachartre 24728cd10891Snarayan prev_srvr = srvr; 24738cd10891Snarayan 24748cd10891Snarayan /* inc numbers of servers */ 24758cd10891Snarayan vdc->num_servers++; 24768cd10891Snarayan } 24778cd10891Snarayan 24788cd10891Snarayan /* pick first server as current server */ 24798cd10891Snarayan if (vdc->server_list != NULL) { 24808cd10891Snarayan vdc->curr_server = vdc->server_list; 24818cd10891Snarayan status = 0; 24828cd10891Snarayan } else { 24831ae08745Sheppo status = ENOENT; 24841ae08745Sheppo } 24851ae08745Sheppo 24861ae08745Sheppo done: 24871ae08745Sheppo kmem_free(chanp, listsz); 24888cd10891Snarayan kmem_free(portp, listsz); 24891ae08745Sheppo return (status); 24901ae08745Sheppo } 24911ae08745Sheppo 24928cd10891Snarayan 24938cd10891Snarayan /* 24948cd10891Snarayan * Function: 24958cd10891Snarayan * vdc_do_ldc_up 24968cd10891Snarayan * 24978cd10891Snarayan * Description: 24988cd10891Snarayan * Bring the channel for the current server up. 24998cd10891Snarayan * 25008cd10891Snarayan * Arguments: 25018cd10891Snarayan * vdc - soft state pointer for this instance of the device driver. 25028cd10891Snarayan * 25038cd10891Snarayan * Return Code: 25048cd10891Snarayan * 0 - Success. 25058cd10891Snarayan * EINVAL - Driver is detaching / LDC error 25068cd10891Snarayan * ECONNREFUSED - Other end is not listening 25078cd10891Snarayan */ 25080a55fbb7Slm66018 static int 25090a55fbb7Slm66018 vdc_do_ldc_up(vdc_t *vdc) 25100a55fbb7Slm66018 { 25110a55fbb7Slm66018 int status; 25123af08d82Slm66018 ldc_status_t ldc_state; 25130a55fbb7Slm66018 25148cd10891Snarayan ASSERT(MUTEX_HELD(&vdc->lock)); 25158cd10891Snarayan 25163af08d82Slm66018 DMSG(vdc, 0, "[%d] Bringing up channel %lx\n", 25178cd10891Snarayan vdc->instance, vdc->curr_server->ldc_id); 25183af08d82Slm66018 25193af08d82Slm66018 if (vdc->lifecycle == VDC_LC_DETACHING) 25203af08d82Slm66018 return (EINVAL); 25210a55fbb7Slm66018 25228cd10891Snarayan if ((status = ldc_up(vdc->curr_server->ldc_handle)) != 0) { 25230a55fbb7Slm66018 switch (status) { 25240a55fbb7Slm66018 case ECONNREFUSED: /* listener not ready at other end */ 25253af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_up(%lx,...) return %d\n", 25268cd10891Snarayan vdc->instance, vdc->curr_server->ldc_id, status); 25270a55fbb7Slm66018 status = 0; 25280a55fbb7Slm66018 break; 25290a55fbb7Slm66018 default: 25303af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to bring up LDC: " 25318cd10891Snarayan "channel=%ld, err=%d", vdc->instance, 25328cd10891Snarayan vdc->curr_server->ldc_id, status); 25333af08d82Slm66018 break; 25343af08d82Slm66018 } 25353af08d82Slm66018 } 25363af08d82Slm66018 25378cd10891Snarayan if (ldc_status(vdc->curr_server->ldc_handle, &ldc_state) == 0) { 25388cd10891Snarayan vdc->curr_server->ldc_state = ldc_state; 25393af08d82Slm66018 if (ldc_state == LDC_UP) { 25403af08d82Slm66018 DMSG(vdc, 0, "[%d] LDC channel already up\n", 25413af08d82Slm66018 vdc->instance); 25423af08d82Slm66018 vdc->seq_num = 1; 25433af08d82Slm66018 vdc->seq_num_reply = 0; 25440a55fbb7Slm66018 } 25450a55fbb7Slm66018 } 25460a55fbb7Slm66018 25470a55fbb7Slm66018 return (status); 25480a55fbb7Slm66018 } 25490a55fbb7Slm66018 25500a55fbb7Slm66018 /* 25510a55fbb7Slm66018 * Function: 25520a55fbb7Slm66018 * vdc_terminate_ldc() 25530a55fbb7Slm66018 * 25540a55fbb7Slm66018 * Description: 25550a55fbb7Slm66018 * 25560a55fbb7Slm66018 * Arguments: 25570a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 25588cd10891Snarayan * srvr - vdc per-server info structure 25590a55fbb7Slm66018 * 25600a55fbb7Slm66018 * Return Code: 25610a55fbb7Slm66018 * None 25620a55fbb7Slm66018 */ 25631ae08745Sheppo static void 25648cd10891Snarayan vdc_terminate_ldc(vdc_t *vdc, vdc_server_t *srvr) 25651ae08745Sheppo { 25661ae08745Sheppo int instance = ddi_get_instance(vdc->dip); 25671ae08745Sheppo 25688cd10891Snarayan if (srvr->state & VDC_LDC_OPEN) { 25698cd10891Snarayan DMSG(vdc, 0, "[%d] ldc_close()\n", instance); 25708cd10891Snarayan (void) ldc_close(srvr->ldc_handle); 25718cd10891Snarayan } 25728cd10891Snarayan if (srvr->state & VDC_LDC_CB) { 25738cd10891Snarayan DMSG(vdc, 0, "[%d] ldc_unreg_callback()\n", instance); 25748cd10891Snarayan (void) ldc_unreg_callback(srvr->ldc_handle); 25758cd10891Snarayan } 25768cd10891Snarayan if (srvr->state & VDC_LDC_INIT) { 25778cd10891Snarayan DMSG(vdc, 0, "[%d] ldc_fini()\n", instance); 25788cd10891Snarayan (void) ldc_fini(srvr->ldc_handle); 25798cd10891Snarayan srvr->ldc_handle = NULL; 25808cd10891Snarayan } 25818cd10891Snarayan 25828cd10891Snarayan srvr->state &= ~(VDC_LDC_INIT | VDC_LDC_CB | VDC_LDC_OPEN); 25838cd10891Snarayan } 25848cd10891Snarayan 25858cd10891Snarayan /* 25868cd10891Snarayan * Function: 25878cd10891Snarayan * vdc_fini_ports() 25888cd10891Snarayan * 25898cd10891Snarayan * Description: 25908cd10891Snarayan * Finalize all ports by closing the channel associated with each 25918cd10891Snarayan * port and also freeing the server structure. 25928cd10891Snarayan * 25938cd10891Snarayan * Arguments: 25948cd10891Snarayan * vdc - soft state pointer for this instance of the device driver. 25958cd10891Snarayan * 25968cd10891Snarayan * Return Code: 25978cd10891Snarayan * None 25988cd10891Snarayan */ 25998cd10891Snarayan static void 26008cd10891Snarayan vdc_fini_ports(vdc_t *vdc) 26018cd10891Snarayan { 26028cd10891Snarayan int instance = ddi_get_instance(vdc->dip); 26038cd10891Snarayan vdc_server_t *srvr, *prev_srvr; 26048cd10891Snarayan 26051ae08745Sheppo ASSERT(vdc != NULL); 26061ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 26071ae08745Sheppo 26083af08d82Slm66018 DMSG(vdc, 0, "[%d] initialized=%x\n", instance, vdc->initialized); 26091ae08745Sheppo 26108cd10891Snarayan srvr = vdc->server_list; 26118cd10891Snarayan 26128cd10891Snarayan while (srvr) { 26138cd10891Snarayan 26148cd10891Snarayan vdc_terminate_ldc(vdc, srvr); 26158cd10891Snarayan 26168cd10891Snarayan /* next server */ 26178cd10891Snarayan prev_srvr = srvr; 26188cd10891Snarayan srvr = srvr->next; 26198cd10891Snarayan 26208cd10891Snarayan /* free server */ 26218cd10891Snarayan kmem_free(prev_srvr, sizeof (vdc_server_t)); 26221ae08745Sheppo } 26231ae08745Sheppo 26248cd10891Snarayan vdc->server_list = NULL; 26256ace3c90SAlexandre Chartre vdc->num_servers = 0; 26261ae08745Sheppo } 26271ae08745Sheppo 26281ae08745Sheppo /* -------------------------------------------------------------------------- */ 26291ae08745Sheppo 26301ae08745Sheppo /* 26311ae08745Sheppo * Descriptor Ring helper routines 26321ae08745Sheppo */ 26331ae08745Sheppo 26340a55fbb7Slm66018 /* 26350a55fbb7Slm66018 * Function: 26360a55fbb7Slm66018 * vdc_init_descriptor_ring() 26370a55fbb7Slm66018 * 26380a55fbb7Slm66018 * Description: 26390a55fbb7Slm66018 * 26400a55fbb7Slm66018 * Arguments: 26410a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 26420a55fbb7Slm66018 * 26430a55fbb7Slm66018 * Return Code: 26440a55fbb7Slm66018 * 0 - Success 26450a55fbb7Slm66018 */ 26461ae08745Sheppo static int 26471ae08745Sheppo vdc_init_descriptor_ring(vdc_t *vdc) 26481ae08745Sheppo { 26491ae08745Sheppo vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 26500a55fbb7Slm66018 int status = 0; 26511ae08745Sheppo int i; 26521ae08745Sheppo 26533af08d82Slm66018 DMSG(vdc, 0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized); 26541ae08745Sheppo 26551ae08745Sheppo ASSERT(vdc != NULL); 26561ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 26571ae08745Sheppo 2658e1ebb9ecSlm66018 /* ensure we have enough room to store max sized block */ 2659e1ebb9ecSlm66018 ASSERT(maxphys <= VD_MAX_BLOCK_SIZE); 2660e1ebb9ecSlm66018 26610a55fbb7Slm66018 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 26623af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_mem_dring_create\n", vdc->instance); 2663e1ebb9ecSlm66018 /* 2664e1ebb9ecSlm66018 * Calculate the maximum block size we can transmit using one 2665e1ebb9ecSlm66018 * Descriptor Ring entry from the attributes returned by the 2666e1ebb9ecSlm66018 * vDisk server. This is subject to a minimum of 'maxphys' 2667e1ebb9ecSlm66018 * as we do not have the capability to split requests over 2668e1ebb9ecSlm66018 * multiple DRing entries. 2669e1ebb9ecSlm66018 */ 267065908c77Syu, larry liu - Sun Microsystems - Beijing China if ((vdc->max_xfer_sz * vdc->vdisk_bsize) < maxphys) { 26713af08d82Slm66018 DMSG(vdc, 0, "[%d] using minimum DRing size\n", 2672e1ebb9ecSlm66018 vdc->instance); 2673e1ebb9ecSlm66018 vdc->dring_max_cookies = maxphys / PAGESIZE; 2674e1ebb9ecSlm66018 } else { 2675e1ebb9ecSlm66018 vdc->dring_max_cookies = 267665908c77Syu, larry liu - Sun Microsystems - Beijing China (vdc->max_xfer_sz * vdc->vdisk_bsize) / PAGESIZE; 2677e1ebb9ecSlm66018 } 2678e1ebb9ecSlm66018 vdc->dring_entry_size = (sizeof (vd_dring_entry_t) + 2679e1ebb9ecSlm66018 (sizeof (ldc_mem_cookie_t) * 2680e1ebb9ecSlm66018 (vdc->dring_max_cookies - 1))); 2681e1ebb9ecSlm66018 vdc->dring_len = VD_DRING_LEN; 2682e1ebb9ecSlm66018 2683e1ebb9ecSlm66018 status = ldc_mem_dring_create(vdc->dring_len, 26848cd10891Snarayan vdc->dring_entry_size, &vdc->dring_hdl); 26858cd10891Snarayan if ((vdc->dring_hdl == NULL) || (status != 0)) { 26863af08d82Slm66018 DMSG(vdc, 0, "[%d] Descriptor ring creation failed", 2687e1ebb9ecSlm66018 vdc->instance); 26881ae08745Sheppo return (status); 26891ae08745Sheppo } 26900a55fbb7Slm66018 vdc->initialized |= VDC_DRING_INIT; 26910a55fbb7Slm66018 } 26921ae08745Sheppo 26930a55fbb7Slm66018 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 26943af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_mem_dring_bind\n", vdc->instance); 26950a55fbb7Slm66018 vdc->dring_cookie = 26960a55fbb7Slm66018 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 26971ae08745Sheppo 26988cd10891Snarayan status = ldc_mem_dring_bind(vdc->curr_server->ldc_handle, 26998cd10891Snarayan vdc->dring_hdl, 27004bac2208Snarayan LDC_SHADOW_MAP|LDC_DIRECT_MAP, LDC_MEM_RW, 27010a55fbb7Slm66018 &vdc->dring_cookie[0], 27021ae08745Sheppo &vdc->dring_cookie_count); 27031ae08745Sheppo if (status != 0) { 27043af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to bind descriptor ring " 27053af08d82Slm66018 "(%lx) to channel (%lx) status=%d\n", 27068cd10891Snarayan vdc->instance, vdc->dring_hdl, 27078cd10891Snarayan vdc->curr_server->ldc_handle, status); 27081ae08745Sheppo return (status); 27091ae08745Sheppo } 27101ae08745Sheppo ASSERT(vdc->dring_cookie_count == 1); 27111ae08745Sheppo vdc->initialized |= VDC_DRING_BOUND; 27120a55fbb7Slm66018 } 27131ae08745Sheppo 27148cd10891Snarayan status = ldc_mem_dring_info(vdc->dring_hdl, &vdc->dring_mem_info); 27151ae08745Sheppo if (status != 0) { 27163af08d82Slm66018 DMSG(vdc, 0, 27173af08d82Slm66018 "[%d] Failed to get info for descriptor ring (%lx)\n", 27188cd10891Snarayan vdc->instance, vdc->dring_hdl); 27191ae08745Sheppo return (status); 27201ae08745Sheppo } 27211ae08745Sheppo 27220a55fbb7Slm66018 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 27233af08d82Slm66018 DMSG(vdc, 0, "[%d] local dring\n", vdc->instance); 27240a55fbb7Slm66018 27251ae08745Sheppo /* Allocate the local copy of this dring */ 27260a55fbb7Slm66018 vdc->local_dring = 2727e1ebb9ecSlm66018 kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t), 27281ae08745Sheppo KM_SLEEP); 27291ae08745Sheppo vdc->initialized |= VDC_DRING_LOCAL; 27300a55fbb7Slm66018 } 27311ae08745Sheppo 27321ae08745Sheppo /* 27330a55fbb7Slm66018 * Mark all DRing entries as free and initialize the private 27340a55fbb7Slm66018 * descriptor's memory handles. If any entry is initialized, 27350a55fbb7Slm66018 * we need to free it later so we set the bit in 'initialized' 27360a55fbb7Slm66018 * at the start. 27371ae08745Sheppo */ 27381ae08745Sheppo vdc->initialized |= VDC_DRING_ENTRY; 2739e1ebb9ecSlm66018 for (i = 0; i < vdc->dring_len; i++) { 27401ae08745Sheppo dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 27411ae08745Sheppo dep->hdr.dstate = VIO_DESC_FREE; 27421ae08745Sheppo 27438cd10891Snarayan status = ldc_mem_alloc_handle(vdc->curr_server->ldc_handle, 27441ae08745Sheppo &vdc->local_dring[i].desc_mhdl); 27451ae08745Sheppo if (status != 0) { 27463af08d82Slm66018 DMSG(vdc, 0, "![%d] Failed to alloc mem handle for" 27471ae08745Sheppo " descriptor %d", vdc->instance, i); 27481ae08745Sheppo return (status); 27491ae08745Sheppo } 27503af08d82Slm66018 vdc->local_dring[i].is_free = B_TRUE; 27511ae08745Sheppo vdc->local_dring[i].dep = dep; 27521ae08745Sheppo } 27531ae08745Sheppo 27543af08d82Slm66018 /* Initialize the starting index */ 2755007a3653SAlexandre Chartre vdc->dring_curr_idx = VDC_DRING_FIRST_ENTRY; 27561ae08745Sheppo 27571ae08745Sheppo return (status); 27581ae08745Sheppo } 27591ae08745Sheppo 27600a55fbb7Slm66018 /* 27610a55fbb7Slm66018 * Function: 27620a55fbb7Slm66018 * vdc_destroy_descriptor_ring() 27630a55fbb7Slm66018 * 27640a55fbb7Slm66018 * Description: 27650a55fbb7Slm66018 * 27660a55fbb7Slm66018 * Arguments: 27670a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 27680a55fbb7Slm66018 * 27690a55fbb7Slm66018 * Return Code: 27700a55fbb7Slm66018 * None 27710a55fbb7Slm66018 */ 27721ae08745Sheppo static void 27731ae08745Sheppo vdc_destroy_descriptor_ring(vdc_t *vdc) 27741ae08745Sheppo { 27750a55fbb7Slm66018 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 27761ae08745Sheppo ldc_mem_handle_t mhdl = NULL; 27773af08d82Slm66018 ldc_mem_info_t minfo; 27781ae08745Sheppo int status = -1; 27791ae08745Sheppo int i; /* loop */ 27801ae08745Sheppo 27811ae08745Sheppo ASSERT(vdc != NULL); 27821ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 27831ae08745Sheppo 27843af08d82Slm66018 DMSG(vdc, 0, "[%d] Entered\n", vdc->instance); 27851ae08745Sheppo 27861ae08745Sheppo if (vdc->initialized & VDC_DRING_ENTRY) { 27873af08d82Slm66018 DMSG(vdc, 0, 27883af08d82Slm66018 "[%d] Removing Local DRing entries\n", vdc->instance); 2789e1ebb9ecSlm66018 for (i = 0; i < vdc->dring_len; i++) { 27900a55fbb7Slm66018 ldep = &vdc->local_dring[i]; 27910a55fbb7Slm66018 mhdl = ldep->desc_mhdl; 27921ae08745Sheppo 27930a55fbb7Slm66018 if (mhdl == NULL) 27940a55fbb7Slm66018 continue; 27950a55fbb7Slm66018 27963af08d82Slm66018 if ((status = ldc_mem_info(mhdl, &minfo)) != 0) { 27973af08d82Slm66018 DMSG(vdc, 0, 27983af08d82Slm66018 "ldc_mem_info returned an error: %d\n", 27993af08d82Slm66018 status); 28003af08d82Slm66018 28013af08d82Slm66018 /* 28023af08d82Slm66018 * This must mean that the mem handle 28033af08d82Slm66018 * is not valid. Clear it out so that 28043af08d82Slm66018 * no one tries to use it. 28053af08d82Slm66018 */ 28063af08d82Slm66018 ldep->desc_mhdl = NULL; 28073af08d82Slm66018 continue; 28083af08d82Slm66018 } 28093af08d82Slm66018 28103af08d82Slm66018 if (minfo.status == LDC_BOUND) { 28113af08d82Slm66018 (void) ldc_mem_unbind_handle(mhdl); 28123af08d82Slm66018 } 28133af08d82Slm66018 28141ae08745Sheppo (void) ldc_mem_free_handle(mhdl); 28153af08d82Slm66018 28163af08d82Slm66018 ldep->desc_mhdl = NULL; 28171ae08745Sheppo } 28181ae08745Sheppo vdc->initialized &= ~VDC_DRING_ENTRY; 28191ae08745Sheppo } 28201ae08745Sheppo 28211ae08745Sheppo if (vdc->initialized & VDC_DRING_LOCAL) { 28223af08d82Slm66018 DMSG(vdc, 0, "[%d] Freeing Local DRing\n", vdc->instance); 28231ae08745Sheppo kmem_free(vdc->local_dring, 2824e1ebb9ecSlm66018 vdc->dring_len * sizeof (vdc_local_desc_t)); 28251ae08745Sheppo vdc->initialized &= ~VDC_DRING_LOCAL; 28261ae08745Sheppo } 28271ae08745Sheppo 28281ae08745Sheppo if (vdc->initialized & VDC_DRING_BOUND) { 28293af08d82Slm66018 DMSG(vdc, 0, "[%d] Unbinding DRing\n", vdc->instance); 28308cd10891Snarayan status = ldc_mem_dring_unbind(vdc->dring_hdl); 28311ae08745Sheppo if (status == 0) { 28321ae08745Sheppo vdc->initialized &= ~VDC_DRING_BOUND; 28331ae08745Sheppo } else { 28343af08d82Slm66018 DMSG(vdc, 0, "[%d] Error %d unbinding DRing %lx", 28358cd10891Snarayan vdc->instance, status, vdc->dring_hdl); 28361ae08745Sheppo } 28373af08d82Slm66018 kmem_free(vdc->dring_cookie, sizeof (ldc_mem_cookie_t)); 28381ae08745Sheppo } 28391ae08745Sheppo 28401ae08745Sheppo if (vdc->initialized & VDC_DRING_INIT) { 28413af08d82Slm66018 DMSG(vdc, 0, "[%d] Destroying DRing\n", vdc->instance); 28428cd10891Snarayan status = ldc_mem_dring_destroy(vdc->dring_hdl); 28431ae08745Sheppo if (status == 0) { 28448cd10891Snarayan vdc->dring_hdl = NULL; 28451ae08745Sheppo bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 28461ae08745Sheppo vdc->initialized &= ~VDC_DRING_INIT; 28471ae08745Sheppo } else { 28483af08d82Slm66018 DMSG(vdc, 0, "[%d] Error %d destroying DRing (%lx)", 28498cd10891Snarayan vdc->instance, status, vdc->dring_hdl); 28501ae08745Sheppo } 28511ae08745Sheppo } 28521ae08745Sheppo } 28531ae08745Sheppo 28541ae08745Sheppo /* 28553af08d82Slm66018 * Function: 285690e2f9dcSlm66018 * vdc_map_to_shared_dring() 28571ae08745Sheppo * 28581ae08745Sheppo * Description: 28593af08d82Slm66018 * Copy contents of the local descriptor to the shared 28603af08d82Slm66018 * memory descriptor. 28611ae08745Sheppo * 28623af08d82Slm66018 * Arguments: 28633af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 28643af08d82Slm66018 * idx - descriptor ring index 28653af08d82Slm66018 * 28663af08d82Slm66018 * Return Code: 28673af08d82Slm66018 * None 28681ae08745Sheppo */ 28691ae08745Sheppo static int 28703af08d82Slm66018 vdc_map_to_shared_dring(vdc_t *vdcp, int idx) 28711ae08745Sheppo { 28723af08d82Slm66018 vdc_local_desc_t *ldep; 28733af08d82Slm66018 vd_dring_entry_t *dep; 28743af08d82Slm66018 int rv; 28751ae08745Sheppo 28763af08d82Slm66018 ldep = &(vdcp->local_dring[idx]); 28771ae08745Sheppo 28783af08d82Slm66018 /* for now leave in the old pop_mem_hdl stuff */ 28793af08d82Slm66018 if (ldep->nbytes > 0) { 28803af08d82Slm66018 rv = vdc_populate_mem_hdl(vdcp, ldep); 28813af08d82Slm66018 if (rv) { 28823af08d82Slm66018 DMSG(vdcp, 0, "[%d] Cannot populate mem handle\n", 28833af08d82Slm66018 vdcp->instance); 28843af08d82Slm66018 return (rv); 28853af08d82Slm66018 } 28863af08d82Slm66018 } 28871ae08745Sheppo 28883af08d82Slm66018 /* 28893af08d82Slm66018 * fill in the data details into the DRing 28903af08d82Slm66018 */ 2891d10e4ef2Snarayan dep = ldep->dep; 28921ae08745Sheppo ASSERT(dep != NULL); 28931ae08745Sheppo 28943af08d82Slm66018 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdcp); 28953af08d82Slm66018 dep->payload.operation = ldep->operation; 28963af08d82Slm66018 dep->payload.addr = ldep->offset; 28973af08d82Slm66018 dep->payload.nbytes = ldep->nbytes; 2898055d7c80Scarlsonj dep->payload.status = (uint32_t)-1; /* vds will set valid value */ 28993af08d82Slm66018 dep->payload.slice = ldep->slice; 29003af08d82Slm66018 dep->hdr.dstate = VIO_DESC_READY; 29013af08d82Slm66018 dep->hdr.ack = 1; /* request an ACK for every message */ 29021ae08745Sheppo 29033af08d82Slm66018 return (0); 29041ae08745Sheppo } 29051ae08745Sheppo 29061ae08745Sheppo /* 29071ae08745Sheppo * Function: 29083af08d82Slm66018 * vdc_send_request 29093af08d82Slm66018 * 29103af08d82Slm66018 * Description: 29113af08d82Slm66018 * This routine writes the data to be transmitted to vds into the 29123af08d82Slm66018 * descriptor, notifies vds that the ring has been updated and 29133af08d82Slm66018 * then waits for the request to be processed. 29143af08d82Slm66018 * 29153af08d82Slm66018 * Arguments: 29163af08d82Slm66018 * vdcp - the soft state pointer 29173af08d82Slm66018 * operation - operation we want vds to perform (VD_OP_XXX) 29183af08d82Slm66018 * addr - address of data buf to be read/written. 29193af08d82Slm66018 * nbytes - number of bytes to read/write 29203af08d82Slm66018 * slice - the disk slice this request is for 29213af08d82Slm66018 * offset - relative disk offset 29226ace3c90SAlexandre Chartre * bufp - buf of operation 29233af08d82Slm66018 * dir - direction of operation (READ/WRITE/BOTH) 29243af08d82Slm66018 * 29253af08d82Slm66018 * Return Codes: 29263af08d82Slm66018 * 0 29273af08d82Slm66018 * ENXIO 29283af08d82Slm66018 */ 29293af08d82Slm66018 static int 29303af08d82Slm66018 vdc_send_request(vdc_t *vdcp, int operation, caddr_t addr, 29316ace3c90SAlexandre Chartre size_t nbytes, int slice, diskaddr_t offset, buf_t *bufp, 29326ace3c90SAlexandre Chartre vio_desc_direction_t dir, int flags) 29333af08d82Slm66018 { 2934366a92acSlm66018 int rv = 0; 2935366a92acSlm66018 29363af08d82Slm66018 ASSERT(vdcp != NULL); 293787a7269eSachartre ASSERT(slice == VD_SLICE_NONE || slice < V_NUMPAR); 29383af08d82Slm66018 29393af08d82Slm66018 mutex_enter(&vdcp->lock); 29403af08d82Slm66018 2941366a92acSlm66018 /* 2942366a92acSlm66018 * If this is a block read/write operation we update the I/O statistics 2943366a92acSlm66018 * to indicate that the request is being put on the waitq to be 2944*ca6d1280SAlexandre Chartre * serviced. Operations which are resubmitted are already in the waitq. 2945366a92acSlm66018 * 2946366a92acSlm66018 * We do it here (a common routine for both synchronous and strategy 2947366a92acSlm66018 * calls) for performance reasons - we are already holding vdc->lock 2948366a92acSlm66018 * so there is no extra locking overhead. We would have to explicitly 2949366a92acSlm66018 * grab the 'lock' mutex to update the stats if we were to do this 2950366a92acSlm66018 * higher up the stack in vdc_strategy() et. al. 2951366a92acSlm66018 */ 2952*ca6d1280SAlexandre Chartre if (((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) && 2953*ca6d1280SAlexandre Chartre !(flags & VDC_OP_RESUBMIT)) { 29546ace3c90SAlexandre Chartre DTRACE_IO1(start, buf_t *, bufp); 295590e2f9dcSlm66018 VD_KSTAT_WAITQ_ENTER(vdcp); 2956366a92acSlm66018 } 2957366a92acSlm66018 29586ace3c90SAlexandre Chartre /* 29596ace3c90SAlexandre Chartre * If the request does not expect the state to be VDC_STATE_RUNNING 29606ace3c90SAlexandre Chartre * then we just try to populate the descriptor ring once. 29616ace3c90SAlexandre Chartre */ 29626ace3c90SAlexandre Chartre if (!(flags & VDC_OP_STATE_RUNNING)) { 29636ace3c90SAlexandre Chartre rv = vdc_populate_descriptor(vdcp, operation, addr, 29646ace3c90SAlexandre Chartre nbytes, slice, offset, bufp, dir, flags); 29656ace3c90SAlexandre Chartre goto done; 29666ace3c90SAlexandre Chartre } 29676ace3c90SAlexandre Chartre 29683af08d82Slm66018 do { 29693c96341aSnarayan while (vdcp->state != VDC_STATE_RUNNING) { 29703af08d82Slm66018 29713c96341aSnarayan /* return error if detaching */ 29723c96341aSnarayan if (vdcp->state == VDC_STATE_DETACH) { 2973366a92acSlm66018 rv = ENXIO; 2974366a92acSlm66018 goto done; 29753c96341aSnarayan } 2976655fd6a9Sachartre 29772f5224aeSachartre /* 29782f5224aeSachartre * If we are panicking and the disk is not ready then 29792f5224aeSachartre * we can't send any request because we can't complete 29802f5224aeSachartre * the handshake now. 29812f5224aeSachartre */ 29822f5224aeSachartre if (ddi_in_panic()) { 2983366a92acSlm66018 rv = EIO; 2984366a92acSlm66018 goto done; 29852f5224aeSachartre } 29862f5224aeSachartre 29876ace3c90SAlexandre Chartre /* 29886ace3c90SAlexandre Chartre * If the state is faulted, notify that a new I/O is 29896ace3c90SAlexandre Chartre * being submitted to force the system to check if any 29906ace3c90SAlexandre Chartre * server has recovered. 29916ace3c90SAlexandre Chartre */ 29926ace3c90SAlexandre Chartre if (vdcp->state == VDC_STATE_FAILED) { 29936ace3c90SAlexandre Chartre vdcp->io_pending = B_TRUE; 29946ace3c90SAlexandre Chartre cv_signal(&vdcp->io_pending_cv); 29956ace3c90SAlexandre Chartre } 29966ace3c90SAlexandre Chartre 2997655fd6a9Sachartre cv_wait(&vdcp->running_cv, &vdcp->lock); 29986ace3c90SAlexandre Chartre 29996ace3c90SAlexandre Chartre /* if service is still faulted then fail the request */ 30006ace3c90SAlexandre Chartre if (vdcp->state == VDC_STATE_FAILED) { 30016ace3c90SAlexandre Chartre rv = EIO; 30026ace3c90SAlexandre Chartre goto done; 30036ace3c90SAlexandre Chartre } 30043c96341aSnarayan } 30053c96341aSnarayan 30063af08d82Slm66018 } while (vdc_populate_descriptor(vdcp, operation, addr, 3007*ca6d1280SAlexandre Chartre nbytes, slice, offset, bufp, dir, flags & ~VDC_OP_RESUBMIT)); 30083af08d82Slm66018 3009366a92acSlm66018 done: 3010366a92acSlm66018 /* 3011366a92acSlm66018 * If this is a block read/write we update the I/O statistics kstat 3012366a92acSlm66018 * to indicate that this request has been placed on the queue for 3013366a92acSlm66018 * processing (i.e sent to the vDisk server) - iostat(1M) will 3014366a92acSlm66018 * report the time waiting for the vDisk server under the %b column 3015*ca6d1280SAlexandre Chartre * 3016*ca6d1280SAlexandre Chartre * In the case of an error we take it off the wait queue only if 3017*ca6d1280SAlexandre Chartre * the I/O was not resubmited. 3018366a92acSlm66018 */ 3019366a92acSlm66018 if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) { 3020366a92acSlm66018 if (rv == 0) { 302190e2f9dcSlm66018 VD_KSTAT_WAITQ_TO_RUNQ(vdcp); 30226ace3c90SAlexandre Chartre DTRACE_PROBE1(send, buf_t *, bufp); 3023366a92acSlm66018 } else { 3024366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_transerrs); 3025*ca6d1280SAlexandre Chartre if (!(flags & VDC_OP_RESUBMIT)) { 302690e2f9dcSlm66018 VD_KSTAT_WAITQ_EXIT(vdcp); 30276ace3c90SAlexandre Chartre DTRACE_IO1(done, buf_t *, bufp); 3028366a92acSlm66018 } 3029366a92acSlm66018 } 3030*ca6d1280SAlexandre Chartre } 3031366a92acSlm66018 30323af08d82Slm66018 mutex_exit(&vdcp->lock); 3033366a92acSlm66018 3034366a92acSlm66018 return (rv); 30353af08d82Slm66018 } 30363af08d82Slm66018 30373af08d82Slm66018 30383af08d82Slm66018 /* 30393af08d82Slm66018 * Function: 30401ae08745Sheppo * vdc_populate_descriptor 30411ae08745Sheppo * 30421ae08745Sheppo * Description: 30431ae08745Sheppo * This routine writes the data to be transmitted to vds into the 30441ae08745Sheppo * descriptor, notifies vds that the ring has been updated and 30451ae08745Sheppo * then waits for the request to be processed. 30461ae08745Sheppo * 30471ae08745Sheppo * Arguments: 30483af08d82Slm66018 * vdcp - the soft state pointer 30491ae08745Sheppo * operation - operation we want vds to perform (VD_OP_XXX) 30503af08d82Slm66018 * addr - address of data buf to be read/written. 30513af08d82Slm66018 * nbytes - number of bytes to read/write 30523af08d82Slm66018 * slice - the disk slice this request is for 30533af08d82Slm66018 * offset - relative disk offset 30546ace3c90SAlexandre Chartre * bufp - buf of operation 30553af08d82Slm66018 * dir - direction of operation (READ/WRITE/BOTH) 30561ae08745Sheppo * 30571ae08745Sheppo * Return Codes: 30581ae08745Sheppo * 0 30591ae08745Sheppo * EAGAIN 306017cadca8Slm66018 * ECONNRESET 30611ae08745Sheppo * ENXIO 30621ae08745Sheppo */ 30631ae08745Sheppo static int 30643af08d82Slm66018 vdc_populate_descriptor(vdc_t *vdcp, int operation, caddr_t addr, 30656ace3c90SAlexandre Chartre size_t nbytes, int slice, diskaddr_t offset, 30666ace3c90SAlexandre Chartre buf_t *bufp, vio_desc_direction_t dir, int flags) 30671ae08745Sheppo { 30683af08d82Slm66018 vdc_local_desc_t *local_dep = NULL; /* Local Dring Pointer */ 30693af08d82Slm66018 int idx; /* Index of DRing entry used */ 30703af08d82Slm66018 int next_idx; 30711ae08745Sheppo vio_dring_msg_t dmsg; 30723af08d82Slm66018 size_t msglen; 30738e6a2a04Slm66018 int rv; 30741ae08745Sheppo 30753af08d82Slm66018 ASSERT(MUTEX_HELD(&vdcp->lock)); 30763af08d82Slm66018 vdcp->threads_pending++; 30773af08d82Slm66018 loop: 30783af08d82Slm66018 DMSG(vdcp, 2, ": dring_curr_idx = %d\n", vdcp->dring_curr_idx); 30791ae08745Sheppo 3080007a3653SAlexandre Chartre if (flags & VDC_OP_DRING_RESERVED) { 3081007a3653SAlexandre Chartre /* use D-Ring reserved entry */ 3082007a3653SAlexandre Chartre idx = VDC_DRING_FIRST_RESV; 3083007a3653SAlexandre Chartre local_dep = &(vdcp->local_dring[idx]); 3084007a3653SAlexandre Chartre } else { 30853af08d82Slm66018 /* Get next available D-Ring entry */ 30863af08d82Slm66018 idx = vdcp->dring_curr_idx; 30873af08d82Slm66018 local_dep = &(vdcp->local_dring[idx]); 30881ae08745Sheppo 30893af08d82Slm66018 if (!local_dep->is_free) { 30903af08d82Slm66018 DMSG(vdcp, 2, "[%d]: dring full - waiting for space\n", 30913af08d82Slm66018 vdcp->instance); 30923af08d82Slm66018 cv_wait(&vdcp->dring_free_cv, &vdcp->lock); 30933af08d82Slm66018 if (vdcp->state == VDC_STATE_RUNNING || 30943af08d82Slm66018 vdcp->state == VDC_STATE_HANDLE_PENDING) { 30953af08d82Slm66018 goto loop; 30963af08d82Slm66018 } 30973af08d82Slm66018 vdcp->threads_pending--; 30983af08d82Slm66018 return (ECONNRESET); 30991ae08745Sheppo } 31001ae08745Sheppo 31013af08d82Slm66018 next_idx = idx + 1; 31023af08d82Slm66018 if (next_idx >= vdcp->dring_len) 3103007a3653SAlexandre Chartre next_idx = VDC_DRING_FIRST_ENTRY; 31043af08d82Slm66018 vdcp->dring_curr_idx = next_idx; 3105007a3653SAlexandre Chartre } 31061ae08745Sheppo 31073af08d82Slm66018 ASSERT(local_dep->is_free); 31081ae08745Sheppo 31093af08d82Slm66018 local_dep->operation = operation; 3110d10e4ef2Snarayan local_dep->addr = addr; 31113af08d82Slm66018 local_dep->nbytes = nbytes; 31123af08d82Slm66018 local_dep->slice = slice; 31133af08d82Slm66018 local_dep->offset = offset; 31146ace3c90SAlexandre Chartre local_dep->buf = bufp; 31153af08d82Slm66018 local_dep->dir = dir; 31166ace3c90SAlexandre Chartre local_dep->flags = flags; 31173af08d82Slm66018 31183af08d82Slm66018 local_dep->is_free = B_FALSE; 31193af08d82Slm66018 31203af08d82Slm66018 rv = vdc_map_to_shared_dring(vdcp, idx); 31213af08d82Slm66018 if (rv) { 3122007a3653SAlexandre Chartre if (flags & VDC_OP_DRING_RESERVED) { 3123007a3653SAlexandre Chartre DMSG(vdcp, 0, "[%d]: cannot bind memory - error\n", 3124007a3653SAlexandre Chartre vdcp->instance); 3125007a3653SAlexandre Chartre /* 3126007a3653SAlexandre Chartre * We can't wait if we are using reserved slot. 3127007a3653SAlexandre Chartre * Free the descriptor and return. 3128007a3653SAlexandre Chartre */ 3129007a3653SAlexandre Chartre local_dep->is_free = B_TRUE; 3130007a3653SAlexandre Chartre vdcp->threads_pending--; 3131007a3653SAlexandre Chartre return (rv); 3132007a3653SAlexandre Chartre } 31333af08d82Slm66018 DMSG(vdcp, 0, "[%d]: cannot bind memory - waiting ..\n", 31343af08d82Slm66018 vdcp->instance); 31353af08d82Slm66018 /* free the descriptor */ 31363af08d82Slm66018 local_dep->is_free = B_TRUE; 31373af08d82Slm66018 vdcp->dring_curr_idx = idx; 31383af08d82Slm66018 cv_wait(&vdcp->membind_cv, &vdcp->lock); 31393af08d82Slm66018 if (vdcp->state == VDC_STATE_RUNNING || 31403af08d82Slm66018 vdcp->state == VDC_STATE_HANDLE_PENDING) { 31413af08d82Slm66018 goto loop; 31421ae08745Sheppo } 31433af08d82Slm66018 vdcp->threads_pending--; 31443af08d82Slm66018 return (ECONNRESET); 31451ae08745Sheppo } 31461ae08745Sheppo 31471ae08745Sheppo /* 31481ae08745Sheppo * Send a msg with the DRing details to vds 31491ae08745Sheppo */ 31501ae08745Sheppo VIO_INIT_DRING_DATA_TAG(dmsg); 31513af08d82Slm66018 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdcp); 31523af08d82Slm66018 dmsg.dring_ident = vdcp->dring_ident; 31531ae08745Sheppo dmsg.start_idx = idx; 31541ae08745Sheppo dmsg.end_idx = idx; 31553af08d82Slm66018 vdcp->seq_num++; 31561ae08745Sheppo 3157366a92acSlm66018 DTRACE_PROBE2(populate, int, vdcp->instance, 3158366a92acSlm66018 vdc_local_desc_t *, local_dep); 31593af08d82Slm66018 DMSG(vdcp, 2, "ident=0x%lx, st=%u, end=%u, seq=%ld\n", 31603af08d82Slm66018 vdcp->dring_ident, dmsg.start_idx, dmsg.end_idx, dmsg.seq_num); 31611ae08745Sheppo 31623af08d82Slm66018 /* 31633af08d82Slm66018 * note we're still holding the lock here to 31643af08d82Slm66018 * make sure the message goes out in order !!!... 31653af08d82Slm66018 */ 31663af08d82Slm66018 msglen = sizeof (dmsg); 31673af08d82Slm66018 rv = vdc_send(vdcp, (caddr_t)&dmsg, &msglen); 31683af08d82Slm66018 switch (rv) { 31693af08d82Slm66018 case ECONNRESET: 31703af08d82Slm66018 /* 31713af08d82Slm66018 * vdc_send initiates the reset on failure. 31723af08d82Slm66018 * Since the transaction has already been put 31733af08d82Slm66018 * on the local dring, it will automatically get 31743af08d82Slm66018 * retried when the channel is reset. Given that, 31753af08d82Slm66018 * it is ok to just return success even though the 31763af08d82Slm66018 * send failed. 31773af08d82Slm66018 */ 31783af08d82Slm66018 rv = 0; 31793af08d82Slm66018 break; 3180d10e4ef2Snarayan 31813af08d82Slm66018 case 0: /* EOK */ 31823af08d82Slm66018 DMSG(vdcp, 1, "sent via LDC: rv=%d\n", rv); 31833af08d82Slm66018 break; 3184d10e4ef2Snarayan 31853af08d82Slm66018 default: 3186007a3653SAlexandre Chartre DMSG(vdcp, 0, "unexpected error, rv=%d\n", rv); 3187007a3653SAlexandre Chartre rv = ENXIO; 3188007a3653SAlexandre Chartre break; 31893af08d82Slm66018 } 3190e1ebb9ecSlm66018 31913af08d82Slm66018 vdcp->threads_pending--; 31923af08d82Slm66018 return (rv); 31931ae08745Sheppo } 31941ae08745Sheppo 31951ae08745Sheppo /* 31963af08d82Slm66018 * Function: 31976ace3c90SAlexandre Chartre * vdc_do_op 31986ace3c90SAlexandre Chartre * 31996ace3c90SAlexandre Chartre * Description: 32006ace3c90SAlexandre Chartre * Wrapper around vdc_submit_request(). Each request is associated with a 32016ace3c90SAlexandre Chartre * buf structure. If a buf structure is provided (bufp != NULL) then the 32026ace3c90SAlexandre Chartre * request will be submitted with that buf, and the caller can wait for 32036ace3c90SAlexandre Chartre * completion of the request with biowait(). If a buf structure is not 32046ace3c90SAlexandre Chartre * provided (bufp == NULL) then a buf structure is created and the function 32056ace3c90SAlexandre Chartre * waits for the completion of the request. 32066ace3c90SAlexandre Chartre * 32076ace3c90SAlexandre Chartre * If the flag VD_OP_STATE_RUNNING is set then vdc_submit_request() will 32086ace3c90SAlexandre Chartre * submit the request only when the vdisk is in state VD_STATE_RUNNING. 32096ace3c90SAlexandre Chartre * If the vdisk is not in that state then the vdc_submit_request() will 32106ace3c90SAlexandre Chartre * wait for that state to be reached. After the request is submitted, the 32116ace3c90SAlexandre Chartre * reply will be processed asynchronously by the vdc_process_msg_thread() 32126ace3c90SAlexandre Chartre * thread. 32136ace3c90SAlexandre Chartre * 32146ace3c90SAlexandre Chartre * If the flag VD_OP_STATE_RUNNING is not set then vdc_submit_request() 32156ace3c90SAlexandre Chartre * submit the request whatever the state of the vdisk is. Then vdc_do_op() 32166ace3c90SAlexandre Chartre * will wait for a reply message, process the reply and complete the 32176ace3c90SAlexandre Chartre * request. 32186ace3c90SAlexandre Chartre * 32196ace3c90SAlexandre Chartre * Arguments: 32206ace3c90SAlexandre Chartre * vdc - the soft state pointer 32216ace3c90SAlexandre Chartre * op - operation we want vds to perform (VD_OP_XXX) 32226ace3c90SAlexandre Chartre * addr - address of data buf to be read/written. 32236ace3c90SAlexandre Chartre * nbytes - number of bytes to read/write 32246ace3c90SAlexandre Chartre * slice - the disk slice this request is for 32256ace3c90SAlexandre Chartre * offset - relative disk offset 32266ace3c90SAlexandre Chartre * bufp - buf structure associated with the request (can be NULL). 32276ace3c90SAlexandre Chartre * dir - direction of operation (READ/WRITE/BOTH) 32286ace3c90SAlexandre Chartre * flags - flags for the request. 32296ace3c90SAlexandre Chartre * 32306ace3c90SAlexandre Chartre * Return Codes: 32316ace3c90SAlexandre Chartre * 0 - the request has been succesfully submitted and completed. 32326ace3c90SAlexandre Chartre * != 0 - the request has failed. In that case, if a buf structure 32336ace3c90SAlexandre Chartre * was provided (bufp != NULL) then the B_ERROR flag is set 32346ace3c90SAlexandre Chartre * and the b_error field of the buf structure is set to EIO. 32356ace3c90SAlexandre Chartre */ 32366ace3c90SAlexandre Chartre static int 32376ace3c90SAlexandre Chartre vdc_do_op(vdc_t *vdc, int op, caddr_t addr, size_t nbytes, int slice, 32386ace3c90SAlexandre Chartre diskaddr_t offset, struct buf *bufp, vio_desc_direction_t dir, int flags) 32396ace3c90SAlexandre Chartre { 32406ace3c90SAlexandre Chartre vio_msg_t vio_msg; 32416ace3c90SAlexandre Chartre struct buf buf; 32426ace3c90SAlexandre Chartre int rv; 32436ace3c90SAlexandre Chartre 32446ace3c90SAlexandre Chartre if (bufp == NULL) { 32456ace3c90SAlexandre Chartre /* 32466ace3c90SAlexandre Chartre * We use buf just as a convenient way to get a notification 32476ace3c90SAlexandre Chartre * that the request is completed, so we initialize buf to the 32486ace3c90SAlexandre Chartre * minimum we need. 32496ace3c90SAlexandre Chartre */ 32506ace3c90SAlexandre Chartre bioinit(&buf); 32516ace3c90SAlexandre Chartre buf.b_bcount = nbytes; 32526ace3c90SAlexandre Chartre buf.b_flags = B_BUSY; 32536ace3c90SAlexandre Chartre bufp = &buf; 32546ace3c90SAlexandre Chartre } 32556ace3c90SAlexandre Chartre 32566ace3c90SAlexandre Chartre rv = vdc_send_request(vdc, op, addr, nbytes, slice, offset, bufp, 32576ace3c90SAlexandre Chartre dir, flags); 32586ace3c90SAlexandre Chartre 32596ace3c90SAlexandre Chartre if (rv != 0) 32606ace3c90SAlexandre Chartre goto done; 32616ace3c90SAlexandre Chartre 32626ace3c90SAlexandre Chartre /* 32636ace3c90SAlexandre Chartre * If the request should be done in VDC_STATE_RUNNING state then the 32646ace3c90SAlexandre Chartre * reply will be received and processed by vdc_process_msg_thread() 32656ace3c90SAlexandre Chartre * and we just have to handle the panic case. Otherwise we have to 32666ace3c90SAlexandre Chartre * wait for the reply message and process it. 32676ace3c90SAlexandre Chartre */ 32686ace3c90SAlexandre Chartre if (flags & VDC_OP_STATE_RUNNING) { 32696ace3c90SAlexandre Chartre 32706ace3c90SAlexandre Chartre if (ddi_in_panic()) { 32716ace3c90SAlexandre Chartre rv = vdc_drain_response(vdc, bufp); 32726ace3c90SAlexandre Chartre goto done; 32736ace3c90SAlexandre Chartre } 32746ace3c90SAlexandre Chartre 32756ace3c90SAlexandre Chartre } else { 32766ace3c90SAlexandre Chartre /* wait for the response message */ 32776ace3c90SAlexandre Chartre rv = vdc_wait_for_response(vdc, &vio_msg); 3278*ca6d1280SAlexandre Chartre 3279*ca6d1280SAlexandre Chartre if (rv == 0) 3280*ca6d1280SAlexandre Chartre rv = vdc_process_data_msg(vdc, &vio_msg); 3281*ca6d1280SAlexandre Chartre 32826ace3c90SAlexandre Chartre if (rv) { 32836ace3c90SAlexandre Chartre /* 32846ace3c90SAlexandre Chartre * If this is a block read/write we update the I/O 32856ace3c90SAlexandre Chartre * statistics kstat to take it off the run queue. 3286*ca6d1280SAlexandre Chartre * If it is a resubmit then it needs to stay in 3287*ca6d1280SAlexandre Chartre * in the waitq, and it will be removed when the 3288*ca6d1280SAlexandre Chartre * I/O is eventually completed or cancelled. 32896ace3c90SAlexandre Chartre */ 32906ace3c90SAlexandre Chartre mutex_enter(&vdc->lock); 32916ace3c90SAlexandre Chartre if (op == VD_OP_BREAD || op == VD_OP_BWRITE) { 3292*ca6d1280SAlexandre Chartre if (flags & VDC_OP_RESUBMIT) { 3293*ca6d1280SAlexandre Chartre VD_KSTAT_RUNQ_BACK_TO_WAITQ(vdc); 3294*ca6d1280SAlexandre Chartre } else { 32956ace3c90SAlexandre Chartre VD_KSTAT_RUNQ_EXIT(vdc); 32966ace3c90SAlexandre Chartre DTRACE_IO1(done, buf_t *, bufp); 32976ace3c90SAlexandre Chartre } 3298*ca6d1280SAlexandre Chartre } 32996ace3c90SAlexandre Chartre mutex_exit(&vdc->lock); 33006ace3c90SAlexandre Chartre goto done; 33016ace3c90SAlexandre Chartre } 33026ace3c90SAlexandre Chartre 33036ace3c90SAlexandre Chartre } 33046ace3c90SAlexandre Chartre 33056ace3c90SAlexandre Chartre if (bufp == &buf) 33066ace3c90SAlexandre Chartre rv = biowait(bufp); 33076ace3c90SAlexandre Chartre 33086ace3c90SAlexandre Chartre done: 33096ace3c90SAlexandre Chartre if (bufp == &buf) { 33106ace3c90SAlexandre Chartre biofini(bufp); 33116ace3c90SAlexandre Chartre } else if (rv != 0) { 33126ace3c90SAlexandre Chartre bioerror(bufp, EIO); 33136ace3c90SAlexandre Chartre biodone(bufp); 33146ace3c90SAlexandre Chartre } 33156ace3c90SAlexandre Chartre 33166ace3c90SAlexandre Chartre return (rv); 33176ace3c90SAlexandre Chartre } 33186ace3c90SAlexandre Chartre 33196ace3c90SAlexandre Chartre /* 33206ace3c90SAlexandre Chartre * Function: 33213af08d82Slm66018 * vdc_do_sync_op 33223af08d82Slm66018 * 33233af08d82Slm66018 * Description: 33246ace3c90SAlexandre Chartre * Wrapper around vdc_do_op that serializes requests. 33253af08d82Slm66018 * 33263af08d82Slm66018 * Arguments: 33273af08d82Slm66018 * vdcp - the soft state pointer 33283af08d82Slm66018 * operation - operation we want vds to perform (VD_OP_XXX) 33293af08d82Slm66018 * addr - address of data buf to be read/written. 33303af08d82Slm66018 * nbytes - number of bytes to read/write 33313af08d82Slm66018 * slice - the disk slice this request is for 33323af08d82Slm66018 * offset - relative disk offset 33333af08d82Slm66018 * dir - direction of operation (READ/WRITE/BOTH) 33342f5224aeSachartre * rconflict - check for reservation conflict in case of failure 33352f5224aeSachartre * 33362f5224aeSachartre * rconflict should be set to B_TRUE by most callers. Callers invoking the 33372f5224aeSachartre * VD_OP_SCSICMD operation can set rconflict to B_FALSE if they check the 33386ace3c90SAlexandre Chartre * result of a successful operation with vdc_scsi_status(). 33393af08d82Slm66018 * 33403af08d82Slm66018 * Return Codes: 33413af08d82Slm66018 * 0 33423af08d82Slm66018 * EAGAIN 33433af08d82Slm66018 * EFAULT 33443af08d82Slm66018 * ENXIO 33453af08d82Slm66018 * EIO 33460a55fbb7Slm66018 */ 33473af08d82Slm66018 static int 33483af08d82Slm66018 vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, size_t nbytes, 33496ace3c90SAlexandre Chartre int slice, diskaddr_t offset, vio_desc_direction_t dir, boolean_t rconflict) 33503af08d82Slm66018 { 33513af08d82Slm66018 int status; 33526ace3c90SAlexandre Chartre int flags = VDC_OP_NORMAL; 33531ae08745Sheppo 33541ae08745Sheppo /* 33553af08d82Slm66018 * Grab the lock, if blocked wait until the server 33563af08d82Slm66018 * response causes us to wake up again. 33573af08d82Slm66018 */ 33583af08d82Slm66018 mutex_enter(&vdcp->lock); 33593af08d82Slm66018 vdcp->sync_op_cnt++; 336011f54b6eSAlexandre Chartre while (vdcp->sync_op_blocked && vdcp->state != VDC_STATE_DETACH) { 336111f54b6eSAlexandre Chartre if (ddi_in_panic()) { 336211f54b6eSAlexandre Chartre /* don't block if we are panicking */ 336311f54b6eSAlexandre Chartre vdcp->sync_op_cnt--; 336411f54b6eSAlexandre Chartre mutex_exit(&vdcp->lock); 336511f54b6eSAlexandre Chartre return (EIO); 336611f54b6eSAlexandre Chartre } else { 33673af08d82Slm66018 cv_wait(&vdcp->sync_blocked_cv, &vdcp->lock); 336811f54b6eSAlexandre Chartre } 336911f54b6eSAlexandre Chartre } 33703af08d82Slm66018 33713af08d82Slm66018 if (vdcp->state == VDC_STATE_DETACH) { 33723af08d82Slm66018 cv_broadcast(&vdcp->sync_blocked_cv); 33733af08d82Slm66018 vdcp->sync_op_cnt--; 33743af08d82Slm66018 mutex_exit(&vdcp->lock); 33753af08d82Slm66018 return (ENXIO); 33763af08d82Slm66018 } 33773af08d82Slm66018 33783af08d82Slm66018 /* now block anyone other thread entering after us */ 33793af08d82Slm66018 vdcp->sync_op_blocked = B_TRUE; 33806ace3c90SAlexandre Chartre 33813af08d82Slm66018 mutex_exit(&vdcp->lock); 33823af08d82Slm66018 33836ace3c90SAlexandre Chartre if (!rconflict) 33846ace3c90SAlexandre Chartre flags &= ~VDC_OP_ERRCHK_CONFLICT; 33856ace3c90SAlexandre Chartre 33866ace3c90SAlexandre Chartre status = vdc_do_op(vdcp, operation, addr, nbytes, slice, offset, 33876ace3c90SAlexandre Chartre NULL, dir, flags); 33883af08d82Slm66018 3389655fd6a9Sachartre mutex_enter(&vdcp->lock); 3390655fd6a9Sachartre 33916ace3c90SAlexandre Chartre DMSG(vdcp, 2, ": operation returned %d\n", status); 33923af08d82Slm66018 33933c96341aSnarayan if (vdcp->state == VDC_STATE_DETACH) { 33943af08d82Slm66018 status = ENXIO; 33952f5224aeSachartre } 33962f5224aeSachartre 33973af08d82Slm66018 vdcp->sync_op_blocked = B_FALSE; 33983af08d82Slm66018 vdcp->sync_op_cnt--; 33993af08d82Slm66018 34003af08d82Slm66018 /* signal the next waiting thread */ 34013af08d82Slm66018 cv_signal(&vdcp->sync_blocked_cv); 34022f5224aeSachartre 34033af08d82Slm66018 mutex_exit(&vdcp->lock); 34043af08d82Slm66018 34053af08d82Slm66018 return (status); 34063af08d82Slm66018 } 34073af08d82Slm66018 34083af08d82Slm66018 34093af08d82Slm66018 /* 34103af08d82Slm66018 * Function: 34113af08d82Slm66018 * vdc_drain_response() 34123af08d82Slm66018 * 34133af08d82Slm66018 * Description: 34141ae08745Sheppo * When a guest is panicking, the completion of requests needs to be 34151ae08745Sheppo * handled differently because interrupts are disabled and vdc 34161ae08745Sheppo * will not get messages. We have to poll for the messages instead. 34173af08d82Slm66018 * 34183c2ebf09Sachartre * Note: since we are panicking we don't implement the io:::done 34193c2ebf09Sachartre * DTrace probe or update the I/O statistics kstats. 3420366a92acSlm66018 * 34213af08d82Slm66018 * Arguments: 34223af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 34236ace3c90SAlexandre Chartre * buf - if buf is NULL then we drain all responses, otherwise we 34243c2ebf09Sachartre * poll until we receive a ACK/NACK for the specific I/O 34253c2ebf09Sachartre * described by buf. 34263af08d82Slm66018 * 34273af08d82Slm66018 * Return Code: 342811f54b6eSAlexandre Chartre * 0 - Success. If we were expecting a response to a particular 34296ace3c90SAlexandre Chartre * request then this means that a response has been received. 34301ae08745Sheppo */ 34313af08d82Slm66018 static int 34326ace3c90SAlexandre Chartre vdc_drain_response(vdc_t *vdc, struct buf *buf) 34333af08d82Slm66018 { 34343af08d82Slm66018 int rv, idx, retries; 34353af08d82Slm66018 size_t msglen; 34363af08d82Slm66018 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 34373af08d82Slm66018 vio_dring_msg_t dmsg; 34383c2ebf09Sachartre struct buf *mbuf; 343911f54b6eSAlexandre Chartre boolean_t ack; 344011f54b6eSAlexandre Chartre 34413af08d82Slm66018 mutex_enter(&vdc->lock); 34423af08d82Slm66018 34431ae08745Sheppo retries = 0; 34441ae08745Sheppo for (;;) { 34451ae08745Sheppo msglen = sizeof (dmsg); 34468cd10891Snarayan rv = ldc_read(vdc->curr_server->ldc_handle, (caddr_t)&dmsg, 34478cd10891Snarayan &msglen); 34488e6a2a04Slm66018 if (rv) { 34498e6a2a04Slm66018 rv = EINVAL; 34501ae08745Sheppo break; 34511ae08745Sheppo } 34521ae08745Sheppo 34531ae08745Sheppo /* 34541ae08745Sheppo * if there are no packets wait and check again 34551ae08745Sheppo */ 34568e6a2a04Slm66018 if ((rv == 0) && (msglen == 0)) { 34571ae08745Sheppo if (retries++ > vdc_dump_retries) { 34588e6a2a04Slm66018 rv = EAGAIN; 34591ae08745Sheppo break; 34601ae08745Sheppo } 34611ae08745Sheppo 3462d10e4ef2Snarayan drv_usecwait(vdc_usec_timeout_dump); 34631ae08745Sheppo continue; 34641ae08745Sheppo } 34651ae08745Sheppo 34661ae08745Sheppo /* 34671ae08745Sheppo * Ignore all messages that are not ACKs/NACKs to 34681ae08745Sheppo * DRing requests. 34691ae08745Sheppo */ 34701ae08745Sheppo if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 34711ae08745Sheppo (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 34723af08d82Slm66018 DMSG(vdc, 0, "discard pkt: type=%d sub=%d env=%d\n", 34731ae08745Sheppo dmsg.tag.vio_msgtype, 34741ae08745Sheppo dmsg.tag.vio_subtype, 34751ae08745Sheppo dmsg.tag.vio_subtype_env); 34761ae08745Sheppo continue; 34771ae08745Sheppo } 34781ae08745Sheppo 34791ae08745Sheppo /* 348011f54b6eSAlexandre Chartre * Record if the packet was ACK'ed or not. If the packet was not 348111f54b6eSAlexandre Chartre * ACK'ed then we will just mark the request as failed; we don't 348211f54b6eSAlexandre Chartre * want to reset the connection at this point. 34831ae08745Sheppo */ 34841ae08745Sheppo switch (dmsg.tag.vio_subtype) { 34851ae08745Sheppo case VIO_SUBTYPE_ACK: 348611f54b6eSAlexandre Chartre ack = B_TRUE; 34871ae08745Sheppo break; 34881ae08745Sheppo case VIO_SUBTYPE_NACK: 348911f54b6eSAlexandre Chartre ack = B_FALSE; 34901ae08745Sheppo break; 34911ae08745Sheppo default: 34921ae08745Sheppo continue; 34931ae08745Sheppo } 34941ae08745Sheppo 34953af08d82Slm66018 idx = dmsg.start_idx; 34963af08d82Slm66018 if (idx >= vdc->dring_len) { 34973af08d82Slm66018 DMSG(vdc, 0, "[%d] Bogus ack data : start %d\n", 3498e1ebb9ecSlm66018 vdc->instance, idx); 34993af08d82Slm66018 continue; 35001ae08745Sheppo } 35013af08d82Slm66018 ldep = &vdc->local_dring[idx]; 35023af08d82Slm66018 if (ldep->dep->hdr.dstate != VIO_DESC_DONE) { 35033af08d82Slm66018 DMSG(vdc, 0, "[%d] Entry @ %d - state !DONE %d\n", 35043af08d82Slm66018 vdc->instance, idx, ldep->dep->hdr.dstate); 35051ae08745Sheppo continue; 35061ae08745Sheppo } 35071ae08745Sheppo 35086ace3c90SAlexandre Chartre mbuf = ldep->buf; 35096ace3c90SAlexandre Chartre ASSERT(mbuf != NULL); 35106ace3c90SAlexandre Chartre mbuf->b_resid = mbuf->b_bcount - ldep->dep->payload.nbytes; 35116ace3c90SAlexandre Chartre bioerror(mbuf, ack ? ldep->dep->payload.status : EIO); 3512e8dc8350Sjmcp biodone(mbuf); 35136ace3c90SAlexandre Chartre 35143af08d82Slm66018 rv = vdc_depopulate_descriptor(vdc, idx); 35153c2ebf09Sachartre if (buf != NULL && buf == mbuf) { 35163c2ebf09Sachartre rv = 0; 351711f54b6eSAlexandre Chartre goto done; 351811f54b6eSAlexandre Chartre } 35193af08d82Slm66018 35203c2ebf09Sachartre /* if this is the last descriptor - break out of loop */ 35213c2ebf09Sachartre if ((idx + 1) % vdc->dring_len == vdc->dring_curr_idx) { 35223c2ebf09Sachartre /* 352311f54b6eSAlexandre Chartre * If we were expecting a response for a particular 352411f54b6eSAlexandre Chartre * request then we return with an error otherwise we 352511f54b6eSAlexandre Chartre * have successfully completed the drain. 35263c2ebf09Sachartre */ 35276ace3c90SAlexandre Chartre rv = (buf != NULL)? ESRCH: 0; 35283c2ebf09Sachartre break; 35293c2ebf09Sachartre } 35303c2ebf09Sachartre } 35313c2ebf09Sachartre 353211f54b6eSAlexandre Chartre done: 35333af08d82Slm66018 mutex_exit(&vdc->lock); 35343af08d82Slm66018 DMSG(vdc, 0, "End idx=%d\n", idx); 35353af08d82Slm66018 35363af08d82Slm66018 return (rv); 35371ae08745Sheppo } 35381ae08745Sheppo 35391ae08745Sheppo 35400a55fbb7Slm66018 /* 35410a55fbb7Slm66018 * Function: 35420a55fbb7Slm66018 * vdc_depopulate_descriptor() 35430a55fbb7Slm66018 * 35440a55fbb7Slm66018 * Description: 35450a55fbb7Slm66018 * 35460a55fbb7Slm66018 * Arguments: 35470a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 35480a55fbb7Slm66018 * idx - Index of the Descriptor Ring entry being modified 35490a55fbb7Slm66018 * 35500a55fbb7Slm66018 * Return Code: 35510a55fbb7Slm66018 * 0 - Success 35520a55fbb7Slm66018 */ 35531ae08745Sheppo static int 35541ae08745Sheppo vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 35551ae08745Sheppo { 35561ae08745Sheppo vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 35571ae08745Sheppo vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 35581ae08745Sheppo int status = ENXIO; 35598e6a2a04Slm66018 int rv = 0; 35601ae08745Sheppo 35611ae08745Sheppo ASSERT(vdc != NULL); 3562e1ebb9ecSlm66018 ASSERT(idx < vdc->dring_len); 35631ae08745Sheppo ldep = &vdc->local_dring[idx]; 35641ae08745Sheppo ASSERT(ldep != NULL); 35653af08d82Slm66018 ASSERT(MUTEX_HELD(&vdc->lock)); 35663af08d82Slm66018 3567366a92acSlm66018 DTRACE_PROBE2(depopulate, int, vdc->instance, vdc_local_desc_t *, ldep); 35683af08d82Slm66018 DMSG(vdc, 2, ": idx = %d\n", idx); 3569366a92acSlm66018 35701ae08745Sheppo dep = ldep->dep; 35711ae08745Sheppo ASSERT(dep != NULL); 3572e1ebb9ecSlm66018 ASSERT((dep->hdr.dstate == VIO_DESC_DONE) || 3573e1ebb9ecSlm66018 (dep->payload.status == ECANCELED)); 35741ae08745Sheppo 3575e1ebb9ecSlm66018 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 35763af08d82Slm66018 35773af08d82Slm66018 ldep->is_free = B_TRUE; 35781ae08745Sheppo status = dep->payload.status; 3579205eeb1aSlm66018 DMSG(vdc, 2, ": is_free = %d : status = %d\n", ldep->is_free, status); 35801ae08745Sheppo 3581eff7243fSlm66018 /* 3582eff7243fSlm66018 * If no buffers were used to transfer information to the server when 3583eff7243fSlm66018 * populating the descriptor then no memory handles need to be unbound 3584eff7243fSlm66018 * and we can return now. 3585eff7243fSlm66018 */ 3586eff7243fSlm66018 if (ldep->nbytes == 0) { 3587eff7243fSlm66018 cv_signal(&vdc->dring_free_cv); 35888e6a2a04Slm66018 return (status); 3589eff7243fSlm66018 } 35908e6a2a04Slm66018 35911ae08745Sheppo /* 35921ae08745Sheppo * If the upper layer passed in a misaligned address we copied the 35931ae08745Sheppo * data into an aligned buffer before sending it to LDC - we now 35941ae08745Sheppo * copy it back to the original buffer. 35951ae08745Sheppo */ 35961ae08745Sheppo if (ldep->align_addr) { 35971ae08745Sheppo ASSERT(ldep->addr != NULL); 35981ae08745Sheppo 35993c96341aSnarayan if (dep->payload.nbytes > 0) 36003c96341aSnarayan bcopy(ldep->align_addr, ldep->addr, 36013c96341aSnarayan dep->payload.nbytes); 36021ae08745Sheppo kmem_free(ldep->align_addr, 36033c96341aSnarayan sizeof (caddr_t) * P2ROUNDUP(ldep->nbytes, 8)); 36041ae08745Sheppo ldep->align_addr = NULL; 36051ae08745Sheppo } 36061ae08745Sheppo 36078e6a2a04Slm66018 rv = ldc_mem_unbind_handle(ldep->desc_mhdl); 36088e6a2a04Slm66018 if (rv != 0) { 36093af08d82Slm66018 DMSG(vdc, 0, "?[%d] unbind mhdl 0x%lx @ idx %d failed (%d)", 36108e6a2a04Slm66018 vdc->instance, ldep->desc_mhdl, idx, rv); 36118e6a2a04Slm66018 /* 36128e6a2a04Slm66018 * The error returned by the vDisk server is more informative 36138e6a2a04Slm66018 * and thus has a higher priority but if it isn't set we ensure 36148e6a2a04Slm66018 * that this function returns an error. 36158e6a2a04Slm66018 */ 36168e6a2a04Slm66018 if (status == 0) 36178e6a2a04Slm66018 status = EINVAL; 36181ae08745Sheppo } 36191ae08745Sheppo 36203af08d82Slm66018 cv_signal(&vdc->membind_cv); 36213af08d82Slm66018 cv_signal(&vdc->dring_free_cv); 36223af08d82Slm66018 36231ae08745Sheppo return (status); 36241ae08745Sheppo } 36251ae08745Sheppo 36260a55fbb7Slm66018 /* 36270a55fbb7Slm66018 * Function: 36280a55fbb7Slm66018 * vdc_populate_mem_hdl() 36290a55fbb7Slm66018 * 36300a55fbb7Slm66018 * Description: 36310a55fbb7Slm66018 * 36320a55fbb7Slm66018 * Arguments: 36330a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 36340a55fbb7Slm66018 * idx - Index of the Descriptor Ring entry being modified 36350a55fbb7Slm66018 * addr - virtual address being mapped in 36360a55fbb7Slm66018 * nybtes - number of bytes in 'addr' 36370a55fbb7Slm66018 * operation - the vDisk operation being performed (VD_OP_xxx) 36380a55fbb7Slm66018 * 36390a55fbb7Slm66018 * Return Code: 36400a55fbb7Slm66018 * 0 - Success 36410a55fbb7Slm66018 */ 36421ae08745Sheppo static int 36433af08d82Slm66018 vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep) 36441ae08745Sheppo { 36451ae08745Sheppo vd_dring_entry_t *dep = NULL; 36461ae08745Sheppo ldc_mem_handle_t mhdl; 36471ae08745Sheppo caddr_t vaddr; 36483af08d82Slm66018 size_t nbytes; 36494bac2208Snarayan uint8_t perm = LDC_MEM_RW; 36504bac2208Snarayan uint8_t maptype; 36511ae08745Sheppo int rv = 0; 36521ae08745Sheppo int i; 36531ae08745Sheppo 36543af08d82Slm66018 ASSERT(vdcp != NULL); 36551ae08745Sheppo 36563af08d82Slm66018 dep = ldep->dep; 36571ae08745Sheppo mhdl = ldep->desc_mhdl; 36581ae08745Sheppo 36593af08d82Slm66018 switch (ldep->dir) { 36603af08d82Slm66018 case VIO_read_dir: 36611ae08745Sheppo perm = LDC_MEM_W; 36621ae08745Sheppo break; 36631ae08745Sheppo 36643af08d82Slm66018 case VIO_write_dir: 36651ae08745Sheppo perm = LDC_MEM_R; 36661ae08745Sheppo break; 36671ae08745Sheppo 36683af08d82Slm66018 case VIO_both_dir: 36691ae08745Sheppo perm = LDC_MEM_RW; 36701ae08745Sheppo break; 36711ae08745Sheppo 36721ae08745Sheppo default: 36731ae08745Sheppo ASSERT(0); /* catch bad programming in vdc */ 36741ae08745Sheppo } 36751ae08745Sheppo 36761ae08745Sheppo /* 36771ae08745Sheppo * LDC expects any addresses passed in to be 8-byte aligned. We need 36781ae08745Sheppo * to copy the contents of any misaligned buffers to a newly allocated 36791ae08745Sheppo * buffer and bind it instead (and copy the the contents back to the 36801ae08745Sheppo * original buffer passed in when depopulating the descriptor) 36811ae08745Sheppo */ 36823af08d82Slm66018 vaddr = ldep->addr; 36833af08d82Slm66018 nbytes = ldep->nbytes; 36843af08d82Slm66018 if (((uint64_t)vaddr & 0x7) != 0) { 3685d10e4ef2Snarayan ASSERT(ldep->align_addr == NULL); 36861ae08745Sheppo ldep->align_addr = 36873af08d82Slm66018 kmem_alloc(sizeof (caddr_t) * 36883af08d82Slm66018 P2ROUNDUP(nbytes, 8), KM_SLEEP); 36893af08d82Slm66018 DMSG(vdcp, 0, "[%d] Misaligned address %p reallocating " 36903af08d82Slm66018 "(buf=%p nb=%ld op=%d)\n", 36913af08d82Slm66018 vdcp->instance, (void *)vaddr, (void *)ldep->align_addr, 36923af08d82Slm66018 nbytes, ldep->operation); 36933af08d82Slm66018 if (perm != LDC_MEM_W) 36943af08d82Slm66018 bcopy(vaddr, ldep->align_addr, nbytes); 36951ae08745Sheppo vaddr = ldep->align_addr; 36961ae08745Sheppo } 36971ae08745Sheppo 36987bd3a2e2SSriharsha Basavapatna maptype = LDC_IO_MAP|LDC_SHADOW_MAP; 36991ae08745Sheppo rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 370087a7269eSachartre maptype, perm, &dep->payload.cookie[0], &dep->payload.ncookies); 37013af08d82Slm66018 DMSG(vdcp, 2, "[%d] bound mem handle; ncookies=%d\n", 37023af08d82Slm66018 vdcp->instance, dep->payload.ncookies); 37031ae08745Sheppo if (rv != 0) { 37043af08d82Slm66018 DMSG(vdcp, 0, "[%d] Failed to bind LDC memory handle " 37053af08d82Slm66018 "(mhdl=%p, buf=%p, err=%d)\n", 37063af08d82Slm66018 vdcp->instance, (void *)mhdl, (void *)vaddr, rv); 37071ae08745Sheppo if (ldep->align_addr) { 37081ae08745Sheppo kmem_free(ldep->align_addr, 3709d10e4ef2Snarayan sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); 37101ae08745Sheppo ldep->align_addr = NULL; 37111ae08745Sheppo } 37121ae08745Sheppo return (EAGAIN); 37131ae08745Sheppo } 37141ae08745Sheppo 37151ae08745Sheppo /* 37161ae08745Sheppo * Get the other cookies (if any). 37171ae08745Sheppo */ 37181ae08745Sheppo for (i = 1; i < dep->payload.ncookies; i++) { 37191ae08745Sheppo rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 37201ae08745Sheppo if (rv != 0) { 37211ae08745Sheppo (void) ldc_mem_unbind_handle(mhdl); 37223af08d82Slm66018 DMSG(vdcp, 0, "?[%d] Failed to get next cookie " 3723e1ebb9ecSlm66018 "(mhdl=%lx cnum=%d), err=%d", 37243af08d82Slm66018 vdcp->instance, mhdl, i, rv); 37251ae08745Sheppo if (ldep->align_addr) { 37261ae08745Sheppo kmem_free(ldep->align_addr, 37273c96341aSnarayan sizeof (caddr_t) * ldep->nbytes); 37281ae08745Sheppo ldep->align_addr = NULL; 37291ae08745Sheppo } 37301ae08745Sheppo return (EAGAIN); 37311ae08745Sheppo } 37321ae08745Sheppo } 37331ae08745Sheppo 37341ae08745Sheppo return (rv); 37351ae08745Sheppo } 37361ae08745Sheppo 37371ae08745Sheppo /* 37381ae08745Sheppo * Interrupt handlers for messages from LDC 37391ae08745Sheppo */ 37401ae08745Sheppo 37410a55fbb7Slm66018 /* 37420a55fbb7Slm66018 * Function: 37430a55fbb7Slm66018 * vdc_handle_cb() 37440a55fbb7Slm66018 * 37450a55fbb7Slm66018 * Description: 37460a55fbb7Slm66018 * 37470a55fbb7Slm66018 * Arguments: 37480a55fbb7Slm66018 * event - Type of event (LDC_EVT_xxx) that triggered the callback 37490a55fbb7Slm66018 * arg - soft state pointer for this instance of the device driver. 37500a55fbb7Slm66018 * 37510a55fbb7Slm66018 * Return Code: 37520a55fbb7Slm66018 * 0 - Success 37530a55fbb7Slm66018 */ 37541ae08745Sheppo static uint_t 37551ae08745Sheppo vdc_handle_cb(uint64_t event, caddr_t arg) 37561ae08745Sheppo { 37571ae08745Sheppo ldc_status_t ldc_state; 37581ae08745Sheppo int rv = 0; 37598cd10891Snarayan vdc_server_t *srvr = (vdc_server_t *)(void *)arg; 37608cd10891Snarayan vdc_t *vdc = srvr->vdcp; 37611ae08745Sheppo 37621ae08745Sheppo ASSERT(vdc != NULL); 37631ae08745Sheppo 37643af08d82Slm66018 DMSG(vdc, 1, "evt=%lx seqID=%ld\n", event, vdc->seq_num); 37651ae08745Sheppo 37668cd10891Snarayan /* If callback is not for the current server, ignore it */ 37678cd10891Snarayan mutex_enter(&vdc->lock); 37688cd10891Snarayan 37698cd10891Snarayan if (vdc->curr_server != srvr) { 37708cd10891Snarayan DMSG(vdc, 0, "[%d] Ignoring event 0x%lx for port@%ld\n", 37718cd10891Snarayan vdc->instance, event, srvr->id); 37728cd10891Snarayan mutex_exit(&vdc->lock); 37738cd10891Snarayan return (LDC_SUCCESS); 37748cd10891Snarayan } 37758cd10891Snarayan 37761ae08745Sheppo /* 37771ae08745Sheppo * Depending on the type of event that triggered this callback, 37783af08d82Slm66018 * we modify the handshake state or read the data. 37791ae08745Sheppo * 37801ae08745Sheppo * NOTE: not done as a switch() as event could be triggered by 37811ae08745Sheppo * a state change and a read request. Also the ordering of the 37821ae08745Sheppo * check for the event types is deliberate. 37831ae08745Sheppo */ 37841ae08745Sheppo if (event & LDC_EVT_UP) { 37853af08d82Slm66018 DMSG(vdc, 0, "[%d] Received LDC_EVT_UP\n", vdc->instance); 37863af08d82Slm66018 37871ae08745Sheppo /* get LDC state */ 37888cd10891Snarayan rv = ldc_status(srvr->ldc_handle, &ldc_state); 37891ae08745Sheppo if (rv != 0) { 37903af08d82Slm66018 DMSG(vdc, 0, "[%d] Couldn't get LDC status %d", 37911ae08745Sheppo vdc->instance, rv); 37928cd10891Snarayan mutex_exit(&vdc->lock); 37931ae08745Sheppo return (LDC_SUCCESS); 37941ae08745Sheppo } 37958cd10891Snarayan if (srvr->ldc_state != LDC_UP && 37968cd10891Snarayan ldc_state == LDC_UP) { 37971ae08745Sheppo /* 37983af08d82Slm66018 * Reset the transaction sequence numbers when 37993af08d82Slm66018 * LDC comes up. We then kick off the handshake 38003af08d82Slm66018 * negotiation with the vDisk server. 38011ae08745Sheppo */ 38020a55fbb7Slm66018 vdc->seq_num = 1; 38031ae08745Sheppo vdc->seq_num_reply = 0; 38046ace3c90SAlexandre Chartre vdc->io_pending = B_TRUE; 38058cd10891Snarayan srvr->ldc_state = ldc_state; 38063af08d82Slm66018 cv_signal(&vdc->initwait_cv); 38076ace3c90SAlexandre Chartre cv_signal(&vdc->io_pending_cv); 38083af08d82Slm66018 } 38091ae08745Sheppo } 38101ae08745Sheppo 38111ae08745Sheppo if (event & LDC_EVT_READ) { 381217cadca8Slm66018 DMSG(vdc, 1, "[%d] Received LDC_EVT_READ\n", vdc->instance); 38133af08d82Slm66018 mutex_enter(&vdc->read_lock); 38143af08d82Slm66018 cv_signal(&vdc->read_cv); 38153af08d82Slm66018 vdc->read_state = VDC_READ_PENDING; 38163af08d82Slm66018 mutex_exit(&vdc->read_lock); 38178cd10891Snarayan mutex_exit(&vdc->lock); 38181ae08745Sheppo 38191ae08745Sheppo /* that's all we have to do - no need to handle DOWN/RESET */ 38201ae08745Sheppo return (LDC_SUCCESS); 38211ae08745Sheppo } 38221ae08745Sheppo 38233af08d82Slm66018 if (event & (LDC_EVT_RESET|LDC_EVT_DOWN)) { 38240a55fbb7Slm66018 38253af08d82Slm66018 DMSG(vdc, 0, "[%d] Received LDC RESET event\n", vdc->instance); 38263af08d82Slm66018 38273af08d82Slm66018 /* 38283af08d82Slm66018 * Need to wake up any readers so they will 38293af08d82Slm66018 * detect that a reset has occurred. 38303af08d82Slm66018 */ 38313af08d82Slm66018 mutex_enter(&vdc->read_lock); 38323af08d82Slm66018 if ((vdc->read_state == VDC_READ_WAITING) || 38333af08d82Slm66018 (vdc->read_state == VDC_READ_RESET)) 38343af08d82Slm66018 cv_signal(&vdc->read_cv); 38353af08d82Slm66018 vdc->read_state = VDC_READ_RESET; 38363af08d82Slm66018 mutex_exit(&vdc->read_lock); 38370a55fbb7Slm66018 38383af08d82Slm66018 /* wake up any threads waiting for connection to come up */ 38393af08d82Slm66018 if (vdc->state == VDC_STATE_INIT_WAITING) { 38403af08d82Slm66018 vdc->state = VDC_STATE_RESETTING; 38413af08d82Slm66018 cv_signal(&vdc->initwait_cv); 38426ace3c90SAlexandre Chartre } else if (vdc->state == VDC_STATE_FAILED) { 38436ace3c90SAlexandre Chartre vdc->io_pending = B_TRUE; 38446ace3c90SAlexandre Chartre cv_signal(&vdc->io_pending_cv); 38451ae08745Sheppo } 38461ae08745Sheppo 38471ae08745Sheppo } 38481ae08745Sheppo 38498cd10891Snarayan mutex_exit(&vdc->lock); 38508cd10891Snarayan 38511ae08745Sheppo if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 38523af08d82Slm66018 DMSG(vdc, 0, "![%d] Unexpected LDC event (%lx) received", 38531ae08745Sheppo vdc->instance, event); 38541ae08745Sheppo 38551ae08745Sheppo return (LDC_SUCCESS); 38561ae08745Sheppo } 38571ae08745Sheppo 38583af08d82Slm66018 /* 38593af08d82Slm66018 * Function: 38603af08d82Slm66018 * vdc_wait_for_response() 38613af08d82Slm66018 * 38623af08d82Slm66018 * Description: 38633af08d82Slm66018 * Block waiting for a response from the server. If there is 38643af08d82Slm66018 * no data the thread block on the read_cv that is signalled 38653af08d82Slm66018 * by the callback when an EVT_READ occurs. 38663af08d82Slm66018 * 38673af08d82Slm66018 * Arguments: 38683af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 38693af08d82Slm66018 * 38703af08d82Slm66018 * Return Code: 38713af08d82Slm66018 * 0 - Success 38723af08d82Slm66018 */ 38733af08d82Slm66018 static int 38743af08d82Slm66018 vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp) 38753af08d82Slm66018 { 38763af08d82Slm66018 size_t nbytes = sizeof (*msgp); 38773af08d82Slm66018 int status; 38783af08d82Slm66018 38793af08d82Slm66018 ASSERT(vdcp != NULL); 38803af08d82Slm66018 38813af08d82Slm66018 DMSG(vdcp, 1, "[%d] Entered\n", vdcp->instance); 38823af08d82Slm66018 38833af08d82Slm66018 status = vdc_recv(vdcp, msgp, &nbytes); 38843af08d82Slm66018 DMSG(vdcp, 3, "vdc_read() done.. status=0x%x size=0x%x\n", 38853af08d82Slm66018 status, (int)nbytes); 38863af08d82Slm66018 if (status) { 38873af08d82Slm66018 DMSG(vdcp, 0, "?[%d] Error %d reading LDC msg\n", 38883af08d82Slm66018 vdcp->instance, status); 38893af08d82Slm66018 return (status); 38903af08d82Slm66018 } 38913af08d82Slm66018 38923af08d82Slm66018 if (nbytes < sizeof (vio_msg_tag_t)) { 38933af08d82Slm66018 DMSG(vdcp, 0, "?[%d] Expect %lu bytes; recv'd %lu\n", 38943af08d82Slm66018 vdcp->instance, sizeof (vio_msg_tag_t), nbytes); 38953af08d82Slm66018 return (ENOMSG); 38963af08d82Slm66018 } 38973af08d82Slm66018 38983af08d82Slm66018 DMSG(vdcp, 2, "[%d] (%x/%x/%x)\n", vdcp->instance, 38993af08d82Slm66018 msgp->tag.vio_msgtype, 39003af08d82Slm66018 msgp->tag.vio_subtype, 39013af08d82Slm66018 msgp->tag.vio_subtype_env); 39023af08d82Slm66018 39033af08d82Slm66018 /* 39043af08d82Slm66018 * Verify the Session ID of the message 39053af08d82Slm66018 * 39063af08d82Slm66018 * Every message after the Version has been negotiated should 39073af08d82Slm66018 * have the correct session ID set. 39083af08d82Slm66018 */ 39093af08d82Slm66018 if ((msgp->tag.vio_sid != vdcp->session_id) && 39103af08d82Slm66018 (msgp->tag.vio_subtype_env != VIO_VER_INFO)) { 39113af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid SID: received 0x%x, " 39123af08d82Slm66018 "expected 0x%lx [seq num %lx @ %d]", 39133af08d82Slm66018 vdcp->instance, msgp->tag.vio_sid, 39143af08d82Slm66018 vdcp->session_id, 39153af08d82Slm66018 ((vio_dring_msg_t *)msgp)->seq_num, 39163af08d82Slm66018 ((vio_dring_msg_t *)msgp)->start_idx); 39173af08d82Slm66018 return (ENOMSG); 39183af08d82Slm66018 } 39193af08d82Slm66018 return (0); 39203af08d82Slm66018 } 39213af08d82Slm66018 39223af08d82Slm66018 39233af08d82Slm66018 /* 39243af08d82Slm66018 * Function: 39253af08d82Slm66018 * vdc_resubmit_backup_dring() 39263af08d82Slm66018 * 39273af08d82Slm66018 * Description: 39283af08d82Slm66018 * Resubmit each descriptor in the backed up dring to 39293af08d82Slm66018 * vDisk server. The Dring was backed up during connection 39303af08d82Slm66018 * reset. 39313af08d82Slm66018 * 39323af08d82Slm66018 * Arguments: 39333af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 39343af08d82Slm66018 * 39353af08d82Slm66018 * Return Code: 39363af08d82Slm66018 * 0 - Success 39373af08d82Slm66018 */ 39383af08d82Slm66018 static int 39393af08d82Slm66018 vdc_resubmit_backup_dring(vdc_t *vdcp) 39403af08d82Slm66018 { 394190e2f9dcSlm66018 int processed = 0; 39423af08d82Slm66018 int count; 39433af08d82Slm66018 int b_idx; 394490e2f9dcSlm66018 int rv = 0; 39453af08d82Slm66018 int dring_size; 39463af08d82Slm66018 vdc_local_desc_t *curr_ldep; 39473af08d82Slm66018 39483af08d82Slm66018 ASSERT(MUTEX_NOT_HELD(&vdcp->lock)); 39493af08d82Slm66018 ASSERT(vdcp->state == VDC_STATE_HANDLE_PENDING); 39503af08d82Slm66018 3951655fd6a9Sachartre if (vdcp->local_dring_backup == NULL) { 3952655fd6a9Sachartre /* the pending requests have already been processed */ 3953655fd6a9Sachartre return (0); 3954655fd6a9Sachartre } 3955655fd6a9Sachartre 39563af08d82Slm66018 DMSG(vdcp, 1, "restoring pending dring entries (len=%d, tail=%d)\n", 39573af08d82Slm66018 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 39583af08d82Slm66018 39593af08d82Slm66018 /* 39603af08d82Slm66018 * Walk the backup copy of the local descriptor ring and 39613af08d82Slm66018 * resubmit all the outstanding transactions. 39623af08d82Slm66018 */ 39633af08d82Slm66018 b_idx = vdcp->local_dring_backup_tail; 39643af08d82Slm66018 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 39653af08d82Slm66018 39663af08d82Slm66018 curr_ldep = &(vdcp->local_dring_backup[b_idx]); 39673af08d82Slm66018 3968eff7243fSlm66018 /* only resubmit outstanding transactions */ 39693af08d82Slm66018 if (!curr_ldep->is_free) { 39703af08d82Slm66018 39713af08d82Slm66018 DMSG(vdcp, 1, "resubmitting entry idx=%x\n", b_idx); 39726ace3c90SAlexandre Chartre 39736ace3c90SAlexandre Chartre rv = vdc_do_op(vdcp, curr_ldep->operation, 39743af08d82Slm66018 curr_ldep->addr, curr_ldep->nbytes, 39753af08d82Slm66018 curr_ldep->slice, curr_ldep->offset, 39766ace3c90SAlexandre Chartre curr_ldep->buf, curr_ldep->dir, 3977*ca6d1280SAlexandre Chartre (curr_ldep->flags & ~VDC_OP_STATE_RUNNING) | 3978*ca6d1280SAlexandre Chartre VDC_OP_RESUBMIT); 397990e2f9dcSlm66018 39803af08d82Slm66018 if (rv) { 39816ace3c90SAlexandre Chartre DMSG(vdcp, 1, "[%d] resubmit entry %d failed\n", 39823af08d82Slm66018 vdcp->instance, b_idx); 398390e2f9dcSlm66018 goto done; 39843af08d82Slm66018 } 39853af08d82Slm66018 398690e2f9dcSlm66018 /* 3987630f014dSrameshc * Mark this entry as free so that we will not resubmit 3988630f014dSrameshc * this "done" request again, if we were to use the same 3989630f014dSrameshc * backup_dring again in future. This could happen when 3990630f014dSrameshc * a reset happens while processing the backup_dring. 3991630f014dSrameshc */ 3992630f014dSrameshc curr_ldep->is_free = B_TRUE; 399390e2f9dcSlm66018 processed++; 39943af08d82Slm66018 } 39953af08d82Slm66018 39963af08d82Slm66018 /* get the next element to submit */ 39973af08d82Slm66018 if (++b_idx >= vdcp->local_dring_backup_len) 39983af08d82Slm66018 b_idx = 0; 39993af08d82Slm66018 } 40003af08d82Slm66018 40013af08d82Slm66018 /* all done - now clear up pending dring copy */ 40023af08d82Slm66018 dring_size = vdcp->local_dring_backup_len * 40033af08d82Slm66018 sizeof (vdcp->local_dring_backup[0]); 40043af08d82Slm66018 40053af08d82Slm66018 (void) kmem_free(vdcp->local_dring_backup, dring_size); 40063af08d82Slm66018 40073af08d82Slm66018 vdcp->local_dring_backup = NULL; 40083af08d82Slm66018 400990e2f9dcSlm66018 done: 401090e2f9dcSlm66018 DTRACE_PROBE2(processed, int, processed, vdc_t *, vdcp); 401190e2f9dcSlm66018 401290e2f9dcSlm66018 return (rv); 40133af08d82Slm66018 } 40143af08d82Slm66018 40153af08d82Slm66018 /* 40163af08d82Slm66018 * Function: 4017655fd6a9Sachartre * vdc_cancel_backup_dring 4018655fd6a9Sachartre * 4019655fd6a9Sachartre * Description: 4020655fd6a9Sachartre * Cancel each descriptor in the backed up dring to vDisk server. 4021655fd6a9Sachartre * The Dring was backed up during connection reset. 4022655fd6a9Sachartre * 4023655fd6a9Sachartre * Arguments: 4024655fd6a9Sachartre * vdcp - soft state pointer for this instance of the device driver. 4025655fd6a9Sachartre * 4026655fd6a9Sachartre * Return Code: 4027655fd6a9Sachartre * None 4028655fd6a9Sachartre */ 4029655fd6a9Sachartre void 403090e2f9dcSlm66018 vdc_cancel_backup_dring(vdc_t *vdcp) 4031655fd6a9Sachartre { 4032655fd6a9Sachartre vdc_local_desc_t *ldep; 4033655fd6a9Sachartre struct buf *bufp; 4034655fd6a9Sachartre int count; 4035655fd6a9Sachartre int b_idx; 4036655fd6a9Sachartre int dring_size; 403790e2f9dcSlm66018 int cancelled = 0; 4038655fd6a9Sachartre 4039655fd6a9Sachartre ASSERT(MUTEX_HELD(&vdcp->lock)); 40406ace3c90SAlexandre Chartre ASSERT(vdcp->state == VDC_STATE_FAILED); 4041655fd6a9Sachartre 4042655fd6a9Sachartre if (vdcp->local_dring_backup == NULL) { 4043655fd6a9Sachartre /* the pending requests have already been processed */ 4044655fd6a9Sachartre return; 4045655fd6a9Sachartre } 4046655fd6a9Sachartre 4047655fd6a9Sachartre DMSG(vdcp, 1, "cancelling pending dring entries (len=%d, tail=%d)\n", 4048655fd6a9Sachartre vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 4049655fd6a9Sachartre 4050655fd6a9Sachartre /* 4051655fd6a9Sachartre * Walk the backup copy of the local descriptor ring and 4052655fd6a9Sachartre * cancel all the outstanding transactions. 4053655fd6a9Sachartre */ 4054655fd6a9Sachartre b_idx = vdcp->local_dring_backup_tail; 4055655fd6a9Sachartre for (count = 0; count < vdcp->local_dring_backup_len; count++) { 4056655fd6a9Sachartre 4057655fd6a9Sachartre ldep = &(vdcp->local_dring_backup[b_idx]); 4058655fd6a9Sachartre 4059655fd6a9Sachartre /* only cancel outstanding transactions */ 4060655fd6a9Sachartre if (!ldep->is_free) { 4061655fd6a9Sachartre 4062655fd6a9Sachartre DMSG(vdcp, 1, "cancelling entry idx=%x\n", b_idx); 406390e2f9dcSlm66018 cancelled++; 4064655fd6a9Sachartre 4065655fd6a9Sachartre /* 4066655fd6a9Sachartre * All requests have already been cleared from the 4067655fd6a9Sachartre * local descriptor ring and the LDC channel has been 4068655fd6a9Sachartre * reset so we will never get any reply for these 4069655fd6a9Sachartre * requests. Now we just have to notify threads waiting 4070655fd6a9Sachartre * for replies that the request has failed. 4071655fd6a9Sachartre */ 40726ace3c90SAlexandre Chartre bufp = ldep->buf; 4073655fd6a9Sachartre ASSERT(bufp != NULL); 4074655fd6a9Sachartre bufp->b_resid = bufp->b_bcount; 40756ace3c90SAlexandre Chartre if (ldep->operation == VD_OP_BREAD || 40766ace3c90SAlexandre Chartre ldep->operation == VD_OP_BWRITE) { 4077366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 4078*ca6d1280SAlexandre Chartre VD_KSTAT_WAITQ_EXIT(vdcp); 4079366a92acSlm66018 DTRACE_IO1(done, buf_t *, bufp); 40806ace3c90SAlexandre Chartre } 4081655fd6a9Sachartre bioerror(bufp, EIO); 4082655fd6a9Sachartre biodone(bufp); 4083655fd6a9Sachartre } 4084655fd6a9Sachartre 4085655fd6a9Sachartre /* get the next element to cancel */ 4086655fd6a9Sachartre if (++b_idx >= vdcp->local_dring_backup_len) 4087655fd6a9Sachartre b_idx = 0; 4088655fd6a9Sachartre } 4089655fd6a9Sachartre 4090655fd6a9Sachartre /* all done - now clear up pending dring copy */ 4091655fd6a9Sachartre dring_size = vdcp->local_dring_backup_len * 4092655fd6a9Sachartre sizeof (vdcp->local_dring_backup[0]); 4093655fd6a9Sachartre 4094655fd6a9Sachartre (void) kmem_free(vdcp->local_dring_backup, dring_size); 4095655fd6a9Sachartre 4096655fd6a9Sachartre vdcp->local_dring_backup = NULL; 4097655fd6a9Sachartre 409890e2f9dcSlm66018 DTRACE_PROBE2(cancelled, int, cancelled, vdc_t *, vdcp); 4099655fd6a9Sachartre } 4100655fd6a9Sachartre 4101655fd6a9Sachartre /* 4102655fd6a9Sachartre * Function: 4103655fd6a9Sachartre * vdc_connection_timeout 4104655fd6a9Sachartre * 4105655fd6a9Sachartre * Description: 4106655fd6a9Sachartre * This function is invoked if the timeout set to establish the connection 4107655fd6a9Sachartre * with vds expires. This will happen if we spend too much time in the 4108*ca6d1280SAlexandre Chartre * VDC_STATE_INIT_WAITING, VDC_STATE_NEGOTIATE or VDC_STATE_HANDLE_PENDING 4109*ca6d1280SAlexandre Chartre * states. 4110655fd6a9Sachartre * 4111655fd6a9Sachartre * Arguments: 4112655fd6a9Sachartre * arg - argument of the timeout function actually a soft state 4113655fd6a9Sachartre * pointer for the instance of the device driver. 4114655fd6a9Sachartre * 4115655fd6a9Sachartre * Return Code: 4116655fd6a9Sachartre * None 4117655fd6a9Sachartre */ 4118655fd6a9Sachartre void 4119655fd6a9Sachartre vdc_connection_timeout(void *arg) 4120655fd6a9Sachartre { 4121655fd6a9Sachartre vdc_t *vdcp = (vdc_t *)arg; 4122655fd6a9Sachartre 4123655fd6a9Sachartre mutex_enter(&vdcp->lock); 4124655fd6a9Sachartre 41256ace3c90SAlexandre Chartre vdcp->ctimeout_reached = B_TRUE; 4126655fd6a9Sachartre 4127655fd6a9Sachartre mutex_exit(&vdcp->lock); 4128655fd6a9Sachartre } 4129655fd6a9Sachartre 4130655fd6a9Sachartre /* 4131655fd6a9Sachartre * Function: 41323af08d82Slm66018 * vdc_backup_local_dring() 41333af08d82Slm66018 * 41343af08d82Slm66018 * Description: 41353af08d82Slm66018 * Backup the current dring in the event of a reset. The Dring 41363af08d82Slm66018 * transactions will be resubmitted to the server when the 41373af08d82Slm66018 * connection is restored. 41383af08d82Slm66018 * 41393af08d82Slm66018 * Arguments: 41403af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 41413af08d82Slm66018 * 41423af08d82Slm66018 * Return Code: 41433af08d82Slm66018 * NONE 41443af08d82Slm66018 */ 41453af08d82Slm66018 static void 41463af08d82Slm66018 vdc_backup_local_dring(vdc_t *vdcp) 41473af08d82Slm66018 { 4148*ca6d1280SAlexandre Chartre int b_idx, count, dring_size; 4149*ca6d1280SAlexandre Chartre vdc_local_desc_t *curr_ldep; 41503af08d82Slm66018 4151655fd6a9Sachartre ASSERT(MUTEX_HELD(&vdcp->lock)); 41523af08d82Slm66018 ASSERT(vdcp->state == VDC_STATE_RESETTING); 41533af08d82Slm66018 41543af08d82Slm66018 /* 41553af08d82Slm66018 * If the backup dring is stil around, it means 41563af08d82Slm66018 * that the last restore did not complete. However, 41573af08d82Slm66018 * since we never got back into the running state, 41583af08d82Slm66018 * the backup copy we have is still valid. 41593af08d82Slm66018 */ 41603af08d82Slm66018 if (vdcp->local_dring_backup != NULL) { 41613af08d82Slm66018 DMSG(vdcp, 1, "reusing local descriptor ring backup " 41623af08d82Slm66018 "(len=%d, tail=%d)\n", vdcp->local_dring_backup_len, 41633af08d82Slm66018 vdcp->local_dring_backup_tail); 41643af08d82Slm66018 return; 41653af08d82Slm66018 } 41663af08d82Slm66018 4167655fd6a9Sachartre /* 4168655fd6a9Sachartre * The backup dring can be NULL and the local dring may not be 4169655fd6a9Sachartre * initialized. This can happen if we had a reset while establishing 4170655fd6a9Sachartre * a new connection but after the connection has timed out. In that 4171655fd6a9Sachartre * case the backup dring is NULL because the requests have been 4172655fd6a9Sachartre * cancelled and the request occured before the local dring is 4173655fd6a9Sachartre * initialized. 4174655fd6a9Sachartre */ 4175655fd6a9Sachartre if (!(vdcp->initialized & VDC_DRING_LOCAL)) 4176655fd6a9Sachartre return; 4177655fd6a9Sachartre 41783af08d82Slm66018 DMSG(vdcp, 1, "backing up the local descriptor ring (len=%d, " 41793af08d82Slm66018 "tail=%d)\n", vdcp->dring_len, vdcp->dring_curr_idx); 41803af08d82Slm66018 41813af08d82Slm66018 dring_size = vdcp->dring_len * sizeof (vdcp->local_dring[0]); 41823af08d82Slm66018 41833af08d82Slm66018 vdcp->local_dring_backup = kmem_alloc(dring_size, KM_SLEEP); 41843af08d82Slm66018 bcopy(vdcp->local_dring, vdcp->local_dring_backup, dring_size); 41853af08d82Slm66018 41863af08d82Slm66018 vdcp->local_dring_backup_tail = vdcp->dring_curr_idx; 41873af08d82Slm66018 vdcp->local_dring_backup_len = vdcp->dring_len; 4188*ca6d1280SAlexandre Chartre 4189*ca6d1280SAlexandre Chartre /* 4190*ca6d1280SAlexandre Chartre * At this point, pending read or write I/Os are recorded in the 4191*ca6d1280SAlexandre Chartre * runq. We update the I/O statistics to indicate that they are now 4192*ca6d1280SAlexandre Chartre * back in the waitq. 4193*ca6d1280SAlexandre Chartre */ 4194*ca6d1280SAlexandre Chartre b_idx = vdcp->local_dring_backup_tail; 4195*ca6d1280SAlexandre Chartre for (count = 0; count < vdcp->local_dring_backup_len; count++) { 4196*ca6d1280SAlexandre Chartre 4197*ca6d1280SAlexandre Chartre curr_ldep = &(vdcp->local_dring_backup[b_idx]); 4198*ca6d1280SAlexandre Chartre 4199*ca6d1280SAlexandre Chartre if (!curr_ldep->is_free && 4200*ca6d1280SAlexandre Chartre (curr_ldep->operation == VD_OP_BREAD || 4201*ca6d1280SAlexandre Chartre curr_ldep->operation == VD_OP_BWRITE)) { 4202*ca6d1280SAlexandre Chartre VD_KSTAT_RUNQ_BACK_TO_WAITQ(vdcp); 4203*ca6d1280SAlexandre Chartre } 4204*ca6d1280SAlexandre Chartre 4205*ca6d1280SAlexandre Chartre /* get the next element */ 4206*ca6d1280SAlexandre Chartre if (++b_idx >= vdcp->local_dring_backup_len) 4207*ca6d1280SAlexandre Chartre b_idx = 0; 4208*ca6d1280SAlexandre Chartre } 4209*ca6d1280SAlexandre Chartre 42103af08d82Slm66018 } 42113af08d82Slm66018 42128cd10891Snarayan static void 42138cd10891Snarayan vdc_switch_server(vdc_t *vdcp) 42148cd10891Snarayan { 42158cd10891Snarayan int rv; 42168cd10891Snarayan vdc_server_t *curr_server, *new_server; 42178cd10891Snarayan 42188cd10891Snarayan ASSERT(MUTEX_HELD(&vdcp->lock)); 42198cd10891Snarayan 42208cd10891Snarayan /* if there is only one server return back */ 42218cd10891Snarayan if (vdcp->num_servers == 1) { 42228cd10891Snarayan return; 42238cd10891Snarayan } 42248cd10891Snarayan 42258cd10891Snarayan /* Get current and next server */ 42268cd10891Snarayan curr_server = vdcp->curr_server; 42278cd10891Snarayan new_server = 42288cd10891Snarayan (curr_server->next) ? curr_server->next : vdcp->server_list; 42298cd10891Snarayan ASSERT(curr_server != new_server); 42308cd10891Snarayan 42318cd10891Snarayan /* bring current server's channel down */ 42328cd10891Snarayan rv = ldc_down(curr_server->ldc_handle); 42338cd10891Snarayan if (rv) { 42348cd10891Snarayan DMSG(vdcp, 0, "[%d] Cannot bring channel down, port %ld\n", 42358cd10891Snarayan vdcp->instance, curr_server->id); 42368cd10891Snarayan return; 42378cd10891Snarayan } 42388cd10891Snarayan 42398cd10891Snarayan /* switch the server */ 42408cd10891Snarayan vdcp->curr_server = new_server; 42418cd10891Snarayan 42428cd10891Snarayan DMSG(vdcp, 0, "[%d] Switched to next vdisk server, port@%ld, ldc@%ld\n", 42438cd10891Snarayan vdcp->instance, vdcp->curr_server->id, vdcp->curr_server->ldc_id); 42448cd10891Snarayan } 42458cd10891Snarayan 42466ace3c90SAlexandre Chartre static void 42476ace3c90SAlexandre Chartre vdc_print_svc_status(vdc_t *vdcp) 42486ace3c90SAlexandre Chartre { 42496ace3c90SAlexandre Chartre int instance; 42506ace3c90SAlexandre Chartre uint64_t ldc_id, port_id; 42516ace3c90SAlexandre Chartre vdc_service_state_t svc_state; 42526ace3c90SAlexandre Chartre 42536ace3c90SAlexandre Chartre ASSERT(mutex_owned(&vdcp->lock)); 42546ace3c90SAlexandre Chartre 42556ace3c90SAlexandre Chartre svc_state = vdcp->curr_server->svc_state; 42566ace3c90SAlexandre Chartre 42576ace3c90SAlexandre Chartre if (vdcp->curr_server->log_state == svc_state) 42586ace3c90SAlexandre Chartre return; 42596ace3c90SAlexandre Chartre 42606ace3c90SAlexandre Chartre instance = vdcp->instance; 42616ace3c90SAlexandre Chartre ldc_id = vdcp->curr_server->ldc_id; 42626ace3c90SAlexandre Chartre port_id = vdcp->curr_server->id; 42636ace3c90SAlexandre Chartre 42646ace3c90SAlexandre Chartre switch (svc_state) { 42656ace3c90SAlexandre Chartre 42666ace3c90SAlexandre Chartre case VDC_SERVICE_OFFLINE: 42676ace3c90SAlexandre Chartre cmn_err(CE_CONT, "?vdisk@%d is offline\n", instance); 42686ace3c90SAlexandre Chartre break; 42696ace3c90SAlexandre Chartre 42706ace3c90SAlexandre Chartre case VDC_SERVICE_CONNECTED: 42716ace3c90SAlexandre Chartre cmn_err(CE_CONT, "?vdisk@%d is connected using ldc@%ld,%ld\n", 42726ace3c90SAlexandre Chartre instance, ldc_id, port_id); 42736ace3c90SAlexandre Chartre break; 42746ace3c90SAlexandre Chartre 42756ace3c90SAlexandre Chartre case VDC_SERVICE_ONLINE: 42766ace3c90SAlexandre Chartre cmn_err(CE_CONT, "?vdisk@%d is online using ldc@%ld,%ld\n", 42776ace3c90SAlexandre Chartre instance, ldc_id, port_id); 42786ace3c90SAlexandre Chartre break; 42796ace3c90SAlexandre Chartre 42806ace3c90SAlexandre Chartre case VDC_SERVICE_FAILED: 42816ace3c90SAlexandre Chartre cmn_err(CE_CONT, "?vdisk@%d access to service failed " 42826ace3c90SAlexandre Chartre "using ldc@%ld,%ld\n", instance, ldc_id, port_id); 42836ace3c90SAlexandre Chartre break; 42846ace3c90SAlexandre Chartre 42856ace3c90SAlexandre Chartre case VDC_SERVICE_FAULTED: 42866ace3c90SAlexandre Chartre cmn_err(CE_CONT, "?vdisk@%d access to backend failed " 42876ace3c90SAlexandre Chartre "using ldc@%ld,%ld\n", instance, ldc_id, port_id); 42886ace3c90SAlexandre Chartre break; 42896ace3c90SAlexandre Chartre 42906ace3c90SAlexandre Chartre default: 42916ace3c90SAlexandre Chartre ASSERT(0); 42926ace3c90SAlexandre Chartre break; 42936ace3c90SAlexandre Chartre } 42946ace3c90SAlexandre Chartre 42956ace3c90SAlexandre Chartre vdcp->curr_server->log_state = svc_state; 42966ace3c90SAlexandre Chartre } 42976ace3c90SAlexandre Chartre 4298*ca6d1280SAlexandre Chartre /* 4299*ca6d1280SAlexandre Chartre * Function: 4300*ca6d1280SAlexandre Chartre * vdc_handshake_retry 4301*ca6d1280SAlexandre Chartre * 4302*ca6d1280SAlexandre Chartre * Description: 4303*ca6d1280SAlexandre Chartre * This function indicates if the handshake should be retried or not. 4304*ca6d1280SAlexandre Chartre * This depends on the lifecycle of the driver: 4305*ca6d1280SAlexandre Chartre * 4306*ca6d1280SAlexandre Chartre * VDC_LC_ATTACHING: the handshake is retried until we have tried 4307*ca6d1280SAlexandre Chartre * a handshake with each server. We don't care how far each handshake 4308*ca6d1280SAlexandre Chartre * went, the goal is just to try the handshake. We want to minimize the 4309*ca6d1280SAlexandre Chartre * the time spent doing the attach because this is locking the device 4310*ca6d1280SAlexandre Chartre * tree. 4311*ca6d1280SAlexandre Chartre * 4312*ca6d1280SAlexandre Chartre * VDC_LC_ONLINE_PENDING: the handshake is retried while we haven't done 4313*ca6d1280SAlexandre Chartre * consecutive attribute negotiations with each server, and we haven't 4314*ca6d1280SAlexandre Chartre * reached a minimum total of consecutive negotiations (hattr_min). The 4315*ca6d1280SAlexandre Chartre * number of attribution negotiations determines the time spent before 4316*ca6d1280SAlexandre Chartre * failing pending I/Os if the handshake is not successful. 4317*ca6d1280SAlexandre Chartre * 4318*ca6d1280SAlexandre Chartre * VDC_LC_ONLINE: the handshake is always retried, until we have a 4319*ca6d1280SAlexandre Chartre * successful handshake with a server. 4320*ca6d1280SAlexandre Chartre * 4321*ca6d1280SAlexandre Chartre * VDC_LC_DETACHING: N/A 4322*ca6d1280SAlexandre Chartre * 4323*ca6d1280SAlexandre Chartre * Arguments: 4324*ca6d1280SAlexandre Chartre * hshake_cnt - number of handshake attempts 4325*ca6d1280SAlexandre Chartre * hattr_cnt - number of attribute negotiation attempts 4326*ca6d1280SAlexandre Chartre * 4327*ca6d1280SAlexandre Chartre * Return Code: 4328*ca6d1280SAlexandre Chartre * B_TRUE - handshake should be retried 4329*ca6d1280SAlexandre Chartre * B_FALSE - handshake should not be retried 4330*ca6d1280SAlexandre Chartre */ 4331*ca6d1280SAlexandre Chartre static boolean_t 4332*ca6d1280SAlexandre Chartre vdc_handshake_retry(vdc_t *vdcp, int hshake_cnt, int hattr_cnt) 4333*ca6d1280SAlexandre Chartre { 4334*ca6d1280SAlexandre Chartre int hattr_total = 0; 4335*ca6d1280SAlexandre Chartre vdc_server_t *srvr; 4336*ca6d1280SAlexandre Chartre 4337*ca6d1280SAlexandre Chartre ASSERT(vdcp->lifecycle != VDC_LC_DETACHING); 4338*ca6d1280SAlexandre Chartre 4339*ca6d1280SAlexandre Chartre /* update handshake counters */ 4340*ca6d1280SAlexandre Chartre vdcp->curr_server->hshake_cnt = hshake_cnt; 4341*ca6d1280SAlexandre Chartre vdcp->curr_server->hattr_cnt = hattr_cnt; 4342*ca6d1280SAlexandre Chartre 4343*ca6d1280SAlexandre Chartre /* 4344*ca6d1280SAlexandre Chartre * If no attribute negotiation was done then we reset the total 4345*ca6d1280SAlexandre Chartre * number otherwise we cumulate the number. 4346*ca6d1280SAlexandre Chartre */ 4347*ca6d1280SAlexandre Chartre if (hattr_cnt == 0) 4348*ca6d1280SAlexandre Chartre vdcp->curr_server->hattr_total = 0; 4349*ca6d1280SAlexandre Chartre else 4350*ca6d1280SAlexandre Chartre vdcp->curr_server->hattr_total += hattr_cnt; 4351*ca6d1280SAlexandre Chartre 4352*ca6d1280SAlexandre Chartre /* 4353*ca6d1280SAlexandre Chartre * If we are online (i.e. at least one handshake was successfully 4354*ca6d1280SAlexandre Chartre * completed) then we always retry the handshake. 4355*ca6d1280SAlexandre Chartre */ 4356*ca6d1280SAlexandre Chartre if (vdcp->lifecycle == VDC_LC_ONLINE) 4357*ca6d1280SAlexandre Chartre return (B_TRUE); 4358*ca6d1280SAlexandre Chartre 4359*ca6d1280SAlexandre Chartre /* 4360*ca6d1280SAlexandre Chartre * If we are attaching then we retry the handshake only if we haven't 4361*ca6d1280SAlexandre Chartre * tried with all servers. 4362*ca6d1280SAlexandre Chartre */ 4363*ca6d1280SAlexandre Chartre if (vdcp->lifecycle == VDC_LC_ATTACHING) { 4364*ca6d1280SAlexandre Chartre 4365*ca6d1280SAlexandre Chartre for (srvr = vdcp->server_list; srvr != NULL; 4366*ca6d1280SAlexandre Chartre srvr = srvr->next) { 4367*ca6d1280SAlexandre Chartre if (srvr->hshake_cnt == 0) { 4368*ca6d1280SAlexandre Chartre return (B_TRUE); 4369*ca6d1280SAlexandre Chartre } 4370*ca6d1280SAlexandre Chartre } 4371*ca6d1280SAlexandre Chartre 4372*ca6d1280SAlexandre Chartre return (B_FALSE); 4373*ca6d1280SAlexandre Chartre } 4374*ca6d1280SAlexandre Chartre 4375*ca6d1280SAlexandre Chartre /* 4376*ca6d1280SAlexandre Chartre * Here we are in the case where we haven't completed any handshake 4377*ca6d1280SAlexandre Chartre * successfully yet. 4378*ca6d1280SAlexandre Chartre */ 4379*ca6d1280SAlexandre Chartre ASSERT(vdcp->lifecycle == VDC_LC_ONLINE_PENDING); 4380*ca6d1280SAlexandre Chartre 4381*ca6d1280SAlexandre Chartre /* 4382*ca6d1280SAlexandre Chartre * We retry the handshake if we haven't done an attribute negotiation 4383*ca6d1280SAlexandre Chartre * with each server. This is to handle the case where one service domain 4384*ca6d1280SAlexandre Chartre * is down. 4385*ca6d1280SAlexandre Chartre */ 4386*ca6d1280SAlexandre Chartre for (srvr = vdcp->server_list; srvr != NULL; srvr = srvr->next) { 4387*ca6d1280SAlexandre Chartre if (srvr->hattr_cnt == 0) { 4388*ca6d1280SAlexandre Chartre return (B_TRUE); 4389*ca6d1280SAlexandre Chartre } 4390*ca6d1280SAlexandre Chartre hattr_total += srvr->hattr_total; 4391*ca6d1280SAlexandre Chartre } 4392*ca6d1280SAlexandre Chartre 4393*ca6d1280SAlexandre Chartre /* 4394*ca6d1280SAlexandre Chartre * We retry the handshake if we haven't reached the minimum number of 4395*ca6d1280SAlexandre Chartre * attribute negotiation. 4396*ca6d1280SAlexandre Chartre */ 4397*ca6d1280SAlexandre Chartre return (hattr_total < vdcp->hattr_min); 4398*ca6d1280SAlexandre Chartre } 4399*ca6d1280SAlexandre Chartre 44001ae08745Sheppo /* -------------------------------------------------------------------------- */ 44011ae08745Sheppo 44021ae08745Sheppo /* 44031ae08745Sheppo * The following functions process the incoming messages from vds 44041ae08745Sheppo */ 44051ae08745Sheppo 44060a55fbb7Slm66018 /* 44070a55fbb7Slm66018 * Function: 44080a55fbb7Slm66018 * vdc_process_msg_thread() 44090a55fbb7Slm66018 * 44100a55fbb7Slm66018 * Description: 44110a55fbb7Slm66018 * 44123af08d82Slm66018 * Main VDC message processing thread. Each vDisk instance 44133af08d82Slm66018 * consists of a copy of this thread. This thread triggers 44143af08d82Slm66018 * all the handshakes and data exchange with the server. It 44153af08d82Slm66018 * also handles all channel resets 44163af08d82Slm66018 * 44170a55fbb7Slm66018 * Arguments: 44180a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 44190a55fbb7Slm66018 * 44200a55fbb7Slm66018 * Return Code: 44210a55fbb7Slm66018 * None 44220a55fbb7Slm66018 */ 44231ae08745Sheppo static void 44243af08d82Slm66018 vdc_process_msg_thread(vdc_t *vdcp) 44251ae08745Sheppo { 4426*ca6d1280SAlexandre Chartre boolean_t failure_msg = B_FALSE; 44271ae08745Sheppo int status; 4428655fd6a9Sachartre int ctimeout; 4429655fd6a9Sachartre timeout_id_t tmid = 0; 44308cd10891Snarayan clock_t ldcup_timeout = 0; 44316ace3c90SAlexandre Chartre vdc_server_t *srvr; 44326ace3c90SAlexandre Chartre vdc_service_state_t svc_state; 4433*ca6d1280SAlexandre Chartre int hshake_cnt = 0; 4434*ca6d1280SAlexandre Chartre int hattr_cnt = 0; 44351ae08745Sheppo 44363af08d82Slm66018 mutex_enter(&vdcp->lock); 44371ae08745Sheppo 4438*ca6d1280SAlexandre Chartre ASSERT(vdcp->lifecycle == VDC_LC_ATTACHING); 4439*ca6d1280SAlexandre Chartre 44401ae08745Sheppo for (;;) { 44411ae08745Sheppo 44423af08d82Slm66018 #define Q(_s) (vdcp->state == _s) ? #_s : 44433af08d82Slm66018 DMSG(vdcp, 3, "state = %d (%s)\n", vdcp->state, 44443af08d82Slm66018 Q(VDC_STATE_INIT) 44453af08d82Slm66018 Q(VDC_STATE_INIT_WAITING) 44463af08d82Slm66018 Q(VDC_STATE_NEGOTIATE) 44473af08d82Slm66018 Q(VDC_STATE_HANDLE_PENDING) 44486ace3c90SAlexandre Chartre Q(VDC_STATE_FAULTED) 44496ace3c90SAlexandre Chartre Q(VDC_STATE_FAILED) 44503af08d82Slm66018 Q(VDC_STATE_RUNNING) 44513af08d82Slm66018 Q(VDC_STATE_RESETTING) 44523af08d82Slm66018 Q(VDC_STATE_DETACH) 44533af08d82Slm66018 "UNKNOWN"); 4454*ca6d1280SAlexandre Chartre #undef Q 44551ae08745Sheppo 44563af08d82Slm66018 switch (vdcp->state) { 44573af08d82Slm66018 case VDC_STATE_INIT: 44583af08d82Slm66018 4459655fd6a9Sachartre /* 4460655fd6a9Sachartre * If requested, start a timeout to check if the 4461655fd6a9Sachartre * connection with vds is established in the 4462655fd6a9Sachartre * specified delay. If the timeout expires, we 4463655fd6a9Sachartre * will cancel any pending request. 4464655fd6a9Sachartre * 4465655fd6a9Sachartre * If some reset have occurred while establishing 4466655fd6a9Sachartre * the connection, we already have a timeout armed 4467655fd6a9Sachartre * and in that case we don't need to arm a new one. 44688cd10891Snarayan * 44698cd10891Snarayan * The same rule applies when there are multiple vds'. 44708cd10891Snarayan * If either a connection cannot be established or 44718cd10891Snarayan * the handshake times out, the connection thread will 44728cd10891Snarayan * try another server. The 'ctimeout' will report 44738cd10891Snarayan * back an error after it expires irrespective of 44748cd10891Snarayan * whether the vdisk is trying to connect to just 44758cd10891Snarayan * one or multiple servers. 4476655fd6a9Sachartre */ 4477655fd6a9Sachartre ctimeout = (vdc_timeout != 0)? 44788cd10891Snarayan vdc_timeout : vdcp->curr_server->ctimeout; 4479655fd6a9Sachartre 4480655fd6a9Sachartre if (ctimeout != 0 && tmid == 0) { 4481655fd6a9Sachartre tmid = timeout(vdc_connection_timeout, vdcp, 44828cd10891Snarayan ctimeout * drv_usectohz(MICROSEC)); 4483655fd6a9Sachartre } 4484655fd6a9Sachartre 44856ace3c90SAlexandre Chartre /* Switch to STATE_DETACH if drv is detaching */ 44866ace3c90SAlexandre Chartre if (vdcp->lifecycle == VDC_LC_DETACHING) { 44876ace3c90SAlexandre Chartre vdcp->state = VDC_STATE_DETACH; 44886ace3c90SAlexandre Chartre break; 44896ace3c90SAlexandre Chartre } 44906ace3c90SAlexandre Chartre 44916ace3c90SAlexandre Chartre /* Check if the timeout has been reached */ 44926ace3c90SAlexandre Chartre if (vdcp->ctimeout_reached) { 44936ace3c90SAlexandre Chartre ASSERT(tmid != 0); 44946ace3c90SAlexandre Chartre tmid = 0; 44956ace3c90SAlexandre Chartre vdcp->state = VDC_STATE_FAILED; 44966ace3c90SAlexandre Chartre break; 44976ace3c90SAlexandre Chartre } 44986ace3c90SAlexandre Chartre 4499*ca6d1280SAlexandre Chartre /* 4500*ca6d1280SAlexandre Chartre * Switch to another server when we reach the limit of 4501*ca6d1280SAlexandre Chartre * the number of handshake per server or if we have done 4502*ca6d1280SAlexandre Chartre * an attribute negotiation. 4503*ca6d1280SAlexandre Chartre */ 4504*ca6d1280SAlexandre Chartre if (hshake_cnt >= vdc_hshake_retries || hattr_cnt > 0) { 45058cd10891Snarayan 4506*ca6d1280SAlexandre Chartre if (!vdc_handshake_retry(vdcp, hshake_cnt, 4507*ca6d1280SAlexandre Chartre hattr_cnt)) { 4508*ca6d1280SAlexandre Chartre DMSG(vdcp, 0, "[%d] too many " 4509*ca6d1280SAlexandre Chartre "handshakes", vdcp->instance); 45106ace3c90SAlexandre Chartre vdcp->state = VDC_STATE_FAILED; 45118cd10891Snarayan break; 45128cd10891Snarayan } 45138cd10891Snarayan 45148cd10891Snarayan vdc_switch_server(vdcp); 4515*ca6d1280SAlexandre Chartre 4516*ca6d1280SAlexandre Chartre hshake_cnt = 0; 4517*ca6d1280SAlexandre Chartre hattr_cnt = 0; 4518*ca6d1280SAlexandre Chartre } 4519*ca6d1280SAlexandre Chartre 4520*ca6d1280SAlexandre Chartre hshake_cnt++; 45218cd10891Snarayan 45223af08d82Slm66018 /* Bring up connection with vds via LDC */ 45233af08d82Slm66018 status = vdc_start_ldc_connection(vdcp); 45248cd10891Snarayan if (status != EINVAL) { 45253af08d82Slm66018 vdcp->state = VDC_STATE_INIT_WAITING; 45266ace3c90SAlexandre Chartre } else { 45276ace3c90SAlexandre Chartre vdcp->curr_server->svc_state = 45286ace3c90SAlexandre Chartre VDC_SERVICE_FAILED; 45296ace3c90SAlexandre Chartre vdc_print_svc_status(vdcp); 45303af08d82Slm66018 } 45313af08d82Slm66018 break; 45323af08d82Slm66018 45333af08d82Slm66018 case VDC_STATE_INIT_WAITING: 45343af08d82Slm66018 45358cd10891Snarayan /* if channel is UP, start negotiation */ 45368cd10891Snarayan if (vdcp->curr_server->ldc_state == LDC_UP) { 45378cd10891Snarayan vdcp->state = VDC_STATE_NEGOTIATE; 45388cd10891Snarayan break; 45398cd10891Snarayan } 45408cd10891Snarayan 45418cd10891Snarayan /* 45426ace3c90SAlexandre Chartre * Wait for LDC_UP. If it times out and we have multiple 45436ace3c90SAlexandre Chartre * servers then we will retry using a different server. 45448cd10891Snarayan */ 45456ace3c90SAlexandre Chartre ldcup_timeout = ddi_get_lbolt() + (vdc_ldcup_timeout * 45468cd10891Snarayan drv_usectohz(MICROSEC)); 45476ace3c90SAlexandre Chartre status = cv_timedwait(&vdcp->initwait_cv, &vdcp->lock, 45486ace3c90SAlexandre Chartre ldcup_timeout); 45498cd10891Snarayan if (status == -1 && 45508cd10891Snarayan vdcp->state == VDC_STATE_INIT_WAITING && 45518cd10891Snarayan vdcp->curr_server->ldc_state != LDC_UP) { 45528cd10891Snarayan /* timed out & still waiting */ 45536ace3c90SAlexandre Chartre vdcp->curr_server->svc_state = 45546ace3c90SAlexandre Chartre VDC_SERVICE_FAILED; 45556ace3c90SAlexandre Chartre vdc_print_svc_status(vdcp); 45568cd10891Snarayan vdcp->state = VDC_STATE_INIT; 45578cd10891Snarayan break; 45588cd10891Snarayan } 45598cd10891Snarayan 45603af08d82Slm66018 if (vdcp->state != VDC_STATE_INIT_WAITING) { 45613af08d82Slm66018 DMSG(vdcp, 0, 45623af08d82Slm66018 "state moved to %d out from under us...\n", 45633af08d82Slm66018 vdcp->state); 45643af08d82Slm66018 } 45653af08d82Slm66018 break; 45663af08d82Slm66018 45673af08d82Slm66018 case VDC_STATE_NEGOTIATE: 45683af08d82Slm66018 switch (status = vdc_ver_negotiation(vdcp)) { 45693af08d82Slm66018 case 0: 45703af08d82Slm66018 break; 45713af08d82Slm66018 default: 45723af08d82Slm66018 DMSG(vdcp, 0, "ver negotiate failed (%d)..\n", 45733af08d82Slm66018 status); 45743af08d82Slm66018 goto reset; 45753af08d82Slm66018 } 45763af08d82Slm66018 4577*ca6d1280SAlexandre Chartre hattr_cnt++; 4578*ca6d1280SAlexandre Chartre 45793af08d82Slm66018 switch (status = vdc_attr_negotiation(vdcp)) { 45803af08d82Slm66018 case 0: 45813af08d82Slm66018 break; 45823af08d82Slm66018 default: 45833af08d82Slm66018 DMSG(vdcp, 0, "attr negotiate failed (%d)..\n", 45843af08d82Slm66018 status); 45853af08d82Slm66018 goto reset; 45863af08d82Slm66018 } 45873af08d82Slm66018 45883af08d82Slm66018 switch (status = vdc_dring_negotiation(vdcp)) { 45893af08d82Slm66018 case 0: 45903af08d82Slm66018 break; 45913af08d82Slm66018 default: 45923af08d82Slm66018 DMSG(vdcp, 0, "dring negotiate failed (%d)..\n", 45933af08d82Slm66018 status); 45943af08d82Slm66018 goto reset; 45953af08d82Slm66018 } 45963af08d82Slm66018 45973af08d82Slm66018 switch (status = vdc_rdx_exchange(vdcp)) { 45983af08d82Slm66018 case 0: 45993af08d82Slm66018 vdcp->state = VDC_STATE_HANDLE_PENDING; 46003af08d82Slm66018 goto done; 46013af08d82Slm66018 default: 46023af08d82Slm66018 DMSG(vdcp, 0, "RDX xchg failed ..(%d)\n", 46033af08d82Slm66018 status); 46043af08d82Slm66018 goto reset; 46053af08d82Slm66018 } 46063af08d82Slm66018 reset: 46073af08d82Slm66018 DMSG(vdcp, 0, "negotiation failed: resetting (%d)\n", 46083af08d82Slm66018 status); 46093af08d82Slm66018 vdcp->state = VDC_STATE_RESETTING; 4610655fd6a9Sachartre vdcp->self_reset = B_TRUE; 46116ace3c90SAlexandre Chartre vdcp->curr_server->svc_state = VDC_SERVICE_FAILED; 46126ace3c90SAlexandre Chartre vdc_print_svc_status(vdcp); 46133af08d82Slm66018 done: 46143af08d82Slm66018 DMSG(vdcp, 0, "negotiation complete (state=0x%x)...\n", 46153af08d82Slm66018 vdcp->state); 46163af08d82Slm66018 break; 46173af08d82Slm66018 46183af08d82Slm66018 case VDC_STATE_HANDLE_PENDING: 46193af08d82Slm66018 46206ace3c90SAlexandre Chartre DMSG(vdcp, 0, "[%d] connection to service domain is up", 46216ace3c90SAlexandre Chartre vdcp->instance); 46226ace3c90SAlexandre Chartre vdcp->curr_server->svc_state = VDC_SERVICE_CONNECTED; 46236ace3c90SAlexandre Chartre 46246ace3c90SAlexandre Chartre mutex_exit(&vdcp->lock); 46256ace3c90SAlexandre Chartre 4626e8dc8350Sjmcp /* 46276ace3c90SAlexandre Chartre * If we have multiple servers, check that the backend 46286ace3c90SAlexandre Chartre * is effectively available before resubmitting any IO. 4629e8dc8350Sjmcp */ 46306ace3c90SAlexandre Chartre if (vdcp->num_servers > 1 && 46316ace3c90SAlexandre Chartre vdc_eio_check(vdcp, 0) != 0) { 46326ace3c90SAlexandre Chartre mutex_enter(&vdcp->lock); 46336ace3c90SAlexandre Chartre vdcp->curr_server->svc_state = 46346ace3c90SAlexandre Chartre VDC_SERVICE_FAULTED; 46356ace3c90SAlexandre Chartre vdcp->state = VDC_STATE_FAULTED; 4636e8dc8350Sjmcp break; 4637e8dc8350Sjmcp } 463800e3a3e9SAlexandre Chartre 46396ace3c90SAlexandre Chartre if (tmid != 0) { 46406ace3c90SAlexandre Chartre (void) untimeout(tmid); 46416ace3c90SAlexandre Chartre tmid = 0; 46426ace3c90SAlexandre Chartre vdcp->ctimeout_reached = B_FALSE; 46436ace3c90SAlexandre Chartre } 46446ace3c90SAlexandre Chartre 46456ace3c90SAlexandre Chartre /* 46466ace3c90SAlexandre Chartre * Setup devid 46476ace3c90SAlexandre Chartre */ 46486ace3c90SAlexandre Chartre (void) vdc_setup_devid(vdcp); 46496ace3c90SAlexandre Chartre 46506ace3c90SAlexandre Chartre status = vdc_resubmit_backup_dring(vdcp); 46516ace3c90SAlexandre Chartre 46526ace3c90SAlexandre Chartre mutex_enter(&vdcp->lock); 46536ace3c90SAlexandre Chartre 46546ace3c90SAlexandre Chartre if (status) { 46556ace3c90SAlexandre Chartre vdcp->state = VDC_STATE_RESETTING; 46566ace3c90SAlexandre Chartre vdcp->self_reset = B_TRUE; 46576ace3c90SAlexandre Chartre vdcp->curr_server->svc_state = 46586ace3c90SAlexandre Chartre VDC_SERVICE_FAILED; 46596ace3c90SAlexandre Chartre vdc_print_svc_status(vdcp); 46606ace3c90SAlexandre Chartre } else { 46616ace3c90SAlexandre Chartre vdcp->state = VDC_STATE_RUNNING; 46626ace3c90SAlexandre Chartre } 46636ace3c90SAlexandre Chartre break; 46646ace3c90SAlexandre Chartre 46656ace3c90SAlexandre Chartre case VDC_STATE_FAULTED: 46666ace3c90SAlexandre Chartre /* 46676ace3c90SAlexandre Chartre * Server is faulted because the backend is unavailable. 46686ace3c90SAlexandre Chartre * If all servers are faulted then we mark the service 46696ace3c90SAlexandre Chartre * as failed, otherwise we reset to switch to another 46706ace3c90SAlexandre Chartre * server. 46716ace3c90SAlexandre Chartre */ 46726ace3c90SAlexandre Chartre vdc_print_svc_status(vdcp); 46736ace3c90SAlexandre Chartre 46746ace3c90SAlexandre Chartre /* check if all servers are faulted */ 46756ace3c90SAlexandre Chartre for (srvr = vdcp->server_list; srvr != NULL; 46766ace3c90SAlexandre Chartre srvr = srvr->next) { 46776ace3c90SAlexandre Chartre svc_state = srvr->svc_state; 46786ace3c90SAlexandre Chartre if (svc_state != VDC_SERVICE_FAULTED) 46796ace3c90SAlexandre Chartre break; 46806ace3c90SAlexandre Chartre } 46816ace3c90SAlexandre Chartre 46826ace3c90SAlexandre Chartre if (srvr != NULL) { 46836ace3c90SAlexandre Chartre vdcp->state = VDC_STATE_RESETTING; 46846ace3c90SAlexandre Chartre vdcp->self_reset = B_TRUE; 46856ace3c90SAlexandre Chartre } else { 46866ace3c90SAlexandre Chartre vdcp->state = VDC_STATE_FAILED; 46876ace3c90SAlexandre Chartre } 46886ace3c90SAlexandre Chartre break; 46896ace3c90SAlexandre Chartre 46906ace3c90SAlexandre Chartre case VDC_STATE_FAILED: 46916ace3c90SAlexandre Chartre /* 46926ace3c90SAlexandre Chartre * We reach this state when we are unable to access the 46936ace3c90SAlexandre Chartre * backend from any server, either because of a maximum 46946ace3c90SAlexandre Chartre * connection retries or timeout, or because the backend 46956ace3c90SAlexandre Chartre * is unavailable. 46966ace3c90SAlexandre Chartre * 46976ace3c90SAlexandre Chartre * Then we cancel the backup DRing so that errors get 46986ace3c90SAlexandre Chartre * reported and we wait for a new I/O before attempting 46996ace3c90SAlexandre Chartre * another connection. 47006ace3c90SAlexandre Chartre */ 4701*ca6d1280SAlexandre Chartre 47026ace3c90SAlexandre Chartre cmn_err(CE_NOTE, "vdisk@%d disk access failed", 47036ace3c90SAlexandre Chartre vdcp->instance); 4704*ca6d1280SAlexandre Chartre failure_msg = B_TRUE; 4705*ca6d1280SAlexandre Chartre 4706*ca6d1280SAlexandre Chartre if (vdcp->lifecycle == VDC_LC_ATTACHING) { 4707*ca6d1280SAlexandre Chartre vdcp->lifecycle = VDC_LC_ONLINE_PENDING; 4708*ca6d1280SAlexandre Chartre vdcp->hattr_min = vdc_hattr_min_initial; 4709*ca6d1280SAlexandre Chartre } else { 4710*ca6d1280SAlexandre Chartre vdcp->hattr_min = vdc_hattr_min; 4711*ca6d1280SAlexandre Chartre } 47126ace3c90SAlexandre Chartre 47136ace3c90SAlexandre Chartre /* cancel any timeout */ 471400e3a3e9SAlexandre Chartre if (tmid != 0) { 471500e3a3e9SAlexandre Chartre (void) untimeout(tmid); 471600e3a3e9SAlexandre Chartre tmid = 0; 471700e3a3e9SAlexandre Chartre } 471800e3a3e9SAlexandre Chartre 47196ace3c90SAlexandre Chartre /* cancel pending I/Os */ 47206ace3c90SAlexandre Chartre cv_broadcast(&vdcp->running_cv); 47216ace3c90SAlexandre Chartre vdc_cancel_backup_dring(vdcp); 47226ace3c90SAlexandre Chartre 47236ace3c90SAlexandre Chartre /* wait for new I/O */ 47246ace3c90SAlexandre Chartre while (!vdcp->io_pending) 47256ace3c90SAlexandre Chartre cv_wait(&vdcp->io_pending_cv, &vdcp->lock); 47266ace3c90SAlexandre Chartre 47276ace3c90SAlexandre Chartre /* 47286ace3c90SAlexandre Chartre * There's a new IO pending. Try to re-establish a 47296ace3c90SAlexandre Chartre * connection. Mark all services as offline, so that 47306ace3c90SAlexandre Chartre * we don't stop again before having retried all 47316ace3c90SAlexandre Chartre * servers. 47326ace3c90SAlexandre Chartre */ 47336ace3c90SAlexandre Chartre for (srvr = vdcp->server_list; srvr != NULL; 47346ace3c90SAlexandre Chartre srvr = srvr->next) { 47356ace3c90SAlexandre Chartre srvr->svc_state = VDC_SERVICE_OFFLINE; 4736*ca6d1280SAlexandre Chartre srvr->hshake_cnt = 0; 4737*ca6d1280SAlexandre Chartre srvr->hattr_cnt = 0; 4738*ca6d1280SAlexandre Chartre srvr->hattr_total = 0; 47396ace3c90SAlexandre Chartre } 47406ace3c90SAlexandre Chartre 47416ace3c90SAlexandre Chartre /* reset variables */ 4742*ca6d1280SAlexandre Chartre hshake_cnt = 0; 4743*ca6d1280SAlexandre Chartre hattr_cnt = 0; 47446ace3c90SAlexandre Chartre vdcp->ctimeout_reached = B_FALSE; 47456ace3c90SAlexandre Chartre 474600e3a3e9SAlexandre Chartre vdcp->state = VDC_STATE_RESETTING; 47476ace3c90SAlexandre Chartre vdcp->self_reset = B_TRUE; 47483af08d82Slm66018 break; 47493af08d82Slm66018 47503af08d82Slm66018 /* enter running state */ 47513af08d82Slm66018 case VDC_STATE_RUNNING: 4752*ca6d1280SAlexandre Chartre 4753*ca6d1280SAlexandre Chartre if (vdcp->lifecycle == VDC_LC_DETACHING) { 4754*ca6d1280SAlexandre Chartre vdcp->state = VDC_STATE_DETACH; 4755*ca6d1280SAlexandre Chartre break; 4756*ca6d1280SAlexandre Chartre } 4757*ca6d1280SAlexandre Chartre 4758*ca6d1280SAlexandre Chartre vdcp->lifecycle = VDC_LC_ONLINE; 4759*ca6d1280SAlexandre Chartre 4760*ca6d1280SAlexandre Chartre if (failure_msg) { 4761*ca6d1280SAlexandre Chartre cmn_err(CE_NOTE, "vdisk@%d disk access " 4762*ca6d1280SAlexandre Chartre "recovered", vdcp->instance); 4763*ca6d1280SAlexandre Chartre failure_msg = B_FALSE; 4764*ca6d1280SAlexandre Chartre } 4765*ca6d1280SAlexandre Chartre 47663af08d82Slm66018 /* 47673af08d82Slm66018 * Signal anyone waiting for the connection 47683af08d82Slm66018 * to come on line. 47693af08d82Slm66018 */ 47703af08d82Slm66018 cv_broadcast(&vdcp->running_cv); 47712f5224aeSachartre 47726ace3c90SAlexandre Chartre /* backend has to be checked after reset */ 47736ace3c90SAlexandre Chartre if (vdcp->failfast_interval != 0 || 47746ace3c90SAlexandre Chartre vdcp->num_servers > 1) 47756ace3c90SAlexandre Chartre cv_signal(&vdcp->eio_cv); 47762f5224aeSachartre 47772f5224aeSachartre /* ownership is lost during reset */ 47782f5224aeSachartre if (vdcp->ownership & VDC_OWNERSHIP_WANTED) 47792f5224aeSachartre vdcp->ownership |= VDC_OWNERSHIP_RESET; 47802f5224aeSachartre cv_signal(&vdcp->ownership_cv); 47812f5224aeSachartre 47826ace3c90SAlexandre Chartre vdcp->curr_server->svc_state = VDC_SERVICE_ONLINE; 47836ace3c90SAlexandre Chartre vdc_print_svc_status(vdcp); 4784d7400d00Sachartre 47853af08d82Slm66018 mutex_exit(&vdcp->lock); 47863af08d82Slm66018 47873af08d82Slm66018 for (;;) { 47883af08d82Slm66018 vio_msg_t msg; 47893af08d82Slm66018 status = vdc_wait_for_response(vdcp, &msg); 47903af08d82Slm66018 if (status) break; 47913af08d82Slm66018 47923af08d82Slm66018 DMSG(vdcp, 1, "[%d] new pkt(s) available\n", 47933af08d82Slm66018 vdcp->instance); 47943af08d82Slm66018 status = vdc_process_data_msg(vdcp, &msg); 47951ae08745Sheppo if (status) { 47963af08d82Slm66018 DMSG(vdcp, 1, "[%d] process_data_msg " 47973af08d82Slm66018 "returned err=%d\n", vdcp->instance, 47983af08d82Slm66018 status); 47991ae08745Sheppo break; 48001ae08745Sheppo } 48011ae08745Sheppo 48023af08d82Slm66018 } 4803e1ebb9ecSlm66018 48043af08d82Slm66018 mutex_enter(&vdcp->lock); 48053af08d82Slm66018 48066ace3c90SAlexandre Chartre /* all servers are now offline */ 48076ace3c90SAlexandre Chartre for (srvr = vdcp->server_list; srvr != NULL; 48086ace3c90SAlexandre Chartre srvr = srvr->next) { 48096ace3c90SAlexandre Chartre srvr->svc_state = VDC_SERVICE_OFFLINE; 48106ace3c90SAlexandre Chartre srvr->log_state = VDC_SERVICE_NONE; 4811*ca6d1280SAlexandre Chartre srvr->hshake_cnt = 0; 4812*ca6d1280SAlexandre Chartre srvr->hattr_cnt = 0; 4813*ca6d1280SAlexandre Chartre srvr->hattr_total = 0; 48146ace3c90SAlexandre Chartre } 48156ace3c90SAlexandre Chartre 4816*ca6d1280SAlexandre Chartre hshake_cnt = 0; 4817*ca6d1280SAlexandre Chartre hattr_cnt = 0; 4818*ca6d1280SAlexandre Chartre 48196ace3c90SAlexandre Chartre vdc_print_svc_status(vdcp); 4820d7400d00Sachartre 48213af08d82Slm66018 vdcp->state = VDC_STATE_RESETTING; 4822690555a1Sachartre vdcp->self_reset = B_TRUE; 48233af08d82Slm66018 break; 48243af08d82Slm66018 48253af08d82Slm66018 case VDC_STATE_RESETTING: 4826655fd6a9Sachartre /* 4827655fd6a9Sachartre * When we reach this state, we either come from the 4828655fd6a9Sachartre * VDC_STATE_RUNNING state and we can have pending 4829655fd6a9Sachartre * request but no timeout is armed; or we come from 4830655fd6a9Sachartre * the VDC_STATE_INIT_WAITING, VDC_NEGOTIATE or 4831655fd6a9Sachartre * VDC_HANDLE_PENDING state and there is no pending 4832655fd6a9Sachartre * request or pending requests have already been copied 4833655fd6a9Sachartre * into the backup dring. So we can safely keep the 4834655fd6a9Sachartre * connection timeout armed while we are in this state. 4835655fd6a9Sachartre */ 4836655fd6a9Sachartre 48373af08d82Slm66018 DMSG(vdcp, 0, "Initiating channel reset " 48383af08d82Slm66018 "(pending = %d)\n", (int)vdcp->threads_pending); 48393af08d82Slm66018 48403af08d82Slm66018 if (vdcp->self_reset) { 48413af08d82Slm66018 DMSG(vdcp, 0, 48423af08d82Slm66018 "[%d] calling stop_ldc_connection.\n", 48433af08d82Slm66018 vdcp->instance); 48443af08d82Slm66018 status = vdc_stop_ldc_connection(vdcp); 48453af08d82Slm66018 vdcp->self_reset = B_FALSE; 48461ae08745Sheppo } 48471ae08745Sheppo 48481ae08745Sheppo /* 48493af08d82Slm66018 * Wait for all threads currently waiting 48503af08d82Slm66018 * for a free dring entry to use. 48511ae08745Sheppo */ 48523af08d82Slm66018 while (vdcp->threads_pending) { 48533af08d82Slm66018 cv_broadcast(&vdcp->membind_cv); 48543af08d82Slm66018 cv_broadcast(&vdcp->dring_free_cv); 48553af08d82Slm66018 mutex_exit(&vdcp->lock); 4856205eeb1aSlm66018 /* give the waiters enough time to wake up */ 4857205eeb1aSlm66018 delay(vdc_hz_min_ldc_delay); 48583af08d82Slm66018 mutex_enter(&vdcp->lock); 48591ae08745Sheppo } 48601ae08745Sheppo 48613af08d82Slm66018 ASSERT(vdcp->threads_pending == 0); 48621ae08745Sheppo 48633af08d82Slm66018 /* Sanity check that no thread is receiving */ 48643af08d82Slm66018 ASSERT(vdcp->read_state != VDC_READ_WAITING); 48650a55fbb7Slm66018 48663af08d82Slm66018 vdcp->read_state = VDC_READ_IDLE; 48676ace3c90SAlexandre Chartre vdcp->io_pending = B_FALSE; 48686ace3c90SAlexandre Chartre 48696ace3c90SAlexandre Chartre /* 48706ace3c90SAlexandre Chartre * Cleanup any pending eio. These I/Os are going to 48716ace3c90SAlexandre Chartre * be resubmitted. 48726ace3c90SAlexandre Chartre */ 48736ace3c90SAlexandre Chartre vdc_eio_unqueue(vdcp, 0, B_FALSE); 48743af08d82Slm66018 48753af08d82Slm66018 vdc_backup_local_dring(vdcp); 48763af08d82Slm66018 48773af08d82Slm66018 /* cleanup the old d-ring */ 48783af08d82Slm66018 vdc_destroy_descriptor_ring(vdcp); 48793af08d82Slm66018 48803af08d82Slm66018 /* go and start again */ 48813af08d82Slm66018 vdcp->state = VDC_STATE_INIT; 48823af08d82Slm66018 48830a55fbb7Slm66018 break; 48840a55fbb7Slm66018 48853af08d82Slm66018 case VDC_STATE_DETACH: 48863af08d82Slm66018 DMSG(vdcp, 0, "[%d] Reset thread exit cleanup ..\n", 48873af08d82Slm66018 vdcp->instance); 48883af08d82Slm66018 4889655fd6a9Sachartre /* cancel any pending timeout */ 4890655fd6a9Sachartre mutex_exit(&vdcp->lock); 4891655fd6a9Sachartre if (tmid != 0) { 4892655fd6a9Sachartre (void) untimeout(tmid); 4893655fd6a9Sachartre tmid = 0; 4894655fd6a9Sachartre } 4895655fd6a9Sachartre mutex_enter(&vdcp->lock); 4896655fd6a9Sachartre 48973c96341aSnarayan /* 48983c96341aSnarayan * Signal anyone waiting for connection 48993c96341aSnarayan * to come online 49003c96341aSnarayan */ 49013c96341aSnarayan cv_broadcast(&vdcp->running_cv); 49023c96341aSnarayan 49036ace3c90SAlexandre Chartre while (vdcp->sync_op_cnt > 0) { 49046ace3c90SAlexandre Chartre cv_broadcast(&vdcp->sync_blocked_cv); 49053af08d82Slm66018 mutex_exit(&vdcp->lock); 4906205eeb1aSlm66018 /* give the waiters enough time to wake up */ 4907205eeb1aSlm66018 delay(vdc_hz_min_ldc_delay); 49083af08d82Slm66018 mutex_enter(&vdcp->lock); 49090a55fbb7Slm66018 } 49101ae08745Sheppo 49113af08d82Slm66018 mutex_exit(&vdcp->lock); 49123af08d82Slm66018 49133af08d82Slm66018 DMSG(vdcp, 0, "[%d] Msg processing thread exiting ..\n", 49143af08d82Slm66018 vdcp->instance); 49153af08d82Slm66018 thread_exit(); 49163af08d82Slm66018 break; 49173af08d82Slm66018 } 49183af08d82Slm66018 } 49190a55fbb7Slm66018 } 49200a55fbb7Slm66018 49210a55fbb7Slm66018 49220a55fbb7Slm66018 /* 49230a55fbb7Slm66018 * Function: 49240a55fbb7Slm66018 * vdc_process_data_msg() 49250a55fbb7Slm66018 * 49260a55fbb7Slm66018 * Description: 49270a55fbb7Slm66018 * This function is called by the message processing thread each time 49280a55fbb7Slm66018 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 49290a55fbb7Slm66018 * be an ACK or NACK from vds[1] which vdc handles as follows. 49300a55fbb7Slm66018 * ACK - wake up the waiting thread 49310a55fbb7Slm66018 * NACK - resend any messages necessary 49320a55fbb7Slm66018 * 49330a55fbb7Slm66018 * [1] Although the message format allows it, vds should not send a 49340a55fbb7Slm66018 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 49350a55fbb7Slm66018 * some bizarre reason it does, vdc will reset the connection. 49360a55fbb7Slm66018 * 49370a55fbb7Slm66018 * Arguments: 49380a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 49390a55fbb7Slm66018 * msg - the LDC message sent by vds 49400a55fbb7Slm66018 * 49410a55fbb7Slm66018 * Return Code: 49420a55fbb7Slm66018 * 0 - Success. 49430a55fbb7Slm66018 * > 0 - error value returned by LDC 49440a55fbb7Slm66018 */ 49450a55fbb7Slm66018 static int 49463af08d82Slm66018 vdc_process_data_msg(vdc_t *vdcp, vio_msg_t *msg) 49470a55fbb7Slm66018 { 49480a55fbb7Slm66018 int status = 0; 49493af08d82Slm66018 vio_dring_msg_t *dring_msg; 4950d10e4ef2Snarayan vdc_local_desc_t *ldep = NULL; 49513af08d82Slm66018 int start, end; 49523af08d82Slm66018 int idx; 495390e2f9dcSlm66018 int op; 49540a55fbb7Slm66018 49553af08d82Slm66018 dring_msg = (vio_dring_msg_t *)msg; 49560a55fbb7Slm66018 49573af08d82Slm66018 ASSERT(msg->tag.vio_msgtype == VIO_TYPE_DATA); 49583af08d82Slm66018 ASSERT(vdcp != NULL); 49593af08d82Slm66018 49603af08d82Slm66018 mutex_enter(&vdcp->lock); 49610a55fbb7Slm66018 49620a55fbb7Slm66018 /* 49630a55fbb7Slm66018 * Check to see if the message has bogus data 49640a55fbb7Slm66018 */ 4965e1ebb9ecSlm66018 idx = start = dring_msg->start_idx; 49660a55fbb7Slm66018 end = dring_msg->end_idx; 49673af08d82Slm66018 if ((start >= vdcp->dring_len) || 49683af08d82Slm66018 (end >= vdcp->dring_len) || (end < -1)) { 496990e2f9dcSlm66018 /* 497090e2f9dcSlm66018 * Update the I/O statistics to indicate that an error ocurred. 497190e2f9dcSlm66018 * No need to update the wait/run queues as no specific read or 497290e2f9dcSlm66018 * write request is being completed in response to this 'msg'. 497390e2f9dcSlm66018 */ 497490e2f9dcSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 49753af08d82Slm66018 DMSG(vdcp, 0, "[%d] Bogus ACK data : start %d, end %d\n", 49763af08d82Slm66018 vdcp->instance, start, end); 49773af08d82Slm66018 mutex_exit(&vdcp->lock); 4978e1ebb9ecSlm66018 return (EINVAL); 49790a55fbb7Slm66018 } 49800a55fbb7Slm66018 49810a55fbb7Slm66018 /* 49820a55fbb7Slm66018 * Verify that the sequence number is what vdc expects. 49830a55fbb7Slm66018 */ 49843af08d82Slm66018 switch (vdc_verify_seq_num(vdcp, dring_msg)) { 4985e1ebb9ecSlm66018 case VDC_SEQ_NUM_TODO: 4986e1ebb9ecSlm66018 break; /* keep processing this message */ 4987e1ebb9ecSlm66018 case VDC_SEQ_NUM_SKIP: 49883af08d82Slm66018 mutex_exit(&vdcp->lock); 4989e1ebb9ecSlm66018 return (0); 4990e1ebb9ecSlm66018 case VDC_SEQ_NUM_INVALID: 499190e2f9dcSlm66018 /* 499290e2f9dcSlm66018 * Update the I/O statistics to indicate that an error ocurred. 499390e2f9dcSlm66018 * No need to update the wait/run queues as no specific read or 499490e2f9dcSlm66018 * write request is being completed in response to this 'msg'. 499590e2f9dcSlm66018 */ 4996366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 499790e2f9dcSlm66018 DMSG(vdcp, 0, "[%d] invalid seqno\n", vdcp->instance); 4998366a92acSlm66018 mutex_exit(&vdcp->lock); 49990a55fbb7Slm66018 return (ENXIO); 50000a55fbb7Slm66018 } 50010a55fbb7Slm66018 50023af08d82Slm66018 if (msg->tag.vio_subtype == VIO_SUBTYPE_NACK) { 500390e2f9dcSlm66018 /* 500490e2f9dcSlm66018 * Update the I/O statistics to indicate that an error ocurred. 5005*ca6d1280SAlexandre Chartre * No need to update the wait/run queues, this will be done by 5006*ca6d1280SAlexandre Chartre * the thread calling this function. 500790e2f9dcSlm66018 */ 5008366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 500990e2f9dcSlm66018 VDC_DUMP_DRING_MSG(dring_msg); 501090e2f9dcSlm66018 DMSG(vdcp, 0, "[%d] DATA NACK\n", vdcp->instance); 50113af08d82Slm66018 mutex_exit(&vdcp->lock); 5012e1ebb9ecSlm66018 return (EIO); 50130a55fbb7Slm66018 50143af08d82Slm66018 } else if (msg->tag.vio_subtype == VIO_SUBTYPE_INFO) { 501590e2f9dcSlm66018 /* 501690e2f9dcSlm66018 * Update the I/O statistics to indicate that an error occurred. 501790e2f9dcSlm66018 * No need to update the wait/run queues as no specific read or 501890e2f9dcSlm66018 * write request is being completed in response to this 'msg'. 501990e2f9dcSlm66018 */ 5020366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_protoerrs); 50213af08d82Slm66018 mutex_exit(&vdcp->lock); 5022e1ebb9ecSlm66018 return (EPROTO); 5023e1ebb9ecSlm66018 } 5024e1ebb9ecSlm66018 50253af08d82Slm66018 DMSG(vdcp, 1, ": start %d end %d\n", start, end); 50263af08d82Slm66018 ASSERT(start == end); 50273af08d82Slm66018 50283af08d82Slm66018 ldep = &vdcp->local_dring[idx]; 50293af08d82Slm66018 50306ace3c90SAlexandre Chartre DMSG(vdcp, 1, ": state 0x%x\n", ldep->dep->hdr.dstate); 50313af08d82Slm66018 5032e1ebb9ecSlm66018 if (ldep->dep->hdr.dstate == VIO_DESC_DONE) { 50333af08d82Slm66018 struct buf *bufp; 5034e1ebb9ecSlm66018 50356ace3c90SAlexandre Chartre status = ldep->dep->payload.status; 5036d10e4ef2Snarayan 50376ace3c90SAlexandre Chartre bufp = ldep->buf; 50383af08d82Slm66018 ASSERT(bufp != NULL); 50396ace3c90SAlexandre Chartre 50406ace3c90SAlexandre Chartre bufp->b_resid = bufp->b_bcount - ldep->dep->payload.nbytes; 5041e8dc8350Sjmcp bioerror(bufp, status); 50426ace3c90SAlexandre Chartre 50436ace3c90SAlexandre Chartre if (status != 0) { 50446ace3c90SAlexandre Chartre DMSG(vdcp, 1, "I/O status=%d\n", status); 5045d10e4ef2Snarayan } 50462f5224aeSachartre 50473c96341aSnarayan DMSG(vdcp, 1, 50486ace3c90SAlexandre Chartre "I/O complete req=%ld bytes resp=%ld bytes\n", 50493c96341aSnarayan bufp->b_bcount, ldep->dep->payload.nbytes); 50502f5224aeSachartre 50512f5224aeSachartre /* 50526ace3c90SAlexandre Chartre * If the request has failed and we have multiple servers or 50536ace3c90SAlexandre Chartre * failfast is enabled then we will have to defer the completion 50546ace3c90SAlexandre Chartre * of the request until we have checked that the vdisk backend 50556ace3c90SAlexandre Chartre * is effectively available (if multiple server) or that there 50566ace3c90SAlexandre Chartre * is no reservation conflict (if failfast). 50572f5224aeSachartre */ 5058007a3653SAlexandre Chartre if (status != 0 && 5059007a3653SAlexandre Chartre ((vdcp->num_servers > 1 && 50606ace3c90SAlexandre Chartre (ldep->flags & VDC_OP_ERRCHK_BACKEND)) || 50616ace3c90SAlexandre Chartre (vdcp->failfast_interval != 0 && 50626ace3c90SAlexandre Chartre (ldep->flags & VDC_OP_ERRCHK_CONFLICT)))) { 50636ace3c90SAlexandre Chartre /* 50646ace3c90SAlexandre Chartre * The I/O has failed and we need to check the error. 50656ace3c90SAlexandre Chartre */ 50666ace3c90SAlexandre Chartre (void) vdc_eio_queue(vdcp, idx); 50672f5224aeSachartre } else { 50686ace3c90SAlexandre Chartre op = ldep->operation; 50696ace3c90SAlexandre Chartre if (op == VD_OP_BREAD || op == VD_OP_BWRITE) { 5070366a92acSlm66018 if (status == 0) { 5071366a92acSlm66018 VD_UPDATE_IO_STATS(vdcp, op, 5072366a92acSlm66018 ldep->dep->payload.nbytes); 50736ace3c90SAlexandre Chartre } else { 50746ace3c90SAlexandre Chartre VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 5075366a92acSlm66018 } 507690e2f9dcSlm66018 VD_KSTAT_RUNQ_EXIT(vdcp); 5077366a92acSlm66018 DTRACE_IO1(done, buf_t *, bufp); 50780a55fbb7Slm66018 } 50796ace3c90SAlexandre Chartre (void) vdc_depopulate_descriptor(vdcp, idx); 50806ace3c90SAlexandre Chartre biodone(bufp); 5081e8dc8350Sjmcp } 50823af08d82Slm66018 } 50833af08d82Slm66018 50843af08d82Slm66018 /* let the arrival signal propogate */ 50853af08d82Slm66018 mutex_exit(&vdcp->lock); 50860a55fbb7Slm66018 5087e1ebb9ecSlm66018 /* probe gives the count of how many entries were processed */ 5088366a92acSlm66018 DTRACE_PROBE2(processed, int, 1, vdc_t *, vdcp); 50890a55fbb7Slm66018 50903af08d82Slm66018 return (0); 50910a55fbb7Slm66018 } 50920a55fbb7Slm66018 50930a55fbb7Slm66018 50940a55fbb7Slm66018 /* 50950a55fbb7Slm66018 * Function: 50960a55fbb7Slm66018 * vdc_handle_ver_msg() 50970a55fbb7Slm66018 * 50980a55fbb7Slm66018 * Description: 50990a55fbb7Slm66018 * 51000a55fbb7Slm66018 * Arguments: 51010a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 51020a55fbb7Slm66018 * ver_msg - LDC message sent by vDisk server 51030a55fbb7Slm66018 * 51040a55fbb7Slm66018 * Return Code: 51050a55fbb7Slm66018 * 0 - Success 51060a55fbb7Slm66018 */ 51070a55fbb7Slm66018 static int 51080a55fbb7Slm66018 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) 51090a55fbb7Slm66018 { 51100a55fbb7Slm66018 int status = 0; 51110a55fbb7Slm66018 51120a55fbb7Slm66018 ASSERT(vdc != NULL); 51130a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 51140a55fbb7Slm66018 51150a55fbb7Slm66018 if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { 51160a55fbb7Slm66018 return (EPROTO); 51170a55fbb7Slm66018 } 51180a55fbb7Slm66018 51190a55fbb7Slm66018 if (ver_msg->dev_class != VDEV_DISK_SERVER) { 51200a55fbb7Slm66018 return (EINVAL); 51210a55fbb7Slm66018 } 51220a55fbb7Slm66018 51230a55fbb7Slm66018 switch (ver_msg->tag.vio_subtype) { 51240a55fbb7Slm66018 case VIO_SUBTYPE_ACK: 51250a55fbb7Slm66018 /* 51260a55fbb7Slm66018 * We check to see if the version returned is indeed supported 51270a55fbb7Slm66018 * (The server may have also adjusted the minor number downwards 51280a55fbb7Slm66018 * and if so 'ver_msg' will contain the actual version agreed) 51290a55fbb7Slm66018 */ 51300a55fbb7Slm66018 if (vdc_is_supported_version(ver_msg)) { 51310a55fbb7Slm66018 vdc->ver.major = ver_msg->ver_major; 51320a55fbb7Slm66018 vdc->ver.minor = ver_msg->ver_minor; 51330a55fbb7Slm66018 ASSERT(vdc->ver.major > 0); 51340a55fbb7Slm66018 } else { 51350a55fbb7Slm66018 status = EPROTO; 51360a55fbb7Slm66018 } 51370a55fbb7Slm66018 break; 51380a55fbb7Slm66018 51390a55fbb7Slm66018 case VIO_SUBTYPE_NACK: 51400a55fbb7Slm66018 /* 51410a55fbb7Slm66018 * call vdc_is_supported_version() which will return the next 51420a55fbb7Slm66018 * supported version (if any) in 'ver_msg' 51430a55fbb7Slm66018 */ 51440a55fbb7Slm66018 (void) vdc_is_supported_version(ver_msg); 51450a55fbb7Slm66018 if (ver_msg->ver_major > 0) { 51460a55fbb7Slm66018 size_t len = sizeof (*ver_msg); 51470a55fbb7Slm66018 51480a55fbb7Slm66018 ASSERT(vdc->ver.major > 0); 51490a55fbb7Slm66018 51500a55fbb7Slm66018 /* reset the necessary fields and resend */ 51510a55fbb7Slm66018 ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; 51520a55fbb7Slm66018 ver_msg->dev_class = VDEV_DISK; 51530a55fbb7Slm66018 51540a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)ver_msg, &len); 51553af08d82Slm66018 DMSG(vdc, 0, "[%d] Resend VER info (LDC status = %d)\n", 51560a55fbb7Slm66018 vdc->instance, status); 51570a55fbb7Slm66018 if (len != sizeof (*ver_msg)) 51580a55fbb7Slm66018 status = EBADMSG; 51590a55fbb7Slm66018 } else { 516087a7269eSachartre DMSG(vdc, 0, "[%d] No common version with vDisk server", 516187a7269eSachartre vdc->instance); 51620a55fbb7Slm66018 status = ENOTSUP; 51630a55fbb7Slm66018 } 51640a55fbb7Slm66018 51650a55fbb7Slm66018 break; 51661ae08745Sheppo case VIO_SUBTYPE_INFO: 51671ae08745Sheppo /* 51681ae08745Sheppo * Handle the case where vds starts handshake 5169eff7243fSlm66018 * (for now only vdc is the instigator) 51701ae08745Sheppo */ 51711ae08745Sheppo status = ENOTSUP; 51721ae08745Sheppo break; 51731ae08745Sheppo 51741ae08745Sheppo default: 51750a55fbb7Slm66018 status = EINVAL; 51761ae08745Sheppo break; 51771ae08745Sheppo } 51781ae08745Sheppo 51790a55fbb7Slm66018 return (status); 51800a55fbb7Slm66018 } 51810a55fbb7Slm66018 51820a55fbb7Slm66018 /* 51830a55fbb7Slm66018 * Function: 51840a55fbb7Slm66018 * vdc_handle_attr_msg() 51850a55fbb7Slm66018 * 51860a55fbb7Slm66018 * Description: 51870a55fbb7Slm66018 * 51880a55fbb7Slm66018 * Arguments: 51890a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 51900a55fbb7Slm66018 * attr_msg - LDC message sent by vDisk server 51910a55fbb7Slm66018 * 51920a55fbb7Slm66018 * Return Code: 51930a55fbb7Slm66018 * 0 - Success 51940a55fbb7Slm66018 */ 51950a55fbb7Slm66018 static int 51960a55fbb7Slm66018 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) 51970a55fbb7Slm66018 { 51980a55fbb7Slm66018 int status = 0; 51996ace3c90SAlexandre Chartre vd_disk_type_t old_type; 52000a55fbb7Slm66018 52010a55fbb7Slm66018 ASSERT(vdc != NULL); 52020a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 52030a55fbb7Slm66018 52040a55fbb7Slm66018 if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { 52050a55fbb7Slm66018 return (EPROTO); 52060a55fbb7Slm66018 } 52070a55fbb7Slm66018 52080a55fbb7Slm66018 switch (attr_msg->tag.vio_subtype) { 52091ae08745Sheppo case VIO_SUBTYPE_ACK: 52101ae08745Sheppo /* 52111ae08745Sheppo * We now verify the attributes sent by vds. 52121ae08745Sheppo */ 521378fcd0a1Sachartre if (attr_msg->vdisk_size == 0) { 521478fcd0a1Sachartre DMSG(vdc, 0, "[%d] Invalid disk size from vds", 521578fcd0a1Sachartre vdc->instance); 521678fcd0a1Sachartre status = EINVAL; 521778fcd0a1Sachartre break; 521878fcd0a1Sachartre } 521978fcd0a1Sachartre 522078fcd0a1Sachartre if (attr_msg->max_xfer_sz == 0) { 522178fcd0a1Sachartre DMSG(vdc, 0, "[%d] Invalid transfer size from vds", 522278fcd0a1Sachartre vdc->instance); 522378fcd0a1Sachartre status = EINVAL; 522478fcd0a1Sachartre break; 522578fcd0a1Sachartre } 522678fcd0a1Sachartre 52272f5224aeSachartre if (attr_msg->vdisk_size == VD_SIZE_UNKNOWN) { 52282f5224aeSachartre DMSG(vdc, 0, "[%d] Unknown disk size from vds", 52292f5224aeSachartre vdc->instance); 52302f5224aeSachartre attr_msg->vdisk_size = 0; 52312f5224aeSachartre } 523265908c77Syu, larry liu - Sun Microsystems - Beijing China 523365908c77Syu, larry liu - Sun Microsystems - Beijing China /* update the VIO block size */ 523465908c77Syu, larry liu - Sun Microsystems - Beijing China if (attr_msg->vdisk_block_size > 0 && 523565908c77Syu, larry liu - Sun Microsystems - Beijing China vdc_update_vio_bsize(vdc, 523665908c77Syu, larry liu - Sun Microsystems - Beijing China attr_msg->vdisk_block_size) != 0) { 523765908c77Syu, larry liu - Sun Microsystems - Beijing China DMSG(vdc, 0, "[%d] Invalid block size (%u) from vds", 523865908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->instance, attr_msg->vdisk_block_size); 523965908c77Syu, larry liu - Sun Microsystems - Beijing China status = EINVAL; 524065908c77Syu, larry liu - Sun Microsystems - Beijing China break; 524165908c77Syu, larry liu - Sun Microsystems - Beijing China } 524265908c77Syu, larry liu - Sun Microsystems - Beijing China 5243de3a5331SRamesh Chitrothu /* update disk, block and transfer sizes */ 52446ace3c90SAlexandre Chartre old_type = vdc->vdisk_type; 5245de3a5331SRamesh Chitrothu vdc_update_size(vdc, attr_msg->vdisk_size, 5246de3a5331SRamesh Chitrothu attr_msg->vdisk_block_size, attr_msg->max_xfer_sz); 52471ae08745Sheppo vdc->vdisk_type = attr_msg->vdisk_type; 524817cadca8Slm66018 vdc->operations = attr_msg->operations; 524917cadca8Slm66018 if (vio_ver_is_supported(vdc->ver, 1, 1)) 525017cadca8Slm66018 vdc->vdisk_media = attr_msg->vdisk_media; 525117cadca8Slm66018 else 525217cadca8Slm66018 vdc->vdisk_media = 0; 52531ae08745Sheppo 52543af08d82Slm66018 DMSG(vdc, 0, "[%d] max_xfer_sz: sent %lx acked %lx\n", 5255e1ebb9ecSlm66018 vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz); 52563af08d82Slm66018 DMSG(vdc, 0, "[%d] vdisk_block_size: sent %lx acked %x\n", 525765908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->instance, vdc->vdisk_bsize, 5258e1ebb9ecSlm66018 attr_msg->vdisk_block_size); 5259e1ebb9ecSlm66018 5260f0ca1d9aSsb155480 if ((attr_msg->xfer_mode != VIO_DRING_MODE_V1_0) || 52611ae08745Sheppo (attr_msg->vdisk_size > INT64_MAX) || 526217cadca8Slm66018 (attr_msg->operations == 0) || 52631ae08745Sheppo (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 52643af08d82Slm66018 DMSG(vdc, 0, "[%d] Invalid attributes from vds", 5265e1ebb9ecSlm66018 vdc->instance); 52661ae08745Sheppo status = EINVAL; 52671ae08745Sheppo break; 52681ae08745Sheppo } 52691ae08745Sheppo 527078fcd0a1Sachartre /* 527178fcd0a1Sachartre * Now that we have received all attributes we can create a 527278fcd0a1Sachartre * fake geometry for the disk. 527378fcd0a1Sachartre */ 527478fcd0a1Sachartre vdc_create_fake_geometry(vdc); 52756ace3c90SAlexandre Chartre 52766ace3c90SAlexandre Chartre /* 52776ace3c90SAlexandre Chartre * If the disk type was previously unknown and device nodes 52786ace3c90SAlexandre Chartre * were created then the driver would have created 8 device 52796ace3c90SAlexandre Chartre * nodes. If we now find out that this is a single-slice disk 52806ace3c90SAlexandre Chartre * then we need to re-create the appropriate device nodes. 52816ace3c90SAlexandre Chartre */ 52826ace3c90SAlexandre Chartre if (old_type == VD_DISK_TYPE_UNK && 52836ace3c90SAlexandre Chartre (vdc->initialized & VDC_MINOR) && 52846ace3c90SAlexandre Chartre vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 52856ace3c90SAlexandre Chartre ddi_remove_minor_node(vdc->dip, NULL); 52866ace3c90SAlexandre Chartre (void) devfs_clean(ddi_get_parent(vdc->dip), 52876ace3c90SAlexandre Chartre NULL, DV_CLEAN_FORCE); 52886ace3c90SAlexandre Chartre if (vdc_create_device_nodes(vdc) != 0) { 52896ace3c90SAlexandre Chartre DMSG(vdc, 0, "![%d] Failed to update " 52906ace3c90SAlexandre Chartre "device nodes", vdc->instance); 52916ace3c90SAlexandre Chartre } 52926ace3c90SAlexandre Chartre } 52936ace3c90SAlexandre Chartre 52941ae08745Sheppo break; 52951ae08745Sheppo 52961ae08745Sheppo case VIO_SUBTYPE_NACK: 52971ae08745Sheppo /* 52981ae08745Sheppo * vds could not handle the attributes we sent so we 52991ae08745Sheppo * stop negotiating. 53001ae08745Sheppo */ 53011ae08745Sheppo status = EPROTO; 53021ae08745Sheppo break; 53031ae08745Sheppo 53041ae08745Sheppo case VIO_SUBTYPE_INFO: 53051ae08745Sheppo /* 53061ae08745Sheppo * Handle the case where vds starts the handshake 53071ae08745Sheppo * (for now; vdc is the only supported instigatior) 53081ae08745Sheppo */ 53091ae08745Sheppo status = ENOTSUP; 53101ae08745Sheppo break; 53111ae08745Sheppo 53121ae08745Sheppo default: 53131ae08745Sheppo status = ENOTSUP; 53141ae08745Sheppo break; 53151ae08745Sheppo } 53161ae08745Sheppo 53170a55fbb7Slm66018 return (status); 53181ae08745Sheppo } 53191ae08745Sheppo 53200a55fbb7Slm66018 /* 53210a55fbb7Slm66018 * Function: 53220a55fbb7Slm66018 * vdc_handle_dring_reg_msg() 53230a55fbb7Slm66018 * 53240a55fbb7Slm66018 * Description: 53250a55fbb7Slm66018 * 53260a55fbb7Slm66018 * Arguments: 53270a55fbb7Slm66018 * vdc - soft state pointer for this instance of the driver. 53280a55fbb7Slm66018 * dring_msg - LDC message sent by vDisk server 53290a55fbb7Slm66018 * 53300a55fbb7Slm66018 * Return Code: 53310a55fbb7Slm66018 * 0 - Success 53320a55fbb7Slm66018 */ 53330a55fbb7Slm66018 static int 53340a55fbb7Slm66018 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) 53350a55fbb7Slm66018 { 53360a55fbb7Slm66018 int status = 0; 53371ae08745Sheppo 53380a55fbb7Slm66018 ASSERT(vdc != NULL); 53390a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 53400a55fbb7Slm66018 53410a55fbb7Slm66018 if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { 53420a55fbb7Slm66018 return (EPROTO); 53430a55fbb7Slm66018 } 53440a55fbb7Slm66018 53450a55fbb7Slm66018 switch (dring_msg->tag.vio_subtype) { 53460a55fbb7Slm66018 case VIO_SUBTYPE_ACK: 53471ae08745Sheppo /* save the received dring_ident */ 53481ae08745Sheppo vdc->dring_ident = dring_msg->dring_ident; 53493af08d82Slm66018 DMSG(vdc, 0, "[%d] Received dring ident=0x%lx\n", 5350e1ebb9ecSlm66018 vdc->instance, vdc->dring_ident); 53511ae08745Sheppo break; 53521ae08745Sheppo 53531ae08745Sheppo case VIO_SUBTYPE_NACK: 53541ae08745Sheppo /* 53551ae08745Sheppo * vds could not handle the DRing info we sent so we 53561ae08745Sheppo * stop negotiating. 53571ae08745Sheppo */ 53583af08d82Slm66018 DMSG(vdc, 0, "[%d] server could not register DRing\n", 53593af08d82Slm66018 vdc->instance); 53601ae08745Sheppo status = EPROTO; 53611ae08745Sheppo break; 53621ae08745Sheppo 53631ae08745Sheppo case VIO_SUBTYPE_INFO: 53641ae08745Sheppo /* 53651ae08745Sheppo * Handle the case where vds starts handshake 53661ae08745Sheppo * (for now only vdc is the instigatior) 53671ae08745Sheppo */ 53681ae08745Sheppo status = ENOTSUP; 53691ae08745Sheppo break; 53701ae08745Sheppo default: 53711ae08745Sheppo status = ENOTSUP; 53721ae08745Sheppo } 53731ae08745Sheppo 53741ae08745Sheppo return (status); 53751ae08745Sheppo } 53761ae08745Sheppo 53771ae08745Sheppo /* 53781ae08745Sheppo * Function: 53791ae08745Sheppo * vdc_verify_seq_num() 53801ae08745Sheppo * 53811ae08745Sheppo * Description: 5382e1ebb9ecSlm66018 * This functions verifies that the sequence number sent back by the vDisk 5383e1ebb9ecSlm66018 * server with the latest message is what is expected (i.e. it is greater 5384e1ebb9ecSlm66018 * than the last seq num sent by the vDisk server and less than or equal 5385e1ebb9ecSlm66018 * to the last seq num generated by vdc). 5386e1ebb9ecSlm66018 * 5387e1ebb9ecSlm66018 * It then checks the request ID to see if any requests need processing 5388e1ebb9ecSlm66018 * in the DRing. 53891ae08745Sheppo * 53901ae08745Sheppo * Arguments: 53911ae08745Sheppo * vdc - soft state pointer for this instance of the driver. 53921ae08745Sheppo * dring_msg - pointer to the LDC message sent by vds 53931ae08745Sheppo * 53941ae08745Sheppo * Return Code: 5395e1ebb9ecSlm66018 * VDC_SEQ_NUM_TODO - Message needs to be processed 5396e1ebb9ecSlm66018 * VDC_SEQ_NUM_SKIP - Message has already been processed 5397e1ebb9ecSlm66018 * VDC_SEQ_NUM_INVALID - The seq numbers are so out of sync, 5398e1ebb9ecSlm66018 * vdc cannot deal with them 53991ae08745Sheppo */ 5400e1ebb9ecSlm66018 static int 5401e1ebb9ecSlm66018 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg) 54021ae08745Sheppo { 54031ae08745Sheppo ASSERT(vdc != NULL); 54041ae08745Sheppo ASSERT(dring_msg != NULL); 5405d10e4ef2Snarayan ASSERT(mutex_owned(&vdc->lock)); 54061ae08745Sheppo 54071ae08745Sheppo /* 54081ae08745Sheppo * Check to see if the messages were responded to in the correct 5409e1ebb9ecSlm66018 * order by vds. 54101ae08745Sheppo */ 5411e1ebb9ecSlm66018 if ((dring_msg->seq_num <= vdc->seq_num_reply) || 5412e1ebb9ecSlm66018 (dring_msg->seq_num > vdc->seq_num)) { 54133af08d82Slm66018 DMSG(vdc, 0, "?[%d] Bogus sequence_number %lu: " 5414e1ebb9ecSlm66018 "%lu > expected <= %lu (last proc req %lu sent %lu)\n", 5415e1ebb9ecSlm66018 vdc->instance, dring_msg->seq_num, 5416e1ebb9ecSlm66018 vdc->seq_num_reply, vdc->seq_num, 5417e1ebb9ecSlm66018 vdc->req_id_proc, vdc->req_id); 5418e1ebb9ecSlm66018 return (VDC_SEQ_NUM_INVALID); 54191ae08745Sheppo } 5420e1ebb9ecSlm66018 vdc->seq_num_reply = dring_msg->seq_num; 54211ae08745Sheppo 5422e1ebb9ecSlm66018 if (vdc->req_id_proc < vdc->req_id) 5423e1ebb9ecSlm66018 return (VDC_SEQ_NUM_TODO); 5424e1ebb9ecSlm66018 else 5425e1ebb9ecSlm66018 return (VDC_SEQ_NUM_SKIP); 54261ae08745Sheppo } 54271ae08745Sheppo 54280a55fbb7Slm66018 54290a55fbb7Slm66018 /* 54300a55fbb7Slm66018 * Function: 54310a55fbb7Slm66018 * vdc_is_supported_version() 54320a55fbb7Slm66018 * 54330a55fbb7Slm66018 * Description: 54340a55fbb7Slm66018 * This routine checks if the major/minor version numbers specified in 54350a55fbb7Slm66018 * 'ver_msg' are supported. If not it finds the next version that is 54360a55fbb7Slm66018 * in the supported version list 'vdc_version[]' and sets the fields in 54370a55fbb7Slm66018 * 'ver_msg' to those values 54380a55fbb7Slm66018 * 54390a55fbb7Slm66018 * Arguments: 54400a55fbb7Slm66018 * ver_msg - LDC message sent by vDisk server 54410a55fbb7Slm66018 * 54420a55fbb7Slm66018 * Return Code: 54430a55fbb7Slm66018 * B_TRUE - Success 54440a55fbb7Slm66018 * B_FALSE - Version not supported 54450a55fbb7Slm66018 */ 54460a55fbb7Slm66018 static boolean_t 54470a55fbb7Slm66018 vdc_is_supported_version(vio_ver_msg_t *ver_msg) 54480a55fbb7Slm66018 { 54490a55fbb7Slm66018 int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); 54500a55fbb7Slm66018 54510a55fbb7Slm66018 for (int i = 0; i < vdc_num_versions; i++) { 54520a55fbb7Slm66018 ASSERT(vdc_version[i].major > 0); 54530a55fbb7Slm66018 ASSERT((i == 0) || 54540a55fbb7Slm66018 (vdc_version[i].major < vdc_version[i-1].major)); 54550a55fbb7Slm66018 54560a55fbb7Slm66018 /* 54570a55fbb7Slm66018 * If the major versions match, adjust the minor version, if 54580a55fbb7Slm66018 * necessary, down to the highest value supported by this 54590a55fbb7Slm66018 * client. The server should support all minor versions lower 54600a55fbb7Slm66018 * than the value it sent 54610a55fbb7Slm66018 */ 54620a55fbb7Slm66018 if (ver_msg->ver_major == vdc_version[i].major) { 54630a55fbb7Slm66018 if (ver_msg->ver_minor > vdc_version[i].minor) { 54643af08d82Slm66018 DMSGX(0, 54653af08d82Slm66018 "Adjusting minor version from %u to %u", 54660a55fbb7Slm66018 ver_msg->ver_minor, vdc_version[i].minor); 54670a55fbb7Slm66018 ver_msg->ver_minor = vdc_version[i].minor; 54680a55fbb7Slm66018 } 54690a55fbb7Slm66018 return (B_TRUE); 54700a55fbb7Slm66018 } 54710a55fbb7Slm66018 54720a55fbb7Slm66018 /* 54730a55fbb7Slm66018 * If the message contains a higher major version number, set 54740a55fbb7Slm66018 * the message's major/minor versions to the current values 54750a55fbb7Slm66018 * and return false, so this message will get resent with 54760a55fbb7Slm66018 * these values, and the server will potentially try again 54770a55fbb7Slm66018 * with the same or a lower version 54780a55fbb7Slm66018 */ 54790a55fbb7Slm66018 if (ver_msg->ver_major > vdc_version[i].major) { 54800a55fbb7Slm66018 ver_msg->ver_major = vdc_version[i].major; 54810a55fbb7Slm66018 ver_msg->ver_minor = vdc_version[i].minor; 54823af08d82Slm66018 DMSGX(0, "Suggesting major/minor (0x%x/0x%x)\n", 54830a55fbb7Slm66018 ver_msg->ver_major, ver_msg->ver_minor); 54840a55fbb7Slm66018 54850a55fbb7Slm66018 return (B_FALSE); 54860a55fbb7Slm66018 } 54870a55fbb7Slm66018 54880a55fbb7Slm66018 /* 54890a55fbb7Slm66018 * Otherwise, the message's major version is less than the 54900a55fbb7Slm66018 * current major version, so continue the loop to the next 54910a55fbb7Slm66018 * (lower) supported version 54920a55fbb7Slm66018 */ 54930a55fbb7Slm66018 } 54940a55fbb7Slm66018 54950a55fbb7Slm66018 /* 54960a55fbb7Slm66018 * No common version was found; "ground" the version pair in the 54970a55fbb7Slm66018 * message to terminate negotiation 54980a55fbb7Slm66018 */ 54990a55fbb7Slm66018 ver_msg->ver_major = 0; 55000a55fbb7Slm66018 ver_msg->ver_minor = 0; 55010a55fbb7Slm66018 55020a55fbb7Slm66018 return (B_FALSE); 55030a55fbb7Slm66018 } 55041ae08745Sheppo /* -------------------------------------------------------------------------- */ 55051ae08745Sheppo 55061ae08745Sheppo /* 55071ae08745Sheppo * DKIO(7) support 55081ae08745Sheppo */ 55091ae08745Sheppo 55101ae08745Sheppo typedef struct vdc_dk_arg { 55111ae08745Sheppo struct dk_callback dkc; 55121ae08745Sheppo int mode; 55131ae08745Sheppo dev_t dev; 55141ae08745Sheppo vdc_t *vdc; 55151ae08745Sheppo } vdc_dk_arg_t; 55161ae08745Sheppo 55171ae08745Sheppo /* 55181ae08745Sheppo * Function: 55191ae08745Sheppo * vdc_dkio_flush_cb() 55201ae08745Sheppo * 55211ae08745Sheppo * Description: 55221ae08745Sheppo * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 55231ae08745Sheppo * by kernel code. 55241ae08745Sheppo * 55251ae08745Sheppo * Arguments: 55261ae08745Sheppo * arg - a pointer to a vdc_dk_arg_t structure. 55271ae08745Sheppo */ 55281ae08745Sheppo void 55291ae08745Sheppo vdc_dkio_flush_cb(void *arg) 55301ae08745Sheppo { 55311ae08745Sheppo struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 55321ae08745Sheppo struct dk_callback *dkc = NULL; 55331ae08745Sheppo vdc_t *vdc = NULL; 55341ae08745Sheppo int rv; 55351ae08745Sheppo 55361ae08745Sheppo if (dk_arg == NULL) { 55373af08d82Slm66018 cmn_err(CE_NOTE, "?[Unk] DKIOCFLUSHWRITECACHE arg is NULL\n"); 55381ae08745Sheppo return; 55391ae08745Sheppo } 55401ae08745Sheppo dkc = &dk_arg->dkc; 55411ae08745Sheppo vdc = dk_arg->vdc; 55421ae08745Sheppo ASSERT(vdc != NULL); 55431ae08745Sheppo 55443af08d82Slm66018 rv = vdc_do_sync_op(vdc, VD_OP_FLUSH, NULL, 0, 55456ace3c90SAlexandre Chartre VDCPART(dk_arg->dev), 0, VIO_both_dir, B_TRUE); 55461ae08745Sheppo if (rv != 0) { 55473af08d82Slm66018 DMSG(vdc, 0, "[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n", 5548e1ebb9ecSlm66018 vdc->instance, rv, 55491ae08745Sheppo ddi_model_convert_from(dk_arg->mode & FMODELS)); 55501ae08745Sheppo } 55511ae08745Sheppo 55521ae08745Sheppo /* 55531ae08745Sheppo * Trigger the call back to notify the caller the the ioctl call has 55541ae08745Sheppo * been completed. 55551ae08745Sheppo */ 55561ae08745Sheppo if ((dk_arg->mode & FKIOCTL) && 55571ae08745Sheppo (dkc != NULL) && 55581ae08745Sheppo (dkc->dkc_callback != NULL)) { 55591ae08745Sheppo ASSERT(dkc->dkc_cookie != NULL); 55608e6a2a04Slm66018 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 55611ae08745Sheppo } 55621ae08745Sheppo 55631ae08745Sheppo /* Indicate that one less DKIO write flush is outstanding */ 55641ae08745Sheppo mutex_enter(&vdc->lock); 55651ae08745Sheppo vdc->dkio_flush_pending--; 55661ae08745Sheppo ASSERT(vdc->dkio_flush_pending >= 0); 55671ae08745Sheppo mutex_exit(&vdc->lock); 55688e6a2a04Slm66018 55698e6a2a04Slm66018 /* free the mem that was allocated when the callback was dispatched */ 55708e6a2a04Slm66018 kmem_free(arg, sizeof (vdc_dk_arg_t)); 55711ae08745Sheppo } 55721ae08745Sheppo 55731ae08745Sheppo /* 557487a7269eSachartre * Function: 55759642afceSachartre * vdc_dkio_gapart() 557687a7269eSachartre * 557787a7269eSachartre * Description: 557887a7269eSachartre * This function implements the DKIOCGAPART ioctl. 557987a7269eSachartre * 558087a7269eSachartre * Arguments: 558178fcd0a1Sachartre * vdc - soft state pointer 558287a7269eSachartre * arg - a pointer to a dk_map[NDKMAP] or dk_map32[NDKMAP] structure 558387a7269eSachartre * flag - ioctl flags 558487a7269eSachartre */ 558587a7269eSachartre static int 55869642afceSachartre vdc_dkio_gapart(vdc_t *vdc, caddr_t arg, int flag) 558787a7269eSachartre { 558878fcd0a1Sachartre struct dk_geom *geom; 5589342440ecSPrasad Singamsetty struct extvtoc *vtoc; 559087a7269eSachartre union { 559187a7269eSachartre struct dk_map map[NDKMAP]; 559287a7269eSachartre struct dk_map32 map32[NDKMAP]; 559387a7269eSachartre } data; 559487a7269eSachartre int i, rv, size; 559587a7269eSachartre 559678fcd0a1Sachartre mutex_enter(&vdc->lock); 559787a7269eSachartre 559878fcd0a1Sachartre if ((rv = vdc_validate_geometry(vdc)) != 0) { 559978fcd0a1Sachartre mutex_exit(&vdc->lock); 560087a7269eSachartre return (rv); 560178fcd0a1Sachartre } 560287a7269eSachartre 5603342440ecSPrasad Singamsetty if (vdc->vdisk_size > VD_OLDVTOC_LIMIT) { 5604342440ecSPrasad Singamsetty mutex_exit(&vdc->lock); 5605342440ecSPrasad Singamsetty return (EOVERFLOW); 5606342440ecSPrasad Singamsetty } 5607342440ecSPrasad Singamsetty 560878fcd0a1Sachartre vtoc = vdc->vtoc; 560978fcd0a1Sachartre geom = vdc->geom; 561087a7269eSachartre 561187a7269eSachartre if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 561287a7269eSachartre 561378fcd0a1Sachartre for (i = 0; i < vtoc->v_nparts; i++) { 561478fcd0a1Sachartre data.map32[i].dkl_cylno = vtoc->v_part[i].p_start / 561578fcd0a1Sachartre (geom->dkg_nhead * geom->dkg_nsect); 561678fcd0a1Sachartre data.map32[i].dkl_nblk = vtoc->v_part[i].p_size; 561787a7269eSachartre } 561887a7269eSachartre size = NDKMAP * sizeof (struct dk_map32); 561987a7269eSachartre 562087a7269eSachartre } else { 562187a7269eSachartre 562278fcd0a1Sachartre for (i = 0; i < vtoc->v_nparts; i++) { 562378fcd0a1Sachartre data.map[i].dkl_cylno = vtoc->v_part[i].p_start / 562478fcd0a1Sachartre (geom->dkg_nhead * geom->dkg_nsect); 562578fcd0a1Sachartre data.map[i].dkl_nblk = vtoc->v_part[i].p_size; 562687a7269eSachartre } 562787a7269eSachartre size = NDKMAP * sizeof (struct dk_map); 562887a7269eSachartre 562987a7269eSachartre } 563087a7269eSachartre 563178fcd0a1Sachartre mutex_exit(&vdc->lock); 563278fcd0a1Sachartre 563387a7269eSachartre if (ddi_copyout(&data, arg, size, flag) != 0) 563487a7269eSachartre return (EFAULT); 563587a7269eSachartre 563687a7269eSachartre return (0); 563787a7269eSachartre } 563887a7269eSachartre 563987a7269eSachartre /* 564087a7269eSachartre * Function: 56419642afceSachartre * vdc_dkio_partition() 56429642afceSachartre * 56439642afceSachartre * Description: 56449642afceSachartre * This function implements the DKIOCPARTITION ioctl. 56459642afceSachartre * 56469642afceSachartre * Arguments: 56479642afceSachartre * vdc - soft state pointer 56489642afceSachartre * arg - a pointer to a struct partition64 structure 56499642afceSachartre * flag - ioctl flags 56509642afceSachartre */ 56519642afceSachartre static int 56529642afceSachartre vdc_dkio_partition(vdc_t *vdc, caddr_t arg, int flag) 56539642afceSachartre { 56549642afceSachartre struct partition64 p64; 56559642afceSachartre efi_gpt_t *gpt; 56569642afceSachartre efi_gpe_t *gpe; 56579642afceSachartre vd_efi_dev_t edev; 56589642afceSachartre uint_t partno; 56599642afceSachartre int rv; 56609642afceSachartre 56619642afceSachartre if (ddi_copyin(arg, &p64, sizeof (struct partition64), flag)) { 56629642afceSachartre return (EFAULT); 56639642afceSachartre } 56649642afceSachartre 566565908c77Syu, larry liu - Sun Microsystems - Beijing China VDC_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl); 56669642afceSachartre 56679642afceSachartre if ((rv = vd_efi_alloc_and_read(&edev, &gpt, &gpe)) != 0) { 56689642afceSachartre return (rv); 56699642afceSachartre } 56709642afceSachartre 56719642afceSachartre partno = p64.p_partno; 56729642afceSachartre 56739642afceSachartre if (partno >= gpt->efi_gpt_NumberOfPartitionEntries) { 56749642afceSachartre vd_efi_free(&edev, gpt, gpe); 56759642afceSachartre return (ESRCH); 56769642afceSachartre } 56779642afceSachartre 56789642afceSachartre bcopy(&gpe[partno].efi_gpe_PartitionTypeGUID, &p64.p_type, 56799642afceSachartre sizeof (struct uuid)); 56809642afceSachartre p64.p_start = gpe[partno].efi_gpe_StartingLBA; 56819642afceSachartre p64.p_size = gpe[partno].efi_gpe_EndingLBA - p64.p_start + 1; 56829642afceSachartre 56839642afceSachartre if (ddi_copyout(&p64, arg, sizeof (struct partition64), flag)) { 56849642afceSachartre vd_efi_free(&edev, gpt, gpe); 56859642afceSachartre return (EFAULT); 56869642afceSachartre } 56879642afceSachartre 56889642afceSachartre vd_efi_free(&edev, gpt, gpe); 56899642afceSachartre return (0); 56909642afceSachartre } 56919642afceSachartre 56929642afceSachartre /* 56939642afceSachartre * Function: 569487a7269eSachartre * vdc_dioctl_rwcmd() 569587a7269eSachartre * 569687a7269eSachartre * Description: 569787a7269eSachartre * This function implements the DIOCTL_RWCMD ioctl. This ioctl is used 569887a7269eSachartre * for DKC_DIRECT disks to read or write at an absolute disk offset. 569987a7269eSachartre * 570087a7269eSachartre * Arguments: 570187a7269eSachartre * dev - device 570287a7269eSachartre * arg - a pointer to a dadkio_rwcmd or dadkio_rwcmd32 structure 570387a7269eSachartre * flag - ioctl flags 570487a7269eSachartre */ 570587a7269eSachartre static int 570665908c77Syu, larry liu - Sun Microsystems - Beijing China vdc_dioctl_rwcmd(vdc_t *vdc, caddr_t arg, int flag) 570787a7269eSachartre { 570887a7269eSachartre struct dadkio_rwcmd32 rwcmd32; 570987a7269eSachartre struct dadkio_rwcmd rwcmd; 571087a7269eSachartre struct iovec aiov; 571187a7269eSachartre struct uio auio; 571287a7269eSachartre int rw, status; 571387a7269eSachartre struct buf *buf; 571487a7269eSachartre 571587a7269eSachartre if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 571687a7269eSachartre if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd32, 571787a7269eSachartre sizeof (struct dadkio_rwcmd32), flag)) { 571887a7269eSachartre return (EFAULT); 571987a7269eSachartre } 572087a7269eSachartre rwcmd.cmd = rwcmd32.cmd; 572187a7269eSachartre rwcmd.flags = rwcmd32.flags; 572287a7269eSachartre rwcmd.blkaddr = (daddr_t)rwcmd32.blkaddr; 572387a7269eSachartre rwcmd.buflen = rwcmd32.buflen; 572487a7269eSachartre rwcmd.bufaddr = (caddr_t)(uintptr_t)rwcmd32.bufaddr; 572587a7269eSachartre } else { 572687a7269eSachartre if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd, 572787a7269eSachartre sizeof (struct dadkio_rwcmd), flag)) { 572887a7269eSachartre return (EFAULT); 572987a7269eSachartre } 573087a7269eSachartre } 573187a7269eSachartre 573287a7269eSachartre switch (rwcmd.cmd) { 573387a7269eSachartre case DADKIO_RWCMD_READ: 573487a7269eSachartre rw = B_READ; 573587a7269eSachartre break; 573687a7269eSachartre case DADKIO_RWCMD_WRITE: 573787a7269eSachartre rw = B_WRITE; 573887a7269eSachartre break; 573987a7269eSachartre default: 574087a7269eSachartre return (EINVAL); 574187a7269eSachartre } 574287a7269eSachartre 574387a7269eSachartre bzero((caddr_t)&aiov, sizeof (struct iovec)); 574487a7269eSachartre aiov.iov_base = rwcmd.bufaddr; 574587a7269eSachartre aiov.iov_len = rwcmd.buflen; 574687a7269eSachartre 574787a7269eSachartre bzero((caddr_t)&auio, sizeof (struct uio)); 574887a7269eSachartre auio.uio_iov = &aiov; 574987a7269eSachartre auio.uio_iovcnt = 1; 575065908c77Syu, larry liu - Sun Microsystems - Beijing China auio.uio_loffset = rwcmd.blkaddr * vdc->vdisk_bsize; 575187a7269eSachartre auio.uio_resid = rwcmd.buflen; 575287a7269eSachartre auio.uio_segflg = flag & FKIOCTL ? UIO_SYSSPACE : UIO_USERSPACE; 575387a7269eSachartre 575487a7269eSachartre buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 575587a7269eSachartre bioinit(buf); 575687a7269eSachartre /* 575787a7269eSachartre * We use the private field of buf to specify that this is an 575887a7269eSachartre * I/O using an absolute offset. 575987a7269eSachartre */ 576087a7269eSachartre buf->b_private = (void *)VD_SLICE_NONE; 576187a7269eSachartre 576265908c77Syu, larry liu - Sun Microsystems - Beijing China status = physio(vdc_strategy, buf, VD_MAKE_DEV(vdc->instance, 0), 576365908c77Syu, larry liu - Sun Microsystems - Beijing China rw, vdc_min, &auio); 576487a7269eSachartre 576587a7269eSachartre biofini(buf); 576687a7269eSachartre kmem_free(buf, sizeof (buf_t)); 576787a7269eSachartre 576887a7269eSachartre return (status); 576987a7269eSachartre } 577087a7269eSachartre 577187a7269eSachartre /* 57722f5224aeSachartre * Allocate a buffer for a VD_OP_SCSICMD operation. The size of the allocated 57732f5224aeSachartre * buffer is returned in alloc_len. 57742f5224aeSachartre */ 57752f5224aeSachartre static vd_scsi_t * 57762f5224aeSachartre vdc_scsi_alloc(int cdb_len, int sense_len, int datain_len, int dataout_len, 57772f5224aeSachartre int *alloc_len) 57782f5224aeSachartre { 57792f5224aeSachartre vd_scsi_t *vd_scsi; 57802f5224aeSachartre int vd_scsi_len = VD_SCSI_SIZE; 57812f5224aeSachartre 57822f5224aeSachartre vd_scsi_len += P2ROUNDUP(cdb_len, sizeof (uint64_t)); 57832f5224aeSachartre vd_scsi_len += P2ROUNDUP(sense_len, sizeof (uint64_t)); 57842f5224aeSachartre vd_scsi_len += P2ROUNDUP(datain_len, sizeof (uint64_t)); 57852f5224aeSachartre vd_scsi_len += P2ROUNDUP(dataout_len, sizeof (uint64_t)); 57862f5224aeSachartre 57872f5224aeSachartre ASSERT(vd_scsi_len % sizeof (uint64_t) == 0); 57882f5224aeSachartre 57892f5224aeSachartre vd_scsi = kmem_zalloc(vd_scsi_len, KM_SLEEP); 57902f5224aeSachartre 57912f5224aeSachartre vd_scsi->cdb_len = cdb_len; 57922f5224aeSachartre vd_scsi->sense_len = sense_len; 57932f5224aeSachartre vd_scsi->datain_len = datain_len; 57942f5224aeSachartre vd_scsi->dataout_len = dataout_len; 57952f5224aeSachartre 57962f5224aeSachartre *alloc_len = vd_scsi_len; 57972f5224aeSachartre 57982f5224aeSachartre return (vd_scsi); 57992f5224aeSachartre } 58002f5224aeSachartre 58012f5224aeSachartre /* 58022f5224aeSachartre * Convert the status of a SCSI command to a Solaris return code. 58032f5224aeSachartre * 58042f5224aeSachartre * Arguments: 58052f5224aeSachartre * vd_scsi - The SCSI operation buffer. 58062f5224aeSachartre * log_error - indicate if an error message should be logged. 58072f5224aeSachartre * 58082f5224aeSachartre * Note that our SCSI error messages are rather primitive for the moment 58092f5224aeSachartre * and could be improved by decoding some data like the SCSI command and 58102f5224aeSachartre * the sense key. 58112f5224aeSachartre * 58122f5224aeSachartre * Return value: 58132f5224aeSachartre * 0 - Status is good. 58142f5224aeSachartre * EACCES - Status reports a reservation conflict. 58152f5224aeSachartre * ENOTSUP - Status reports a check condition and sense key 58162f5224aeSachartre * reports an illegal request. 58172f5224aeSachartre * EIO - Any other status. 58182f5224aeSachartre */ 58192f5224aeSachartre static int 58202f5224aeSachartre vdc_scsi_status(vdc_t *vdc, vd_scsi_t *vd_scsi, boolean_t log_error) 58212f5224aeSachartre { 58222f5224aeSachartre int rv; 58232f5224aeSachartre char path_str[MAXPATHLEN]; 58242f5224aeSachartre char panic_str[VDC_RESV_CONFLICT_FMT_LEN + MAXPATHLEN]; 58252f5224aeSachartre union scsi_cdb *cdb; 58262f5224aeSachartre struct scsi_extended_sense *sense; 58272f5224aeSachartre 58282f5224aeSachartre if (vd_scsi->cmd_status == STATUS_GOOD) 58292f5224aeSachartre /* no error */ 58302f5224aeSachartre return (0); 58312f5224aeSachartre 58322f5224aeSachartre /* when the tunable vdc_scsi_log_error is true we log all errors */ 58332f5224aeSachartre if (vdc_scsi_log_error) 58342f5224aeSachartre log_error = B_TRUE; 58352f5224aeSachartre 58362f5224aeSachartre if (log_error) { 58372f5224aeSachartre cmn_err(CE_WARN, "%s (vdc%d):\tError for Command: 0x%x)\n", 58382f5224aeSachartre ddi_pathname(vdc->dip, path_str), vdc->instance, 58392f5224aeSachartre GETCMD(VD_SCSI_DATA_CDB(vd_scsi))); 58402f5224aeSachartre } 58412f5224aeSachartre 58422f5224aeSachartre /* default returned value */ 58432f5224aeSachartre rv = EIO; 58442f5224aeSachartre 58452f5224aeSachartre switch (vd_scsi->cmd_status) { 58462f5224aeSachartre 58472f5224aeSachartre case STATUS_CHECK: 58482f5224aeSachartre case STATUS_TERMINATED: 58492f5224aeSachartre if (log_error) 58502f5224aeSachartre cmn_err(CE_CONT, "\tCheck Condition Error\n"); 58512f5224aeSachartre 58522f5224aeSachartre /* check sense buffer */ 58532f5224aeSachartre if (vd_scsi->sense_len == 0 || 58542f5224aeSachartre vd_scsi->sense_status != STATUS_GOOD) { 58552f5224aeSachartre if (log_error) 58562f5224aeSachartre cmn_err(CE_CONT, "\tNo Sense Data Available\n"); 58572f5224aeSachartre break; 58582f5224aeSachartre } 58592f5224aeSachartre 58602f5224aeSachartre sense = VD_SCSI_DATA_SENSE(vd_scsi); 58612f5224aeSachartre 58622f5224aeSachartre if (log_error) { 58632f5224aeSachartre cmn_err(CE_CONT, "\tSense Key: 0x%x\n" 58642f5224aeSachartre "\tASC: 0x%x, ASCQ: 0x%x\n", 58652f5224aeSachartre scsi_sense_key((uint8_t *)sense), 58662f5224aeSachartre scsi_sense_asc((uint8_t *)sense), 58672f5224aeSachartre scsi_sense_ascq((uint8_t *)sense)); 58682f5224aeSachartre } 58692f5224aeSachartre 58702f5224aeSachartre if (scsi_sense_key((uint8_t *)sense) == KEY_ILLEGAL_REQUEST) 58712f5224aeSachartre rv = ENOTSUP; 58722f5224aeSachartre break; 58732f5224aeSachartre 58742f5224aeSachartre case STATUS_BUSY: 58752f5224aeSachartre if (log_error) 58762f5224aeSachartre cmn_err(CE_NOTE, "\tDevice Busy\n"); 58772f5224aeSachartre break; 58782f5224aeSachartre 58792f5224aeSachartre case STATUS_RESERVATION_CONFLICT: 58802f5224aeSachartre /* 58812f5224aeSachartre * If the command was PERSISTENT_RESERVATION_[IN|OUT] then 58822f5224aeSachartre * reservation conflict could be due to various reasons like 58832f5224aeSachartre * incorrect keys, not registered or not reserved etc. So, 58842f5224aeSachartre * we should not panic in that case. 58852f5224aeSachartre */ 58862f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 58872f5224aeSachartre if (vdc->failfast_interval != 0 && 58882f5224aeSachartre cdb->scc_cmd != SCMD_PERSISTENT_RESERVE_IN && 58892f5224aeSachartre cdb->scc_cmd != SCMD_PERSISTENT_RESERVE_OUT) { 58902f5224aeSachartre /* failfast is enabled so we have to panic */ 58912f5224aeSachartre (void) snprintf(panic_str, sizeof (panic_str), 58922f5224aeSachartre VDC_RESV_CONFLICT_FMT_STR "%s", 58932f5224aeSachartre ddi_pathname(vdc->dip, path_str)); 58942f5224aeSachartre panic(panic_str); 58952f5224aeSachartre } 58962f5224aeSachartre if (log_error) 58972f5224aeSachartre cmn_err(CE_NOTE, "\tReservation Conflict\n"); 58982f5224aeSachartre rv = EACCES; 58992f5224aeSachartre break; 59002f5224aeSachartre 59012f5224aeSachartre case STATUS_QFULL: 59022f5224aeSachartre if (log_error) 59032f5224aeSachartre cmn_err(CE_NOTE, "\tQueue Full\n"); 59042f5224aeSachartre break; 59052f5224aeSachartre 59062f5224aeSachartre case STATUS_MET: 59072f5224aeSachartre case STATUS_INTERMEDIATE: 59082f5224aeSachartre case STATUS_SCSI2: 59092f5224aeSachartre case STATUS_INTERMEDIATE_MET: 59102f5224aeSachartre case STATUS_ACA_ACTIVE: 59112f5224aeSachartre if (log_error) 59122f5224aeSachartre cmn_err(CE_CONT, 59132f5224aeSachartre "\tUnexpected SCSI status received: 0x%x\n", 59142f5224aeSachartre vd_scsi->cmd_status); 59152f5224aeSachartre break; 59162f5224aeSachartre 59172f5224aeSachartre default: 59182f5224aeSachartre if (log_error) 59192f5224aeSachartre cmn_err(CE_CONT, 59202f5224aeSachartre "\tInvalid SCSI status received: 0x%x\n", 59212f5224aeSachartre vd_scsi->cmd_status); 59222f5224aeSachartre break; 59232f5224aeSachartre } 59242f5224aeSachartre 59252f5224aeSachartre return (rv); 59262f5224aeSachartre } 59272f5224aeSachartre 59282f5224aeSachartre /* 59292f5224aeSachartre * Implemented the USCSICMD uscsi(7I) ioctl. This ioctl is converted to 59302f5224aeSachartre * a VD_OP_SCSICMD operation which is sent to the vdisk server. If a SCSI 59312f5224aeSachartre * reset is requested (i.e. a flag USCSI_RESET* is set) then the ioctl is 59322f5224aeSachartre * converted to a VD_OP_RESET operation. 59332f5224aeSachartre */ 59342f5224aeSachartre static int 59352f5224aeSachartre vdc_uscsi_cmd(vdc_t *vdc, caddr_t arg, int mode) 59362f5224aeSachartre { 59372f5224aeSachartre struct uscsi_cmd uscsi; 59382f5224aeSachartre struct uscsi_cmd32 uscsi32; 59392f5224aeSachartre vd_scsi_t *vd_scsi; 59402f5224aeSachartre int vd_scsi_len; 59412f5224aeSachartre union scsi_cdb *cdb; 59422f5224aeSachartre struct scsi_extended_sense *sense; 59432f5224aeSachartre char *datain, *dataout; 59442f5224aeSachartre size_t cdb_len, datain_len, dataout_len, sense_len; 59452f5224aeSachartre int rv; 59462f5224aeSachartre 59472f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 59482f5224aeSachartre if (ddi_copyin(arg, &uscsi32, sizeof (struct uscsi_cmd32), 59492f5224aeSachartre mode) != 0) 59502f5224aeSachartre return (EFAULT); 59512f5224aeSachartre uscsi_cmd32touscsi_cmd((&uscsi32), (&uscsi)); 59522f5224aeSachartre } else { 59532f5224aeSachartre if (ddi_copyin(arg, &uscsi, sizeof (struct uscsi_cmd), 59542f5224aeSachartre mode) != 0) 59552f5224aeSachartre return (EFAULT); 59562f5224aeSachartre } 59572f5224aeSachartre 59582f5224aeSachartre /* a uscsi reset is converted to a VD_OP_RESET operation */ 59592f5224aeSachartre if (uscsi.uscsi_flags & (USCSI_RESET | USCSI_RESET_LUN | 59602f5224aeSachartre USCSI_RESET_ALL)) { 59616ace3c90SAlexandre Chartre rv = vdc_do_sync_op(vdc, VD_OP_RESET, NULL, 0, 0, 0, 59626ace3c90SAlexandre Chartre VIO_both_dir, B_TRUE); 59632f5224aeSachartre return (rv); 59642f5224aeSachartre } 59652f5224aeSachartre 59662f5224aeSachartre /* cdb buffer length */ 59672f5224aeSachartre cdb_len = uscsi.uscsi_cdblen; 59682f5224aeSachartre 59692f5224aeSachartre /* data in and out buffers length */ 59702f5224aeSachartre if (uscsi.uscsi_flags & USCSI_READ) { 59712f5224aeSachartre datain_len = uscsi.uscsi_buflen; 59722f5224aeSachartre dataout_len = 0; 59732f5224aeSachartre } else { 59742f5224aeSachartre datain_len = 0; 59752f5224aeSachartre dataout_len = uscsi.uscsi_buflen; 59762f5224aeSachartre } 59772f5224aeSachartre 59782f5224aeSachartre /* sense buffer length */ 59792f5224aeSachartre if (uscsi.uscsi_flags & USCSI_RQENABLE) 59802f5224aeSachartre sense_len = uscsi.uscsi_rqlen; 59812f5224aeSachartre else 59822f5224aeSachartre sense_len = 0; 59832f5224aeSachartre 59842f5224aeSachartre /* allocate buffer for the VD_SCSICMD_OP operation */ 59852f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, datain_len, dataout_len, 59862f5224aeSachartre &vd_scsi_len); 59872f5224aeSachartre 59882f5224aeSachartre /* 59892f5224aeSachartre * The documentation of USCSI_ISOLATE and USCSI_DIAGNOSE is very vague, 59902f5224aeSachartre * but basically they prevent a SCSI command from being retried in case 59912f5224aeSachartre * of an error. 59922f5224aeSachartre */ 59932f5224aeSachartre if ((uscsi.uscsi_flags & USCSI_ISOLATE) || 59942f5224aeSachartre (uscsi.uscsi_flags & USCSI_DIAGNOSE)) 59952f5224aeSachartre vd_scsi->options |= VD_SCSI_OPT_NORETRY; 59962f5224aeSachartre 59972f5224aeSachartre /* set task attribute */ 59982f5224aeSachartre if (uscsi.uscsi_flags & USCSI_NOTAG) { 59992f5224aeSachartre vd_scsi->task_attribute = 0; 60002f5224aeSachartre } else { 60012f5224aeSachartre if (uscsi.uscsi_flags & USCSI_HEAD) 60022f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_ACA; 60032f5224aeSachartre else if (uscsi.uscsi_flags & USCSI_HTAG) 60042f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_HQUEUE; 60052f5224aeSachartre else if (uscsi.uscsi_flags & USCSI_OTAG) 60062f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_ORDERED; 60072f5224aeSachartre else 60082f5224aeSachartre vd_scsi->task_attribute = 0; 60092f5224aeSachartre } 60102f5224aeSachartre 60112f5224aeSachartre /* set timeout */ 60122f5224aeSachartre vd_scsi->timeout = uscsi.uscsi_timeout; 60132f5224aeSachartre 60142f5224aeSachartre /* copy-in cdb data */ 60152f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 60162f5224aeSachartre if (ddi_copyin(uscsi.uscsi_cdb, cdb, cdb_len, mode) != 0) { 60172f5224aeSachartre rv = EFAULT; 60182f5224aeSachartre goto done; 60192f5224aeSachartre } 60202f5224aeSachartre 60212f5224aeSachartre /* keep a pointer to the sense buffer */ 60222f5224aeSachartre sense = VD_SCSI_DATA_SENSE(vd_scsi); 60232f5224aeSachartre 60242f5224aeSachartre /* keep a pointer to the data-in buffer */ 60252f5224aeSachartre datain = (char *)VD_SCSI_DATA_IN(vd_scsi); 60262f5224aeSachartre 60272f5224aeSachartre /* copy-in request data to the data-out buffer */ 60282f5224aeSachartre dataout = (char *)VD_SCSI_DATA_OUT(vd_scsi); 60292f5224aeSachartre if (!(uscsi.uscsi_flags & USCSI_READ)) { 60302f5224aeSachartre if (ddi_copyin(uscsi.uscsi_bufaddr, dataout, dataout_len, 60312f5224aeSachartre mode)) { 60322f5224aeSachartre rv = EFAULT; 60332f5224aeSachartre goto done; 60342f5224aeSachartre } 60352f5224aeSachartre } 60362f5224aeSachartre 60372f5224aeSachartre /* submit the request */ 60382f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 60396ace3c90SAlexandre Chartre 0, 0, VIO_both_dir, B_FALSE); 60402f5224aeSachartre 60412f5224aeSachartre if (rv != 0) 60422f5224aeSachartre goto done; 60432f5224aeSachartre 60442f5224aeSachartre /* update scsi status */ 60452f5224aeSachartre uscsi.uscsi_status = vd_scsi->cmd_status; 60462f5224aeSachartre 60472f5224aeSachartre /* update sense data */ 60482f5224aeSachartre if ((uscsi.uscsi_flags & USCSI_RQENABLE) && 60492f5224aeSachartre (uscsi.uscsi_status == STATUS_CHECK || 60502f5224aeSachartre uscsi.uscsi_status == STATUS_TERMINATED)) { 60512f5224aeSachartre 60522f5224aeSachartre uscsi.uscsi_rqstatus = vd_scsi->sense_status; 60532f5224aeSachartre 60542f5224aeSachartre if (uscsi.uscsi_rqstatus == STATUS_GOOD) { 60552f5224aeSachartre uscsi.uscsi_rqresid = uscsi.uscsi_rqlen - 60562f5224aeSachartre vd_scsi->sense_len; 60572f5224aeSachartre if (ddi_copyout(sense, uscsi.uscsi_rqbuf, 60582f5224aeSachartre vd_scsi->sense_len, mode) != 0) { 60592f5224aeSachartre rv = EFAULT; 60602f5224aeSachartre goto done; 60612f5224aeSachartre } 60622f5224aeSachartre } 60632f5224aeSachartre } 60642f5224aeSachartre 60652f5224aeSachartre /* update request data */ 60662f5224aeSachartre if (uscsi.uscsi_status == STATUS_GOOD) { 60672f5224aeSachartre if (uscsi.uscsi_flags & USCSI_READ) { 60682f5224aeSachartre uscsi.uscsi_resid = uscsi.uscsi_buflen - 60692f5224aeSachartre vd_scsi->datain_len; 60702f5224aeSachartre if (ddi_copyout(datain, uscsi.uscsi_bufaddr, 60712f5224aeSachartre vd_scsi->datain_len, mode) != 0) { 60722f5224aeSachartre rv = EFAULT; 60732f5224aeSachartre goto done; 60742f5224aeSachartre } 60752f5224aeSachartre } else { 60762f5224aeSachartre uscsi.uscsi_resid = uscsi.uscsi_buflen - 60772f5224aeSachartre vd_scsi->dataout_len; 60782f5224aeSachartre } 60792f5224aeSachartre } 60802f5224aeSachartre 60812f5224aeSachartre /* copy-out result */ 60822f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 60832f5224aeSachartre uscsi_cmdtouscsi_cmd32((&uscsi), (&uscsi32)); 60842f5224aeSachartre if (ddi_copyout(&uscsi32, arg, sizeof (struct uscsi_cmd32), 60852f5224aeSachartre mode) != 0) { 60862f5224aeSachartre rv = EFAULT; 60872f5224aeSachartre goto done; 60882f5224aeSachartre } 60892f5224aeSachartre } else { 60902f5224aeSachartre if (ddi_copyout(&uscsi, arg, sizeof (struct uscsi_cmd), 60912f5224aeSachartre mode) != 0) { 60922f5224aeSachartre rv = EFAULT; 60932f5224aeSachartre goto done; 60942f5224aeSachartre } 60952f5224aeSachartre } 60962f5224aeSachartre 60972f5224aeSachartre /* get the return code from the SCSI command status */ 60982f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, 60992f5224aeSachartre !(uscsi.uscsi_flags & USCSI_SILENT)); 61002f5224aeSachartre 61012f5224aeSachartre done: 61022f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 61032f5224aeSachartre return (rv); 61042f5224aeSachartre } 61052f5224aeSachartre 61062f5224aeSachartre /* 61072f5224aeSachartre * Create a VD_OP_SCSICMD buffer for a SCSI PERSISTENT IN command. 61082f5224aeSachartre * 61092f5224aeSachartre * Arguments: 61102f5224aeSachartre * cmd - SCSI PERSISTENT IN command 61112f5224aeSachartre * len - length of the SCSI input buffer 61122f5224aeSachartre * vd_scsi_len - return the length of the allocated buffer 61132f5224aeSachartre * 61142f5224aeSachartre * Returned Value: 61152f5224aeSachartre * a pointer to the allocated VD_OP_SCSICMD buffer. 61162f5224aeSachartre */ 61172f5224aeSachartre static vd_scsi_t * 61182f5224aeSachartre vdc_scsi_alloc_persistent_in(uchar_t cmd, int len, int *vd_scsi_len) 61192f5224aeSachartre { 61202f5224aeSachartre int cdb_len, sense_len, datain_len, dataout_len; 61212f5224aeSachartre vd_scsi_t *vd_scsi; 61222f5224aeSachartre union scsi_cdb *cdb; 61232f5224aeSachartre 61242f5224aeSachartre cdb_len = CDB_GROUP1; 61252f5224aeSachartre sense_len = sizeof (struct scsi_extended_sense); 61262f5224aeSachartre datain_len = len; 61272f5224aeSachartre dataout_len = 0; 61282f5224aeSachartre 61292f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, datain_len, dataout_len, 61302f5224aeSachartre vd_scsi_len); 61312f5224aeSachartre 61322f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 61332f5224aeSachartre 61342f5224aeSachartre /* set cdb */ 61352f5224aeSachartre cdb->scc_cmd = SCMD_PERSISTENT_RESERVE_IN; 61362f5224aeSachartre cdb->cdb_opaque[1] = cmd; 61372f5224aeSachartre FORMG1COUNT(cdb, datain_len); 61382f5224aeSachartre 61392f5224aeSachartre vd_scsi->timeout = vdc_scsi_timeout; 61402f5224aeSachartre 61412f5224aeSachartre return (vd_scsi); 61422f5224aeSachartre } 61432f5224aeSachartre 61442f5224aeSachartre /* 61452f5224aeSachartre * Create a VD_OP_SCSICMD buffer for a SCSI PERSISTENT OUT command. 61462f5224aeSachartre * 61472f5224aeSachartre * Arguments: 61482f5224aeSachartre * cmd - SCSI PERSISTENT OUT command 61492f5224aeSachartre * len - length of the SCSI output buffer 61502f5224aeSachartre * vd_scsi_len - return the length of the allocated buffer 61512f5224aeSachartre * 61522f5224aeSachartre * Returned Code: 61532f5224aeSachartre * a pointer to the allocated VD_OP_SCSICMD buffer. 61542f5224aeSachartre */ 61552f5224aeSachartre static vd_scsi_t * 61562f5224aeSachartre vdc_scsi_alloc_persistent_out(uchar_t cmd, int len, int *vd_scsi_len) 61572f5224aeSachartre { 61582f5224aeSachartre int cdb_len, sense_len, datain_len, dataout_len; 61592f5224aeSachartre vd_scsi_t *vd_scsi; 61602f5224aeSachartre union scsi_cdb *cdb; 61612f5224aeSachartre 61622f5224aeSachartre cdb_len = CDB_GROUP1; 61632f5224aeSachartre sense_len = sizeof (struct scsi_extended_sense); 61642f5224aeSachartre datain_len = 0; 61652f5224aeSachartre dataout_len = len; 61662f5224aeSachartre 61672f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, datain_len, dataout_len, 61682f5224aeSachartre vd_scsi_len); 61692f5224aeSachartre 61702f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 61712f5224aeSachartre 61722f5224aeSachartre /* set cdb */ 61732f5224aeSachartre cdb->scc_cmd = SCMD_PERSISTENT_RESERVE_OUT; 61742f5224aeSachartre cdb->cdb_opaque[1] = cmd; 61752f5224aeSachartre FORMG1COUNT(cdb, dataout_len); 61762f5224aeSachartre 61772f5224aeSachartre vd_scsi->timeout = vdc_scsi_timeout; 61782f5224aeSachartre 61792f5224aeSachartre return (vd_scsi); 61802f5224aeSachartre } 61812f5224aeSachartre 61822f5224aeSachartre /* 61832f5224aeSachartre * Implement the MHIOCGRP_INKEYS mhd(7i) ioctl. The ioctl is converted 61842f5224aeSachartre * to a SCSI PERSISTENT IN READ KEYS command which is sent to the vdisk 61852f5224aeSachartre * server with a VD_OP_SCSICMD operation. 61862f5224aeSachartre */ 61872f5224aeSachartre static int 61882f5224aeSachartre vdc_mhd_inkeys(vdc_t *vdc, caddr_t arg, int mode) 61892f5224aeSachartre { 61902f5224aeSachartre vd_scsi_t *vd_scsi; 61912f5224aeSachartre mhioc_inkeys_t inkeys; 61922f5224aeSachartre mhioc_key_list_t klist; 61932f5224aeSachartre struct mhioc_inkeys32 inkeys32; 61942f5224aeSachartre struct mhioc_key_list32 klist32; 61952f5224aeSachartre sd_prin_readkeys_t *scsi_keys; 61962f5224aeSachartre void *user_keys; 61972f5224aeSachartre int vd_scsi_len; 61982f5224aeSachartre int listsize, listlen, rv; 61992f5224aeSachartre 62002f5224aeSachartre /* copyin arguments */ 62012f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 62022f5224aeSachartre rv = ddi_copyin(arg, &inkeys32, sizeof (inkeys32), mode); 62032f5224aeSachartre if (rv != 0) 62042f5224aeSachartre return (EFAULT); 62052f5224aeSachartre 62062f5224aeSachartre rv = ddi_copyin((caddr_t)(uintptr_t)inkeys32.li, &klist32, 62072f5224aeSachartre sizeof (klist32), mode); 62082f5224aeSachartre if (rv != 0) 62092f5224aeSachartre return (EFAULT); 62102f5224aeSachartre 62112f5224aeSachartre listsize = klist32.listsize; 62122f5224aeSachartre } else { 62132f5224aeSachartre rv = ddi_copyin(arg, &inkeys, sizeof (inkeys), mode); 62142f5224aeSachartre if (rv != 0) 62152f5224aeSachartre return (EFAULT); 62162f5224aeSachartre 62172f5224aeSachartre rv = ddi_copyin(inkeys.li, &klist, sizeof (klist), mode); 62182f5224aeSachartre if (rv != 0) 62192f5224aeSachartre return (EFAULT); 62202f5224aeSachartre 62212f5224aeSachartre listsize = klist.listsize; 62222f5224aeSachartre } 62232f5224aeSachartre 62242f5224aeSachartre /* build SCSI VD_OP request */ 62252f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_in(SD_READ_KEYS, 62262f5224aeSachartre sizeof (sd_prin_readkeys_t) - sizeof (caddr_t) + 62272f5224aeSachartre (sizeof (mhioc_resv_key_t) * listsize), &vd_scsi_len); 62282f5224aeSachartre 62292f5224aeSachartre scsi_keys = (sd_prin_readkeys_t *)VD_SCSI_DATA_IN(vd_scsi); 62302f5224aeSachartre 62312f5224aeSachartre /* submit the request */ 62322f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 62336ace3c90SAlexandre Chartre 0, 0, VIO_both_dir, B_FALSE); 62342f5224aeSachartre 62352f5224aeSachartre if (rv != 0) 62362f5224aeSachartre goto done; 62372f5224aeSachartre 62382f5224aeSachartre listlen = scsi_keys->len / MHIOC_RESV_KEY_SIZE; 62392f5224aeSachartre 62402f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 62412f5224aeSachartre inkeys32.generation = scsi_keys->generation; 62422f5224aeSachartre rv = ddi_copyout(&inkeys32, arg, sizeof (inkeys32), mode); 62432f5224aeSachartre if (rv != 0) { 62442f5224aeSachartre rv = EFAULT; 62452f5224aeSachartre goto done; 62462f5224aeSachartre } 62472f5224aeSachartre 62482f5224aeSachartre klist32.listlen = listlen; 62492f5224aeSachartre rv = ddi_copyout(&klist32, (caddr_t)(uintptr_t)inkeys32.li, 62502f5224aeSachartre sizeof (klist32), mode); 62512f5224aeSachartre if (rv != 0) { 62522f5224aeSachartre rv = EFAULT; 62532f5224aeSachartre goto done; 62542f5224aeSachartre } 62552f5224aeSachartre 62562f5224aeSachartre user_keys = (caddr_t)(uintptr_t)klist32.list; 62572f5224aeSachartre } else { 62582f5224aeSachartre inkeys.generation = scsi_keys->generation; 62592f5224aeSachartre rv = ddi_copyout(&inkeys, arg, sizeof (inkeys), mode); 62602f5224aeSachartre if (rv != 0) { 62612f5224aeSachartre rv = EFAULT; 62622f5224aeSachartre goto done; 62632f5224aeSachartre } 62642f5224aeSachartre 62652f5224aeSachartre klist.listlen = listlen; 62662f5224aeSachartre rv = ddi_copyout(&klist, inkeys.li, sizeof (klist), mode); 62672f5224aeSachartre if (rv != 0) { 62682f5224aeSachartre rv = EFAULT; 62692f5224aeSachartre goto done; 62702f5224aeSachartre } 62712f5224aeSachartre 62722f5224aeSachartre user_keys = klist.list; 62732f5224aeSachartre } 62742f5224aeSachartre 62752f5224aeSachartre /* copy out keys */ 62762f5224aeSachartre if (listlen > 0 && listsize > 0) { 62772f5224aeSachartre if (listsize < listlen) 62782f5224aeSachartre listlen = listsize; 62792f5224aeSachartre rv = ddi_copyout(&scsi_keys->keylist, user_keys, 62802f5224aeSachartre listlen * MHIOC_RESV_KEY_SIZE, mode); 62812f5224aeSachartre if (rv != 0) 62822f5224aeSachartre rv = EFAULT; 62832f5224aeSachartre } 62842f5224aeSachartre 62852f5224aeSachartre if (rv == 0) 62862f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 62872f5224aeSachartre 62882f5224aeSachartre done: 62892f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 62902f5224aeSachartre 62912f5224aeSachartre return (rv); 62922f5224aeSachartre } 62932f5224aeSachartre 62942f5224aeSachartre /* 62952f5224aeSachartre * Implement the MHIOCGRP_INRESV mhd(7i) ioctl. The ioctl is converted 62962f5224aeSachartre * to a SCSI PERSISTENT IN READ RESERVATION command which is sent to 62972f5224aeSachartre * the vdisk server with a VD_OP_SCSICMD operation. 62982f5224aeSachartre */ 62992f5224aeSachartre static int 63002f5224aeSachartre vdc_mhd_inresv(vdc_t *vdc, caddr_t arg, int mode) 63012f5224aeSachartre { 63022f5224aeSachartre vd_scsi_t *vd_scsi; 63032f5224aeSachartre mhioc_inresvs_t inresv; 63042f5224aeSachartre mhioc_resv_desc_list_t rlist; 63052f5224aeSachartre struct mhioc_inresvs32 inresv32; 63062f5224aeSachartre struct mhioc_resv_desc_list32 rlist32; 63072f5224aeSachartre mhioc_resv_desc_t mhd_resv; 63082f5224aeSachartre sd_prin_readresv_t *scsi_resv; 63092f5224aeSachartre sd_readresv_desc_t *resv; 63102f5224aeSachartre mhioc_resv_desc_t *user_resv; 63112f5224aeSachartre int vd_scsi_len; 63122f5224aeSachartre int listsize, listlen, i, rv; 63132f5224aeSachartre 63142f5224aeSachartre /* copyin arguments */ 63152f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 63162f5224aeSachartre rv = ddi_copyin(arg, &inresv32, sizeof (inresv32), mode); 63172f5224aeSachartre if (rv != 0) 63182f5224aeSachartre return (EFAULT); 63192f5224aeSachartre 63202f5224aeSachartre rv = ddi_copyin((caddr_t)(uintptr_t)inresv32.li, &rlist32, 63212f5224aeSachartre sizeof (rlist32), mode); 63222f5224aeSachartre if (rv != 0) 63232f5224aeSachartre return (EFAULT); 63242f5224aeSachartre 63252f5224aeSachartre listsize = rlist32.listsize; 63262f5224aeSachartre } else { 63272f5224aeSachartre rv = ddi_copyin(arg, &inresv, sizeof (inresv), mode); 63282f5224aeSachartre if (rv != 0) 63292f5224aeSachartre return (EFAULT); 63302f5224aeSachartre 63312f5224aeSachartre rv = ddi_copyin(inresv.li, &rlist, sizeof (rlist), mode); 63322f5224aeSachartre if (rv != 0) 63332f5224aeSachartre return (EFAULT); 63342f5224aeSachartre 63352f5224aeSachartre listsize = rlist.listsize; 63362f5224aeSachartre } 63372f5224aeSachartre 63382f5224aeSachartre /* build SCSI VD_OP request */ 63392f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_in(SD_READ_RESV, 63402f5224aeSachartre sizeof (sd_prin_readresv_t) - sizeof (caddr_t) + 63412f5224aeSachartre (SCSI3_RESV_DESC_LEN * listsize), &vd_scsi_len); 63422f5224aeSachartre 63432f5224aeSachartre scsi_resv = (sd_prin_readresv_t *)VD_SCSI_DATA_IN(vd_scsi); 63442f5224aeSachartre 63452f5224aeSachartre /* submit the request */ 63462f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 63476ace3c90SAlexandre Chartre 0, 0, VIO_both_dir, B_FALSE); 63482f5224aeSachartre 63492f5224aeSachartre if (rv != 0) 63502f5224aeSachartre goto done; 63512f5224aeSachartre 63522f5224aeSachartre listlen = scsi_resv->len / SCSI3_RESV_DESC_LEN; 63532f5224aeSachartre 63542f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 63552f5224aeSachartre inresv32.generation = scsi_resv->generation; 63562f5224aeSachartre rv = ddi_copyout(&inresv32, arg, sizeof (inresv32), mode); 63572f5224aeSachartre if (rv != 0) { 63582f5224aeSachartre rv = EFAULT; 63592f5224aeSachartre goto done; 63602f5224aeSachartre } 63612f5224aeSachartre 63622f5224aeSachartre rlist32.listlen = listlen; 63632f5224aeSachartre rv = ddi_copyout(&rlist32, (caddr_t)(uintptr_t)inresv32.li, 63642f5224aeSachartre sizeof (rlist32), mode); 63652f5224aeSachartre if (rv != 0) { 63662f5224aeSachartre rv = EFAULT; 63672f5224aeSachartre goto done; 63682f5224aeSachartre } 63692f5224aeSachartre 63702f5224aeSachartre user_resv = (mhioc_resv_desc_t *)(uintptr_t)rlist32.list; 63712f5224aeSachartre } else { 63722f5224aeSachartre inresv.generation = scsi_resv->generation; 63732f5224aeSachartre rv = ddi_copyout(&inresv, arg, sizeof (inresv), mode); 63742f5224aeSachartre if (rv != 0) { 63752f5224aeSachartre rv = EFAULT; 63762f5224aeSachartre goto done; 63772f5224aeSachartre } 63782f5224aeSachartre 63792f5224aeSachartre rlist.listlen = listlen; 63802f5224aeSachartre rv = ddi_copyout(&rlist, inresv.li, sizeof (rlist), mode); 63812f5224aeSachartre if (rv != 0) { 63822f5224aeSachartre rv = EFAULT; 63832f5224aeSachartre goto done; 63842f5224aeSachartre } 63852f5224aeSachartre 63862f5224aeSachartre user_resv = rlist.list; 63872f5224aeSachartre } 63882f5224aeSachartre 63892f5224aeSachartre /* copy out reservations */ 63902f5224aeSachartre if (listsize > 0 && listlen > 0) { 63912f5224aeSachartre if (listsize < listlen) 63922f5224aeSachartre listlen = listsize; 63932f5224aeSachartre resv = (sd_readresv_desc_t *)&scsi_resv->readresv_desc; 63942f5224aeSachartre 63952f5224aeSachartre for (i = 0; i < listlen; i++) { 63962f5224aeSachartre mhd_resv.type = resv->type; 63972f5224aeSachartre mhd_resv.scope = resv->scope; 63982f5224aeSachartre mhd_resv.scope_specific_addr = 63992f5224aeSachartre BE_32(resv->scope_specific_addr); 64002f5224aeSachartre bcopy(&resv->resvkey, &mhd_resv.key, 64012f5224aeSachartre MHIOC_RESV_KEY_SIZE); 64022f5224aeSachartre 64032f5224aeSachartre rv = ddi_copyout(&mhd_resv, user_resv, 64042f5224aeSachartre sizeof (mhd_resv), mode); 64052f5224aeSachartre if (rv != 0) { 64062f5224aeSachartre rv = EFAULT; 64072f5224aeSachartre goto done; 64082f5224aeSachartre } 64092f5224aeSachartre resv++; 64102f5224aeSachartre user_resv++; 64112f5224aeSachartre } 64122f5224aeSachartre } 64132f5224aeSachartre 64142f5224aeSachartre if (rv == 0) 64152f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 64162f5224aeSachartre 64172f5224aeSachartre done: 64182f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 64192f5224aeSachartre return (rv); 64202f5224aeSachartre } 64212f5224aeSachartre 64222f5224aeSachartre /* 64232f5224aeSachartre * Implement the MHIOCGRP_REGISTER mhd(7i) ioctl. The ioctl is converted 64242f5224aeSachartre * to a SCSI PERSISTENT OUT REGISTER command which is sent to the vdisk 64252f5224aeSachartre * server with a VD_OP_SCSICMD operation. 64262f5224aeSachartre */ 64272f5224aeSachartre static int 64282f5224aeSachartre vdc_mhd_register(vdc_t *vdc, caddr_t arg, int mode) 64292f5224aeSachartre { 64302f5224aeSachartre vd_scsi_t *vd_scsi; 64312f5224aeSachartre sd_prout_t *scsi_prout; 64322f5224aeSachartre mhioc_register_t mhd_reg; 64332f5224aeSachartre int vd_scsi_len, rv; 64342f5224aeSachartre 64352f5224aeSachartre /* copyin arguments */ 64362f5224aeSachartre rv = ddi_copyin(arg, &mhd_reg, sizeof (mhd_reg), mode); 64372f5224aeSachartre if (rv != 0) 64382f5224aeSachartre return (EFAULT); 64392f5224aeSachartre 64402f5224aeSachartre /* build SCSI VD_OP request */ 64412f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_REGISTER, 64422f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 64432f5224aeSachartre 64442f5224aeSachartre /* set parameters */ 64452f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 64462f5224aeSachartre bcopy(mhd_reg.oldkey.key, scsi_prout->res_key, MHIOC_RESV_KEY_SIZE); 64472f5224aeSachartre bcopy(mhd_reg.newkey.key, scsi_prout->service_key, MHIOC_RESV_KEY_SIZE); 64482f5224aeSachartre scsi_prout->aptpl = (uchar_t)mhd_reg.aptpl; 64492f5224aeSachartre 64502f5224aeSachartre /* submit the request */ 64512f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 64526ace3c90SAlexandre Chartre 0, 0, VIO_both_dir, B_FALSE); 64532f5224aeSachartre 64542f5224aeSachartre if (rv == 0) 64552f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 64562f5224aeSachartre 64572f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 64582f5224aeSachartre return (rv); 64592f5224aeSachartre } 64602f5224aeSachartre 64612f5224aeSachartre /* 64622f5224aeSachartre * Implement the MHIOCGRP_RESERVE mhd(7i) ioctl. The ioctl is converted 64632f5224aeSachartre * to a SCSI PERSISTENT OUT RESERVE command which is sent to the vdisk 64642f5224aeSachartre * server with a VD_OP_SCSICMD operation. 64652f5224aeSachartre */ 64662f5224aeSachartre static int 64672f5224aeSachartre vdc_mhd_reserve(vdc_t *vdc, caddr_t arg, int mode) 64682f5224aeSachartre { 64692f5224aeSachartre union scsi_cdb *cdb; 64702f5224aeSachartre vd_scsi_t *vd_scsi; 64712f5224aeSachartre sd_prout_t *scsi_prout; 64722f5224aeSachartre mhioc_resv_desc_t mhd_resv; 64732f5224aeSachartre int vd_scsi_len, rv; 64742f5224aeSachartre 64752f5224aeSachartre /* copyin arguments */ 64762f5224aeSachartre rv = ddi_copyin(arg, &mhd_resv, sizeof (mhd_resv), mode); 64772f5224aeSachartre if (rv != 0) 64782f5224aeSachartre return (EFAULT); 64792f5224aeSachartre 64802f5224aeSachartre /* build SCSI VD_OP request */ 64812f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_RESERVE, 64822f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 64832f5224aeSachartre 64842f5224aeSachartre /* set parameters */ 64852f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 64862f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 64872f5224aeSachartre bcopy(mhd_resv.key.key, scsi_prout->res_key, MHIOC_RESV_KEY_SIZE); 64882f5224aeSachartre scsi_prout->scope_address = mhd_resv.scope_specific_addr; 64892f5224aeSachartre cdb->cdb_opaque[2] = mhd_resv.type; 64902f5224aeSachartre 64912f5224aeSachartre /* submit the request */ 64922f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 64936ace3c90SAlexandre Chartre 0, 0, VIO_both_dir, B_FALSE); 64942f5224aeSachartre 64952f5224aeSachartre if (rv == 0) 64962f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 64972f5224aeSachartre 64982f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 64992f5224aeSachartre return (rv); 65002f5224aeSachartre } 65012f5224aeSachartre 65022f5224aeSachartre /* 65032f5224aeSachartre * Implement the MHIOCGRP_PREEMPTANDABORT mhd(7i) ioctl. The ioctl is 65042f5224aeSachartre * converted to a SCSI PERSISTENT OUT PREEMPT AND ABORT command which 65052f5224aeSachartre * is sent to the vdisk server with a VD_OP_SCSICMD operation. 65062f5224aeSachartre */ 65072f5224aeSachartre static int 65082f5224aeSachartre vdc_mhd_preemptabort(vdc_t *vdc, caddr_t arg, int mode) 65092f5224aeSachartre { 65102f5224aeSachartre union scsi_cdb *cdb; 65112f5224aeSachartre vd_scsi_t *vd_scsi; 65122f5224aeSachartre sd_prout_t *scsi_prout; 65132f5224aeSachartre mhioc_preemptandabort_t mhd_preempt; 65142f5224aeSachartre int vd_scsi_len, rv; 65152f5224aeSachartre 65162f5224aeSachartre /* copyin arguments */ 65172f5224aeSachartre rv = ddi_copyin(arg, &mhd_preempt, sizeof (mhd_preempt), mode); 65182f5224aeSachartre if (rv != 0) 65192f5224aeSachartre return (EFAULT); 65202f5224aeSachartre 65212f5224aeSachartre /* build SCSI VD_OP request */ 65222f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_PREEMPTANDABORT, 65232f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 65242f5224aeSachartre 65252f5224aeSachartre /* set parameters */ 65262f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_ACA; 65272f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 65282f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 65292f5224aeSachartre bcopy(mhd_preempt.resvdesc.key.key, scsi_prout->res_key, 65302f5224aeSachartre MHIOC_RESV_KEY_SIZE); 65312f5224aeSachartre bcopy(mhd_preempt.victim_key.key, scsi_prout->service_key, 65322f5224aeSachartre MHIOC_RESV_KEY_SIZE); 65332f5224aeSachartre scsi_prout->scope_address = mhd_preempt.resvdesc.scope_specific_addr; 65342f5224aeSachartre cdb->cdb_opaque[2] = mhd_preempt.resvdesc.type; 65352f5224aeSachartre 65362f5224aeSachartre /* submit the request */ 65372f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 65386ace3c90SAlexandre Chartre 0, 0, VIO_both_dir, B_FALSE); 65392f5224aeSachartre 65402f5224aeSachartre if (rv == 0) 65412f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 65422f5224aeSachartre 65432f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 65442f5224aeSachartre return (rv); 65452f5224aeSachartre } 65462f5224aeSachartre 65472f5224aeSachartre /* 65482f5224aeSachartre * Implement the MHIOCGRP_REGISTERANDIGNOREKEY mhd(7i) ioctl. The ioctl 65492f5224aeSachartre * is converted to a SCSI PERSISTENT OUT REGISTER AND IGNORE EXISTING KEY 65502f5224aeSachartre * command which is sent to the vdisk server with a VD_OP_SCSICMD operation. 65512f5224aeSachartre */ 65522f5224aeSachartre static int 65532f5224aeSachartre vdc_mhd_registerignore(vdc_t *vdc, caddr_t arg, int mode) 65542f5224aeSachartre { 65552f5224aeSachartre vd_scsi_t *vd_scsi; 65562f5224aeSachartre sd_prout_t *scsi_prout; 65572f5224aeSachartre mhioc_registerandignorekey_t mhd_regi; 65582f5224aeSachartre int vd_scsi_len, rv; 65592f5224aeSachartre 65602f5224aeSachartre /* copyin arguments */ 65612f5224aeSachartre rv = ddi_copyin(arg, &mhd_regi, sizeof (mhd_regi), mode); 65622f5224aeSachartre if (rv != 0) 65632f5224aeSachartre return (EFAULT); 65642f5224aeSachartre 65652f5224aeSachartre /* build SCSI VD_OP request */ 65662f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_REGISTERANDIGNOREKEY, 65672f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 65682f5224aeSachartre 65692f5224aeSachartre /* set parameters */ 65702f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 65712f5224aeSachartre bcopy(mhd_regi.newkey.key, scsi_prout->service_key, 65722f5224aeSachartre MHIOC_RESV_KEY_SIZE); 65732f5224aeSachartre scsi_prout->aptpl = (uchar_t)mhd_regi.aptpl; 65742f5224aeSachartre 65752f5224aeSachartre /* submit the request */ 65762f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 65776ace3c90SAlexandre Chartre 0, 0, VIO_both_dir, B_FALSE); 65782f5224aeSachartre 65792f5224aeSachartre if (rv == 0) 65802f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 65812f5224aeSachartre 65822f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 65832f5224aeSachartre return (rv); 65842f5224aeSachartre } 65852f5224aeSachartre 65862f5224aeSachartre /* 65876ace3c90SAlexandre Chartre * This function is used to send a (simple) SCSI command and check errors. 65882f5224aeSachartre */ 65892f5224aeSachartre static int 65906ace3c90SAlexandre Chartre vdc_eio_scsi_cmd(vdc_t *vdc, uchar_t scmd, int flags) 65912f5224aeSachartre { 65922f5224aeSachartre int cdb_len, sense_len, vd_scsi_len; 65932f5224aeSachartre vd_scsi_t *vd_scsi; 65942f5224aeSachartre union scsi_cdb *cdb; 65952f5224aeSachartre int rv; 65962f5224aeSachartre 65972f5224aeSachartre ASSERT(scmd == SCMD_TEST_UNIT_READY || scmd == SCMD_WRITE_G1); 65982f5224aeSachartre 65992f5224aeSachartre if (scmd == SCMD_WRITE_G1) 66002f5224aeSachartre cdb_len = CDB_GROUP1; 66012f5224aeSachartre else 66022f5224aeSachartre cdb_len = CDB_GROUP0; 66032f5224aeSachartre 66042f5224aeSachartre sense_len = sizeof (struct scsi_extended_sense); 66052f5224aeSachartre 66062f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, 0, 0, &vd_scsi_len); 66072f5224aeSachartre 66082f5224aeSachartre /* set cdb */ 66092f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 66102f5224aeSachartre cdb->scc_cmd = scmd; 66112f5224aeSachartre 66122f5224aeSachartre vd_scsi->timeout = vdc_scsi_timeout; 66132f5224aeSachartre 66142f5224aeSachartre /* 66156ace3c90SAlexandre Chartre * Submit the request. Note the operation should not request that any 66166ace3c90SAlexandre Chartre * error is checked because this function is precisely called when 66176ace3c90SAlexandre Chartre * checking errors. 66182f5224aeSachartre */ 66196ace3c90SAlexandre Chartre ASSERT((flags & VDC_OP_ERRCHK) == 0); 66206ace3c90SAlexandre Chartre 66216ace3c90SAlexandre Chartre rv = vdc_do_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 66226ace3c90SAlexandre Chartre 0, 0, NULL, VIO_both_dir, flags); 66232f5224aeSachartre 66242f5224aeSachartre if (rv == 0) 66256ace3c90SAlexandre Chartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 66262f5224aeSachartre 66272f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 66282f5224aeSachartre return (rv); 66292f5224aeSachartre } 66302f5224aeSachartre 66312f5224aeSachartre /* 66326ace3c90SAlexandre Chartre * This function is used to check if a SCSI backend is accessible. It will 66336ace3c90SAlexandre Chartre * also detect reservation conflict if failfast is enabled, and panic the 66346ace3c90SAlexandre Chartre * system in that case. 66352f5224aeSachartre * 66362f5224aeSachartre * Returned Code: 66376ace3c90SAlexandre Chartre * 0 - disk is accessible 66386ace3c90SAlexandre Chartre * != 0 - disk is inaccessible or unable to check if disk is accessible 66392f5224aeSachartre */ 66406ace3c90SAlexandre Chartre static int 66416ace3c90SAlexandre Chartre vdc_eio_scsi_check(vdc_t *vdc, int flags) 66422f5224aeSachartre { 66432f5224aeSachartre int failure = 0; 66446ace3c90SAlexandre Chartre int rv; 66452f5224aeSachartre 66462f5224aeSachartre /* 66472f5224aeSachartre * Send a TEST UNIT READY command. The command will panic 66486ace3c90SAlexandre Chartre * the system if it fails with a reservation conflict and 66496ace3c90SAlexandre Chartre * failfast is enabled. If there is a reservation conflict 66506ace3c90SAlexandre Chartre * and failfast is not enabled then the function will return 66516ace3c90SAlexandre Chartre * EACCES. In that case, there's no problem with accessing 66526ace3c90SAlexandre Chartre * the backend, it is just reserved. 66532f5224aeSachartre */ 66546ace3c90SAlexandre Chartre rv = vdc_eio_scsi_cmd(vdc, SCMD_TEST_UNIT_READY, flags); 66556ace3c90SAlexandre Chartre if (rv != 0 && rv != EACCES) 66562f5224aeSachartre failure++; 66572f5224aeSachartre 66586ace3c90SAlexandre Chartre /* we don't need to do more checking if failfast is not enabled */ 66596ace3c90SAlexandre Chartre if (vdc->failfast_interval == 0) 66606ace3c90SAlexandre Chartre return (failure); 66616ace3c90SAlexandre Chartre 66622f5224aeSachartre /* 66632f5224aeSachartre * With SPC-3 compliant devices TEST UNIT READY will succeed on 66642f5224aeSachartre * a reserved device, so we also do a WRITE(10) of zero byte in 66652f5224aeSachartre * order to provoke a Reservation Conflict status on those newer 66662f5224aeSachartre * devices. 66672f5224aeSachartre */ 66686ace3c90SAlexandre Chartre if (vdc_eio_scsi_cmd(vdc, SCMD_WRITE_G1, flags) != 0) 66692f5224aeSachartre failure++; 66702f5224aeSachartre 66712f5224aeSachartre return (failure); 66722f5224aeSachartre } 66732f5224aeSachartre 66742f5224aeSachartre /* 66756ace3c90SAlexandre Chartre * This function is used to check if a backend is effectively accessible. 66762f5224aeSachartre * 66776ace3c90SAlexandre Chartre * Returned Code: 66786ace3c90SAlexandre Chartre * 0 - disk is accessible 66796ace3c90SAlexandre Chartre * != 0 - disk is inaccessible or unable to check if disk is accessible 66806ace3c90SAlexandre Chartre */ 66816ace3c90SAlexandre Chartre static int 66826ace3c90SAlexandre Chartre vdc_eio_check(vdc_t *vdc, int flags) 66836ace3c90SAlexandre Chartre { 66846ace3c90SAlexandre Chartre char *buffer; 66856ace3c90SAlexandre Chartre diskaddr_t blkno; 66866ace3c90SAlexandre Chartre int rv; 66876ace3c90SAlexandre Chartre 66886ace3c90SAlexandre Chartre ASSERT((flags & VDC_OP_ERRCHK) == 0); 66896ace3c90SAlexandre Chartre 6690007a3653SAlexandre Chartre flags |= VDC_OP_DRING_RESERVED; 6691007a3653SAlexandre Chartre 66926ace3c90SAlexandre Chartre if (VD_OP_SUPPORTED(vdc->operations, VD_OP_SCSICMD)) 66936ace3c90SAlexandre Chartre return (vdc_eio_scsi_check(vdc, flags)); 66946ace3c90SAlexandre Chartre 66956ace3c90SAlexandre Chartre ASSERT(vdc->failfast_interval == 0); 66966ace3c90SAlexandre Chartre 66976ace3c90SAlexandre Chartre /* 66986ace3c90SAlexandre Chartre * If the backend does not support SCSI operations then we simply 66996ace3c90SAlexandre Chartre * check if the backend is accessible by reading some data blocks. 67006ace3c90SAlexandre Chartre * We first try to read a random block, to try to avoid getting 67016ace3c90SAlexandre Chartre * a block that might have been cached on the service domain. Then 67026ace3c90SAlexandre Chartre * we try the last block, and finally the first block. 67036ace3c90SAlexandre Chartre * 67046ace3c90SAlexandre Chartre * We return success as soon as we are able to read any block. 67056ace3c90SAlexandre Chartre */ 67066ace3c90SAlexandre Chartre buffer = kmem_alloc(vdc->vdisk_bsize, KM_SLEEP); 67076ace3c90SAlexandre Chartre 67086ace3c90SAlexandre Chartre if (vdc->vdisk_size > 0) { 67096ace3c90SAlexandre Chartre 67106ace3c90SAlexandre Chartre /* try a random block */ 67116ace3c90SAlexandre Chartre (void) random_get_pseudo_bytes((uint8_t *)&blkno, 67126ace3c90SAlexandre Chartre sizeof (diskaddr_t)); 67136ace3c90SAlexandre Chartre blkno = blkno % vdc->vdisk_size; 67146ace3c90SAlexandre Chartre rv = vdc_do_op(vdc, VD_OP_BREAD, (caddr_t)buffer, 67156ace3c90SAlexandre Chartre vdc->vdisk_bsize, VD_SLICE_NONE, blkno, NULL, 67166ace3c90SAlexandre Chartre VIO_read_dir, flags); 67176ace3c90SAlexandre Chartre 67186ace3c90SAlexandre Chartre if (rv == 0) 67196ace3c90SAlexandre Chartre goto done; 67206ace3c90SAlexandre Chartre 67216ace3c90SAlexandre Chartre /* try the last block */ 67226ace3c90SAlexandre Chartre blkno = vdc->vdisk_size - 1; 67236ace3c90SAlexandre Chartre rv = vdc_do_op(vdc, VD_OP_BREAD, (caddr_t)buffer, 67246ace3c90SAlexandre Chartre vdc->vdisk_bsize, VD_SLICE_NONE, blkno, NULL, 67256ace3c90SAlexandre Chartre VIO_read_dir, flags); 67266ace3c90SAlexandre Chartre 67276ace3c90SAlexandre Chartre if (rv == 0) 67286ace3c90SAlexandre Chartre goto done; 67296ace3c90SAlexandre Chartre } 67306ace3c90SAlexandre Chartre 67316ace3c90SAlexandre Chartre /* try block 0 */ 67326ace3c90SAlexandre Chartre blkno = 0; 67336ace3c90SAlexandre Chartre rv = vdc_do_op(vdc, VD_OP_BREAD, (caddr_t)buffer, vdc->vdisk_bsize, 67346ace3c90SAlexandre Chartre VD_SLICE_NONE, blkno, NULL, VIO_read_dir, flags); 67356ace3c90SAlexandre Chartre 67366ace3c90SAlexandre Chartre done: 67376ace3c90SAlexandre Chartre kmem_free(buffer, vdc->vdisk_bsize); 67386ace3c90SAlexandre Chartre return (rv); 67396ace3c90SAlexandre Chartre } 67406ace3c90SAlexandre Chartre 67416ace3c90SAlexandre Chartre /* 67426ace3c90SAlexandre Chartre * Add a pending I/O to the eio queue. An I/O is added to this queue 67436ace3c90SAlexandre Chartre * when it has failed and failfast is enabled or the vdisk has multiple 67446ace3c90SAlexandre Chartre * servers. It will then be handled by the eio thread (vdc_eio_thread). 67456ace3c90SAlexandre Chartre * The eio queue is ordered starting with the most recent I/O added. 67462f5224aeSachartre */ 67472f5224aeSachartre static vdc_io_t * 67486ace3c90SAlexandre Chartre vdc_eio_queue(vdc_t *vdc, int index) 67492f5224aeSachartre { 67502f5224aeSachartre vdc_io_t *vio; 67512f5224aeSachartre 67522f5224aeSachartre ASSERT(MUTEX_HELD(&vdc->lock)); 67532f5224aeSachartre 67542f5224aeSachartre vio = kmem_alloc(sizeof (vdc_io_t), KM_SLEEP); 67556ace3c90SAlexandre Chartre vio->vio_next = vdc->eio_queue; 67566ace3c90SAlexandre Chartre vio->vio_index = index; 67572f5224aeSachartre vio->vio_qtime = ddi_get_lbolt(); 67582f5224aeSachartre 67596ace3c90SAlexandre Chartre vdc->eio_queue = vio; 67602f5224aeSachartre 67616ace3c90SAlexandre Chartre /* notify the eio thread that a new I/O is queued */ 67626ace3c90SAlexandre Chartre cv_signal(&vdc->eio_cv); 67632f5224aeSachartre 67642f5224aeSachartre return (vio); 67652f5224aeSachartre } 67662f5224aeSachartre 67672f5224aeSachartre /* 67686ace3c90SAlexandre Chartre * Remove I/Os added before the indicated deadline from the eio queue. A 67696ace3c90SAlexandre Chartre * deadline of 0 means that all I/Os have to be unqueued. The complete_io 67706ace3c90SAlexandre Chartre * boolean specifies if unqueued I/Os should be marked as completed or not. 67712f5224aeSachartre */ 67722f5224aeSachartre static void 67736ace3c90SAlexandre Chartre vdc_eio_unqueue(vdc_t *vdc, clock_t deadline, boolean_t complete_io) 67742f5224aeSachartre { 67756ace3c90SAlexandre Chartre struct buf *buf; 67762f5224aeSachartre vdc_io_t *vio, *vio_tmp; 67776ace3c90SAlexandre Chartre int index, op; 67782f5224aeSachartre 67792f5224aeSachartre ASSERT(MUTEX_HELD(&vdc->lock)); 67802f5224aeSachartre 67812f5224aeSachartre vio_tmp = NULL; 67826ace3c90SAlexandre Chartre vio = vdc->eio_queue; 67832f5224aeSachartre 67842f5224aeSachartre if (deadline != 0) { 67852f5224aeSachartre /* 67866ace3c90SAlexandre Chartre * Skip any io queued after the deadline. The eio queue is 67876ace3c90SAlexandre Chartre * ordered starting with the last I/O added to the queue. 67882f5224aeSachartre */ 67892f5224aeSachartre while (vio != NULL && vio->vio_qtime > deadline) { 67902f5224aeSachartre vio_tmp = vio; 67912f5224aeSachartre vio = vio->vio_next; 67922f5224aeSachartre } 67932f5224aeSachartre } 67942f5224aeSachartre 67952f5224aeSachartre if (vio == NULL) 67962f5224aeSachartre /* nothing to unqueue */ 67972f5224aeSachartre return; 67982f5224aeSachartre 67992f5224aeSachartre /* update the queue */ 68002f5224aeSachartre if (vio_tmp == NULL) 68016ace3c90SAlexandre Chartre vdc->eio_queue = NULL; 68022f5224aeSachartre else 68032f5224aeSachartre vio_tmp->vio_next = NULL; 68042f5224aeSachartre 68052f5224aeSachartre /* 68066ace3c90SAlexandre Chartre * Free and complete unqueued I/Os if this was requested. All I/Os 68076ace3c90SAlexandre Chartre * have a block I/O data transfer structure (buf) and they are 68086ace3c90SAlexandre Chartre * completed by calling biodone(). 68092f5224aeSachartre */ 68102f5224aeSachartre while (vio != NULL) { 68112f5224aeSachartre vio_tmp = vio->vio_next; 68126ace3c90SAlexandre Chartre 68136ace3c90SAlexandre Chartre if (complete_io) { 68146ace3c90SAlexandre Chartre index = vio->vio_index; 68156ace3c90SAlexandre Chartre op = vdc->local_dring[index].operation; 68166ace3c90SAlexandre Chartre buf = vdc->local_dring[index].buf; 68176ace3c90SAlexandre Chartre (void) vdc_depopulate_descriptor(vdc, index); 68186ace3c90SAlexandre Chartre ASSERT(buf->b_flags & B_ERROR); 68196ace3c90SAlexandre Chartre if (op == VD_OP_BREAD || op == VD_OP_BWRITE) { 68206ace3c90SAlexandre Chartre VD_UPDATE_ERR_STATS(vdc, vd_softerrs); 682190e2f9dcSlm66018 VD_KSTAT_RUNQ_EXIT(vdc); 68226ace3c90SAlexandre Chartre DTRACE_IO1(done, buf_t *, buf); 6823e8dc8350Sjmcp } 68246ace3c90SAlexandre Chartre biodone(buf); 682500e3a3e9SAlexandre Chartre } 6826e8dc8350Sjmcp 68276ace3c90SAlexandre Chartre kmem_free(vio, sizeof (vdc_io_t)); 68286ace3c90SAlexandre Chartre vio = vio_tmp; 68296ace3c90SAlexandre Chartre } 68302f5224aeSachartre } 68312f5224aeSachartre 68322f5224aeSachartre /* 68336ace3c90SAlexandre Chartre * Error I/O Thread. There is one eio thread for each virtual disk that 68346ace3c90SAlexandre Chartre * has multiple servers or for which failfast is enabled. Failfast can only 68356ace3c90SAlexandre Chartre * be enabled for vdisk supporting SCSI commands. 68362f5224aeSachartre * 68376ace3c90SAlexandre Chartre * While failfast is enabled, the eio thread sends a TEST UNIT READY 68382f5224aeSachartre * and a zero size WRITE(10) SCSI commands on a regular basis to check that 68392f5224aeSachartre * we still have access to the disk. If a command fails with a RESERVATION 68402f5224aeSachartre * CONFLICT error then the system will immediatly panic. 68412f5224aeSachartre * 68426ace3c90SAlexandre Chartre * The eio thread is also woken up when an I/O has failed. It then checks 68432f5224aeSachartre * the access to the disk to ensure that the I/O failure was not due to a 68446ace3c90SAlexandre Chartre * reservation conflict or to the backend been inaccessible. 68452f5224aeSachartre * 68462f5224aeSachartre */ 68472f5224aeSachartre static void 68486ace3c90SAlexandre Chartre vdc_eio_thread(void *arg) 68492f5224aeSachartre { 68502f5224aeSachartre int status; 68512f5224aeSachartre vdc_t *vdc = (vdc_t *)arg; 6852d3d50737SRafael Vanoni clock_t starttime, timeout = drv_usectohz(vdc->failfast_interval); 68532f5224aeSachartre 68542f5224aeSachartre mutex_enter(&vdc->lock); 68552f5224aeSachartre 68566ace3c90SAlexandre Chartre while (vdc->failfast_interval != 0 || vdc->num_servers > 1) { 68576ace3c90SAlexandre Chartre /* 68586ace3c90SAlexandre Chartre * Wait if there is nothing in the eio queue or if the state 68596ace3c90SAlexandre Chartre * is not VDC_STATE_RUNNING. 68606ace3c90SAlexandre Chartre */ 68616ace3c90SAlexandre Chartre if (vdc->eio_queue == NULL || vdc->state != VDC_STATE_RUNNING) { 68626ace3c90SAlexandre Chartre if (vdc->failfast_interval != 0) { 68636ace3c90SAlexandre Chartre timeout = ddi_get_lbolt() + 68646ace3c90SAlexandre Chartre drv_usectohz(vdc->failfast_interval); 68656ace3c90SAlexandre Chartre (void) cv_timedwait(&vdc->eio_cv, &vdc->lock, 68666ace3c90SAlexandre Chartre timeout); 68676ace3c90SAlexandre Chartre } else { 68686ace3c90SAlexandre Chartre ASSERT(vdc->num_servers > 1); 68696ace3c90SAlexandre Chartre (void) cv_wait(&vdc->eio_cv, &vdc->lock); 68706ace3c90SAlexandre Chartre } 68712f5224aeSachartre 68726ace3c90SAlexandre Chartre if (vdc->state != VDC_STATE_RUNNING) 68736ace3c90SAlexandre Chartre continue; 68746ace3c90SAlexandre Chartre } 687500e3a3e9SAlexandre Chartre 6876e8dc8350Sjmcp mutex_exit(&vdc->lock); 6877e8dc8350Sjmcp 68786ace3c90SAlexandre Chartre starttime = ddi_get_lbolt(); 68796ace3c90SAlexandre Chartre 68806ace3c90SAlexandre Chartre /* check error */ 68816ace3c90SAlexandre Chartre status = vdc_eio_check(vdc, VDC_OP_STATE_RUNNING); 68822f5224aeSachartre 68832f5224aeSachartre mutex_enter(&vdc->lock); 68842f5224aeSachartre /* 68856ace3c90SAlexandre Chartre * We have dropped the lock to check the backend so we have 68866ace3c90SAlexandre Chartre * to check that the eio thread is still enabled. 68872f5224aeSachartre */ 68886ace3c90SAlexandre Chartre if (vdc->failfast_interval == 0 && vdc->num_servers <= 1) 68892f5224aeSachartre break; 68902f5224aeSachartre 68912f5224aeSachartre /* 68926ace3c90SAlexandre Chartre * If the eio queue is empty or we are not in running state 68936ace3c90SAlexandre Chartre * anymore then there is nothing to do. 68942f5224aeSachartre */ 68956ace3c90SAlexandre Chartre if (vdc->state != VDC_STATE_RUNNING || vdc->eio_queue == NULL) 68962f5224aeSachartre continue; 68972f5224aeSachartre 68986ace3c90SAlexandre Chartre if (status == 0) { 68996ace3c90SAlexandre Chartre /* 69006ace3c90SAlexandre Chartre * The backend access has been successfully checked, 69016ace3c90SAlexandre Chartre * we can complete any I/O queued before the last check. 69026ace3c90SAlexandre Chartre */ 69036ace3c90SAlexandre Chartre vdc_eio_unqueue(vdc, starttime, B_TRUE); 69046ace3c90SAlexandre Chartre 69056ace3c90SAlexandre Chartre } else if (vdc->num_servers > 1) { 69066ace3c90SAlexandre Chartre /* 69076ace3c90SAlexandre Chartre * The backend is inaccessible for a disk with multiple 69086ace3c90SAlexandre Chartre * servers. So we force a reset to switch to another 69096ace3c90SAlexandre Chartre * server. The reset will also clear the eio queue and 69106ace3c90SAlexandre Chartre * resubmit all pending I/Os. 69116ace3c90SAlexandre Chartre */ 69126ace3c90SAlexandre Chartre mutex_enter(&vdc->read_lock); 69136ace3c90SAlexandre Chartre vdc->read_state = VDC_READ_RESET; 69146ace3c90SAlexandre Chartre cv_signal(&vdc->read_cv); 69156ace3c90SAlexandre Chartre mutex_exit(&vdc->read_lock); 6916007a3653SAlexandre Chartre } else { 6917007a3653SAlexandre Chartre /* 6918007a3653SAlexandre Chartre * There is only one path and the backend is not 6919007a3653SAlexandre Chartre * accessible, so I/Os are actually failing because 6920007a3653SAlexandre Chartre * of that. So we can complete I/O queued before the 6921007a3653SAlexandre Chartre * last check. 6922007a3653SAlexandre Chartre */ 6923007a3653SAlexandre Chartre vdc_eio_unqueue(vdc, starttime, B_TRUE); 69246ace3c90SAlexandre Chartre } 69252f5224aeSachartre } 69262f5224aeSachartre 69272f5224aeSachartre /* 69286ace3c90SAlexandre Chartre * The thread is being stopped so we can complete any queued I/O. 69292f5224aeSachartre */ 69306ace3c90SAlexandre Chartre vdc_eio_unqueue(vdc, 0, B_TRUE); 69316ace3c90SAlexandre Chartre vdc->eio_thread = NULL; 69322f5224aeSachartre mutex_exit(&vdc->lock); 69332f5224aeSachartre thread_exit(); 69342f5224aeSachartre } 69352f5224aeSachartre 69362f5224aeSachartre /* 69372f5224aeSachartre * Implement the MHIOCENFAILFAST mhd(7i) ioctl. 69382f5224aeSachartre */ 69392f5224aeSachartre static int 69402f5224aeSachartre vdc_failfast(vdc_t *vdc, caddr_t arg, int mode) 69412f5224aeSachartre { 69422f5224aeSachartre unsigned int mh_time; 69432f5224aeSachartre 69442f5224aeSachartre if (ddi_copyin((void *)arg, &mh_time, sizeof (int), mode)) 69452f5224aeSachartre return (EFAULT); 69462f5224aeSachartre 69472f5224aeSachartre mutex_enter(&vdc->lock); 69486ace3c90SAlexandre Chartre if (mh_time != 0 && vdc->eio_thread == NULL) { 69496ace3c90SAlexandre Chartre vdc->eio_thread = thread_create(NULL, 0, 69506ace3c90SAlexandre Chartre vdc_eio_thread, vdc, 0, &p0, TS_RUN, 69512f5224aeSachartre v.v_maxsyspri - 2); 69522f5224aeSachartre } 69532f5224aeSachartre 69546ace3c90SAlexandre Chartre vdc->failfast_interval = ((long)mh_time) * MILLISEC; 69556ace3c90SAlexandre Chartre cv_signal(&vdc->eio_cv); 69562f5224aeSachartre mutex_exit(&vdc->lock); 69572f5224aeSachartre 69582f5224aeSachartre return (0); 69592f5224aeSachartre } 69602f5224aeSachartre 69612f5224aeSachartre /* 69622f5224aeSachartre * Implement the MHIOCTKOWN and MHIOCRELEASE mhd(7i) ioctls. These ioctls are 69632f5224aeSachartre * converted to VD_OP_SET_ACCESS operations. 69642f5224aeSachartre */ 69652f5224aeSachartre static int 69666ace3c90SAlexandre Chartre vdc_access_set(vdc_t *vdc, uint64_t flags) 69672f5224aeSachartre { 69682f5224aeSachartre int rv; 69692f5224aeSachartre 69702f5224aeSachartre /* submit owership command request */ 69712f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SET_ACCESS, (caddr_t)&flags, 69726ace3c90SAlexandre Chartre sizeof (uint64_t), 0, 0, VIO_both_dir, B_TRUE); 69732f5224aeSachartre 69742f5224aeSachartre return (rv); 69752f5224aeSachartre } 69762f5224aeSachartre 69772f5224aeSachartre /* 69782f5224aeSachartre * Implement the MHIOCSTATUS mhd(7i) ioctl. This ioctl is converted to a 69792f5224aeSachartre * VD_OP_GET_ACCESS operation. 69802f5224aeSachartre */ 69812f5224aeSachartre static int 69826ace3c90SAlexandre Chartre vdc_access_get(vdc_t *vdc, uint64_t *status) 69832f5224aeSachartre { 69842f5224aeSachartre int rv; 69852f5224aeSachartre 69862f5224aeSachartre /* submit owership command request */ 69872f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_GET_ACCESS, (caddr_t)status, 69886ace3c90SAlexandre Chartre sizeof (uint64_t), 0, 0, VIO_both_dir, B_TRUE); 69892f5224aeSachartre 69902f5224aeSachartre return (rv); 69912f5224aeSachartre } 69922f5224aeSachartre 69932f5224aeSachartre /* 69942f5224aeSachartre * Disk Ownership Thread. 69952f5224aeSachartre * 69962f5224aeSachartre * When we have taken the ownership of a disk, this thread waits to be 69972f5224aeSachartre * notified when the LDC channel is reset so that it can recover the 69982f5224aeSachartre * ownership. 69992f5224aeSachartre * 70002f5224aeSachartre * Note that the thread handling the LDC reset (vdc_process_msg_thread()) 70012f5224aeSachartre * can not be used to do the ownership recovery because it has to be 70022f5224aeSachartre * running to handle the reply message to the ownership operation. 70032f5224aeSachartre */ 70042f5224aeSachartre static void 70052f5224aeSachartre vdc_ownership_thread(void *arg) 70062f5224aeSachartre { 70072f5224aeSachartre vdc_t *vdc = (vdc_t *)arg; 70082f5224aeSachartre clock_t timeout; 70092f5224aeSachartre uint64_t status; 70102f5224aeSachartre 70112f5224aeSachartre mutex_enter(&vdc->ownership_lock); 70122f5224aeSachartre mutex_enter(&vdc->lock); 70132f5224aeSachartre 70142f5224aeSachartre while (vdc->ownership & VDC_OWNERSHIP_WANTED) { 70152f5224aeSachartre 70162f5224aeSachartre if ((vdc->ownership & VDC_OWNERSHIP_RESET) || 70172f5224aeSachartre !(vdc->ownership & VDC_OWNERSHIP_GRANTED)) { 70182f5224aeSachartre /* 70192f5224aeSachartre * There was a reset so the ownership has been lost, 70202f5224aeSachartre * try to recover. We do this without using the preempt 70212f5224aeSachartre * option so that we don't steal the ownership from 70222f5224aeSachartre * someone who has preempted us. 70232f5224aeSachartre */ 70242f5224aeSachartre DMSG(vdc, 0, "[%d] Ownership lost, recovering", 70252f5224aeSachartre vdc->instance); 70262f5224aeSachartre 70272f5224aeSachartre vdc->ownership &= ~(VDC_OWNERSHIP_RESET | 70282f5224aeSachartre VDC_OWNERSHIP_GRANTED); 70292f5224aeSachartre 70302f5224aeSachartre mutex_exit(&vdc->lock); 70312f5224aeSachartre 70322f5224aeSachartre status = vdc_access_set(vdc, VD_ACCESS_SET_EXCLUSIVE | 70336ace3c90SAlexandre Chartre VD_ACCESS_SET_PRESERVE); 70342f5224aeSachartre 70352f5224aeSachartre mutex_enter(&vdc->lock); 70362f5224aeSachartre 70372f5224aeSachartre if (status == 0) { 70382f5224aeSachartre DMSG(vdc, 0, "[%d] Ownership recovered", 70392f5224aeSachartre vdc->instance); 70402f5224aeSachartre vdc->ownership |= VDC_OWNERSHIP_GRANTED; 70412f5224aeSachartre } else { 70422f5224aeSachartre DMSG(vdc, 0, "[%d] Fail to recover ownership", 70432f5224aeSachartre vdc->instance); 70442f5224aeSachartre } 70452f5224aeSachartre 70462f5224aeSachartre } 70472f5224aeSachartre 70482f5224aeSachartre /* 70492f5224aeSachartre * If we have the ownership then we just wait for an event 70502f5224aeSachartre * to happen (LDC reset), otherwise we will retry to recover 70512f5224aeSachartre * after a delay. 70522f5224aeSachartre */ 70532f5224aeSachartre if (vdc->ownership & VDC_OWNERSHIP_GRANTED) 70542f5224aeSachartre timeout = 0; 70552f5224aeSachartre else 7056d3d50737SRafael Vanoni timeout = drv_usectohz(vdc_ownership_delay); 70572f5224aeSachartre 70582f5224aeSachartre /* Release the ownership_lock and wait on the vdc lock */ 70592f5224aeSachartre mutex_exit(&vdc->ownership_lock); 70602f5224aeSachartre 70612f5224aeSachartre if (timeout == 0) 70622f5224aeSachartre (void) cv_wait(&vdc->ownership_cv, &vdc->lock); 70632f5224aeSachartre else 7064d3d50737SRafael Vanoni (void) cv_reltimedwait(&vdc->ownership_cv, &vdc->lock, 7065d3d50737SRafael Vanoni timeout, TR_CLOCK_TICK); 70662f5224aeSachartre 70672f5224aeSachartre mutex_exit(&vdc->lock); 70682f5224aeSachartre 70692f5224aeSachartre mutex_enter(&vdc->ownership_lock); 70702f5224aeSachartre mutex_enter(&vdc->lock); 70712f5224aeSachartre } 70722f5224aeSachartre 70732f5224aeSachartre vdc->ownership_thread = NULL; 70742f5224aeSachartre mutex_exit(&vdc->lock); 70752f5224aeSachartre mutex_exit(&vdc->ownership_lock); 70762f5224aeSachartre 70772f5224aeSachartre thread_exit(); 70782f5224aeSachartre } 70792f5224aeSachartre 70802f5224aeSachartre static void 70812f5224aeSachartre vdc_ownership_update(vdc_t *vdc, int ownership_flags) 70822f5224aeSachartre { 70832f5224aeSachartre ASSERT(MUTEX_HELD(&vdc->ownership_lock)); 70842f5224aeSachartre 70852f5224aeSachartre mutex_enter(&vdc->lock); 70862f5224aeSachartre vdc->ownership = ownership_flags; 70872f5224aeSachartre if ((vdc->ownership & VDC_OWNERSHIP_WANTED) && 70882f5224aeSachartre vdc->ownership_thread == NULL) { 70892f5224aeSachartre /* start ownership thread */ 70902f5224aeSachartre vdc->ownership_thread = thread_create(NULL, 0, 70912f5224aeSachartre vdc_ownership_thread, vdc, 0, &p0, TS_RUN, 70922f5224aeSachartre v.v_maxsyspri - 2); 70932f5224aeSachartre } else { 70942f5224aeSachartre /* notify the ownership thread */ 70952f5224aeSachartre cv_signal(&vdc->ownership_cv); 70962f5224aeSachartre } 70972f5224aeSachartre mutex_exit(&vdc->lock); 70982f5224aeSachartre } 70992f5224aeSachartre 71002f5224aeSachartre /* 71012f5224aeSachartre * Get the size and the block size of a virtual disk from the vdisk server. 71022f5224aeSachartre */ 71032f5224aeSachartre static int 7104de3a5331SRamesh Chitrothu vdc_get_capacity(vdc_t *vdc, size_t *dsk_size, size_t *blk_size) 71052f5224aeSachartre { 71062f5224aeSachartre int rv = 0; 71072f5224aeSachartre size_t alloc_len; 71082f5224aeSachartre vd_capacity_t *vd_cap; 71092f5224aeSachartre 7110de3a5331SRamesh Chitrothu ASSERT(MUTEX_NOT_HELD(&vdc->lock)); 71112f5224aeSachartre 71122f5224aeSachartre alloc_len = P2ROUNDUP(sizeof (vd_capacity_t), sizeof (uint64_t)); 71132f5224aeSachartre 71142f5224aeSachartre vd_cap = kmem_zalloc(alloc_len, KM_SLEEP); 71152f5224aeSachartre 71162f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_GET_CAPACITY, (caddr_t)vd_cap, alloc_len, 71176ace3c90SAlexandre Chartre 0, 0, VIO_both_dir, B_TRUE); 71182f5224aeSachartre 7119de3a5331SRamesh Chitrothu *dsk_size = vd_cap->vdisk_size; 7120de3a5331SRamesh Chitrothu *blk_size = vd_cap->vdisk_block_size; 71212f5224aeSachartre 71222f5224aeSachartre kmem_free(vd_cap, alloc_len); 71232f5224aeSachartre return (rv); 71242f5224aeSachartre } 71252f5224aeSachartre 71262f5224aeSachartre /* 7127de3a5331SRamesh Chitrothu * Check the disk capacity. Disk size information is updated if size has 7128de3a5331SRamesh Chitrothu * changed. 7129de3a5331SRamesh Chitrothu * 7130de3a5331SRamesh Chitrothu * Return 0 if the disk capacity is available, or non-zero if it is not. 7131de3a5331SRamesh Chitrothu */ 7132de3a5331SRamesh Chitrothu static int 7133de3a5331SRamesh Chitrothu vdc_check_capacity(vdc_t *vdc) 7134de3a5331SRamesh Chitrothu { 7135de3a5331SRamesh Chitrothu size_t dsk_size, blk_size; 7136de3a5331SRamesh Chitrothu int rv; 7137de3a5331SRamesh Chitrothu 71383f4df6d3SAlexandre Chartre /* 71393f4df6d3SAlexandre Chartre * If the vdisk does not support the VD_OP_GET_CAPACITY operation 71403f4df6d3SAlexandre Chartre * then the disk capacity has been retrieved during the handshake 71413f4df6d3SAlexandre Chartre * and there's nothing more to do here. 71423f4df6d3SAlexandre Chartre */ 71433f4df6d3SAlexandre Chartre if (!VD_OP_SUPPORTED(vdc->operations, VD_OP_GET_CAPACITY)) 71443f4df6d3SAlexandre Chartre return (0); 71453f4df6d3SAlexandre Chartre 7146de3a5331SRamesh Chitrothu if ((rv = vdc_get_capacity(vdc, &dsk_size, &blk_size)) != 0) 7147de3a5331SRamesh Chitrothu return (rv); 7148de3a5331SRamesh Chitrothu 714965908c77Syu, larry liu - Sun Microsystems - Beijing China if (dsk_size == VD_SIZE_UNKNOWN || dsk_size == 0 || blk_size == 0) 7150de3a5331SRamesh Chitrothu return (EINVAL); 7151de3a5331SRamesh Chitrothu 7152de3a5331SRamesh Chitrothu mutex_enter(&vdc->lock); 715365908c77Syu, larry liu - Sun Microsystems - Beijing China /* 715465908c77Syu, larry liu - Sun Microsystems - Beijing China * First try to update the VIO block size (which is the same as the 715565908c77Syu, larry liu - Sun Microsystems - Beijing China * vdisk block size). If this returns an error then that means that 715665908c77Syu, larry liu - Sun Microsystems - Beijing China * we can not use that block size so basically the vdisk is unusable 715765908c77Syu, larry liu - Sun Microsystems - Beijing China * and we return an error. 715865908c77Syu, larry liu - Sun Microsystems - Beijing China */ 715965908c77Syu, larry liu - Sun Microsystems - Beijing China rv = vdc_update_vio_bsize(vdc, blk_size); 716065908c77Syu, larry liu - Sun Microsystems - Beijing China if (rv == 0) 7161de3a5331SRamesh Chitrothu vdc_update_size(vdc, dsk_size, blk_size, vdc->max_xfer_sz); 716265908c77Syu, larry liu - Sun Microsystems - Beijing China 7163de3a5331SRamesh Chitrothu mutex_exit(&vdc->lock); 7164de3a5331SRamesh Chitrothu 716565908c77Syu, larry liu - Sun Microsystems - Beijing China return (rv); 7166de3a5331SRamesh Chitrothu } 7167de3a5331SRamesh Chitrothu 7168de3a5331SRamesh Chitrothu /* 71691ae08745Sheppo * This structure is used in the DKIO(7I) array below. 71701ae08745Sheppo */ 71711ae08745Sheppo typedef struct vdc_dk_ioctl { 71721ae08745Sheppo uint8_t op; /* VD_OP_XXX value */ 71731ae08745Sheppo int cmd; /* Solaris ioctl operation number */ 71741ae08745Sheppo size_t nbytes; /* size of structure to be copied */ 71750a55fbb7Slm66018 71760a55fbb7Slm66018 /* function to convert between vDisk and Solaris structure formats */ 7177d10e4ef2Snarayan int (*convert)(vdc_t *vdc, void *vd_buf, void *ioctl_arg, 7178d10e4ef2Snarayan int mode, int dir); 71791ae08745Sheppo } vdc_dk_ioctl_t; 71801ae08745Sheppo 71811ae08745Sheppo /* 71821ae08745Sheppo * Subset of DKIO(7I) operations currently supported 71831ae08745Sheppo */ 71841ae08745Sheppo static vdc_dk_ioctl_t dk_ioctl[] = { 7185eff7243fSlm66018 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, 0, 71860a55fbb7Slm66018 vdc_null_copy_func}, 71870a55fbb7Slm66018 {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), 71884bac2208Snarayan vdc_get_wce_convert}, 71890a55fbb7Slm66018 {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), 71904bac2208Snarayan vdc_set_wce_convert}, 71910a55fbb7Slm66018 {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), 71920a55fbb7Slm66018 vdc_get_vtoc_convert}, 71930a55fbb7Slm66018 {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), 71940a55fbb7Slm66018 vdc_set_vtoc_convert}, 7195342440ecSPrasad Singamsetty {VD_OP_GET_VTOC, DKIOCGEXTVTOC, sizeof (vd_vtoc_t), 7196342440ecSPrasad Singamsetty vdc_get_extvtoc_convert}, 7197342440ecSPrasad Singamsetty {VD_OP_SET_VTOC, DKIOCSEXTVTOC, sizeof (vd_vtoc_t), 7198342440ecSPrasad Singamsetty vdc_set_extvtoc_convert}, 71990a55fbb7Slm66018 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), 72000a55fbb7Slm66018 vdc_get_geom_convert}, 72010a55fbb7Slm66018 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), 72020a55fbb7Slm66018 vdc_get_geom_convert}, 72030a55fbb7Slm66018 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), 72040a55fbb7Slm66018 vdc_get_geom_convert}, 72050a55fbb7Slm66018 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 72060a55fbb7Slm66018 vdc_set_geom_convert}, 72074bac2208Snarayan {VD_OP_GET_EFI, DKIOCGETEFI, 0, 72084bac2208Snarayan vdc_get_efi_convert}, 72094bac2208Snarayan {VD_OP_SET_EFI, DKIOCSETEFI, 0, 72104bac2208Snarayan vdc_set_efi_convert}, 72110a55fbb7Slm66018 721287a7269eSachartre /* DIOCTL_RWCMD is converted to a read or a write */ 721387a7269eSachartre {0, DIOCTL_RWCMD, sizeof (struct dadkio_rwcmd), NULL}, 721487a7269eSachartre 72152f5224aeSachartre /* mhd(7I) non-shared multihost disks ioctls */ 72162f5224aeSachartre {0, MHIOCTKOWN, 0, vdc_null_copy_func}, 72172f5224aeSachartre {0, MHIOCRELEASE, 0, vdc_null_copy_func}, 72182f5224aeSachartre {0, MHIOCSTATUS, 0, vdc_null_copy_func}, 72192f5224aeSachartre {0, MHIOCQRESERVE, 0, vdc_null_copy_func}, 72202f5224aeSachartre 72212f5224aeSachartre /* mhd(7I) shared multihost disks ioctls */ 72222f5224aeSachartre {0, MHIOCGRP_INKEYS, 0, vdc_null_copy_func}, 72232f5224aeSachartre {0, MHIOCGRP_INRESV, 0, vdc_null_copy_func}, 72242f5224aeSachartre {0, MHIOCGRP_REGISTER, 0, vdc_null_copy_func}, 72252f5224aeSachartre {0, MHIOCGRP_RESERVE, 0, vdc_null_copy_func}, 72262f5224aeSachartre {0, MHIOCGRP_PREEMPTANDABORT, 0, vdc_null_copy_func}, 72272f5224aeSachartre {0, MHIOCGRP_REGISTERANDIGNOREKEY, 0, vdc_null_copy_func}, 72282f5224aeSachartre 72292f5224aeSachartre /* mhd(7I) failfast ioctl */ 72302f5224aeSachartre {0, MHIOCENFAILFAST, 0, vdc_null_copy_func}, 72312f5224aeSachartre 72320a55fbb7Slm66018 /* 72330a55fbb7Slm66018 * These particular ioctls are not sent to the server - vdc fakes up 72340a55fbb7Slm66018 * the necessary info. 72350a55fbb7Slm66018 */ 72360a55fbb7Slm66018 {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, 72370a55fbb7Slm66018 {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, 72380a55fbb7Slm66018 {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, 72399642afceSachartre {0, DKIOCPARTITION, 0, vdc_null_copy_func }, 724087a7269eSachartre {0, DKIOCGAPART, 0, vdc_null_copy_func }, 72410a55fbb7Slm66018 {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, 72420a55fbb7Slm66018 {0, CDROMREADOFFSET, 0, vdc_null_copy_func} 72431ae08745Sheppo }; 72441ae08745Sheppo 72451ae08745Sheppo /* 7246edcc0754Sachartre * This function handles ioctl requests from the vd_efi_alloc_and_read() 7247edcc0754Sachartre * function and forward them to the vdisk. 72482f5224aeSachartre */ 72492f5224aeSachartre static int 7250edcc0754Sachartre vd_process_efi_ioctl(void *vdisk, int cmd, uintptr_t arg) 72512f5224aeSachartre { 7252edcc0754Sachartre vdc_t *vdc = (vdc_t *)vdisk; 7253edcc0754Sachartre dev_t dev; 72542f5224aeSachartre int rval; 7255edcc0754Sachartre 7256edcc0754Sachartre dev = makedevice(ddi_driver_major(vdc->dip), 7257edcc0754Sachartre VD_MAKE_DEV(vdc->instance, 0)); 7258edcc0754Sachartre 7259edcc0754Sachartre return (vd_process_ioctl(dev, cmd, (caddr_t)arg, FKIOCTL, &rval)); 72602f5224aeSachartre } 72612f5224aeSachartre 72622f5224aeSachartre /* 72631ae08745Sheppo * Function: 72641ae08745Sheppo * vd_process_ioctl() 72651ae08745Sheppo * 72661ae08745Sheppo * Description: 72670a55fbb7Slm66018 * This routine processes disk specific ioctl calls 72681ae08745Sheppo * 72691ae08745Sheppo * Arguments: 72701ae08745Sheppo * dev - the device number 72711ae08745Sheppo * cmd - the operation [dkio(7I)] to be processed 72721ae08745Sheppo * arg - pointer to user provided structure 72731ae08745Sheppo * (contains data to be set or reference parameter for get) 72741ae08745Sheppo * mode - bit flag, indicating open settings, 32/64 bit type, etc 72752f5224aeSachartre * rvalp - pointer to return value for calling process. 72761ae08745Sheppo * 72771ae08745Sheppo * Return Code: 72781ae08745Sheppo * 0 72791ae08745Sheppo * EFAULT 72801ae08745Sheppo * ENXIO 72811ae08745Sheppo * EIO 72821ae08745Sheppo * ENOTSUP 72831ae08745Sheppo */ 72841ae08745Sheppo static int 72852f5224aeSachartre vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode, int *rvalp) 72861ae08745Sheppo { 72870d0c8d4bSnarayan int instance = VDCUNIT(dev); 72881ae08745Sheppo vdc_t *vdc = NULL; 72891ae08745Sheppo int rv = -1; 72901ae08745Sheppo int idx = 0; /* index into dk_ioctl[] */ 72911ae08745Sheppo size_t len = 0; /* #bytes to send to vds */ 72921ae08745Sheppo size_t alloc_len = 0; /* #bytes to allocate mem for */ 72931ae08745Sheppo caddr_t mem_p = NULL; 72941ae08745Sheppo size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 72953af08d82Slm66018 vdc_dk_ioctl_t *iop; 72961ae08745Sheppo 72971ae08745Sheppo vdc = ddi_get_soft_state(vdc_state, instance); 72981ae08745Sheppo if (vdc == NULL) { 72991ae08745Sheppo cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 73001ae08745Sheppo instance); 73011ae08745Sheppo return (ENXIO); 73021ae08745Sheppo } 73031ae08745Sheppo 73043af08d82Slm66018 DMSG(vdc, 0, "[%d] Processing ioctl(%x) for dev %lx : model %x\n", 73053af08d82Slm66018 instance, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 73061ae08745Sheppo 73072f5224aeSachartre if (rvalp != NULL) { 73082f5224aeSachartre /* the return value of the ioctl is 0 by default */ 73092f5224aeSachartre *rvalp = 0; 73102f5224aeSachartre } 73112f5224aeSachartre 73121ae08745Sheppo /* 73131ae08745Sheppo * Validate the ioctl operation to be performed. 73141ae08745Sheppo * 73151ae08745Sheppo * If we have looped through the array without finding a match then we 73161ae08745Sheppo * don't support this ioctl. 73171ae08745Sheppo */ 73181ae08745Sheppo for (idx = 0; idx < nioctls; idx++) { 73191ae08745Sheppo if (cmd == dk_ioctl[idx].cmd) 73201ae08745Sheppo break; 73211ae08745Sheppo } 73221ae08745Sheppo 73231ae08745Sheppo if (idx >= nioctls) { 73243af08d82Slm66018 DMSG(vdc, 0, "[%d] Unsupported ioctl (0x%x)\n", 7325e1ebb9ecSlm66018 vdc->instance, cmd); 73261ae08745Sheppo return (ENOTSUP); 73271ae08745Sheppo } 73281ae08745Sheppo 73293af08d82Slm66018 iop = &(dk_ioctl[idx]); 73303af08d82Slm66018 73314bac2208Snarayan if (cmd == DKIOCGETEFI || cmd == DKIOCSETEFI) { 73324bac2208Snarayan /* size is not fixed for EFI ioctls, it depends on ioctl arg */ 73334bac2208Snarayan dk_efi_t dk_efi; 73344bac2208Snarayan 73354bac2208Snarayan rv = ddi_copyin(arg, &dk_efi, sizeof (dk_efi_t), mode); 73364bac2208Snarayan if (rv != 0) 73374bac2208Snarayan return (EFAULT); 73384bac2208Snarayan 73394bac2208Snarayan len = sizeof (vd_efi_t) - 1 + dk_efi.dki_length; 73404bac2208Snarayan } else { 73413af08d82Slm66018 len = iop->nbytes; 73424bac2208Snarayan } 73431ae08745Sheppo 73442f5224aeSachartre /* check if the ioctl is applicable */ 73451ae08745Sheppo switch (cmd) { 73461ae08745Sheppo case CDROMREADOFFSET: 73471ae08745Sheppo case DKIOCREMOVABLE: 73481ae08745Sheppo return (ENOTTY); 73491ae08745Sheppo 73502f5224aeSachartre case USCSICMD: 73512f5224aeSachartre case MHIOCTKOWN: 73522f5224aeSachartre case MHIOCSTATUS: 73532f5224aeSachartre case MHIOCQRESERVE: 73542f5224aeSachartre case MHIOCRELEASE: 73552f5224aeSachartre case MHIOCGRP_INKEYS: 73562f5224aeSachartre case MHIOCGRP_INRESV: 73572f5224aeSachartre case MHIOCGRP_REGISTER: 73582f5224aeSachartre case MHIOCGRP_RESERVE: 73592f5224aeSachartre case MHIOCGRP_PREEMPTANDABORT: 73602f5224aeSachartre case MHIOCGRP_REGISTERANDIGNOREKEY: 73612f5224aeSachartre case MHIOCENFAILFAST: 73622f5224aeSachartre if (vdc->cinfo == NULL) 73632f5224aeSachartre return (ENXIO); 73642f5224aeSachartre if (vdc->cinfo->dki_ctype != DKC_SCSI_CCS) 73652f5224aeSachartre return (ENOTTY); 73662f5224aeSachartre break; 73672f5224aeSachartre 73682f5224aeSachartre case DIOCTL_RWCMD: 73692f5224aeSachartre if (vdc->cinfo == NULL) 73702f5224aeSachartre return (ENXIO); 73712f5224aeSachartre if (vdc->cinfo->dki_ctype != DKC_DIRECT) 73722f5224aeSachartre return (ENOTTY); 73732f5224aeSachartre break; 73742f5224aeSachartre 73752f5224aeSachartre case DKIOCINFO: 73762f5224aeSachartre if (vdc->cinfo == NULL) 73772f5224aeSachartre return (ENXIO); 73782f5224aeSachartre break; 73792f5224aeSachartre 73802f5224aeSachartre case DKIOCGMEDIAINFO: 73812f5224aeSachartre if (vdc->minfo == NULL) 73822f5224aeSachartre return (ENXIO); 73832f5224aeSachartre if (vdc_check_capacity(vdc) != 0) 73842f5224aeSachartre /* disk capacity is not available */ 73852f5224aeSachartre return (EIO); 73862f5224aeSachartre break; 73872f5224aeSachartre } 73882f5224aeSachartre 73892f5224aeSachartre /* 73902f5224aeSachartre * Deal with ioctls which require a processing different than 73912f5224aeSachartre * converting ioctl arguments and sending a corresponding 73922f5224aeSachartre * VD operation. 73932f5224aeSachartre */ 73942f5224aeSachartre switch (cmd) { 73952f5224aeSachartre 73962f5224aeSachartre case USCSICMD: 73972f5224aeSachartre { 73982f5224aeSachartre return (vdc_uscsi_cmd(vdc, arg, mode)); 73992f5224aeSachartre } 74002f5224aeSachartre 74012f5224aeSachartre case MHIOCTKOWN: 74022f5224aeSachartre { 74032f5224aeSachartre mutex_enter(&vdc->ownership_lock); 74042f5224aeSachartre /* 74052f5224aeSachartre * We have to set VDC_OWNERSHIP_WANTED now so that the ownership 74062f5224aeSachartre * can be flagged with VDC_OWNERSHIP_RESET if the LDC is reset 74072f5224aeSachartre * while we are processing the ioctl. 74082f5224aeSachartre */ 74092f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_WANTED); 74102f5224aeSachartre 74112f5224aeSachartre rv = vdc_access_set(vdc, VD_ACCESS_SET_EXCLUSIVE | 74126ace3c90SAlexandre Chartre VD_ACCESS_SET_PREEMPT | VD_ACCESS_SET_PRESERVE); 74132f5224aeSachartre if (rv == 0) { 74142f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_WANTED | 74152f5224aeSachartre VDC_OWNERSHIP_GRANTED); 74162f5224aeSachartre } else { 74172f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_NONE); 74182f5224aeSachartre } 74192f5224aeSachartre mutex_exit(&vdc->ownership_lock); 74202f5224aeSachartre return (rv); 74212f5224aeSachartre } 74222f5224aeSachartre 74232f5224aeSachartre case MHIOCRELEASE: 74242f5224aeSachartre { 74252f5224aeSachartre mutex_enter(&vdc->ownership_lock); 74266ace3c90SAlexandre Chartre rv = vdc_access_set(vdc, VD_ACCESS_SET_CLEAR); 74272f5224aeSachartre if (rv == 0) { 74282f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_NONE); 74292f5224aeSachartre } 74302f5224aeSachartre mutex_exit(&vdc->ownership_lock); 74312f5224aeSachartre return (rv); 74322f5224aeSachartre } 74332f5224aeSachartre 74342f5224aeSachartre case MHIOCSTATUS: 74352f5224aeSachartre { 74362f5224aeSachartre uint64_t status; 74372f5224aeSachartre 74386ace3c90SAlexandre Chartre rv = vdc_access_get(vdc, &status); 74392f5224aeSachartre if (rv == 0 && rvalp != NULL) 74402f5224aeSachartre *rvalp = (status & VD_ACCESS_ALLOWED)? 0 : 1; 74412f5224aeSachartre return (rv); 74422f5224aeSachartre } 74432f5224aeSachartre 74442f5224aeSachartre case MHIOCQRESERVE: 74452f5224aeSachartre { 74466ace3c90SAlexandre Chartre rv = vdc_access_set(vdc, VD_ACCESS_SET_EXCLUSIVE); 74472f5224aeSachartre return (rv); 74482f5224aeSachartre } 74492f5224aeSachartre 74502f5224aeSachartre case MHIOCGRP_INKEYS: 74512f5224aeSachartre { 74522f5224aeSachartre return (vdc_mhd_inkeys(vdc, arg, mode)); 74532f5224aeSachartre } 74542f5224aeSachartre 74552f5224aeSachartre case MHIOCGRP_INRESV: 74562f5224aeSachartre { 74572f5224aeSachartre return (vdc_mhd_inresv(vdc, arg, mode)); 74582f5224aeSachartre } 74592f5224aeSachartre 74602f5224aeSachartre case MHIOCGRP_REGISTER: 74612f5224aeSachartre { 74622f5224aeSachartre return (vdc_mhd_register(vdc, arg, mode)); 74632f5224aeSachartre } 74642f5224aeSachartre 74652f5224aeSachartre case MHIOCGRP_RESERVE: 74662f5224aeSachartre { 74672f5224aeSachartre return (vdc_mhd_reserve(vdc, arg, mode)); 74682f5224aeSachartre } 74692f5224aeSachartre 74702f5224aeSachartre case MHIOCGRP_PREEMPTANDABORT: 74712f5224aeSachartre { 74722f5224aeSachartre return (vdc_mhd_preemptabort(vdc, arg, mode)); 74732f5224aeSachartre } 74742f5224aeSachartre 74752f5224aeSachartre case MHIOCGRP_REGISTERANDIGNOREKEY: 74762f5224aeSachartre { 74772f5224aeSachartre return (vdc_mhd_registerignore(vdc, arg, mode)); 74782f5224aeSachartre } 74792f5224aeSachartre 74802f5224aeSachartre case MHIOCENFAILFAST: 74812f5224aeSachartre { 74822f5224aeSachartre rv = vdc_failfast(vdc, arg, mode); 74832f5224aeSachartre return (rv); 74842f5224aeSachartre } 74852f5224aeSachartre 748687a7269eSachartre case DIOCTL_RWCMD: 748787a7269eSachartre { 748865908c77Syu, larry liu - Sun Microsystems - Beijing China return (vdc_dioctl_rwcmd(vdc, arg, mode)); 748987a7269eSachartre } 749087a7269eSachartre 749187a7269eSachartre case DKIOCGAPART: 749287a7269eSachartre { 74939642afceSachartre return (vdc_dkio_gapart(vdc, arg, mode)); 74949642afceSachartre } 74959642afceSachartre 74969642afceSachartre case DKIOCPARTITION: 74979642afceSachartre { 74989642afceSachartre return (vdc_dkio_partition(vdc, arg, mode)); 749987a7269eSachartre } 750087a7269eSachartre 75011ae08745Sheppo case DKIOCINFO: 75021ae08745Sheppo { 75031ae08745Sheppo struct dk_cinfo cinfo; 75041ae08745Sheppo 75051ae08745Sheppo bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 75060d0c8d4bSnarayan cinfo.dki_partition = VDCPART(dev); 75071ae08745Sheppo 75081ae08745Sheppo rv = ddi_copyout(&cinfo, (void *)arg, 75091ae08745Sheppo sizeof (struct dk_cinfo), mode); 75101ae08745Sheppo if (rv != 0) 75111ae08745Sheppo return (EFAULT); 75121ae08745Sheppo 75131ae08745Sheppo return (0); 75141ae08745Sheppo } 75151ae08745Sheppo 75161ae08745Sheppo case DKIOCGMEDIAINFO: 75178e6a2a04Slm66018 { 75182f5224aeSachartre ASSERT(vdc->vdisk_size != 0); 7519de3a5331SRamesh Chitrothu ASSERT(vdc->minfo->dki_capacity != 0); 75201ae08745Sheppo rv = ddi_copyout(vdc->minfo, (void *)arg, 75211ae08745Sheppo sizeof (struct dk_minfo), mode); 75221ae08745Sheppo if (rv != 0) 75231ae08745Sheppo return (EFAULT); 75241ae08745Sheppo 75251ae08745Sheppo return (0); 75261ae08745Sheppo } 75271ae08745Sheppo 75288e6a2a04Slm66018 case DKIOCFLUSHWRITECACHE: 75298e6a2a04Slm66018 { 753017cadca8Slm66018 struct dk_callback *dkc = 753117cadca8Slm66018 (struct dk_callback *)(uintptr_t)arg; 75328e6a2a04Slm66018 vdc_dk_arg_t *dkarg = NULL; 75338e6a2a04Slm66018 75343af08d82Slm66018 DMSG(vdc, 1, "[%d] Flush W$: mode %x\n", 75353af08d82Slm66018 instance, mode); 75368e6a2a04Slm66018 75378e6a2a04Slm66018 /* 75388e6a2a04Slm66018 * If arg is NULL, then there is no callback function 75398e6a2a04Slm66018 * registered and the call operates synchronously; we 75408e6a2a04Slm66018 * break and continue with the rest of the function and 75418e6a2a04Slm66018 * wait for vds to return (i.e. after the request to 75428e6a2a04Slm66018 * vds returns successfully, all writes completed prior 75438e6a2a04Slm66018 * to the ioctl will have been flushed from the disk 75448e6a2a04Slm66018 * write cache to persistent media. 75458e6a2a04Slm66018 * 75468e6a2a04Slm66018 * If a callback function is registered, we dispatch 75478e6a2a04Slm66018 * the request on a task queue and return immediately. 75488e6a2a04Slm66018 * The callback will deal with informing the calling 75498e6a2a04Slm66018 * thread that the flush request is completed. 75508e6a2a04Slm66018 */ 75518e6a2a04Slm66018 if (dkc == NULL) 75528e6a2a04Slm66018 break; 75538e6a2a04Slm66018 7554eff7243fSlm66018 /* 7555eff7243fSlm66018 * the asynchronous callback is only supported if 7556eff7243fSlm66018 * invoked from within the kernel 7557eff7243fSlm66018 */ 7558eff7243fSlm66018 if ((mode & FKIOCTL) == 0) 7559eff7243fSlm66018 return (ENOTSUP); 7560eff7243fSlm66018 75618e6a2a04Slm66018 dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP); 75628e6a2a04Slm66018 75638e6a2a04Slm66018 dkarg->mode = mode; 75648e6a2a04Slm66018 dkarg->dev = dev; 75658e6a2a04Slm66018 bcopy(dkc, &dkarg->dkc, sizeof (*dkc)); 75668e6a2a04Slm66018 75678e6a2a04Slm66018 mutex_enter(&vdc->lock); 75688e6a2a04Slm66018 vdc->dkio_flush_pending++; 75698e6a2a04Slm66018 dkarg->vdc = vdc; 75708e6a2a04Slm66018 mutex_exit(&vdc->lock); 75718e6a2a04Slm66018 75728e6a2a04Slm66018 /* put the request on a task queue */ 75738e6a2a04Slm66018 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 75748e6a2a04Slm66018 (void *)dkarg, DDI_SLEEP); 75753af08d82Slm66018 if (rv == NULL) { 75763af08d82Slm66018 /* clean up if dispatch fails */ 75773af08d82Slm66018 mutex_enter(&vdc->lock); 75783af08d82Slm66018 vdc->dkio_flush_pending--; 757978fcd0a1Sachartre mutex_exit(&vdc->lock); 75803af08d82Slm66018 kmem_free(dkarg, sizeof (vdc_dk_arg_t)); 75813af08d82Slm66018 } 75828e6a2a04Slm66018 75838e6a2a04Slm66018 return (rv == NULL ? ENOMEM : 0); 75848e6a2a04Slm66018 } 75858e6a2a04Slm66018 } 75868e6a2a04Slm66018 75871ae08745Sheppo /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 75883af08d82Slm66018 ASSERT(iop->op != 0); 75891ae08745Sheppo 759017cadca8Slm66018 /* check if the vDisk server handles the operation for this vDisk */ 759117cadca8Slm66018 if (VD_OP_SUPPORTED(vdc->operations, iop->op) == B_FALSE) { 759217cadca8Slm66018 DMSG(vdc, 0, "[%d] Unsupported VD_OP operation (0x%x)\n", 759317cadca8Slm66018 vdc->instance, iop->op); 759417cadca8Slm66018 return (ENOTSUP); 759517cadca8Slm66018 } 759617cadca8Slm66018 75971ae08745Sheppo /* LDC requires that the memory being mapped is 8-byte aligned */ 75981ae08745Sheppo alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 75993af08d82Slm66018 DMSG(vdc, 1, "[%d] struct size %ld alloc %ld\n", 76003af08d82Slm66018 instance, len, alloc_len); 76011ae08745Sheppo 7602eff7243fSlm66018 if (alloc_len > 0) 76031ae08745Sheppo mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 76041ae08745Sheppo 76050a55fbb7Slm66018 /* 7606eff7243fSlm66018 * Call the conversion function for this ioctl which, if necessary, 76070a55fbb7Slm66018 * converts from the Solaris format to the format ARC'ed 76080a55fbb7Slm66018 * as part of the vDisk protocol (FWARC 2006/195) 76090a55fbb7Slm66018 */ 76103af08d82Slm66018 ASSERT(iop->convert != NULL); 76113af08d82Slm66018 rv = (iop->convert)(vdc, arg, mem_p, mode, VD_COPYIN); 76121ae08745Sheppo if (rv != 0) { 76133af08d82Slm66018 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 7614e1ebb9ecSlm66018 instance, rv, cmd); 76151ae08745Sheppo if (mem_p != NULL) 76161ae08745Sheppo kmem_free(mem_p, alloc_len); 76170a55fbb7Slm66018 return (rv); 76181ae08745Sheppo } 76191ae08745Sheppo 76201ae08745Sheppo /* 76211ae08745Sheppo * send request to vds to service the ioctl. 76221ae08745Sheppo */ 76233af08d82Slm66018 rv = vdc_do_sync_op(vdc, iop->op, mem_p, alloc_len, 76246ace3c90SAlexandre Chartre VDCPART(dev), 0, VIO_both_dir, B_TRUE); 762578fcd0a1Sachartre 76261ae08745Sheppo if (rv != 0) { 76271ae08745Sheppo /* 76281ae08745Sheppo * This is not necessarily an error. The ioctl could 76291ae08745Sheppo * be returning a value such as ENOTTY to indicate 76301ae08745Sheppo * that the ioctl is not applicable. 76311ae08745Sheppo */ 76323af08d82Slm66018 DMSG(vdc, 0, "[%d] vds returned %d for ioctl 0x%x\n", 7633e1ebb9ecSlm66018 instance, rv, cmd); 76341ae08745Sheppo if (mem_p != NULL) 76351ae08745Sheppo kmem_free(mem_p, alloc_len); 7636d10e4ef2Snarayan 76371ae08745Sheppo return (rv); 76381ae08745Sheppo } 76391ae08745Sheppo 76401ae08745Sheppo /* 76410a55fbb7Slm66018 * Call the conversion function (if it exists) for this ioctl 76420a55fbb7Slm66018 * which converts from the format ARC'ed as part of the vDisk 76430a55fbb7Slm66018 * protocol (FWARC 2006/195) back to a format understood by 76440a55fbb7Slm66018 * the rest of Solaris. 76451ae08745Sheppo */ 76463af08d82Slm66018 rv = (iop->convert)(vdc, mem_p, arg, mode, VD_COPYOUT); 76470a55fbb7Slm66018 if (rv != 0) { 76483af08d82Slm66018 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 7649e1ebb9ecSlm66018 instance, rv, cmd); 76501ae08745Sheppo if (mem_p != NULL) 76511ae08745Sheppo kmem_free(mem_p, alloc_len); 76520a55fbb7Slm66018 return (rv); 76531ae08745Sheppo } 76541ae08745Sheppo 76551ae08745Sheppo if (mem_p != NULL) 76561ae08745Sheppo kmem_free(mem_p, alloc_len); 76571ae08745Sheppo 76581ae08745Sheppo return (rv); 76591ae08745Sheppo } 76601ae08745Sheppo 76611ae08745Sheppo /* 76621ae08745Sheppo * Function: 76630a55fbb7Slm66018 * 76640a55fbb7Slm66018 * Description: 76650a55fbb7Slm66018 * This is an empty conversion function used by ioctl calls which 76660a55fbb7Slm66018 * do not need to convert the data being passed in/out to userland 76670a55fbb7Slm66018 */ 76680a55fbb7Slm66018 static int 7669d10e4ef2Snarayan vdc_null_copy_func(vdc_t *vdc, void *from, void *to, int mode, int dir) 76700a55fbb7Slm66018 { 7671d10e4ef2Snarayan _NOTE(ARGUNUSED(vdc)) 76720a55fbb7Slm66018 _NOTE(ARGUNUSED(from)) 76730a55fbb7Slm66018 _NOTE(ARGUNUSED(to)) 76740a55fbb7Slm66018 _NOTE(ARGUNUSED(mode)) 76750a55fbb7Slm66018 _NOTE(ARGUNUSED(dir)) 76760a55fbb7Slm66018 76770a55fbb7Slm66018 return (0); 76780a55fbb7Slm66018 } 76790a55fbb7Slm66018 76804bac2208Snarayan static int 76814bac2208Snarayan vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 76824bac2208Snarayan int mode, int dir) 76834bac2208Snarayan { 76844bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 76854bac2208Snarayan 76864bac2208Snarayan if (dir == VD_COPYIN) 76874bac2208Snarayan return (0); /* nothing to do */ 76884bac2208Snarayan 76894bac2208Snarayan if (ddi_copyout(from, to, sizeof (int), mode) != 0) 76904bac2208Snarayan return (EFAULT); 76914bac2208Snarayan 76924bac2208Snarayan return (0); 76934bac2208Snarayan } 76944bac2208Snarayan 76954bac2208Snarayan static int 76964bac2208Snarayan vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 76974bac2208Snarayan int mode, int dir) 76984bac2208Snarayan { 76994bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 77004bac2208Snarayan 77014bac2208Snarayan if (dir == VD_COPYOUT) 77024bac2208Snarayan return (0); /* nothing to do */ 77034bac2208Snarayan 77044bac2208Snarayan if (ddi_copyin(from, to, sizeof (int), mode) != 0) 77054bac2208Snarayan return (EFAULT); 77064bac2208Snarayan 77074bac2208Snarayan return (0); 77084bac2208Snarayan } 77094bac2208Snarayan 77100a55fbb7Slm66018 /* 77110a55fbb7Slm66018 * Function: 77120a55fbb7Slm66018 * vdc_get_vtoc_convert() 77130a55fbb7Slm66018 * 77140a55fbb7Slm66018 * Description: 7715d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCGVTOC 7716d10e4ef2Snarayan * Solaris structure to the format defined in FWARC 2006/195. 7717d10e4ef2Snarayan * 7718d10e4ef2Snarayan * In the struct vtoc definition, the timestamp field is marked as not 7719d10e4ef2Snarayan * supported so it is not part of vDisk protocol (FWARC 2006/195). 7720d10e4ef2Snarayan * However SVM uses that field to check it can write into the VTOC, 7721d10e4ef2Snarayan * so we fake up the info of that field. 77220a55fbb7Slm66018 * 77230a55fbb7Slm66018 * Arguments: 7724d10e4ef2Snarayan * vdc - the vDisk client 77250a55fbb7Slm66018 * from - the buffer containing the data to be copied from 77260a55fbb7Slm66018 * to - the buffer to be copied to 77270a55fbb7Slm66018 * mode - flags passed to ioctl() call 77280a55fbb7Slm66018 * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT 77290a55fbb7Slm66018 * 77300a55fbb7Slm66018 * Return Code: 77310a55fbb7Slm66018 * 0 - Success 77320a55fbb7Slm66018 * ENXIO - incorrect buffer passed in. 7733d10e4ef2Snarayan * EFAULT - ddi_copyout routine encountered an error. 77340a55fbb7Slm66018 */ 77350a55fbb7Slm66018 static int 7736d10e4ef2Snarayan vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 77370a55fbb7Slm66018 { 7738d10e4ef2Snarayan int i; 7739342440ecSPrasad Singamsetty struct vtoc vtoc; 7740342440ecSPrasad Singamsetty struct vtoc32 vtoc32; 7741342440ecSPrasad Singamsetty struct extvtoc evtoc; 7742342440ecSPrasad Singamsetty int rv; 77430a55fbb7Slm66018 77440a55fbb7Slm66018 if (dir != VD_COPYOUT) 77450a55fbb7Slm66018 return (0); /* nothing to do */ 77460a55fbb7Slm66018 77470a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 77480a55fbb7Slm66018 return (ENXIO); 77490a55fbb7Slm66018 7750342440ecSPrasad Singamsetty if (vdc->vdisk_size > VD_OLDVTOC_LIMIT) 7751342440ecSPrasad Singamsetty return (EOVERFLOW); 77520a55fbb7Slm66018 7753342440ecSPrasad Singamsetty VD_VTOC2VTOC((vd_vtoc_t *)from, &evtoc); 7754d10e4ef2Snarayan 7755d10e4ef2Snarayan /* fake the VTOC timestamp field */ 7756d10e4ef2Snarayan for (i = 0; i < V_NUMPAR; i++) { 7757342440ecSPrasad Singamsetty evtoc.timestamp[i] = vdc->vtoc->timestamp[i]; 7758d10e4ef2Snarayan } 7759d10e4ef2Snarayan 77600a55fbb7Slm66018 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 776117cadca8Slm66018 /* LINTED E_ASSIGN_NARROW_CONV */ 7762342440ecSPrasad Singamsetty extvtoctovtoc32(evtoc, vtoc32); 7763342440ecSPrasad Singamsetty rv = ddi_copyout(&vtoc32, to, sizeof (vtoc32), mode); 77640a55fbb7Slm66018 if (rv != 0) 77650a55fbb7Slm66018 rv = EFAULT; 7766342440ecSPrasad Singamsetty } else { 7767342440ecSPrasad Singamsetty extvtoctovtoc(evtoc, vtoc); 7768342440ecSPrasad Singamsetty rv = ddi_copyout(&vtoc, to, sizeof (vtoc), mode); 7769342440ecSPrasad Singamsetty if (rv != 0) 7770342440ecSPrasad Singamsetty rv = EFAULT; 7771342440ecSPrasad Singamsetty } 77720a55fbb7Slm66018 77730a55fbb7Slm66018 return (rv); 77740a55fbb7Slm66018 } 77750a55fbb7Slm66018 77760a55fbb7Slm66018 /* 77770a55fbb7Slm66018 * Function: 77780a55fbb7Slm66018 * vdc_set_vtoc_convert() 77790a55fbb7Slm66018 * 77800a55fbb7Slm66018 * Description: 7781d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCSVTOC 7782d10e4ef2Snarayan * Solaris structure to the format defined in FWARC 2006/195. 77830a55fbb7Slm66018 * 77840a55fbb7Slm66018 * Arguments: 7785d10e4ef2Snarayan * vdc - the vDisk client 77860a55fbb7Slm66018 * from - Buffer with data 77870a55fbb7Slm66018 * to - Buffer where data is to be copied to 77880a55fbb7Slm66018 * mode - flags passed to ioctl 77890a55fbb7Slm66018 * dir - direction of copy (in or out) 77900a55fbb7Slm66018 * 77910a55fbb7Slm66018 * Return Code: 77920a55fbb7Slm66018 * 0 - Success 77930a55fbb7Slm66018 * ENXIO - Invalid buffer passed in 77940a55fbb7Slm66018 * EFAULT - ddi_copyin of data failed 77950a55fbb7Slm66018 */ 77960a55fbb7Slm66018 static int 7797d10e4ef2Snarayan vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 77980a55fbb7Slm66018 { 7799342440ecSPrasad Singamsetty void *uvtoc; 7800342440ecSPrasad Singamsetty struct vtoc vtoc; 7801342440ecSPrasad Singamsetty struct vtoc32 vtoc32; 7802342440ecSPrasad Singamsetty struct extvtoc evtoc; 7803342440ecSPrasad Singamsetty int i, rv; 78040a55fbb7Slm66018 78050a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 78060a55fbb7Slm66018 return (ENXIO); 78070a55fbb7Slm66018 7808342440ecSPrasad Singamsetty if (vdc->vdisk_size > VD_OLDVTOC_LIMIT) 7809342440ecSPrasad Singamsetty return (EOVERFLOW); 78102f5224aeSachartre 7811342440ecSPrasad Singamsetty uvtoc = (dir == VD_COPYIN)? from : to; 78120a55fbb7Slm66018 78130a55fbb7Slm66018 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 7814342440ecSPrasad Singamsetty rv = ddi_copyin(uvtoc, &vtoc32, sizeof (vtoc32), mode); 7815342440ecSPrasad Singamsetty if (rv != 0) 7816342440ecSPrasad Singamsetty return (EFAULT); 7817342440ecSPrasad Singamsetty vtoc32toextvtoc(vtoc32, evtoc); 78180a55fbb7Slm66018 } else { 7819342440ecSPrasad Singamsetty rv = ddi_copyin(uvtoc, &vtoc, sizeof (vtoc), mode); 7820342440ecSPrasad Singamsetty if (rv != 0) 7821342440ecSPrasad Singamsetty return (EFAULT); 7822342440ecSPrasad Singamsetty vtoctoextvtoc(vtoc, evtoc); 78230a55fbb7Slm66018 } 78240a55fbb7Slm66018 78252f5224aeSachartre if (dir == VD_COPYOUT) { 78262f5224aeSachartre /* 78272f5224aeSachartre * The disk label may have changed. Revalidate the disk 78285b98b509Sachartre * geometry. This will also update the device nodes. 78292f5224aeSachartre */ 78302f5224aeSachartre vdc_validate(vdc); 78312f5224aeSachartre 78322f5224aeSachartre /* 78332f5224aeSachartre * We also need to keep track of the timestamp fields. 78342f5224aeSachartre */ 78352f5224aeSachartre for (i = 0; i < V_NUMPAR; i++) { 7836342440ecSPrasad Singamsetty vdc->vtoc->timestamp[i] = evtoc.timestamp[i]; 7837342440ecSPrasad Singamsetty } 7838342440ecSPrasad Singamsetty 7839342440ecSPrasad Singamsetty } else { 7840342440ecSPrasad Singamsetty VTOC2VD_VTOC(&evtoc, (vd_vtoc_t *)to); 78412f5224aeSachartre } 78422f5224aeSachartre 78432f5224aeSachartre return (0); 78442f5224aeSachartre } 78452f5224aeSachartre 7846342440ecSPrasad Singamsetty static int 7847342440ecSPrasad Singamsetty vdc_get_extvtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 7848342440ecSPrasad Singamsetty { 7849342440ecSPrasad Singamsetty int i, rv; 7850342440ecSPrasad Singamsetty struct extvtoc evtoc; 7851342440ecSPrasad Singamsetty 7852342440ecSPrasad Singamsetty if (dir != VD_COPYOUT) 7853342440ecSPrasad Singamsetty return (0); /* nothing to do */ 7854342440ecSPrasad Singamsetty 7855342440ecSPrasad Singamsetty if ((from == NULL) || (to == NULL)) 7856342440ecSPrasad Singamsetty return (ENXIO); 7857342440ecSPrasad Singamsetty 7858342440ecSPrasad Singamsetty VD_VTOC2VTOC((vd_vtoc_t *)from, &evtoc); 7859342440ecSPrasad Singamsetty 7860342440ecSPrasad Singamsetty /* fake the VTOC timestamp field */ 7861342440ecSPrasad Singamsetty for (i = 0; i < V_NUMPAR; i++) { 7862342440ecSPrasad Singamsetty evtoc.timestamp[i] = vdc->vtoc->timestamp[i]; 7863342440ecSPrasad Singamsetty } 7864342440ecSPrasad Singamsetty 7865342440ecSPrasad Singamsetty rv = ddi_copyout(&evtoc, to, sizeof (struct extvtoc), mode); 7866342440ecSPrasad Singamsetty if (rv != 0) 7867342440ecSPrasad Singamsetty rv = EFAULT; 7868342440ecSPrasad Singamsetty 7869342440ecSPrasad Singamsetty return (rv); 7870342440ecSPrasad Singamsetty } 7871342440ecSPrasad Singamsetty 7872342440ecSPrasad Singamsetty static int 7873342440ecSPrasad Singamsetty vdc_set_extvtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 7874342440ecSPrasad Singamsetty { 7875342440ecSPrasad Singamsetty void *uvtoc; 7876342440ecSPrasad Singamsetty struct extvtoc evtoc; 7877342440ecSPrasad Singamsetty int i, rv; 7878342440ecSPrasad Singamsetty 7879342440ecSPrasad Singamsetty if ((from == NULL) || (to == NULL)) 7880342440ecSPrasad Singamsetty return (ENXIO); 7881342440ecSPrasad Singamsetty 7882342440ecSPrasad Singamsetty uvtoc = (dir == VD_COPYIN)? from : to; 7883342440ecSPrasad Singamsetty 7884342440ecSPrasad Singamsetty rv = ddi_copyin(uvtoc, &evtoc, sizeof (struct extvtoc), mode); 7885342440ecSPrasad Singamsetty if (rv != 0) 7886342440ecSPrasad Singamsetty return (EFAULT); 7887342440ecSPrasad Singamsetty 7888342440ecSPrasad Singamsetty if (dir == VD_COPYOUT) { 7889342440ecSPrasad Singamsetty /* 7890342440ecSPrasad Singamsetty * The disk label may have changed. Revalidate the disk 7891342440ecSPrasad Singamsetty * geometry. This will also update the device nodes. 7892342440ecSPrasad Singamsetty */ 7893342440ecSPrasad Singamsetty vdc_validate(vdc); 7894342440ecSPrasad Singamsetty 7895342440ecSPrasad Singamsetty /* 7896342440ecSPrasad Singamsetty * We also need to keep track of the timestamp fields. 7897342440ecSPrasad Singamsetty */ 7898342440ecSPrasad Singamsetty for (i = 0; i < V_NUMPAR; i++) { 7899342440ecSPrasad Singamsetty vdc->vtoc->timestamp[i] = evtoc.timestamp[i]; 7900342440ecSPrasad Singamsetty } 7901342440ecSPrasad Singamsetty 7902342440ecSPrasad Singamsetty } else { 7903342440ecSPrasad Singamsetty VTOC2VD_VTOC(&evtoc, (vd_vtoc_t *)to); 7904342440ecSPrasad Singamsetty } 79050a55fbb7Slm66018 79060a55fbb7Slm66018 return (0); 79070a55fbb7Slm66018 } 79080a55fbb7Slm66018 79090a55fbb7Slm66018 /* 79100a55fbb7Slm66018 * Function: 79110a55fbb7Slm66018 * vdc_get_geom_convert() 79120a55fbb7Slm66018 * 79130a55fbb7Slm66018 * Description: 7914d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCGGEOM, 7915d10e4ef2Snarayan * DKIOCG_PHYSGEOM and DKIOG_VIRTGEOM Solaris structures to the format 7916d10e4ef2Snarayan * defined in FWARC 2006/195 79170a55fbb7Slm66018 * 79180a55fbb7Slm66018 * Arguments: 7919d10e4ef2Snarayan * vdc - the vDisk client 79200a55fbb7Slm66018 * from - Buffer with data 79210a55fbb7Slm66018 * to - Buffer where data is to be copied to 79220a55fbb7Slm66018 * mode - flags passed to ioctl 79230a55fbb7Slm66018 * dir - direction of copy (in or out) 79240a55fbb7Slm66018 * 79250a55fbb7Slm66018 * Return Code: 79260a55fbb7Slm66018 * 0 - Success 79270a55fbb7Slm66018 * ENXIO - Invalid buffer passed in 7928d10e4ef2Snarayan * EFAULT - ddi_copyout of data failed 79290a55fbb7Slm66018 */ 79300a55fbb7Slm66018 static int 7931d10e4ef2Snarayan vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 79320a55fbb7Slm66018 { 7933d10e4ef2Snarayan _NOTE(ARGUNUSED(vdc)) 7934d10e4ef2Snarayan 79350a55fbb7Slm66018 struct dk_geom geom; 79360a55fbb7Slm66018 int copy_len = sizeof (struct dk_geom); 79370a55fbb7Slm66018 int rv = 0; 79380a55fbb7Slm66018 79390a55fbb7Slm66018 if (dir != VD_COPYOUT) 79400a55fbb7Slm66018 return (0); /* nothing to do */ 79410a55fbb7Slm66018 79420a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 79430a55fbb7Slm66018 return (ENXIO); 79440a55fbb7Slm66018 79450a55fbb7Slm66018 VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); 79460a55fbb7Slm66018 rv = ddi_copyout(&geom, to, copy_len, mode); 79470a55fbb7Slm66018 if (rv != 0) 79480a55fbb7Slm66018 rv = EFAULT; 79490a55fbb7Slm66018 79500a55fbb7Slm66018 return (rv); 79510a55fbb7Slm66018 } 79520a55fbb7Slm66018 79530a55fbb7Slm66018 /* 79540a55fbb7Slm66018 * Function: 79550a55fbb7Slm66018 * vdc_set_geom_convert() 79560a55fbb7Slm66018 * 79570a55fbb7Slm66018 * Description: 7958d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCSGEOM 7959d10e4ef2Snarayan * Solaris structure to the format defined in FWARC 2006/195. 79600a55fbb7Slm66018 * 79610a55fbb7Slm66018 * Arguments: 7962d10e4ef2Snarayan * vdc - the vDisk client 79630a55fbb7Slm66018 * from - Buffer with data 79640a55fbb7Slm66018 * to - Buffer where data is to be copied to 79650a55fbb7Slm66018 * mode - flags passed to ioctl 79660a55fbb7Slm66018 * dir - direction of copy (in or out) 79670a55fbb7Slm66018 * 79680a55fbb7Slm66018 * Return Code: 79690a55fbb7Slm66018 * 0 - Success 79700a55fbb7Slm66018 * ENXIO - Invalid buffer passed in 79710a55fbb7Slm66018 * EFAULT - ddi_copyin of data failed 79720a55fbb7Slm66018 */ 79730a55fbb7Slm66018 static int 7974d10e4ef2Snarayan vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 79750a55fbb7Slm66018 { 7976d10e4ef2Snarayan _NOTE(ARGUNUSED(vdc)) 7977d10e4ef2Snarayan 79780a55fbb7Slm66018 vd_geom_t vdgeom; 79790a55fbb7Slm66018 void *tmp_mem = NULL; 79800a55fbb7Slm66018 int copy_len = sizeof (struct dk_geom); 79810a55fbb7Slm66018 int rv = 0; 79820a55fbb7Slm66018 79830a55fbb7Slm66018 if (dir != VD_COPYIN) 79840a55fbb7Slm66018 return (0); /* nothing to do */ 79850a55fbb7Slm66018 79860a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 79870a55fbb7Slm66018 return (ENXIO); 79880a55fbb7Slm66018 79890a55fbb7Slm66018 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 79900a55fbb7Slm66018 79910a55fbb7Slm66018 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 79920a55fbb7Slm66018 if (rv != 0) { 79930a55fbb7Slm66018 kmem_free(tmp_mem, copy_len); 79940a55fbb7Slm66018 return (EFAULT); 79950a55fbb7Slm66018 } 79960a55fbb7Slm66018 DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); 79970a55fbb7Slm66018 bcopy(&vdgeom, to, sizeof (vdgeom)); 79980a55fbb7Slm66018 kmem_free(tmp_mem, copy_len); 79990a55fbb7Slm66018 80000a55fbb7Slm66018 return (0); 80010a55fbb7Slm66018 } 80020a55fbb7Slm66018 80034bac2208Snarayan static int 80044bac2208Snarayan vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 80054bac2208Snarayan { 80064bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 80074bac2208Snarayan 80084bac2208Snarayan vd_efi_t *vd_efi; 80094bac2208Snarayan dk_efi_t dk_efi; 80104bac2208Snarayan int rv = 0; 80114bac2208Snarayan void *uaddr; 80124bac2208Snarayan 80134bac2208Snarayan if ((from == NULL) || (to == NULL)) 80144bac2208Snarayan return (ENXIO); 80154bac2208Snarayan 80164bac2208Snarayan if (dir == VD_COPYIN) { 80174bac2208Snarayan 80184bac2208Snarayan vd_efi = (vd_efi_t *)to; 80194bac2208Snarayan 80204bac2208Snarayan rv = ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode); 80214bac2208Snarayan if (rv != 0) 80224bac2208Snarayan return (EFAULT); 80234bac2208Snarayan 80244bac2208Snarayan vd_efi->lba = dk_efi.dki_lba; 80254bac2208Snarayan vd_efi->length = dk_efi.dki_length; 80264bac2208Snarayan bzero(vd_efi->data, vd_efi->length); 80274bac2208Snarayan 80284bac2208Snarayan } else { 80294bac2208Snarayan 80304bac2208Snarayan rv = ddi_copyin(to, &dk_efi, sizeof (dk_efi_t), mode); 80314bac2208Snarayan if (rv != 0) 80324bac2208Snarayan return (EFAULT); 80334bac2208Snarayan 80344bac2208Snarayan uaddr = dk_efi.dki_data; 80354bac2208Snarayan 80364bac2208Snarayan dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 80374bac2208Snarayan 80384bac2208Snarayan VD_EFI2DK_EFI((vd_efi_t *)from, &dk_efi); 80394bac2208Snarayan 80404bac2208Snarayan rv = ddi_copyout(dk_efi.dki_data, uaddr, dk_efi.dki_length, 80414bac2208Snarayan mode); 80424bac2208Snarayan if (rv != 0) 80434bac2208Snarayan return (EFAULT); 80444bac2208Snarayan 80454bac2208Snarayan kmem_free(dk_efi.dki_data, dk_efi.dki_length); 80464bac2208Snarayan } 80474bac2208Snarayan 80484bac2208Snarayan return (0); 80494bac2208Snarayan } 80504bac2208Snarayan 80514bac2208Snarayan static int 80524bac2208Snarayan vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 80534bac2208Snarayan { 80544bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 80554bac2208Snarayan 80564bac2208Snarayan dk_efi_t dk_efi; 80574bac2208Snarayan void *uaddr; 80584bac2208Snarayan 80592f5224aeSachartre if (dir == VD_COPYOUT) { 80602f5224aeSachartre /* 80612f5224aeSachartre * The disk label may have changed. Revalidate the disk 80625b98b509Sachartre * geometry. This will also update the device nodes. 80632f5224aeSachartre */ 80642f5224aeSachartre vdc_validate(vdc); 80652f5224aeSachartre return (0); 80662f5224aeSachartre } 80674bac2208Snarayan 80684bac2208Snarayan if ((from == NULL) || (to == NULL)) 80694bac2208Snarayan return (ENXIO); 80704bac2208Snarayan 80714bac2208Snarayan if (ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode) != 0) 80724bac2208Snarayan return (EFAULT); 80734bac2208Snarayan 80744bac2208Snarayan uaddr = dk_efi.dki_data; 80754bac2208Snarayan 80764bac2208Snarayan dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 80774bac2208Snarayan 80784bac2208Snarayan if (ddi_copyin(uaddr, dk_efi.dki_data, dk_efi.dki_length, mode) != 0) 80794bac2208Snarayan return (EFAULT); 80804bac2208Snarayan 80814bac2208Snarayan DK_EFI2VD_EFI(&dk_efi, (vd_efi_t *)to); 80824bac2208Snarayan 80834bac2208Snarayan kmem_free(dk_efi.dki_data, dk_efi.dki_length); 80844bac2208Snarayan 80854bac2208Snarayan return (0); 80864bac2208Snarayan } 80874bac2208Snarayan 808817cadca8Slm66018 808917cadca8Slm66018 /* -------------------------------------------------------------------------- */ 809017cadca8Slm66018 80910a55fbb7Slm66018 /* 80920a55fbb7Slm66018 * Function: 80931ae08745Sheppo * vdc_create_fake_geometry() 80941ae08745Sheppo * 80951ae08745Sheppo * Description: 809617cadca8Slm66018 * This routine fakes up the disk info needed for some DKIO ioctls such 809717cadca8Slm66018 * as DKIOCINFO and DKIOCGMEDIAINFO [just like lofi(7D) and ramdisk(7D) do] 80981ae08745Sheppo * 809917cadca8Slm66018 * Note: This function must not be called until the vDisk attributes have 810017cadca8Slm66018 * been exchanged as part of the handshake with the vDisk server. 81011ae08745Sheppo * 81021ae08745Sheppo * Arguments: 81031ae08745Sheppo * vdc - soft state pointer for this instance of the device driver. 81041ae08745Sheppo * 81051ae08745Sheppo * Return Code: 810678fcd0a1Sachartre * none. 81071ae08745Sheppo */ 810878fcd0a1Sachartre static void 81091ae08745Sheppo vdc_create_fake_geometry(vdc_t *vdc) 81101ae08745Sheppo { 81111ae08745Sheppo ASSERT(vdc != NULL); 811278fcd0a1Sachartre ASSERT(vdc->max_xfer_sz != 0); 81130d0c8d4bSnarayan 81140d0c8d4bSnarayan /* 81151ae08745Sheppo * DKIOCINFO support 81161ae08745Sheppo */ 811778fcd0a1Sachartre if (vdc->cinfo == NULL) 81181ae08745Sheppo vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 81191ae08745Sheppo 81201ae08745Sheppo (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 81211ae08745Sheppo (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 812265908c77Syu, larry liu - Sun Microsystems - Beijing China /* max_xfer_sz is #blocks so we don't need to divide by vdisk_bsize */ 81238e6a2a04Slm66018 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; 81242f5224aeSachartre 812587a7269eSachartre /* 81262f5224aeSachartre * We set the controller type to DKC_SCSI_CCS only if the VD_OP_SCSICMD 81272f5224aeSachartre * operation is supported, otherwise the controller type is DKC_DIRECT. 81282f5224aeSachartre * Version 1.0 does not support the VD_OP_SCSICMD operation, so the 81292f5224aeSachartre * controller type is always DKC_DIRECT in that case. 81302f5224aeSachartre * 813117cadca8Slm66018 * If the virtual disk is backed by a physical CD/DVD device or 813217cadca8Slm66018 * an ISO image, modify the controller type to indicate this 813387a7269eSachartre */ 813417cadca8Slm66018 switch (vdc->vdisk_media) { 813517cadca8Slm66018 case VD_MEDIA_CD: 813617cadca8Slm66018 case VD_MEDIA_DVD: 813717cadca8Slm66018 vdc->cinfo->dki_ctype = DKC_CDROM; 813817cadca8Slm66018 break; 813917cadca8Slm66018 case VD_MEDIA_FIXED: 81402f5224aeSachartre if (VD_OP_SUPPORTED(vdc->operations, VD_OP_SCSICMD)) 81412f5224aeSachartre vdc->cinfo->dki_ctype = DKC_SCSI_CCS; 81422f5224aeSachartre else 814387a7269eSachartre vdc->cinfo->dki_ctype = DKC_DIRECT; 814417cadca8Slm66018 break; 814517cadca8Slm66018 default: 814617cadca8Slm66018 /* in the case of v1.0 we default to a fixed disk */ 814717cadca8Slm66018 vdc->cinfo->dki_ctype = DKC_DIRECT; 814817cadca8Slm66018 break; 814917cadca8Slm66018 } 81501ae08745Sheppo vdc->cinfo->dki_flags = DKI_FMTVOL; 81511ae08745Sheppo vdc->cinfo->dki_cnum = 0; 81521ae08745Sheppo vdc->cinfo->dki_addr = 0; 81531ae08745Sheppo vdc->cinfo->dki_space = 0; 81541ae08745Sheppo vdc->cinfo->dki_prio = 0; 81551ae08745Sheppo vdc->cinfo->dki_vec = 0; 81561ae08745Sheppo vdc->cinfo->dki_unit = vdc->instance; 81571ae08745Sheppo vdc->cinfo->dki_slave = 0; 81581ae08745Sheppo /* 81591ae08745Sheppo * The partition number will be created on the fly depending on the 81601ae08745Sheppo * actual slice (i.e. minor node) that is used to request the data. 81611ae08745Sheppo */ 81621ae08745Sheppo vdc->cinfo->dki_partition = 0; 81631ae08745Sheppo 81641ae08745Sheppo /* 81651ae08745Sheppo * DKIOCGMEDIAINFO support 81661ae08745Sheppo */ 81670a55fbb7Slm66018 if (vdc->minfo == NULL) 81681ae08745Sheppo vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 816917cadca8Slm66018 817017cadca8Slm66018 if (vio_ver_is_supported(vdc->ver, 1, 1)) { 817117cadca8Slm66018 vdc->minfo->dki_media_type = 817217cadca8Slm66018 VD_MEDIATYPE2DK_MEDIATYPE(vdc->vdisk_media); 817317cadca8Slm66018 } else { 81741ae08745Sheppo vdc->minfo->dki_media_type = DK_FIXED_DISK; 817517cadca8Slm66018 } 817617cadca8Slm66018 81774bac2208Snarayan vdc->minfo->dki_capacity = vdc->vdisk_size; 817865908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->minfo->dki_lbsize = vdc->vdisk_bsize; 817978fcd0a1Sachartre } 81801ae08745Sheppo 818178fcd0a1Sachartre static ushort_t 818278fcd0a1Sachartre vdc_lbl2cksum(struct dk_label *label) 818378fcd0a1Sachartre { 818478fcd0a1Sachartre int count; 818578fcd0a1Sachartre ushort_t sum, *sp; 818678fcd0a1Sachartre 818778fcd0a1Sachartre count = (sizeof (struct dk_label)) / (sizeof (short)) - 1; 818878fcd0a1Sachartre sp = (ushort_t *)label; 818978fcd0a1Sachartre sum = 0; 819078fcd0a1Sachartre while (count--) { 819178fcd0a1Sachartre sum ^= *sp++; 819278fcd0a1Sachartre } 819378fcd0a1Sachartre 819478fcd0a1Sachartre return (sum); 81950a55fbb7Slm66018 } 81960a55fbb7Slm66018 8197de3a5331SRamesh Chitrothu static void 8198de3a5331SRamesh Chitrothu vdc_update_size(vdc_t *vdc, size_t dsk_size, size_t blk_size, size_t xfr_size) 8199de3a5331SRamesh Chitrothu { 8200de3a5331SRamesh Chitrothu vd_err_stats_t *stp; 8201de3a5331SRamesh Chitrothu 8202de3a5331SRamesh Chitrothu ASSERT(MUTEX_HELD(&vdc->lock)); 8203de3a5331SRamesh Chitrothu ASSERT(xfr_size != 0); 8204de3a5331SRamesh Chitrothu 8205de3a5331SRamesh Chitrothu /* 8206de3a5331SRamesh Chitrothu * If the disk size is unknown or sizes are unchanged then don't 8207de3a5331SRamesh Chitrothu * update anything. 8208de3a5331SRamesh Chitrothu */ 8209de3a5331SRamesh Chitrothu if (dsk_size == VD_SIZE_UNKNOWN || dsk_size == 0 || 821065908c77Syu, larry liu - Sun Microsystems - Beijing China (blk_size == vdc->vdisk_bsize && dsk_size == vdc->vdisk_size && 8211de3a5331SRamesh Chitrothu xfr_size == vdc->max_xfer_sz)) 8212de3a5331SRamesh Chitrothu return; 8213de3a5331SRamesh Chitrothu 8214de3a5331SRamesh Chitrothu /* 8215de3a5331SRamesh Chitrothu * We don't know at compile time what the vDisk server will think 8216de3a5331SRamesh Chitrothu * are good values but we apply a large (arbitrary) upper bound to 8217de3a5331SRamesh Chitrothu * prevent memory exhaustion in vdc if it was allocating a DRing 8218de3a5331SRamesh Chitrothu * based of huge values sent by the server. We probably will never 8219de3a5331SRamesh Chitrothu * exceed this except if the message was garbage. 8220de3a5331SRamesh Chitrothu */ 8221de3a5331SRamesh Chitrothu if ((xfr_size * blk_size) > (PAGESIZE * DEV_BSIZE)) { 8222de3a5331SRamesh Chitrothu DMSG(vdc, 0, "[%d] vds block transfer size too big;" 8223de3a5331SRamesh Chitrothu " using max supported by vdc", vdc->instance); 822465908c77Syu, larry liu - Sun Microsystems - Beijing China xfr_size = maxphys / blk_size; 8225de3a5331SRamesh Chitrothu } 8226de3a5331SRamesh Chitrothu 8227de3a5331SRamesh Chitrothu vdc->max_xfer_sz = xfr_size; 822865908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vdisk_bsize = blk_size; 8229de3a5331SRamesh Chitrothu vdc->vdisk_size = dsk_size; 8230de3a5331SRamesh Chitrothu 8231de3a5331SRamesh Chitrothu stp = (vd_err_stats_t *)vdc->err_stats->ks_data; 8232de3a5331SRamesh Chitrothu stp->vd_capacity.value.ui64 = dsk_size * blk_size; 8233de3a5331SRamesh Chitrothu 8234de3a5331SRamesh Chitrothu vdc->minfo->dki_capacity = dsk_size; 8235de3a5331SRamesh Chitrothu vdc->minfo->dki_lbsize = (uint_t)blk_size; 8236de3a5331SRamesh Chitrothu } 8237de3a5331SRamesh Chitrothu 82380a55fbb7Slm66018 /* 823965908c77Syu, larry liu - Sun Microsystems - Beijing China * Update information about the VIO block size. The VIO block size is the 824065908c77Syu, larry liu - Sun Microsystems - Beijing China * same as the vdisk block size which is stored in vdc->vdisk_bsize so we 824165908c77Syu, larry liu - Sun Microsystems - Beijing China * do not store that information again. 824265908c77Syu, larry liu - Sun Microsystems - Beijing China * 824365908c77Syu, larry liu - Sun Microsystems - Beijing China * However, buf structures will always use a logical block size of 512 bytes 824465908c77Syu, larry liu - Sun Microsystems - Beijing China * (DEV_BSIZE) and we will need to convert logical block numbers to VIO block 824565908c77Syu, larry liu - Sun Microsystems - Beijing China * numbers for each read or write operation using vdc_strategy(). To speed up 824665908c77Syu, larry liu - Sun Microsystems - Beijing China * this conversion, we expect the VIO block size to be a power of 2 and a 824765908c77Syu, larry liu - Sun Microsystems - Beijing China * multiple 512 bytes (DEV_BSIZE), and we cache some useful information. 824865908c77Syu, larry liu - Sun Microsystems - Beijing China * 824965908c77Syu, larry liu - Sun Microsystems - Beijing China * The function return EINVAL if the new VIO block size (blk_size) is not a 825065908c77Syu, larry liu - Sun Microsystems - Beijing China * power of 2 or not a multiple of 512 bytes, otherwise it returns 0. 825165908c77Syu, larry liu - Sun Microsystems - Beijing China */ 825265908c77Syu, larry liu - Sun Microsystems - Beijing China static int 825365908c77Syu, larry liu - Sun Microsystems - Beijing China vdc_update_vio_bsize(vdc_t *vdc, uint32_t blk_size) 825465908c77Syu, larry liu - Sun Microsystems - Beijing China { 825565908c77Syu, larry liu - Sun Microsystems - Beijing China uint32_t ratio, n; 825665908c77Syu, larry liu - Sun Microsystems - Beijing China int nshift = 0; 825765908c77Syu, larry liu - Sun Microsystems - Beijing China 825865908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vio_bmask = 0; 825965908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vio_bshift = 0; 826065908c77Syu, larry liu - Sun Microsystems - Beijing China 826165908c77Syu, larry liu - Sun Microsystems - Beijing China ASSERT(blk_size > 0); 826265908c77Syu, larry liu - Sun Microsystems - Beijing China 826365908c77Syu, larry liu - Sun Microsystems - Beijing China if ((blk_size % DEV_BSIZE) != 0) 826465908c77Syu, larry liu - Sun Microsystems - Beijing China return (EINVAL); 826565908c77Syu, larry liu - Sun Microsystems - Beijing China 826665908c77Syu, larry liu - Sun Microsystems - Beijing China ratio = blk_size / DEV_BSIZE; 826765908c77Syu, larry liu - Sun Microsystems - Beijing China 826865908c77Syu, larry liu - Sun Microsystems - Beijing China for (n = ratio; n > 1; n >>= 1) { 826965908c77Syu, larry liu - Sun Microsystems - Beijing China if ((n & 0x1) != 0) { 827065908c77Syu, larry liu - Sun Microsystems - Beijing China /* blk_size is not a power of 2 */ 827165908c77Syu, larry liu - Sun Microsystems - Beijing China return (EINVAL); 827265908c77Syu, larry liu - Sun Microsystems - Beijing China } 827365908c77Syu, larry liu - Sun Microsystems - Beijing China nshift++; 827465908c77Syu, larry liu - Sun Microsystems - Beijing China } 827565908c77Syu, larry liu - Sun Microsystems - Beijing China 827665908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vio_bshift = nshift; 827765908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vio_bmask = ratio - 1; 827865908c77Syu, larry liu - Sun Microsystems - Beijing China 827965908c77Syu, larry liu - Sun Microsystems - Beijing China return (0); 828065908c77Syu, larry liu - Sun Microsystems - Beijing China } 828165908c77Syu, larry liu - Sun Microsystems - Beijing China 828265908c77Syu, larry liu - Sun Microsystems - Beijing China /* 82830a55fbb7Slm66018 * Function: 828478fcd0a1Sachartre * vdc_validate_geometry 82850a55fbb7Slm66018 * 82860a55fbb7Slm66018 * Description: 828778fcd0a1Sachartre * This routine discovers the label and geometry of the disk. It stores 828878fcd0a1Sachartre * the disk label and related information in the vdc structure. If it 828978fcd0a1Sachartre * fails to validate the geometry or to discover the disk label then 829078fcd0a1Sachartre * the label is marked as unknown (VD_DISK_LABEL_UNK). 82910a55fbb7Slm66018 * 82920a55fbb7Slm66018 * Arguments: 82930a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 82940a55fbb7Slm66018 * 82950a55fbb7Slm66018 * Return Code: 829678fcd0a1Sachartre * 0 - success. 829778fcd0a1Sachartre * EINVAL - unknown disk label. 829878fcd0a1Sachartre * ENOTSUP - geometry not applicable (EFI label). 829978fcd0a1Sachartre * EIO - error accessing the disk. 83000a55fbb7Slm66018 */ 83010a55fbb7Slm66018 static int 830278fcd0a1Sachartre vdc_validate_geometry(vdc_t *vdc) 83030a55fbb7Slm66018 { 83040a55fbb7Slm66018 dev_t dev; 83052f5224aeSachartre int rv, rval; 830665908c77Syu, larry liu - Sun Microsystems - Beijing China struct dk_label *label; 830778fcd0a1Sachartre struct dk_geom geom; 8308342440ecSPrasad Singamsetty struct extvtoc vtoc; 8309edcc0754Sachartre efi_gpt_t *gpt; 8310edcc0754Sachartre efi_gpe_t *gpe; 8311edcc0754Sachartre vd_efi_dev_t edev; 83120a55fbb7Slm66018 83130a55fbb7Slm66018 ASSERT(vdc != NULL); 831478fcd0a1Sachartre ASSERT(vdc->vtoc != NULL && vdc->geom != NULL); 831578fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 83160a55fbb7Slm66018 831778fcd0a1Sachartre mutex_exit(&vdc->lock); 8318de3a5331SRamesh Chitrothu /* 8319de3a5331SRamesh Chitrothu * Check the disk capacity in case it has changed. If that fails then 8320de3a5331SRamesh Chitrothu * we proceed and we will be using the disk size we currently have. 8321de3a5331SRamesh Chitrothu */ 8322de3a5331SRamesh Chitrothu (void) vdc_check_capacity(vdc); 83230a55fbb7Slm66018 dev = makedevice(ddi_driver_major(vdc->dip), 83240a55fbb7Slm66018 VD_MAKE_DEV(vdc->instance, 0)); 83254bac2208Snarayan 83262f5224aeSachartre rv = vd_process_ioctl(dev, DKIOCGGEOM, (caddr_t)&geom, FKIOCTL, &rval); 832778fcd0a1Sachartre if (rv == 0) 8328342440ecSPrasad Singamsetty rv = vd_process_ioctl(dev, DKIOCGEXTVTOC, (caddr_t)&vtoc, 83292f5224aeSachartre FKIOCTL, &rval); 83300d0c8d4bSnarayan 83314bac2208Snarayan if (rv == ENOTSUP) { 83324bac2208Snarayan /* 83334bac2208Snarayan * If the device does not support VTOC then we try 83344bac2208Snarayan * to read an EFI label. 8335edcc0754Sachartre * 8336edcc0754Sachartre * We need to know the block size and the disk size to 8337edcc0754Sachartre * be able to read an EFI label. 83384bac2208Snarayan */ 8339edcc0754Sachartre if (vdc->vdisk_size == 0) { 8340edcc0754Sachartre mutex_enter(&vdc->lock); 8341edcc0754Sachartre vdc_store_label_unk(vdc); 8342de3a5331SRamesh Chitrothu return (EIO); 8343edcc0754Sachartre } 83444bac2208Snarayan 834565908c77Syu, larry liu - Sun Microsystems - Beijing China VDC_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl); 8346edcc0754Sachartre 8347edcc0754Sachartre rv = vd_efi_alloc_and_read(&edev, &gpt, &gpe); 83484bac2208Snarayan 83494bac2208Snarayan if (rv) { 83503af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to get EFI (err=%d)", 83514bac2208Snarayan vdc->instance, rv); 835278fcd0a1Sachartre mutex_enter(&vdc->lock); 835378fcd0a1Sachartre vdc_store_label_unk(vdc); 835478fcd0a1Sachartre return (EIO); 835578fcd0a1Sachartre } 835678fcd0a1Sachartre 835778fcd0a1Sachartre mutex_enter(&vdc->lock); 8358edcc0754Sachartre vdc_store_label_efi(vdc, gpt, gpe); 8359edcc0754Sachartre vd_efi_free(&edev, gpt, gpe); 836078fcd0a1Sachartre return (ENOTSUP); 836178fcd0a1Sachartre } 836278fcd0a1Sachartre 836378fcd0a1Sachartre if (rv != 0) { 836478fcd0a1Sachartre DMSG(vdc, 0, "[%d] Failed to get VTOC (err=%d)", 836578fcd0a1Sachartre vdc->instance, rv); 836678fcd0a1Sachartre mutex_enter(&vdc->lock); 836778fcd0a1Sachartre vdc_store_label_unk(vdc); 836878fcd0a1Sachartre if (rv != EINVAL) 836978fcd0a1Sachartre rv = EIO; 83704bac2208Snarayan return (rv); 83714bac2208Snarayan } 83724bac2208Snarayan 837378fcd0a1Sachartre /* check that geometry and vtoc are valid */ 837478fcd0a1Sachartre if (geom.dkg_nhead == 0 || geom.dkg_nsect == 0 || 837578fcd0a1Sachartre vtoc.v_sanity != VTOC_SANE) { 837678fcd0a1Sachartre mutex_enter(&vdc->lock); 837778fcd0a1Sachartre vdc_store_label_unk(vdc); 837878fcd0a1Sachartre return (EINVAL); 837978fcd0a1Sachartre } 83804bac2208Snarayan 838178fcd0a1Sachartre /* 838278fcd0a1Sachartre * We have a disk and a valid VTOC. However this does not mean 838378fcd0a1Sachartre * that the disk currently have a VTOC label. The returned VTOC may 838478fcd0a1Sachartre * be a default VTOC to be used for configuring the disk (this is 838578fcd0a1Sachartre * what is done for disk image). So we read the label from the 838678fcd0a1Sachartre * beginning of the disk to ensure we really have a VTOC label. 838778fcd0a1Sachartre * 838878fcd0a1Sachartre * FUTURE: This could be the default way for reading the VTOC 838978fcd0a1Sachartre * from the disk as opposed to sending the VD_OP_GET_VTOC 839078fcd0a1Sachartre * to the server. This will be the default if vdc is implemented 839178fcd0a1Sachartre * ontop of cmlb. 839278fcd0a1Sachartre */ 839378fcd0a1Sachartre 839478fcd0a1Sachartre /* 839578fcd0a1Sachartre * Single slice disk does not support read using an absolute disk 839678fcd0a1Sachartre * offset so we just rely on the DKIOCGVTOC ioctl in that case. 839778fcd0a1Sachartre */ 839878fcd0a1Sachartre if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 839978fcd0a1Sachartre mutex_enter(&vdc->lock); 840078fcd0a1Sachartre if (vtoc.v_nparts != 1) { 840178fcd0a1Sachartre vdc_store_label_unk(vdc); 840278fcd0a1Sachartre return (EINVAL); 840378fcd0a1Sachartre } 840478fcd0a1Sachartre vdc_store_label_vtoc(vdc, &geom, &vtoc); 84054bac2208Snarayan return (0); 84064bac2208Snarayan } 84074bac2208Snarayan 840878fcd0a1Sachartre if (vtoc.v_nparts != V_NUMPAR) { 840978fcd0a1Sachartre mutex_enter(&vdc->lock); 841078fcd0a1Sachartre vdc_store_label_unk(vdc); 841178fcd0a1Sachartre return (EINVAL); 84120a55fbb7Slm66018 } 8413d10e4ef2Snarayan 8414d10e4ef2Snarayan /* 8415c813bb04SGabriel Carrillo * Most CD/DVDs do not have a disk label and the label is 8416c813bb04SGabriel Carrillo * generated by the disk driver. So the on-disk label check 8417c813bb04SGabriel Carrillo * below may fail and we return now to avoid this problem. 8418c813bb04SGabriel Carrillo */ 8419c813bb04SGabriel Carrillo if (vdc->vdisk_media == VD_MEDIA_CD || 8420c813bb04SGabriel Carrillo vdc->vdisk_media == VD_MEDIA_DVD) { 8421c813bb04SGabriel Carrillo mutex_enter(&vdc->lock); 8422c813bb04SGabriel Carrillo vdc_store_label_vtoc(vdc, &geom, &vtoc); 8423c813bb04SGabriel Carrillo return (0); 8424c813bb04SGabriel Carrillo } 8425c813bb04SGabriel Carrillo 8426c813bb04SGabriel Carrillo /* 8427d10e4ef2Snarayan * Read disk label from start of disk 8428d10e4ef2Snarayan */ 842965908c77Syu, larry liu - Sun Microsystems - Beijing China label = kmem_alloc(vdc->vdisk_bsize, KM_SLEEP); 84306ace3c90SAlexandre Chartre 84316ace3c90SAlexandre Chartre rv = vdc_do_op(vdc, VD_OP_BREAD, (caddr_t)label, vdc->vdisk_bsize, 84326ace3c90SAlexandre Chartre VD_SLICE_NONE, 0, NULL, VIO_read_dir, VDC_OP_NORMAL); 84330a55fbb7Slm66018 843465908c77Syu, larry liu - Sun Microsystems - Beijing China if (rv != 0 || label->dkl_magic != DKL_MAGIC || 843565908c77Syu, larry liu - Sun Microsystems - Beijing China label->dkl_cksum != vdc_lbl2cksum(label)) { 843678fcd0a1Sachartre DMSG(vdc, 1, "[%d] Got VTOC with invalid label\n", 843778fcd0a1Sachartre vdc->instance); 843865908c77Syu, larry liu - Sun Microsystems - Beijing China kmem_free(label, vdc->vdisk_bsize); 843978fcd0a1Sachartre mutex_enter(&vdc->lock); 844078fcd0a1Sachartre vdc_store_label_unk(vdc); 844178fcd0a1Sachartre return (EINVAL); 844278fcd0a1Sachartre } 844378fcd0a1Sachartre 844465908c77Syu, larry liu - Sun Microsystems - Beijing China kmem_free(label, vdc->vdisk_bsize); 844578fcd0a1Sachartre mutex_enter(&vdc->lock); 844678fcd0a1Sachartre vdc_store_label_vtoc(vdc, &geom, &vtoc); 844778fcd0a1Sachartre return (0); 844878fcd0a1Sachartre } 844978fcd0a1Sachartre 845078fcd0a1Sachartre /* 845178fcd0a1Sachartre * Function: 845278fcd0a1Sachartre * vdc_validate 845378fcd0a1Sachartre * 845478fcd0a1Sachartre * Description: 845578fcd0a1Sachartre * This routine discovers the label of the disk and create the 845678fcd0a1Sachartre * appropriate device nodes if the label has changed. 845778fcd0a1Sachartre * 845878fcd0a1Sachartre * Arguments: 845978fcd0a1Sachartre * vdc - soft state pointer for this instance of the device driver. 846078fcd0a1Sachartre * 846178fcd0a1Sachartre * Return Code: 846278fcd0a1Sachartre * none. 846378fcd0a1Sachartre */ 846478fcd0a1Sachartre static void 846578fcd0a1Sachartre vdc_validate(vdc_t *vdc) 846678fcd0a1Sachartre { 846778fcd0a1Sachartre vd_disk_label_t old_label; 8468edcc0754Sachartre vd_slice_t old_slice[V_NUMPAR]; 846978fcd0a1Sachartre int rv; 847078fcd0a1Sachartre 847178fcd0a1Sachartre ASSERT(!MUTEX_HELD(&vdc->lock)); 847278fcd0a1Sachartre 847378fcd0a1Sachartre mutex_enter(&vdc->lock); 847478fcd0a1Sachartre 847578fcd0a1Sachartre /* save the current label and vtoc */ 847678fcd0a1Sachartre old_label = vdc->vdisk_label; 8477edcc0754Sachartre bcopy(vdc->slice, &old_slice, sizeof (vd_slice_t) * V_NUMPAR); 847878fcd0a1Sachartre 847978fcd0a1Sachartre /* check the geometry */ 848078fcd0a1Sachartre (void) vdc_validate_geometry(vdc); 848178fcd0a1Sachartre 848278fcd0a1Sachartre /* if the disk label has changed, update device nodes */ 84836ace3c90SAlexandre Chartre if (vdc->vdisk_type == VD_DISK_TYPE_DISK && 84846ace3c90SAlexandre Chartre vdc->vdisk_label != old_label) { 848578fcd0a1Sachartre 848678fcd0a1Sachartre if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 848778fcd0a1Sachartre rv = vdc_create_device_nodes_efi(vdc); 848878fcd0a1Sachartre else 848978fcd0a1Sachartre rv = vdc_create_device_nodes_vtoc(vdc); 849078fcd0a1Sachartre 849178fcd0a1Sachartre if (rv != 0) { 849278fcd0a1Sachartre DMSG(vdc, 0, "![%d] Failed to update device nodes", 849378fcd0a1Sachartre vdc->instance); 849478fcd0a1Sachartre } 849578fcd0a1Sachartre } 849678fcd0a1Sachartre 849778fcd0a1Sachartre mutex_exit(&vdc->lock); 849878fcd0a1Sachartre } 849978fcd0a1Sachartre 850078fcd0a1Sachartre static void 850178fcd0a1Sachartre vdc_validate_task(void *arg) 850278fcd0a1Sachartre { 850378fcd0a1Sachartre vdc_t *vdc = (vdc_t *)arg; 850478fcd0a1Sachartre 850578fcd0a1Sachartre vdc_validate(vdc); 850678fcd0a1Sachartre 850778fcd0a1Sachartre mutex_enter(&vdc->lock); 850878fcd0a1Sachartre ASSERT(vdc->validate_pending > 0); 850978fcd0a1Sachartre vdc->validate_pending--; 851078fcd0a1Sachartre mutex_exit(&vdc->lock); 85111ae08745Sheppo } 85124bac2208Snarayan 85134bac2208Snarayan /* 85144bac2208Snarayan * Function: 85154bac2208Snarayan * vdc_setup_devid() 85164bac2208Snarayan * 85174bac2208Snarayan * Description: 85184bac2208Snarayan * This routine discovers the devid of a vDisk. It requests the devid of 85194bac2208Snarayan * the underlying device from the vDisk server, builds an encapsulated 85204bac2208Snarayan * devid based on the retrieved devid and registers that new devid to 85214bac2208Snarayan * the vDisk. 85224bac2208Snarayan * 85234bac2208Snarayan * Arguments: 85244bac2208Snarayan * vdc - soft state pointer for this instance of the device driver. 85254bac2208Snarayan * 85264bac2208Snarayan * Return Code: 85274bac2208Snarayan * 0 - A devid was succesfully registered for the vDisk 85284bac2208Snarayan */ 85294bac2208Snarayan static int 85304bac2208Snarayan vdc_setup_devid(vdc_t *vdc) 85314bac2208Snarayan { 85324bac2208Snarayan int rv; 85334bac2208Snarayan vd_devid_t *vd_devid; 85344bac2208Snarayan size_t bufsize, bufid_len; 85356ace3c90SAlexandre Chartre ddi_devid_t vdisk_devid; 85366ace3c90SAlexandre Chartre char *devid_str; 85374bac2208Snarayan 85384bac2208Snarayan /* 85394bac2208Snarayan * At first sight, we don't know the size of the devid that the 85404bac2208Snarayan * server will return but this size will be encoded into the 85414bac2208Snarayan * reply. So we do a first request using a default size then we 85424bac2208Snarayan * check if this size was large enough. If not then we do a second 85434bac2208Snarayan * request with the correct size returned by the server. Note that 85444bac2208Snarayan * ldc requires size to be 8-byte aligned. 85454bac2208Snarayan */ 85464bac2208Snarayan bufsize = P2ROUNDUP(VD_DEVID_SIZE(VD_DEVID_DEFAULT_LEN), 85474bac2208Snarayan sizeof (uint64_t)); 85484bac2208Snarayan vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 85494bac2208Snarayan bufid_len = bufsize - sizeof (vd_efi_t) - 1; 85504bac2208Snarayan 85516ace3c90SAlexandre Chartre rv = vdc_do_op(vdc, VD_OP_GET_DEVID, (caddr_t)vd_devid, 85526ace3c90SAlexandre Chartre bufsize, 0, 0, NULL, VIO_both_dir, 0); 85533af08d82Slm66018 85546ace3c90SAlexandre Chartre DMSG(vdc, 2, "do_op returned %d\n", rv); 85553af08d82Slm66018 85564bac2208Snarayan if (rv) { 85574bac2208Snarayan kmem_free(vd_devid, bufsize); 85584bac2208Snarayan return (rv); 85594bac2208Snarayan } 85604bac2208Snarayan 85614bac2208Snarayan if (vd_devid->length > bufid_len) { 85624bac2208Snarayan /* 85634bac2208Snarayan * The returned devid is larger than the buffer used. Try again 85644bac2208Snarayan * with a buffer with the right size. 85654bac2208Snarayan */ 85664bac2208Snarayan kmem_free(vd_devid, bufsize); 85674bac2208Snarayan bufsize = P2ROUNDUP(VD_DEVID_SIZE(vd_devid->length), 85684bac2208Snarayan sizeof (uint64_t)); 85694bac2208Snarayan vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 85704bac2208Snarayan bufid_len = bufsize - sizeof (vd_efi_t) - 1; 85714bac2208Snarayan 85726ace3c90SAlexandre Chartre rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, (caddr_t)vd_devid, 85736ace3c90SAlexandre Chartre bufsize, 0, 0, VIO_both_dir, B_TRUE); 85743af08d82Slm66018 85754bac2208Snarayan if (rv) { 85764bac2208Snarayan kmem_free(vd_devid, bufsize); 85774bac2208Snarayan return (rv); 85784bac2208Snarayan } 85794bac2208Snarayan } 85804bac2208Snarayan 85814bac2208Snarayan /* 85824bac2208Snarayan * The virtual disk should have the same device id as the one associated 85834bac2208Snarayan * with the physical disk it is mapped on, otherwise sharing a disk 85844bac2208Snarayan * between a LDom and a non-LDom may not work (for example for a shared 85854bac2208Snarayan * SVM disk set). 85864bac2208Snarayan * 85874bac2208Snarayan * The DDI framework does not allow creating a device id with any 85884bac2208Snarayan * type so we first create a device id of type DEVID_ENCAP and then 85894bac2208Snarayan * we restore the orignal type of the physical device. 85904bac2208Snarayan */ 85914bac2208Snarayan 85923af08d82Slm66018 DMSG(vdc, 2, ": devid length = %d\n", vd_devid->length); 85933af08d82Slm66018 85944bac2208Snarayan /* build an encapsulated devid based on the returned devid */ 85954bac2208Snarayan if (ddi_devid_init(vdc->dip, DEVID_ENCAP, vd_devid->length, 85966ace3c90SAlexandre Chartre vd_devid->id, &vdisk_devid) != DDI_SUCCESS) { 85973af08d82Slm66018 DMSG(vdc, 1, "[%d] Fail to created devid\n", vdc->instance); 85984bac2208Snarayan kmem_free(vd_devid, bufsize); 85994bac2208Snarayan return (1); 86004bac2208Snarayan } 86014bac2208Snarayan 86026ace3c90SAlexandre Chartre DEVID_FORMTYPE((impl_devid_t *)vdisk_devid, vd_devid->type); 86034bac2208Snarayan 86046ace3c90SAlexandre Chartre ASSERT(ddi_devid_valid(vdisk_devid) == DDI_SUCCESS); 86054bac2208Snarayan 86064bac2208Snarayan kmem_free(vd_devid, bufsize); 86074bac2208Snarayan 86086ace3c90SAlexandre Chartre if (vdc->devid != NULL) { 86096ace3c90SAlexandre Chartre /* check that the devid hasn't changed */ 86106ace3c90SAlexandre Chartre if (ddi_devid_compare(vdisk_devid, vdc->devid) == 0) { 86116ace3c90SAlexandre Chartre ddi_devid_free(vdisk_devid); 86126ace3c90SAlexandre Chartre return (0); 86136ace3c90SAlexandre Chartre } 86146ace3c90SAlexandre Chartre 86156ace3c90SAlexandre Chartre cmn_err(CE_WARN, "vdisk@%d backend devid has changed", 86166ace3c90SAlexandre Chartre vdc->instance); 86176ace3c90SAlexandre Chartre 86186ace3c90SAlexandre Chartre devid_str = ddi_devid_str_encode(vdc->devid, NULL); 86196ace3c90SAlexandre Chartre 86206ace3c90SAlexandre Chartre cmn_err(CE_CONT, "vdisk@%d backend initial devid: %s", 86216ace3c90SAlexandre Chartre vdc->instance, 86226ace3c90SAlexandre Chartre (devid_str)? devid_str : "<encoding error>"); 86236ace3c90SAlexandre Chartre 86246ace3c90SAlexandre Chartre if (devid_str) 86256ace3c90SAlexandre Chartre ddi_devid_str_free(devid_str); 86266ace3c90SAlexandre Chartre 86276ace3c90SAlexandre Chartre devid_str = ddi_devid_str_encode(vdisk_devid, NULL); 86286ace3c90SAlexandre Chartre 86296ace3c90SAlexandre Chartre cmn_err(CE_CONT, "vdisk@%d backend current devid: %s", 86306ace3c90SAlexandre Chartre vdc->instance, 86316ace3c90SAlexandre Chartre (devid_str)? devid_str : "<encoding error>"); 86326ace3c90SAlexandre Chartre 86336ace3c90SAlexandre Chartre if (devid_str) 86346ace3c90SAlexandre Chartre ddi_devid_str_free(devid_str); 86356ace3c90SAlexandre Chartre 86366ace3c90SAlexandre Chartre ddi_devid_free(vdisk_devid); 863700e3a3e9SAlexandre Chartre return (1); 863800e3a3e9SAlexandre Chartre } 863900e3a3e9SAlexandre Chartre 86406ace3c90SAlexandre Chartre if (ddi_devid_register(vdc->dip, vdisk_devid) != DDI_SUCCESS) { 86416ace3c90SAlexandre Chartre DMSG(vdc, 1, "[%d] Fail to register devid\n", vdc->instance); 86426ace3c90SAlexandre Chartre ddi_devid_free(vdisk_devid); 86436ace3c90SAlexandre Chartre return (1); 86446ace3c90SAlexandre Chartre } 86456ace3c90SAlexandre Chartre 86466ace3c90SAlexandre Chartre vdc->devid = vdisk_devid; 86476ace3c90SAlexandre Chartre 86484bac2208Snarayan return (0); 86494bac2208Snarayan } 86504bac2208Snarayan 86514bac2208Snarayan static void 8652edcc0754Sachartre vdc_store_label_efi(vdc_t *vdc, efi_gpt_t *gpt, efi_gpe_t *gpe) 86534bac2208Snarayan { 8654edcc0754Sachartre int i, nparts; 86554bac2208Snarayan 865678fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 865778fcd0a1Sachartre 865878fcd0a1Sachartre vdc->vdisk_label = VD_DISK_LABEL_EFI; 8659342440ecSPrasad Singamsetty bzero(vdc->vtoc, sizeof (struct extvtoc)); 866078fcd0a1Sachartre bzero(vdc->geom, sizeof (struct dk_geom)); 8661edcc0754Sachartre bzero(vdc->slice, sizeof (vd_slice_t) * V_NUMPAR); 8662edcc0754Sachartre 8663edcc0754Sachartre nparts = gpt->efi_gpt_NumberOfPartitionEntries; 8664edcc0754Sachartre 8665edcc0754Sachartre for (i = 0; i < nparts && i < VD_EFI_WD_SLICE; i++) { 8666edcc0754Sachartre 8667d84f0041SAlexandre Chartre if (gpe[i].efi_gpe_StartingLBA == 0 && 8668edcc0754Sachartre gpe[i].efi_gpe_EndingLBA == 0) { 8669edcc0754Sachartre continue; 86704bac2208Snarayan } 8671edcc0754Sachartre 8672edcc0754Sachartre vdc->slice[i].start = gpe[i].efi_gpe_StartingLBA; 8673edcc0754Sachartre vdc->slice[i].nblocks = gpe[i].efi_gpe_EndingLBA - 8674edcc0754Sachartre gpe[i].efi_gpe_StartingLBA + 1; 8675edcc0754Sachartre } 8676edcc0754Sachartre 8677edcc0754Sachartre ASSERT(vdc->vdisk_size != 0); 8678edcc0754Sachartre vdc->slice[VD_EFI_WD_SLICE].start = 0; 8679edcc0754Sachartre vdc->slice[VD_EFI_WD_SLICE].nblocks = vdc->vdisk_size; 8680edcc0754Sachartre 86814bac2208Snarayan } 868278fcd0a1Sachartre 868378fcd0a1Sachartre static void 8684342440ecSPrasad Singamsetty vdc_store_label_vtoc(vdc_t *vdc, struct dk_geom *geom, struct extvtoc *vtoc) 868578fcd0a1Sachartre { 8686edcc0754Sachartre int i; 8687edcc0754Sachartre 868878fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 868965908c77Syu, larry liu - Sun Microsystems - Beijing China ASSERT(vdc->vdisk_bsize == vtoc->v_sectorsz); 869078fcd0a1Sachartre 869178fcd0a1Sachartre vdc->vdisk_label = VD_DISK_LABEL_VTOC; 8692342440ecSPrasad Singamsetty bcopy(vtoc, vdc->vtoc, sizeof (struct extvtoc)); 869378fcd0a1Sachartre bcopy(geom, vdc->geom, sizeof (struct dk_geom)); 8694edcc0754Sachartre bzero(vdc->slice, sizeof (vd_slice_t) * V_NUMPAR); 8695edcc0754Sachartre 8696edcc0754Sachartre for (i = 0; i < vtoc->v_nparts; i++) { 8697edcc0754Sachartre vdc->slice[i].start = vtoc->v_part[i].p_start; 8698edcc0754Sachartre vdc->slice[i].nblocks = vtoc->v_part[i].p_size; 8699edcc0754Sachartre } 870078fcd0a1Sachartre } 870178fcd0a1Sachartre 870278fcd0a1Sachartre static void 870378fcd0a1Sachartre vdc_store_label_unk(vdc_t *vdc) 870478fcd0a1Sachartre { 870578fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 870678fcd0a1Sachartre 870778fcd0a1Sachartre vdc->vdisk_label = VD_DISK_LABEL_UNK; 8708342440ecSPrasad Singamsetty bzero(vdc->vtoc, sizeof (struct extvtoc)); 870978fcd0a1Sachartre bzero(vdc->geom, sizeof (struct dk_geom)); 8710edcc0754Sachartre bzero(vdc->slice, sizeof (vd_slice_t) * V_NUMPAR); 871178fcd0a1Sachartre } 8712