11ae08745Sheppo /* 21ae08745Sheppo * CDDL HEADER START 31ae08745Sheppo * 41ae08745Sheppo * The contents of this file are subject to the terms of the 51ae08745Sheppo * Common Development and Distribution License (the "License"). 61ae08745Sheppo * You may not use this file except in compliance with the License. 71ae08745Sheppo * 81ae08745Sheppo * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91ae08745Sheppo * or http://www.opensolaris.org/os/licensing. 101ae08745Sheppo * See the License for the specific language governing permissions 111ae08745Sheppo * and limitations under the License. 121ae08745Sheppo * 131ae08745Sheppo * When distributing Covered Code, include this CDDL HEADER in each 141ae08745Sheppo * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151ae08745Sheppo * If applicable, add the following below this CDDL HEADER, with the 161ae08745Sheppo * fields enclosed by brackets "[]" replaced with your own identifying 171ae08745Sheppo * information: Portions Copyright [yyyy] [name of copyright owner] 181ae08745Sheppo * 191ae08745Sheppo * CDDL HEADER END 201ae08745Sheppo */ 211ae08745Sheppo 221ae08745Sheppo /* 231ae08745Sheppo * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 241ae08745Sheppo * Use is subject to license terms. 251ae08745Sheppo */ 261ae08745Sheppo 271ae08745Sheppo #pragma ident "%Z%%M% %I% %E% SMI" 281ae08745Sheppo 291ae08745Sheppo /* 301ae08745Sheppo * Virtual disk server 311ae08745Sheppo */ 321ae08745Sheppo 331ae08745Sheppo 341ae08745Sheppo #include <sys/types.h> 351ae08745Sheppo #include <sys/conf.h> 361ae08745Sheppo #include <sys/ddi.h> 371ae08745Sheppo #include <sys/dkio.h> 381ae08745Sheppo #include <sys/file.h> 391ae08745Sheppo #include <sys/mdeg.h> 401ae08745Sheppo #include <sys/modhash.h> 411ae08745Sheppo #include <sys/note.h> 421ae08745Sheppo #include <sys/pathname.h> 431ae08745Sheppo #include <sys/sunddi.h> 441ae08745Sheppo #include <sys/sunldi.h> 451ae08745Sheppo #include <sys/sysmacros.h> 461ae08745Sheppo #include <sys/vio_common.h> 471ae08745Sheppo #include <sys/vdsk_mailbox.h> 481ae08745Sheppo #include <sys/vdsk_common.h> 491ae08745Sheppo #include <sys/vtoc.h> 501ae08745Sheppo 511ae08745Sheppo 521ae08745Sheppo /* Virtual disk server initialization flags */ 531ae08745Sheppo #define VDS_LOCKING 0x01 541ae08745Sheppo #define VDS_LDI 0x02 551ae08745Sheppo #define VDS_MDEG 0x04 561ae08745Sheppo 571ae08745Sheppo /* Virtual disk server tunable parameters */ 581ae08745Sheppo #define VDS_LDC_RETRIES 3 591ae08745Sheppo #define VDS_NCHAINS 32 601ae08745Sheppo 611ae08745Sheppo /* Identification parameters for MD, synthetic dkio(7i) structures, etc. */ 621ae08745Sheppo #define VDS_NAME "virtual-disk-server" 631ae08745Sheppo 641ae08745Sheppo #define VD_NAME "vd" 651ae08745Sheppo #define VD_VOLUME_NAME "vdisk" 661ae08745Sheppo #define VD_ASCIILABEL "Virtual Disk" 671ae08745Sheppo 681ae08745Sheppo #define VD_CHANNEL_ENDPOINT "channel-endpoint" 691ae08745Sheppo #define VD_ID_PROP "id" 701ae08745Sheppo #define VD_BLOCK_DEVICE_PROP "vds-block-device" 711ae08745Sheppo 721ae08745Sheppo /* Virtual disk initialization flags */ 731ae08745Sheppo #define VD_LOCKING 0x01 741ae08745Sheppo #define VD_TASKQ 0x02 751ae08745Sheppo #define VD_LDC 0x04 761ae08745Sheppo #define VD_DRING 0x08 771ae08745Sheppo #define VD_SID 0x10 781ae08745Sheppo #define VD_SEQ_NUM 0x20 791ae08745Sheppo 801ae08745Sheppo /* Flags for opening/closing backing devices via LDI */ 811ae08745Sheppo #define VD_OPEN_FLAGS (FEXCL | FREAD | FWRITE) 821ae08745Sheppo 831ae08745Sheppo /* 841ae08745Sheppo * By Solaris convention, slice/partition 2 represents the entire disk; 851ae08745Sheppo * unfortunately, this convention does not appear to be codified. 861ae08745Sheppo */ 871ae08745Sheppo #define VD_ENTIRE_DISK_SLICE 2 881ae08745Sheppo 891ae08745Sheppo /* Return a cpp token as a string */ 901ae08745Sheppo #define STRINGIZE(token) #token 911ae08745Sheppo 921ae08745Sheppo /* 931ae08745Sheppo * Print a message prefixed with the current function name to the message log 941ae08745Sheppo * (and optionally to the console for verbose boots); these macros use cpp's 951ae08745Sheppo * concatenation of string literals and C99 variable-length-argument-list 961ae08745Sheppo * macros 971ae08745Sheppo */ 981ae08745Sheppo #define PRN(...) _PRN("?%s(): "__VA_ARGS__, "") 991ae08745Sheppo #define _PRN(format, ...) \ 1001ae08745Sheppo cmn_err(CE_CONT, format"%s", __func__, __VA_ARGS__) 1011ae08745Sheppo 1021ae08745Sheppo /* Return a pointer to the "i"th vdisk dring element */ 1031ae08745Sheppo #define VD_DRING_ELEM(i) ((vd_dring_entry_t *)(void *) \ 1041ae08745Sheppo (vd->dring + (i)*vd->descriptor_size)) 1051ae08745Sheppo 1061ae08745Sheppo /* Return the virtual disk client's type as a string (for use in messages) */ 1071ae08745Sheppo #define VD_CLIENT(vd) \ 1081ae08745Sheppo (((vd)->xfer_mode == VIO_DESC_MODE) ? "in-band client" : \ 1091ae08745Sheppo (((vd)->xfer_mode == VIO_DRING_MODE) ? "dring client" : \ 1101ae08745Sheppo (((vd)->xfer_mode == 0) ? "null client" : \ 1111ae08745Sheppo "unsupported client"))) 1121ae08745Sheppo 1131ae08745Sheppo /* Debugging macros */ 1141ae08745Sheppo #ifdef DEBUG 1151ae08745Sheppo #define PR0 if (vd_msglevel > 0) PRN 1161ae08745Sheppo #define PR1 if (vd_msglevel > 1) PRN 1171ae08745Sheppo #define PR2 if (vd_msglevel > 2) PRN 1181ae08745Sheppo 1191ae08745Sheppo #define VD_DUMP_DRING_ELEM(elem) \ 1201ae08745Sheppo PRN("dst:%x op:%x st:%u nb:%lx addr:%lx ncook:%u\n", \ 1211ae08745Sheppo elem->hdr.dstate, \ 1221ae08745Sheppo elem->payload.operation, \ 1231ae08745Sheppo elem->payload.status, \ 1241ae08745Sheppo elem->payload.nbytes, \ 1251ae08745Sheppo elem->payload.addr, \ 1261ae08745Sheppo elem->payload.ncookies); 1271ae08745Sheppo 1281ae08745Sheppo #else /* !DEBUG */ 1291ae08745Sheppo #define PR0(...) 1301ae08745Sheppo #define PR1(...) 1311ae08745Sheppo #define PR2(...) 1321ae08745Sheppo 1331ae08745Sheppo #define VD_DUMP_DRING_ELEM(elem) 1341ae08745Sheppo 1351ae08745Sheppo #endif /* DEBUG */ 1361ae08745Sheppo 1371ae08745Sheppo 1381ae08745Sheppo typedef struct vds { 1391ae08745Sheppo uint_t initialized; /* driver inst initialization flags */ 1401ae08745Sheppo dev_info_t *dip; /* driver inst devinfo pointer */ 1411ae08745Sheppo kmutex_t lock; /* lock for this structure */ 1421ae08745Sheppo ldi_ident_t ldi_ident; /* driver's identifier for LDI */ 1431ae08745Sheppo mod_hash_t *vd_table; /* table of virtual disks served */ 1441ae08745Sheppo mdeg_handle_t mdeg; /* handle for MDEG operations */ 1451ae08745Sheppo } vds_t; 1461ae08745Sheppo 1471ae08745Sheppo typedef struct vd { 1481ae08745Sheppo uint_t initialized; /* vdisk initialization flags */ 1491ae08745Sheppo kmutex_t lock; /* lock for this structure */ 1501ae08745Sheppo vds_t *vds; /* server for this vdisk */ 1511ae08745Sheppo ddi_taskq_t *taskq; /* taskq for this vdisk */ 1521ae08745Sheppo ldi_handle_t ldi_handle[V_NUMPAR]; /* LDI slice handles */ 1531ae08745Sheppo dev_t dev[V_NUMPAR]; /* dev numbers for slices */ 1541ae08745Sheppo uint_t nslices; /* number for slices */ 1551ae08745Sheppo size_t vdisk_size; /* number of blocks in vdisk */ 1561ae08745Sheppo vd_disk_type_t vdisk_type; /* slice or entire disk */ 1571ae08745Sheppo boolean_t pseudo; /* underlying pseudo dev */ 1581ae08745Sheppo struct dk_geom dk_geom; /* synthetic for slice type */ 1591ae08745Sheppo struct vtoc vtoc; /* synthetic for slice type */ 1601ae08745Sheppo ldc_status_t ldc_state; /* LDC connection state */ 1611ae08745Sheppo ldc_handle_t ldc_handle; /* handle for LDC comm */ 1621ae08745Sheppo size_t max_msglen; /* largest LDC message len */ 1631ae08745Sheppo boolean_t enabled; /* whether vdisk is enabled */ 1641ae08745Sheppo vd_state_t state; /* client handshake state */ 1651ae08745Sheppo uint8_t xfer_mode; /* transfer mode with client */ 1661ae08745Sheppo uint32_t sid; /* client's session ID */ 1671ae08745Sheppo uint64_t seq_num; /* message sequence number */ 1681ae08745Sheppo uint64_t dring_ident; /* identifier of dring */ 1691ae08745Sheppo ldc_dring_handle_t dring_handle; /* handle for dring ops */ 1701ae08745Sheppo uint32_t descriptor_size; /* num bytes in desc */ 1711ae08745Sheppo uint32_t dring_len; /* number of dring elements */ 1721ae08745Sheppo caddr_t dring; /* address of dring */ 1731ae08745Sheppo } vd_t; 1741ae08745Sheppo 1751ae08745Sheppo typedef struct vds_operation { 1761ae08745Sheppo uint8_t operation; 1771ae08745Sheppo int (*function)(vd_t *vd, vd_dring_payload_t *request); 1781ae08745Sheppo } vds_operation_t; 1791ae08745Sheppo 180*0a55fbb7Slm66018 typedef struct vd_ioctl { 181*0a55fbb7Slm66018 uint8_t operation; /* vdisk operation */ 182*0a55fbb7Slm66018 const char *operation_name; /* vdisk operation name */ 183*0a55fbb7Slm66018 size_t nbytes; /* size of operation buffer */ 184*0a55fbb7Slm66018 int cmd; /* corresponding ioctl cmd */ 185*0a55fbb7Slm66018 const char *cmd_name; /* ioctl cmd name */ 186*0a55fbb7Slm66018 void *arg; /* ioctl cmd argument */ 187*0a55fbb7Slm66018 /* convert input vd_buf to output ioctl_arg */ 188*0a55fbb7Slm66018 void (*copyin)(void *vd_buf, void *ioctl_arg); 189*0a55fbb7Slm66018 /* convert input ioctl_arg to output vd_buf */ 190*0a55fbb7Slm66018 void (*copyout)(void *ioctl_arg, void *vd_buf); 191*0a55fbb7Slm66018 } vd_ioctl_t; 192*0a55fbb7Slm66018 193*0a55fbb7Slm66018 /* Define trivial copyin/copyout conversion function flag */ 194*0a55fbb7Slm66018 #define VD_IDENTITY ((void (*)(void *, void *))-1) 1951ae08745Sheppo 1961ae08745Sheppo 1971ae08745Sheppo static int vds_ldc_retries = VDS_LDC_RETRIES; 1981ae08745Sheppo static void *vds_state; 1991ae08745Sheppo static uint64_t vds_operations; /* see vds_operation[] definition below */ 2001ae08745Sheppo 2011ae08745Sheppo static int vd_open_flags = VD_OPEN_FLAGS; 2021ae08745Sheppo 203*0a55fbb7Slm66018 /* 204*0a55fbb7Slm66018 * Supported protocol version pairs, from highest (newest) to lowest (oldest) 205*0a55fbb7Slm66018 * 206*0a55fbb7Slm66018 * Each supported major version should appear only once, paired with (and only 207*0a55fbb7Slm66018 * with) its highest supported minor version number (as the protocol requires 208*0a55fbb7Slm66018 * supporting all lower minor version numbers as well) 209*0a55fbb7Slm66018 */ 210*0a55fbb7Slm66018 static const vio_ver_t vds_version[] = {{1, 0}}; 211*0a55fbb7Slm66018 static const size_t vds_num_versions = 212*0a55fbb7Slm66018 sizeof (vds_version)/sizeof (vds_version[0]); 213*0a55fbb7Slm66018 2141ae08745Sheppo #ifdef DEBUG 2151ae08745Sheppo static int vd_msglevel; 2161ae08745Sheppo #endif /* DEBUG */ 2171ae08745Sheppo 2181ae08745Sheppo 2191ae08745Sheppo static int 2201ae08745Sheppo vd_bread(vd_t *vd, vd_dring_payload_t *request) 2211ae08745Sheppo { 2221ae08745Sheppo int status; 2231ae08745Sheppo struct buf buf; 2241ae08745Sheppo 2251ae08745Sheppo PR1("Read %lu bytes at block %lu", request->nbytes, request->addr); 2261ae08745Sheppo if (request->nbytes == 0) 2271ae08745Sheppo return (EINVAL); /* no service for trivial requests */ 2281ae08745Sheppo ASSERT(mutex_owned(&vd->lock)); 2291ae08745Sheppo ASSERT(request->slice < vd->nslices); 2301ae08745Sheppo 2311ae08745Sheppo bioinit(&buf); 2321ae08745Sheppo buf.b_flags = B_BUSY | B_READ; 2331ae08745Sheppo buf.b_bcount = request->nbytes; 2341ae08745Sheppo buf.b_un.b_addr = kmem_alloc(buf.b_bcount, KM_SLEEP); 2351ae08745Sheppo buf.b_lblkno = request->addr; 2361ae08745Sheppo buf.b_edev = vd->dev[request->slice]; 2371ae08745Sheppo 2381ae08745Sheppo if ((status = ldi_strategy(vd->ldi_handle[request->slice], &buf)) == 0) 2391ae08745Sheppo status = biowait(&buf); 2401ae08745Sheppo biofini(&buf); 2411ae08745Sheppo if ((status == 0) && 2421ae08745Sheppo ((status = ldc_mem_copy(vd->ldc_handle, buf.b_un.b_addr, 0, 2431ae08745Sheppo &request->nbytes, request->cookie, request->ncookies, 2441ae08745Sheppo LDC_COPY_OUT)) != 0)) { 2451ae08745Sheppo PRN("ldc_mem_copy() returned errno %d copying to client", 2461ae08745Sheppo status); 2471ae08745Sheppo } 2481ae08745Sheppo kmem_free(buf.b_un.b_addr, buf.b_bcount); /* nbytes can change */ 2491ae08745Sheppo return (status); 2501ae08745Sheppo } 2511ae08745Sheppo 2521ae08745Sheppo static int 2531ae08745Sheppo vd_do_bwrite(vd_t *vd, uint_t slice, diskaddr_t block, size_t nbytes, 2541ae08745Sheppo ldc_mem_cookie_t *cookie, uint64_t ncookies, caddr_t data) 2551ae08745Sheppo { 2561ae08745Sheppo int status; 2571ae08745Sheppo struct buf buf; 2581ae08745Sheppo 2591ae08745Sheppo ASSERT(mutex_owned(&vd->lock)); 2601ae08745Sheppo ASSERT(slice < vd->nslices); 2611ae08745Sheppo ASSERT(nbytes != 0); 2621ae08745Sheppo ASSERT(data != NULL); 2631ae08745Sheppo 2641ae08745Sheppo /* Get data from client */ 2651ae08745Sheppo if ((status = ldc_mem_copy(vd->ldc_handle, data, 0, &nbytes, 2661ae08745Sheppo cookie, ncookies, LDC_COPY_IN)) != 0) { 2671ae08745Sheppo PRN("ldc_mem_copy() returned errno %d copying from client", 2681ae08745Sheppo status); 2691ae08745Sheppo return (status); 2701ae08745Sheppo } 2711ae08745Sheppo 2721ae08745Sheppo bioinit(&buf); 2731ae08745Sheppo buf.b_flags = B_BUSY | B_WRITE; 2741ae08745Sheppo buf.b_bcount = nbytes; 2751ae08745Sheppo buf.b_un.b_addr = data; 2761ae08745Sheppo buf.b_lblkno = block; 2771ae08745Sheppo buf.b_edev = vd->dev[slice]; 2781ae08745Sheppo 2791ae08745Sheppo if ((status = ldi_strategy(vd->ldi_handle[slice], &buf)) == 0) 2801ae08745Sheppo status = biowait(&buf); 2811ae08745Sheppo biofini(&buf); 2821ae08745Sheppo return (status); 2831ae08745Sheppo } 2841ae08745Sheppo 2851ae08745Sheppo static int 2861ae08745Sheppo vd_bwrite(vd_t *vd, vd_dring_payload_t *request) 2871ae08745Sheppo { 2881ae08745Sheppo int status; 2891ae08745Sheppo caddr_t data; 2901ae08745Sheppo 2911ae08745Sheppo 2921ae08745Sheppo PR1("Write %ld bytes at block %lu", request->nbytes, request->addr); 2931ae08745Sheppo if (request->nbytes == 0) 2941ae08745Sheppo return (EINVAL); /* no service for trivial requests */ 2951ae08745Sheppo data = kmem_alloc(request->nbytes, KM_SLEEP); 2961ae08745Sheppo status = vd_do_bwrite(vd, request->slice, request->addr, 2971ae08745Sheppo request->nbytes, request->cookie, request->ncookies, data); 2981ae08745Sheppo kmem_free(data, request->nbytes); 2991ae08745Sheppo return (status); 3001ae08745Sheppo } 3011ae08745Sheppo 302*0a55fbb7Slm66018 static void 303*0a55fbb7Slm66018 vd_geom2dk_geom(void *vd_buf, void *ioctl_arg) 304*0a55fbb7Slm66018 { 305*0a55fbb7Slm66018 VD_GEOM2DK_GEOM((vd_geom_t *)vd_buf, (struct dk_geom *)ioctl_arg); 306*0a55fbb7Slm66018 } 307*0a55fbb7Slm66018 308*0a55fbb7Slm66018 static void 309*0a55fbb7Slm66018 vd_vtoc2vtoc(void *vd_buf, void *ioctl_arg) 310*0a55fbb7Slm66018 { 311*0a55fbb7Slm66018 VD_VTOC2VTOC((vd_vtoc_t *)vd_buf, (struct vtoc *)ioctl_arg); 312*0a55fbb7Slm66018 } 313*0a55fbb7Slm66018 314*0a55fbb7Slm66018 static void 315*0a55fbb7Slm66018 dk_geom2vd_geom(void *ioctl_arg, void *vd_buf) 316*0a55fbb7Slm66018 { 317*0a55fbb7Slm66018 DK_GEOM2VD_GEOM((struct dk_geom *)ioctl_arg, (vd_geom_t *)vd_buf); 318*0a55fbb7Slm66018 } 319*0a55fbb7Slm66018 320*0a55fbb7Slm66018 static void 321*0a55fbb7Slm66018 vtoc2vd_vtoc(void *ioctl_arg, void *vd_buf) 322*0a55fbb7Slm66018 { 323*0a55fbb7Slm66018 VTOC2VD_VTOC((struct vtoc *)ioctl_arg, (vd_vtoc_t *)vd_buf); 324*0a55fbb7Slm66018 } 325*0a55fbb7Slm66018 3261ae08745Sheppo static int 327*0a55fbb7Slm66018 vd_do_slice_ioctl(vd_t *vd, int cmd, void *ioctl_arg) 3281ae08745Sheppo { 3291ae08745Sheppo switch (cmd) { 3301ae08745Sheppo case DKIOCGGEOM: 331*0a55fbb7Slm66018 ASSERT(ioctl_arg != NULL); 332*0a55fbb7Slm66018 bcopy(&vd->dk_geom, ioctl_arg, sizeof (vd->dk_geom)); 3331ae08745Sheppo return (0); 3341ae08745Sheppo case DKIOCGVTOC: 335*0a55fbb7Slm66018 ASSERT(ioctl_arg != NULL); 336*0a55fbb7Slm66018 bcopy(&vd->vtoc, ioctl_arg, sizeof (vd->vtoc)); 3371ae08745Sheppo return (0); 3381ae08745Sheppo default: 3391ae08745Sheppo return (ENOTSUP); 3401ae08745Sheppo } 3411ae08745Sheppo } 3421ae08745Sheppo 3431ae08745Sheppo static int 344*0a55fbb7Slm66018 vd_do_ioctl(vd_t *vd, vd_dring_payload_t *request, void* buf, vd_ioctl_t *ioctl) 3451ae08745Sheppo { 3461ae08745Sheppo int rval = 0, status; 3471ae08745Sheppo size_t nbytes = request->nbytes; /* modifiable copy */ 3481ae08745Sheppo 3491ae08745Sheppo 3501ae08745Sheppo ASSERT(mutex_owned(&vd->lock)); 3511ae08745Sheppo ASSERT(request->slice < vd->nslices); 3521ae08745Sheppo PR0("Performing %s", ioctl->operation_name); 3531ae08745Sheppo 354*0a55fbb7Slm66018 /* Get data from client and convert, if necessary */ 355*0a55fbb7Slm66018 if (ioctl->copyin != NULL) { 3561ae08745Sheppo ASSERT(nbytes != 0 && buf != NULL); 3571ae08745Sheppo PR1("Getting \"arg\" data from client"); 3581ae08745Sheppo if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, 3591ae08745Sheppo request->cookie, request->ncookies, 3601ae08745Sheppo LDC_COPY_IN)) != 0) { 3611ae08745Sheppo PRN("ldc_mem_copy() returned errno %d " 3621ae08745Sheppo "copying from client", status); 3631ae08745Sheppo return (status); 3641ae08745Sheppo } 365*0a55fbb7Slm66018 366*0a55fbb7Slm66018 /* Convert client's data, if necessary */ 367*0a55fbb7Slm66018 if (ioctl->copyin == VD_IDENTITY) /* use client buffer */ 368*0a55fbb7Slm66018 ioctl->arg = buf; 369*0a55fbb7Slm66018 else /* convert client vdisk operation data to ioctl data */ 370*0a55fbb7Slm66018 (ioctl->copyin)(buf, (void *)ioctl->arg); 3711ae08745Sheppo } 3721ae08745Sheppo 3731ae08745Sheppo /* 3741ae08745Sheppo * Handle single-slice block devices internally; otherwise, have the 3751ae08745Sheppo * real driver perform the ioctl() 3761ae08745Sheppo */ 3771ae08745Sheppo if (vd->vdisk_type == VD_DISK_TYPE_SLICE && !vd->pseudo) { 378*0a55fbb7Slm66018 if ((status = vd_do_slice_ioctl(vd, ioctl->cmd, 379*0a55fbb7Slm66018 (void *)ioctl->arg)) != 0) 3801ae08745Sheppo return (status); 3811ae08745Sheppo } else if ((status = ldi_ioctl(vd->ldi_handle[request->slice], 382*0a55fbb7Slm66018 ioctl->cmd, (intptr_t)ioctl->arg, FKIOCTL, kcred, 383*0a55fbb7Slm66018 &rval)) != 0) { 3841ae08745Sheppo PR0("ldi_ioctl(%s) = errno %d", ioctl->cmd_name, status); 3851ae08745Sheppo return (status); 3861ae08745Sheppo } 3871ae08745Sheppo #ifdef DEBUG 3881ae08745Sheppo if (rval != 0) { 3891ae08745Sheppo PRN("%s set rval = %d, which is not being returned to client", 3901ae08745Sheppo ioctl->cmd_name, rval); 3911ae08745Sheppo } 3921ae08745Sheppo #endif /* DEBUG */ 3931ae08745Sheppo 394*0a55fbb7Slm66018 /* Convert data and send to client, if necessary */ 395*0a55fbb7Slm66018 if (ioctl->copyout != NULL) { 3961ae08745Sheppo ASSERT(nbytes != 0 && buf != NULL); 3971ae08745Sheppo PR1("Sending \"arg\" data to client"); 398*0a55fbb7Slm66018 399*0a55fbb7Slm66018 /* Convert ioctl data to vdisk operation data, if necessary */ 400*0a55fbb7Slm66018 if (ioctl->copyout != VD_IDENTITY) 401*0a55fbb7Slm66018 (ioctl->copyout)((void *)ioctl->arg, buf); 402*0a55fbb7Slm66018 4031ae08745Sheppo if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, 4041ae08745Sheppo request->cookie, request->ncookies, 4051ae08745Sheppo LDC_COPY_OUT)) != 0) { 4061ae08745Sheppo PRN("ldc_mem_copy() returned errno %d " 4071ae08745Sheppo "copying to client", status); 4081ae08745Sheppo return (status); 4091ae08745Sheppo } 4101ae08745Sheppo } 4111ae08745Sheppo 4121ae08745Sheppo return (status); 4131ae08745Sheppo } 4141ae08745Sheppo 415*0a55fbb7Slm66018 /* 416*0a55fbb7Slm66018 * Open any slices which have become non-empty as a result of performing a 417*0a55fbb7Slm66018 * set-VTOC operation for the client. 418*0a55fbb7Slm66018 * 419*0a55fbb7Slm66018 * When serving a full disk, vds attempts to exclusively open all of the 420*0a55fbb7Slm66018 * disk's slices to prevent another thread or process in the service domain 421*0a55fbb7Slm66018 * from "stealing" a slice or from performing I/O to a slice while a vds 422*0a55fbb7Slm66018 * client is accessing it. Unfortunately, underlying drivers, such as sd(7d) 423*0a55fbb7Slm66018 * and cmdk(7d), return an error when attempting to open the device file for a 424*0a55fbb7Slm66018 * slice which is currently empty according to the VTOC. This driver behavior 425*0a55fbb7Slm66018 * means that vds must skip opening empty slices when initializing a vdisk for 426*0a55fbb7Slm66018 * full-disk service and try to open slices that become non-empty (via a 427*0a55fbb7Slm66018 * set-VTOC operation) during use of the full disk in order to begin serving 428*0a55fbb7Slm66018 * such slices to the client. This approach has an inherent (and therefore 429*0a55fbb7Slm66018 * unavoidable) race condition; it also means that failure to open a 430*0a55fbb7Slm66018 * newly-non-empty slice has different semantics than failure to open an 431*0a55fbb7Slm66018 * initially-non-empty slice: Due to driver bahavior, opening a 432*0a55fbb7Slm66018 * newly-non-empty slice is a necessary side effect of vds performing a 433*0a55fbb7Slm66018 * (successful) set-VTOC operation for a client on an in-service (and in-use) 434*0a55fbb7Slm66018 * disk in order to begin serving the slice; failure of this side-effect 435*0a55fbb7Slm66018 * operation does not mean that the client's set-VTOC operation failed or that 436*0a55fbb7Slm66018 * operations on other slices must fail. Therefore, this function prints an 437*0a55fbb7Slm66018 * error message on failure to open a slice, but does not return an error to 438*0a55fbb7Slm66018 * its caller--unlike failure to open a slice initially, which results in an 439*0a55fbb7Slm66018 * error that prevents serving the vdisk (and thereby requires an 440*0a55fbb7Slm66018 * administrator to resolve the problem). Note that, apart from another 441*0a55fbb7Slm66018 * thread or process opening a new slice during the race-condition window, 442*0a55fbb7Slm66018 * failure to open a slice in this function will likely indicate an underlying 443*0a55fbb7Slm66018 * drive problem, which will also likely become evident in errors returned by 444*0a55fbb7Slm66018 * operations on other slices, and which will require administrative 445*0a55fbb7Slm66018 * intervention and possibly servicing the drive. 446*0a55fbb7Slm66018 */ 447*0a55fbb7Slm66018 static void 448*0a55fbb7Slm66018 vd_open_new_slices(vd_t *vd) 449*0a55fbb7Slm66018 { 450*0a55fbb7Slm66018 int rval, status; 451*0a55fbb7Slm66018 struct vtoc vtoc; 452*0a55fbb7Slm66018 453*0a55fbb7Slm66018 454*0a55fbb7Slm66018 /* Get the (new) VTOC for updated slice sizes */ 455*0a55fbb7Slm66018 if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCGVTOC, (intptr_t)&vtoc, 456*0a55fbb7Slm66018 FKIOCTL, kcred, &rval)) != 0) { 457*0a55fbb7Slm66018 PRN("ldi_ioctl(DKIOCGVTOC) returned errno %d", status); 458*0a55fbb7Slm66018 return; 459*0a55fbb7Slm66018 } 460*0a55fbb7Slm66018 461*0a55fbb7Slm66018 /* Open any newly-non-empty slices */ 462*0a55fbb7Slm66018 for (int slice = 0; slice < vd->nslices; slice++) { 463*0a55fbb7Slm66018 /* Skip zero-length slices */ 464*0a55fbb7Slm66018 if (vtoc.v_part[slice].p_size == 0) { 465*0a55fbb7Slm66018 if (vd->ldi_handle[slice] != NULL) 466*0a55fbb7Slm66018 PR0("Open slice %u now has zero length", slice); 467*0a55fbb7Slm66018 continue; 468*0a55fbb7Slm66018 } 469*0a55fbb7Slm66018 470*0a55fbb7Slm66018 /* Skip already-open slices */ 471*0a55fbb7Slm66018 if (vd->ldi_handle[slice] != NULL) 472*0a55fbb7Slm66018 continue; 473*0a55fbb7Slm66018 474*0a55fbb7Slm66018 PR0("Opening newly-non-empty slice %u", slice); 475*0a55fbb7Slm66018 if ((status = ldi_open_by_dev(&vd->dev[slice], OTYP_BLK, 476*0a55fbb7Slm66018 vd_open_flags, kcred, &vd->ldi_handle[slice], 477*0a55fbb7Slm66018 vd->vds->ldi_ident)) != 0) { 478*0a55fbb7Slm66018 PRN("ldi_open_by_dev() returned errno %d " 479*0a55fbb7Slm66018 "for slice %u", status, slice); 480*0a55fbb7Slm66018 } 481*0a55fbb7Slm66018 } 482*0a55fbb7Slm66018 } 483*0a55fbb7Slm66018 4841ae08745Sheppo #define RNDSIZE(expr) P2ROUNDUP(sizeof (expr), sizeof (uint64_t)) 4851ae08745Sheppo static int 4861ae08745Sheppo vd_ioctl(vd_t *vd, vd_dring_payload_t *request) 4871ae08745Sheppo { 4881ae08745Sheppo int i, status; 4891ae08745Sheppo void *buf = NULL; 490*0a55fbb7Slm66018 struct dk_geom dk_geom = {0}; 491*0a55fbb7Slm66018 struct vtoc vtoc = {0}; 492*0a55fbb7Slm66018 vd_ioctl_t ioctl[] = { 493*0a55fbb7Slm66018 /* Command (no-copy) operations */ 494*0a55fbb7Slm66018 {VD_OP_FLUSH, STRINGIZE(VD_OP_FLUSH), 0, 495*0a55fbb7Slm66018 DKIOCFLUSHWRITECACHE, STRINGIZE(DKIOCFLUSHWRITECACHE), 496*0a55fbb7Slm66018 NULL, NULL, NULL}, 497*0a55fbb7Slm66018 498*0a55fbb7Slm66018 /* "Get" (copy-out) operations */ 499*0a55fbb7Slm66018 {VD_OP_GET_WCE, STRINGIZE(VD_OP_GET_WCE), RNDSIZE(int), 500*0a55fbb7Slm66018 DKIOCGETWCE, STRINGIZE(DKIOCGETWCE), 501*0a55fbb7Slm66018 NULL, NULL, VD_IDENTITY}, 502*0a55fbb7Slm66018 {VD_OP_GET_DISKGEOM, STRINGIZE(VD_OP_GET_DISKGEOM), 503*0a55fbb7Slm66018 RNDSIZE(vd_geom_t), 504*0a55fbb7Slm66018 DKIOCGGEOM, STRINGIZE(DKIOCGGEOM), 505*0a55fbb7Slm66018 &dk_geom, NULL, dk_geom2vd_geom}, 506*0a55fbb7Slm66018 {VD_OP_GET_VTOC, STRINGIZE(VD_OP_GET_VTOC), RNDSIZE(vd_vtoc_t), 507*0a55fbb7Slm66018 DKIOCGVTOC, STRINGIZE(DKIOCGVTOC), 508*0a55fbb7Slm66018 &vtoc, NULL, vtoc2vd_vtoc}, 509*0a55fbb7Slm66018 510*0a55fbb7Slm66018 /* "Set" (copy-in) operations */ 511*0a55fbb7Slm66018 {VD_OP_SET_WCE, STRINGIZE(VD_OP_SET_WCE), RNDSIZE(int), 512*0a55fbb7Slm66018 DKIOCSETWCE, STRINGIZE(DKIOCSETWCE), 513*0a55fbb7Slm66018 NULL, VD_IDENTITY, NULL}, 514*0a55fbb7Slm66018 {VD_OP_SET_DISKGEOM, STRINGIZE(VD_OP_SET_DISKGEOM), 515*0a55fbb7Slm66018 RNDSIZE(vd_geom_t), 516*0a55fbb7Slm66018 DKIOCSGEOM, STRINGIZE(DKIOCSGEOM), 517*0a55fbb7Slm66018 &dk_geom, vd_geom2dk_geom, NULL}, 518*0a55fbb7Slm66018 {VD_OP_SET_VTOC, STRINGIZE(VD_OP_SET_VTOC), RNDSIZE(vd_vtoc_t), 519*0a55fbb7Slm66018 DKIOCSVTOC, STRINGIZE(DKIOCSVTOC), 520*0a55fbb7Slm66018 &vtoc, vd_vtoc2vtoc, NULL}, 521*0a55fbb7Slm66018 }; 5221ae08745Sheppo size_t nioctls = (sizeof (ioctl))/(sizeof (ioctl[0])); 5231ae08745Sheppo 5241ae08745Sheppo 5251ae08745Sheppo ASSERT(mutex_owned(&vd->lock)); 5261ae08745Sheppo ASSERT(request->slice < vd->nslices); 5271ae08745Sheppo 5281ae08745Sheppo /* 5291ae08745Sheppo * Determine ioctl corresponding to caller's "operation" and 5301ae08745Sheppo * validate caller's "nbytes" 5311ae08745Sheppo */ 5321ae08745Sheppo for (i = 0; i < nioctls; i++) { 5331ae08745Sheppo if (request->operation == ioctl[i].operation) { 534*0a55fbb7Slm66018 /* LDC memory operations require 8-byte multiples */ 535*0a55fbb7Slm66018 ASSERT(ioctl[i].nbytes % sizeof (uint64_t) == 0); 536*0a55fbb7Slm66018 537*0a55fbb7Slm66018 if (request->nbytes != ioctl[i].nbytes) { 538*0a55fbb7Slm66018 PRN("%s: Expected nbytes = %lu, got %lu", 539*0a55fbb7Slm66018 ioctl[i].operation_name, ioctl[i].nbytes, 540*0a55fbb7Slm66018 request->nbytes); 5411ae08745Sheppo return (EINVAL); 5421ae08745Sheppo } 5431ae08745Sheppo 5441ae08745Sheppo break; 5451ae08745Sheppo } 5461ae08745Sheppo } 5471ae08745Sheppo ASSERT(i < nioctls); /* because "operation" already validated */ 5481ae08745Sheppo 5491ae08745Sheppo if (request->nbytes) 5501ae08745Sheppo buf = kmem_zalloc(request->nbytes, KM_SLEEP); 5511ae08745Sheppo status = vd_do_ioctl(vd, request, buf, &ioctl[i]); 5521ae08745Sheppo if (request->nbytes) 5531ae08745Sheppo kmem_free(buf, request->nbytes); 554*0a55fbb7Slm66018 if ((request->operation == VD_OP_SET_VTOC) && 555*0a55fbb7Slm66018 (vd->vdisk_type == VD_DISK_TYPE_DISK)) 556*0a55fbb7Slm66018 vd_open_new_slices(vd); 5571ae08745Sheppo return (status); 5581ae08745Sheppo } 5591ae08745Sheppo 5601ae08745Sheppo /* 5611ae08745Sheppo * Define the supported operations once the functions for performing them have 5621ae08745Sheppo * been defined 5631ae08745Sheppo */ 5641ae08745Sheppo static const vds_operation_t vds_operation[] = { 5651ae08745Sheppo {VD_OP_BREAD, vd_bread}, 5661ae08745Sheppo {VD_OP_BWRITE, vd_bwrite}, 5671ae08745Sheppo {VD_OP_FLUSH, vd_ioctl}, 5681ae08745Sheppo {VD_OP_GET_WCE, vd_ioctl}, 5691ae08745Sheppo {VD_OP_SET_WCE, vd_ioctl}, 5701ae08745Sheppo {VD_OP_GET_VTOC, vd_ioctl}, 5711ae08745Sheppo {VD_OP_SET_VTOC, vd_ioctl}, 5721ae08745Sheppo {VD_OP_GET_DISKGEOM, vd_ioctl}, 573*0a55fbb7Slm66018 {VD_OP_SET_DISKGEOM, vd_ioctl} 5741ae08745Sheppo }; 5751ae08745Sheppo 5761ae08745Sheppo static const size_t vds_noperations = 5771ae08745Sheppo (sizeof (vds_operation))/(sizeof (vds_operation[0])); 5781ae08745Sheppo 5791ae08745Sheppo /* 5801ae08745Sheppo * Process a request using a defined operation 5811ae08745Sheppo */ 5821ae08745Sheppo static int 5831ae08745Sheppo vd_process_request(vd_t *vd, vd_dring_payload_t *request) 5841ae08745Sheppo { 5851ae08745Sheppo int i; 5861ae08745Sheppo 5871ae08745Sheppo 5881ae08745Sheppo PR1("Entered"); 5891ae08745Sheppo ASSERT(mutex_owned(&vd->lock)); 5901ae08745Sheppo 5911ae08745Sheppo /* Range-check slice */ 5921ae08745Sheppo if (request->slice >= vd->nslices) { 5931ae08745Sheppo PRN("Invalid \"slice\" %u (max %u) for virtual disk", 5941ae08745Sheppo request->slice, (vd->nslices - 1)); 5951ae08745Sheppo return (EINVAL); 5961ae08745Sheppo } 5971ae08745Sheppo 5981ae08745Sheppo /* Perform the requested operation */ 5991ae08745Sheppo for (i = 0; i < vds_noperations; i++) 6001ae08745Sheppo if (request->operation == vds_operation[i].operation) 6011ae08745Sheppo return (vds_operation[i].function(vd, request)); 6021ae08745Sheppo 6031ae08745Sheppo /* No matching operation found */ 6041ae08745Sheppo PRN("Unsupported operation %u", request->operation); 6051ae08745Sheppo return (ENOTSUP); 6061ae08745Sheppo } 6071ae08745Sheppo 6081ae08745Sheppo static int 6091ae08745Sheppo send_msg(ldc_handle_t ldc_handle, void *msg, size_t msglen) 6101ae08745Sheppo { 6111ae08745Sheppo int retry, status; 6121ae08745Sheppo size_t nbytes; 6131ae08745Sheppo 6141ae08745Sheppo 6151ae08745Sheppo for (retry = 0, status = EWOULDBLOCK; 6161ae08745Sheppo retry < vds_ldc_retries && status == EWOULDBLOCK; 6171ae08745Sheppo retry++) { 6181ae08745Sheppo PR1("ldc_write() attempt %d", (retry + 1)); 6191ae08745Sheppo nbytes = msglen; 6201ae08745Sheppo status = ldc_write(ldc_handle, msg, &nbytes); 6211ae08745Sheppo } 6221ae08745Sheppo 6231ae08745Sheppo if (status != 0) { 6241ae08745Sheppo PRN("ldc_write() returned errno %d", status); 6251ae08745Sheppo return (status); 6261ae08745Sheppo } else if (nbytes != msglen) { 6271ae08745Sheppo PRN("ldc_write() performed only partial write"); 6281ae08745Sheppo return (EIO); 6291ae08745Sheppo } 6301ae08745Sheppo 6311ae08745Sheppo PR1("SENT %lu bytes", msglen); 6321ae08745Sheppo return (0); 6331ae08745Sheppo } 6341ae08745Sheppo 6351ae08745Sheppo /* 636*0a55fbb7Slm66018 * Return true if the "type", "subtype", and "env" fields of the "tag" first 637*0a55fbb7Slm66018 * argument match the corresponding remaining arguments; otherwise, return false 6381ae08745Sheppo */ 639*0a55fbb7Slm66018 boolean_t 6401ae08745Sheppo vd_msgtype(vio_msg_tag_t *tag, int type, int subtype, int env) 6411ae08745Sheppo { 6421ae08745Sheppo return ((tag->vio_msgtype == type) && 6431ae08745Sheppo (tag->vio_subtype == subtype) && 644*0a55fbb7Slm66018 (tag->vio_subtype_env == env)) ? B_TRUE : B_FALSE; 6451ae08745Sheppo } 6461ae08745Sheppo 647*0a55fbb7Slm66018 /* 648*0a55fbb7Slm66018 * Check whether the major/minor version specified in "ver_msg" is supported 649*0a55fbb7Slm66018 * by this server. 650*0a55fbb7Slm66018 */ 651*0a55fbb7Slm66018 static boolean_t 652*0a55fbb7Slm66018 vds_supported_version(vio_ver_msg_t *ver_msg) 653*0a55fbb7Slm66018 { 654*0a55fbb7Slm66018 for (int i = 0; i < vds_num_versions; i++) { 655*0a55fbb7Slm66018 ASSERT(vds_version[i].major > 0); 656*0a55fbb7Slm66018 ASSERT((i == 0) || 657*0a55fbb7Slm66018 (vds_version[i].major < vds_version[i-1].major)); 658*0a55fbb7Slm66018 659*0a55fbb7Slm66018 /* 660*0a55fbb7Slm66018 * If the major versions match, adjust the minor version, if 661*0a55fbb7Slm66018 * necessary, down to the highest value supported by this 662*0a55fbb7Slm66018 * server and return true so this message will get "ack"ed; 663*0a55fbb7Slm66018 * the client should also support all minor versions lower 664*0a55fbb7Slm66018 * than the value it sent 665*0a55fbb7Slm66018 */ 666*0a55fbb7Slm66018 if (ver_msg->ver_major == vds_version[i].major) { 667*0a55fbb7Slm66018 if (ver_msg->ver_minor > vds_version[i].minor) { 668*0a55fbb7Slm66018 PR0("Adjusting minor version from %u to %u", 669*0a55fbb7Slm66018 ver_msg->ver_minor, vds_version[i].minor); 670*0a55fbb7Slm66018 ver_msg->ver_minor = vds_version[i].minor; 671*0a55fbb7Slm66018 } 672*0a55fbb7Slm66018 return (B_TRUE); 673*0a55fbb7Slm66018 } 674*0a55fbb7Slm66018 675*0a55fbb7Slm66018 /* 676*0a55fbb7Slm66018 * If the message contains a higher major version number, set 677*0a55fbb7Slm66018 * the message's major/minor versions to the current values 678*0a55fbb7Slm66018 * and return false, so this message will get "nack"ed with 679*0a55fbb7Slm66018 * these values, and the client will potentially try again 680*0a55fbb7Slm66018 * with the same or a lower version 681*0a55fbb7Slm66018 */ 682*0a55fbb7Slm66018 if (ver_msg->ver_major > vds_version[i].major) { 683*0a55fbb7Slm66018 ver_msg->ver_major = vds_version[i].major; 684*0a55fbb7Slm66018 ver_msg->ver_minor = vds_version[i].minor; 685*0a55fbb7Slm66018 return (B_FALSE); 686*0a55fbb7Slm66018 } 687*0a55fbb7Slm66018 688*0a55fbb7Slm66018 /* 689*0a55fbb7Slm66018 * Otherwise, the message's major version is less than the 690*0a55fbb7Slm66018 * current major version, so continue the loop to the next 691*0a55fbb7Slm66018 * (lower) supported version 692*0a55fbb7Slm66018 */ 693*0a55fbb7Slm66018 } 694*0a55fbb7Slm66018 695*0a55fbb7Slm66018 /* 696*0a55fbb7Slm66018 * No common version was found; "ground" the version pair in the 697*0a55fbb7Slm66018 * message to terminate negotiation 698*0a55fbb7Slm66018 */ 699*0a55fbb7Slm66018 ver_msg->ver_major = 0; 700*0a55fbb7Slm66018 ver_msg->ver_minor = 0; 701*0a55fbb7Slm66018 return (B_FALSE); 702*0a55fbb7Slm66018 } 703*0a55fbb7Slm66018 704*0a55fbb7Slm66018 /* 705*0a55fbb7Slm66018 * Process a version message from a client. vds expects to receive version 706*0a55fbb7Slm66018 * messages from clients seeking service, but never issues version messages 707*0a55fbb7Slm66018 * itself; therefore, vds can ACK or NACK client version messages, but does 708*0a55fbb7Slm66018 * not expect to receive version-message ACKs or NACKs (and will treat such 709*0a55fbb7Slm66018 * messages as invalid). 710*0a55fbb7Slm66018 */ 7111ae08745Sheppo static int 712*0a55fbb7Slm66018 vd_process_ver_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 7131ae08745Sheppo { 7141ae08745Sheppo vio_ver_msg_t *ver_msg = (vio_ver_msg_t *)msg; 7151ae08745Sheppo 7161ae08745Sheppo 7171ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 7181ae08745Sheppo 7191ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 7201ae08745Sheppo VIO_VER_INFO)) { 7211ae08745Sheppo return (ENOMSG); /* not a version message */ 7221ae08745Sheppo } 7231ae08745Sheppo 7241ae08745Sheppo if (msglen != sizeof (*ver_msg)) { 7251ae08745Sheppo PRN("Expected %lu-byte version message; " 7261ae08745Sheppo "received %lu bytes", sizeof (*ver_msg), msglen); 7271ae08745Sheppo return (EBADMSG); 7281ae08745Sheppo } 7291ae08745Sheppo 7301ae08745Sheppo if (ver_msg->dev_class != VDEV_DISK) { 7311ae08745Sheppo PRN("Expected device class %u (disk); received %u", 7321ae08745Sheppo VDEV_DISK, ver_msg->dev_class); 7331ae08745Sheppo return (EBADMSG); 7341ae08745Sheppo } 7351ae08745Sheppo 736*0a55fbb7Slm66018 /* 737*0a55fbb7Slm66018 * We're talking to the expected kind of client; set our device class 738*0a55fbb7Slm66018 * for "ack/nack" back to the client 739*0a55fbb7Slm66018 */ 7401ae08745Sheppo ver_msg->dev_class = VDEV_DISK_SERVER; 741*0a55fbb7Slm66018 742*0a55fbb7Slm66018 /* 743*0a55fbb7Slm66018 * Check whether the (valid) version message specifies a version 744*0a55fbb7Slm66018 * supported by this server. If the version is not supported, return 745*0a55fbb7Slm66018 * EBADMSG so the message will get "nack"ed; vds_supported_version() 746*0a55fbb7Slm66018 * will have updated the message with a supported version for the 747*0a55fbb7Slm66018 * client to consider 748*0a55fbb7Slm66018 */ 749*0a55fbb7Slm66018 if (!vds_supported_version(ver_msg)) 750*0a55fbb7Slm66018 return (EBADMSG); 751*0a55fbb7Slm66018 752*0a55fbb7Slm66018 753*0a55fbb7Slm66018 /* 754*0a55fbb7Slm66018 * A version has been agreed upon; use the client's SID for 755*0a55fbb7Slm66018 * communication on this channel now 756*0a55fbb7Slm66018 */ 757*0a55fbb7Slm66018 ASSERT(!(vd->initialized & VD_SID)); 758*0a55fbb7Slm66018 vd->sid = ver_msg->tag.vio_sid; 759*0a55fbb7Slm66018 vd->initialized |= VD_SID; 760*0a55fbb7Slm66018 761*0a55fbb7Slm66018 /* 762*0a55fbb7Slm66018 * When multiple versions are supported, this function should store 763*0a55fbb7Slm66018 * the negotiated major and minor version values in the "vd" data 764*0a55fbb7Slm66018 * structure to govern further communication; in particular, note that 765*0a55fbb7Slm66018 * the client might have specified a lower minor version for the 766*0a55fbb7Slm66018 * agreed major version than specifed in the vds_version[] array. The 767*0a55fbb7Slm66018 * following assertions should help remind future maintainers to make 768*0a55fbb7Slm66018 * the appropriate changes to support multiple versions. 769*0a55fbb7Slm66018 */ 770*0a55fbb7Slm66018 ASSERT(vds_num_versions == 1); 771*0a55fbb7Slm66018 ASSERT(ver_msg->ver_major == vds_version[0].major); 772*0a55fbb7Slm66018 ASSERT(ver_msg->ver_minor == vds_version[0].minor); 773*0a55fbb7Slm66018 774*0a55fbb7Slm66018 PR0("Using major version %u, minor version %u", 775*0a55fbb7Slm66018 ver_msg->ver_major, ver_msg->ver_minor); 7761ae08745Sheppo return (0); 7771ae08745Sheppo } 7781ae08745Sheppo 7791ae08745Sheppo static int 7801ae08745Sheppo vd_process_attr_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 7811ae08745Sheppo { 7821ae08745Sheppo vd_attr_msg_t *attr_msg = (vd_attr_msg_t *)msg; 7831ae08745Sheppo 7841ae08745Sheppo 7851ae08745Sheppo PR0("Entered"); 7861ae08745Sheppo ASSERT(mutex_owned(&vd->lock)); 7871ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 7881ae08745Sheppo 7891ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 7901ae08745Sheppo VIO_ATTR_INFO)) { 7911ae08745Sheppo return (ENOMSG); /* not an attribute message */ 7921ae08745Sheppo } 7931ae08745Sheppo 7941ae08745Sheppo if (msglen != sizeof (*attr_msg)) { 7951ae08745Sheppo PRN("Expected %lu-byte attribute message; " 7961ae08745Sheppo "received %lu bytes", sizeof (*attr_msg), msglen); 7971ae08745Sheppo return (EBADMSG); 7981ae08745Sheppo } 7991ae08745Sheppo 8001ae08745Sheppo if (attr_msg->max_xfer_sz == 0) { 8011ae08745Sheppo PRN("Received maximum transfer size of 0 from client"); 8021ae08745Sheppo return (EBADMSG); 8031ae08745Sheppo } 8041ae08745Sheppo 8051ae08745Sheppo if ((attr_msg->xfer_mode != VIO_DESC_MODE) && 8061ae08745Sheppo (attr_msg->xfer_mode != VIO_DRING_MODE)) { 8071ae08745Sheppo PRN("Client requested unsupported transfer mode"); 8081ae08745Sheppo return (EBADMSG); 8091ae08745Sheppo } 8101ae08745Sheppo 8111ae08745Sheppo 8121ae08745Sheppo /* Success: valid message and transfer mode */ 8131ae08745Sheppo vd->xfer_mode = attr_msg->xfer_mode; 8141ae08745Sheppo if (vd->xfer_mode == VIO_DESC_MODE) { 8151ae08745Sheppo /* 8161ae08745Sheppo * The vd_dring_inband_msg_t contains one cookie; need room 8171ae08745Sheppo * for up to n-1 more cookies, where "n" is the number of full 8181ae08745Sheppo * pages plus possibly one partial page required to cover 8191ae08745Sheppo * "max_xfer_sz". Add room for one more cookie if 8201ae08745Sheppo * "max_xfer_sz" isn't an integral multiple of the page size. 8211ae08745Sheppo * Must first get the maximum transfer size in bytes. 8221ae08745Sheppo */ 8231ae08745Sheppo size_t max_xfer_bytes = attr_msg->vdisk_block_size ? 8241ae08745Sheppo attr_msg->vdisk_block_size*attr_msg->max_xfer_sz : 8251ae08745Sheppo attr_msg->max_xfer_sz; 8261ae08745Sheppo size_t max_inband_msglen = 8271ae08745Sheppo sizeof (vd_dring_inband_msg_t) + 8281ae08745Sheppo ((max_xfer_bytes/PAGESIZE + 8291ae08745Sheppo ((max_xfer_bytes % PAGESIZE) ? 1 : 0))* 8301ae08745Sheppo (sizeof (ldc_mem_cookie_t))); 8311ae08745Sheppo 8321ae08745Sheppo /* 8331ae08745Sheppo * Set the maximum expected message length to 8341ae08745Sheppo * accommodate in-band-descriptor messages with all 8351ae08745Sheppo * their cookies 8361ae08745Sheppo */ 8371ae08745Sheppo vd->max_msglen = MAX(vd->max_msglen, max_inband_msglen); 8381ae08745Sheppo } 8391ae08745Sheppo 8401ae08745Sheppo attr_msg->vdisk_size = vd->vdisk_size; 8411ae08745Sheppo attr_msg->vdisk_type = vd->vdisk_type; 8421ae08745Sheppo attr_msg->operations = vds_operations; 8431ae08745Sheppo PR0("%s", VD_CLIENT(vd)); 8441ae08745Sheppo return (0); 8451ae08745Sheppo } 8461ae08745Sheppo 8471ae08745Sheppo static int 8481ae08745Sheppo vd_process_dring_reg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 8491ae08745Sheppo { 8501ae08745Sheppo int status; 8511ae08745Sheppo size_t expected; 8521ae08745Sheppo ldc_mem_info_t dring_minfo; 8531ae08745Sheppo vio_dring_reg_msg_t *reg_msg = (vio_dring_reg_msg_t *)msg; 8541ae08745Sheppo 8551ae08745Sheppo 8561ae08745Sheppo PR0("Entered"); 8571ae08745Sheppo ASSERT(mutex_owned(&vd->lock)); 8581ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 8591ae08745Sheppo 8601ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 8611ae08745Sheppo VIO_DRING_REG)) { 8621ae08745Sheppo return (ENOMSG); /* not a register-dring message */ 8631ae08745Sheppo } 8641ae08745Sheppo 8651ae08745Sheppo if (msglen < sizeof (*reg_msg)) { 8661ae08745Sheppo PRN("Expected at least %lu-byte register-dring message; " 8671ae08745Sheppo "received %lu bytes", sizeof (*reg_msg), msglen); 8681ae08745Sheppo return (EBADMSG); 8691ae08745Sheppo } 8701ae08745Sheppo 8711ae08745Sheppo expected = sizeof (*reg_msg) + 8721ae08745Sheppo (reg_msg->ncookies - 1)*(sizeof (reg_msg->cookie[0])); 8731ae08745Sheppo if (msglen != expected) { 8741ae08745Sheppo PRN("Expected %lu-byte register-dring message; " 8751ae08745Sheppo "received %lu bytes", expected, msglen); 8761ae08745Sheppo return (EBADMSG); 8771ae08745Sheppo } 8781ae08745Sheppo 8791ae08745Sheppo if (vd->initialized & VD_DRING) { 8801ae08745Sheppo PRN("A dring was previously registered; only support one"); 8811ae08745Sheppo return (EBADMSG); 8821ae08745Sheppo } 8831ae08745Sheppo 8841ae08745Sheppo if (reg_msg->ncookies != 1) { 8851ae08745Sheppo /* 8861ae08745Sheppo * In addition to fixing the assertion in the success case 8871ae08745Sheppo * below, supporting drings which require more than one 8881ae08745Sheppo * "cookie" requires increasing the value of vd->max_msglen 8891ae08745Sheppo * somewhere in the code path prior to receiving the message 8901ae08745Sheppo * which results in calling this function. Note that without 8911ae08745Sheppo * making this change, the larger message size required to 8921ae08745Sheppo * accommodate multiple cookies cannot be successfully 8931ae08745Sheppo * received, so this function will not even get called. 8941ae08745Sheppo * Gracefully accommodating more dring cookies might 8951ae08745Sheppo * reasonably demand exchanging an additional attribute or 8961ae08745Sheppo * making a minor protocol adjustment 8971ae08745Sheppo */ 8981ae08745Sheppo PRN("reg_msg->ncookies = %u != 1", reg_msg->ncookies); 8991ae08745Sheppo return (EBADMSG); 9001ae08745Sheppo } 9011ae08745Sheppo 9021ae08745Sheppo status = ldc_mem_dring_map(vd->ldc_handle, reg_msg->cookie, 9031ae08745Sheppo reg_msg->ncookies, reg_msg->num_descriptors, 9041ae08745Sheppo reg_msg->descriptor_size, LDC_SHADOW_MAP, &vd->dring_handle); 9051ae08745Sheppo if (status != 0) { 9061ae08745Sheppo PRN("ldc_mem_dring_map() returned errno %d", status); 9071ae08745Sheppo return (status); 9081ae08745Sheppo } 9091ae08745Sheppo 9101ae08745Sheppo /* 9111ae08745Sheppo * To remove the need for this assertion, must call 9121ae08745Sheppo * ldc_mem_dring_nextcookie() successfully ncookies-1 times after a 9131ae08745Sheppo * successful call to ldc_mem_dring_map() 9141ae08745Sheppo */ 9151ae08745Sheppo ASSERT(reg_msg->ncookies == 1); 9161ae08745Sheppo 9171ae08745Sheppo if ((status = 9181ae08745Sheppo ldc_mem_dring_info(vd->dring_handle, &dring_minfo)) != 0) { 9191ae08745Sheppo PRN("ldc_mem_dring_info() returned errno %d", status); 9201ae08745Sheppo if ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0) 9211ae08745Sheppo PRN("ldc_mem_dring_unmap() returned errno %d", status); 9221ae08745Sheppo return (status); 9231ae08745Sheppo } 9241ae08745Sheppo 9251ae08745Sheppo if (dring_minfo.vaddr == NULL) { 9261ae08745Sheppo PRN("Descriptor ring virtual address is NULL"); 927*0a55fbb7Slm66018 return (ENXIO); 9281ae08745Sheppo } 9291ae08745Sheppo 9301ae08745Sheppo 9311ae08745Sheppo /* Valid message and dring mapped */ 9321ae08745Sheppo PR1("descriptor size = %u, dring length = %u", 9331ae08745Sheppo vd->descriptor_size, vd->dring_len); 9341ae08745Sheppo vd->initialized |= VD_DRING; 9351ae08745Sheppo vd->dring_ident = 1; /* "There Can Be Only One" */ 9361ae08745Sheppo vd->dring = dring_minfo.vaddr; 9371ae08745Sheppo vd->descriptor_size = reg_msg->descriptor_size; 9381ae08745Sheppo vd->dring_len = reg_msg->num_descriptors; 9391ae08745Sheppo reg_msg->dring_ident = vd->dring_ident; 9401ae08745Sheppo return (0); 9411ae08745Sheppo } 9421ae08745Sheppo 9431ae08745Sheppo static int 9441ae08745Sheppo vd_process_dring_unreg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 9451ae08745Sheppo { 9461ae08745Sheppo vio_dring_unreg_msg_t *unreg_msg = (vio_dring_unreg_msg_t *)msg; 9471ae08745Sheppo 9481ae08745Sheppo 9491ae08745Sheppo PR0("Entered"); 9501ae08745Sheppo ASSERT(mutex_owned(&vd->lock)); 9511ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 9521ae08745Sheppo 9531ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 9541ae08745Sheppo VIO_DRING_UNREG)) { 9551ae08745Sheppo return (ENOMSG); /* not an unregister-dring message */ 9561ae08745Sheppo } 9571ae08745Sheppo 9581ae08745Sheppo if (msglen != sizeof (*unreg_msg)) { 9591ae08745Sheppo PRN("Expected %lu-byte unregister-dring message; " 9601ae08745Sheppo "received %lu bytes", sizeof (*unreg_msg), msglen); 9611ae08745Sheppo return (EBADMSG); 9621ae08745Sheppo } 9631ae08745Sheppo 9641ae08745Sheppo if (unreg_msg->dring_ident != vd->dring_ident) { 9651ae08745Sheppo PRN("Expected dring ident %lu; received %lu", 9661ae08745Sheppo vd->dring_ident, unreg_msg->dring_ident); 9671ae08745Sheppo return (EBADMSG); 9681ae08745Sheppo } 9691ae08745Sheppo 9701ae08745Sheppo return (0); 9711ae08745Sheppo } 9721ae08745Sheppo 9731ae08745Sheppo static int 9741ae08745Sheppo process_rdx_msg(vio_msg_t *msg, size_t msglen) 9751ae08745Sheppo { 9761ae08745Sheppo PR0("Entered"); 9771ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 9781ae08745Sheppo 9791ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_RDX)) 9801ae08745Sheppo return (ENOMSG); /* not an RDX message */ 9811ae08745Sheppo 9821ae08745Sheppo if (msglen != sizeof (vio_rdx_msg_t)) { 9831ae08745Sheppo PRN("Expected %lu-byte RDX message; received %lu bytes", 9841ae08745Sheppo sizeof (vio_rdx_msg_t), msglen); 9851ae08745Sheppo return (EBADMSG); 9861ae08745Sheppo } 9871ae08745Sheppo 9881ae08745Sheppo return (0); 9891ae08745Sheppo } 9901ae08745Sheppo 9911ae08745Sheppo static void 9921ae08745Sheppo vd_reset_connection(vd_t *vd, boolean_t reset_ldc) 9931ae08745Sheppo { 9941ae08745Sheppo int status = 0; 9951ae08745Sheppo 9961ae08745Sheppo 9971ae08745Sheppo ASSERT(mutex_owned(&vd->lock)); 9981ae08745Sheppo PR0("Resetting connection with %s", VD_CLIENT(vd)); 9991ae08745Sheppo if ((vd->initialized & VD_DRING) && 10001ae08745Sheppo ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0)) 10011ae08745Sheppo PRN("ldc_mem_dring_unmap() returned errno %d", status); 10021ae08745Sheppo if ((reset_ldc == B_TRUE) && 10031ae08745Sheppo ((status = ldc_reset(vd->ldc_handle)) != 0)) 10041ae08745Sheppo PRN("ldc_reset() returned errno %d", status); 10051ae08745Sheppo vd->initialized &= ~(VD_SID | VD_SEQ_NUM | VD_DRING); 10061ae08745Sheppo vd->state = VD_STATE_INIT; 10071ae08745Sheppo vd->max_msglen = sizeof (vio_msg_t); /* baseline vio message size */ 10081ae08745Sheppo } 10091ae08745Sheppo 10101ae08745Sheppo static int 10111ae08745Sheppo vd_check_seq_num(vd_t *vd, uint64_t seq_num) 10121ae08745Sheppo { 10131ae08745Sheppo ASSERT(mutex_owned(&vd->lock)); 10141ae08745Sheppo if ((vd->initialized & VD_SEQ_NUM) && (seq_num != vd->seq_num + 1)) { 10151ae08745Sheppo PRN("Received seq_num %lu; expected %lu", 10161ae08745Sheppo seq_num, (vd->seq_num + 1)); 10171ae08745Sheppo vd_reset_connection(vd, B_FALSE); 10181ae08745Sheppo return (1); 10191ae08745Sheppo } 10201ae08745Sheppo 10211ae08745Sheppo vd->seq_num = seq_num; 10221ae08745Sheppo vd->initialized |= VD_SEQ_NUM; /* superfluous after first time... */ 10231ae08745Sheppo return (0); 10241ae08745Sheppo } 10251ae08745Sheppo 10261ae08745Sheppo /* 10271ae08745Sheppo * Return the expected size of an inband-descriptor message with all the 10281ae08745Sheppo * cookies it claims to include 10291ae08745Sheppo */ 10301ae08745Sheppo static size_t 10311ae08745Sheppo expected_inband_size(vd_dring_inband_msg_t *msg) 10321ae08745Sheppo { 10331ae08745Sheppo return ((sizeof (*msg)) + 10341ae08745Sheppo (msg->payload.ncookies - 1)*(sizeof (msg->payload.cookie[0]))); 10351ae08745Sheppo } 10361ae08745Sheppo 10371ae08745Sheppo /* 10381ae08745Sheppo * Process an in-band descriptor message: used with clients like OBP, with 10391ae08745Sheppo * which vds exchanges descriptors within VIO message payloads, rather than 10401ae08745Sheppo * operating on them within a descriptor ring 10411ae08745Sheppo */ 10421ae08745Sheppo static int 10431ae08745Sheppo vd_process_desc_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 10441ae08745Sheppo { 10451ae08745Sheppo size_t expected; 10461ae08745Sheppo vd_dring_inband_msg_t *desc_msg = (vd_dring_inband_msg_t *)msg; 10471ae08745Sheppo 10481ae08745Sheppo 10491ae08745Sheppo PR1("Entered"); 10501ae08745Sheppo ASSERT(mutex_owned(&vd->lock)); 10511ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 10521ae08745Sheppo 10531ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, 10541ae08745Sheppo VIO_DESC_DATA)) 10551ae08745Sheppo return (ENOMSG); /* not an in-band-descriptor message */ 10561ae08745Sheppo 10571ae08745Sheppo if (msglen < sizeof (*desc_msg)) { 10581ae08745Sheppo PRN("Expected at least %lu-byte descriptor message; " 10591ae08745Sheppo "received %lu bytes", sizeof (*desc_msg), msglen); 10601ae08745Sheppo return (EBADMSG); 10611ae08745Sheppo } 10621ae08745Sheppo 10631ae08745Sheppo if (msglen != (expected = expected_inband_size(desc_msg))) { 10641ae08745Sheppo PRN("Expected %lu-byte descriptor message; " 10651ae08745Sheppo "received %lu bytes", expected, msglen); 10661ae08745Sheppo return (EBADMSG); 10671ae08745Sheppo } 10681ae08745Sheppo 10691ae08745Sheppo if (vd_check_seq_num(vd, desc_msg->hdr.seq_num) != 0) { 10701ae08745Sheppo return (EBADMSG); 10711ae08745Sheppo } 10721ae08745Sheppo 10731ae08745Sheppo /* Valid message; process the request */ 10741ae08745Sheppo desc_msg->payload.status = vd_process_request(vd, &desc_msg->payload); 10751ae08745Sheppo return (0); 10761ae08745Sheppo } 10771ae08745Sheppo 10781ae08745Sheppo static boolean_t 10791ae08745Sheppo vd_accept_dring_elems(vd_t *vd, uint32_t start, uint32_t ndesc) 10801ae08745Sheppo { 10811ae08745Sheppo uint32_t i, n; 10821ae08745Sheppo 10831ae08745Sheppo 10841ae08745Sheppo /* Check descriptor states */ 10851ae08745Sheppo for (n = ndesc, i = start; n > 0; n--, i = (i + 1) % vd->dring_len) { 10861ae08745Sheppo if (VD_DRING_ELEM(i)->hdr.dstate != VIO_DESC_READY) { 10871ae08745Sheppo PRN("descriptor %u not ready", i); 10881ae08745Sheppo VD_DUMP_DRING_ELEM(VD_DRING_ELEM(i)); 10891ae08745Sheppo return (B_FALSE); 10901ae08745Sheppo } 10911ae08745Sheppo } 10921ae08745Sheppo 10931ae08745Sheppo /* Descriptors are valid; accept them */ 10941ae08745Sheppo for (n = ndesc, i = start; n > 0; n--, i = (i + 1) % vd->dring_len) 10951ae08745Sheppo VD_DRING_ELEM(i)->hdr.dstate = VIO_DESC_ACCEPTED; 10961ae08745Sheppo 10971ae08745Sheppo return (B_TRUE); 10981ae08745Sheppo } 10991ae08745Sheppo 11001ae08745Sheppo static int 11011ae08745Sheppo vd_process_dring(vd_t *vd, uint32_t start, uint32_t end) 11021ae08745Sheppo { 11031ae08745Sheppo int status; 11041ae08745Sheppo boolean_t accepted; 11051ae08745Sheppo uint32_t i, io_status, n, ndesc; 11061ae08745Sheppo 11071ae08745Sheppo 11081ae08745Sheppo ASSERT(mutex_owned(&vd->lock)); 11091ae08745Sheppo PR1("start = %u, end = %u", start, end); 11101ae08745Sheppo 11111ae08745Sheppo /* Validate descriptor range */ 11121ae08745Sheppo if ((start >= vd->dring_len) || (end >= vd->dring_len)) { 11131ae08745Sheppo PRN("\"start\" = %u, \"end\" = %u; both must be less than %u", 11141ae08745Sheppo start, end, vd->dring_len); 11151ae08745Sheppo return (EINVAL); 11161ae08745Sheppo } 11171ae08745Sheppo 11181ae08745Sheppo /* Acquire updated dring elements */ 11191ae08745Sheppo if ((status = ldc_mem_dring_acquire(vd->dring_handle, 11201ae08745Sheppo start, end)) != 0) { 11211ae08745Sheppo PRN("ldc_mem_dring_acquire() returned errno %d", status); 11221ae08745Sheppo return (status); 11231ae08745Sheppo } 11241ae08745Sheppo /* Accept updated dring elements */ 11251ae08745Sheppo ndesc = ((end < start) ? end + vd->dring_len : end) - start + 1; 11261ae08745Sheppo PR1("ndesc = %u", ndesc); 11271ae08745Sheppo accepted = vd_accept_dring_elems(vd, start, ndesc); 11281ae08745Sheppo /* Release dring elements */ 11291ae08745Sheppo if ((status = ldc_mem_dring_release(vd->dring_handle, 11301ae08745Sheppo start, end)) != 0) { 11311ae08745Sheppo PRN("ldc_mem_dring_release() returned errno %d", status); 11321ae08745Sheppo return (status); 11331ae08745Sheppo } 11341ae08745Sheppo /* If a descriptor was in the wrong state, return an error */ 11351ae08745Sheppo if (!accepted) 11361ae08745Sheppo return (EINVAL); 11371ae08745Sheppo 11381ae08745Sheppo 11391ae08745Sheppo /* Process accepted dring elements */ 11401ae08745Sheppo for (n = ndesc, i = start; n > 0; n--, i = (i + 1) % vd->dring_len) { 11411ae08745Sheppo vd_dring_entry_t *elem = VD_DRING_ELEM(i); 11421ae08745Sheppo 11431ae08745Sheppo /* Process descriptor outside acquire/release bracket */ 11441ae08745Sheppo PR1("Processing dring element %u", i); 11451ae08745Sheppo io_status = vd_process_request(vd, &elem->payload); 11461ae08745Sheppo 11471ae08745Sheppo /* Re-acquire client's dring element */ 11481ae08745Sheppo if ((status = ldc_mem_dring_acquire(vd->dring_handle, 11491ae08745Sheppo i, i)) != 0) { 11501ae08745Sheppo PRN("ldc_mem_dring_acquire() returned errno %d", 11511ae08745Sheppo status); 11521ae08745Sheppo return (status); 11531ae08745Sheppo } 11541ae08745Sheppo /* Update processed element */ 11551ae08745Sheppo if (elem->hdr.dstate == VIO_DESC_ACCEPTED) { 11561ae08745Sheppo elem->payload.status = io_status; 11571ae08745Sheppo elem->hdr.dstate = VIO_DESC_DONE; 11581ae08745Sheppo } else { 11591ae08745Sheppo /* Perhaps client timed out waiting for I/O... */ 11601ae08745Sheppo accepted = B_FALSE; 11611ae08745Sheppo PRN("element %u no longer \"accepted\"", i); 11621ae08745Sheppo VD_DUMP_DRING_ELEM(elem); 11631ae08745Sheppo } 11641ae08745Sheppo /* Release updated processed element */ 11651ae08745Sheppo if ((status = ldc_mem_dring_release(vd->dring_handle, 11661ae08745Sheppo i, i)) != 0) { 11671ae08745Sheppo PRN("ldc_mem_dring_release() returned errno %d", 11681ae08745Sheppo status); 11691ae08745Sheppo return (status); 11701ae08745Sheppo } 11711ae08745Sheppo /* If the descriptor was in the wrong state, return an error */ 11721ae08745Sheppo if (!accepted) 11731ae08745Sheppo return (EINVAL); 11741ae08745Sheppo } 11751ae08745Sheppo 11761ae08745Sheppo return (0); 11771ae08745Sheppo } 11781ae08745Sheppo 11791ae08745Sheppo static int 11801ae08745Sheppo vd_process_dring_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 11811ae08745Sheppo { 11821ae08745Sheppo vio_dring_msg_t *dring_msg = (vio_dring_msg_t *)msg; 11831ae08745Sheppo 11841ae08745Sheppo 11851ae08745Sheppo PR1("Entered"); 11861ae08745Sheppo ASSERT(mutex_owned(&vd->lock)); 11871ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 11881ae08745Sheppo 11891ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, 11901ae08745Sheppo VIO_DRING_DATA)) { 11911ae08745Sheppo return (ENOMSG); /* not a dring-data message */ 11921ae08745Sheppo } 11931ae08745Sheppo 11941ae08745Sheppo if (msglen != sizeof (*dring_msg)) { 11951ae08745Sheppo PRN("Expected %lu-byte dring message; received %lu bytes", 11961ae08745Sheppo sizeof (*dring_msg), msglen); 11971ae08745Sheppo return (EBADMSG); 11981ae08745Sheppo } 11991ae08745Sheppo 12001ae08745Sheppo if (vd_check_seq_num(vd, dring_msg->seq_num) != 0) { 12011ae08745Sheppo return (EBADMSG); 12021ae08745Sheppo } 12031ae08745Sheppo 12041ae08745Sheppo if (dring_msg->dring_ident != vd->dring_ident) { 12051ae08745Sheppo PRN("Expected dring ident %lu; received ident %lu", 12061ae08745Sheppo vd->dring_ident, dring_msg->dring_ident); 12071ae08745Sheppo return (EBADMSG); 12081ae08745Sheppo } 12091ae08745Sheppo 12101ae08745Sheppo 12111ae08745Sheppo /* Valid message; process dring */ 12121ae08745Sheppo dring_msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 12131ae08745Sheppo return (vd_process_dring(vd, dring_msg->start_idx, dring_msg->end_idx)); 12141ae08745Sheppo } 12151ae08745Sheppo 12161ae08745Sheppo static int 12171ae08745Sheppo recv_msg(ldc_handle_t ldc_handle, void *msg, size_t *nbytes) 12181ae08745Sheppo { 12191ae08745Sheppo int retry, status; 12201ae08745Sheppo size_t size = *nbytes; 12211ae08745Sheppo 12221ae08745Sheppo 12231ae08745Sheppo for (retry = 0, status = ETIMEDOUT; 12241ae08745Sheppo retry < vds_ldc_retries && status == ETIMEDOUT; 12251ae08745Sheppo retry++) { 12261ae08745Sheppo PR1("ldc_read() attempt %d", (retry + 1)); 12271ae08745Sheppo *nbytes = size; 12281ae08745Sheppo status = ldc_read(ldc_handle, msg, nbytes); 12291ae08745Sheppo } 12301ae08745Sheppo 12311ae08745Sheppo if (status != 0) { 12321ae08745Sheppo PRN("ldc_read() returned errno %d", status); 12331ae08745Sheppo return (status); 12341ae08745Sheppo } else if (*nbytes == 0) { 12351ae08745Sheppo PR1("ldc_read() returned 0 and no message read"); 12361ae08745Sheppo return (ENOMSG); 12371ae08745Sheppo } 12381ae08745Sheppo 12391ae08745Sheppo PR1("RCVD %lu-byte message", *nbytes); 12401ae08745Sheppo return (0); 12411ae08745Sheppo } 12421ae08745Sheppo 12431ae08745Sheppo static int 12441ae08745Sheppo vd_do_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 12451ae08745Sheppo { 12461ae08745Sheppo int status; 12471ae08745Sheppo 12481ae08745Sheppo 12491ae08745Sheppo PR1("Processing (%x/%x/%x) message", msg->tag.vio_msgtype, 12501ae08745Sheppo msg->tag.vio_subtype, msg->tag.vio_subtype_env); 12511ae08745Sheppo ASSERT(mutex_owned(&vd->lock)); 12521ae08745Sheppo 12531ae08745Sheppo /* 12541ae08745Sheppo * Validate session ID up front, since it applies to all messages 12551ae08745Sheppo * once set 12561ae08745Sheppo */ 12571ae08745Sheppo if ((msg->tag.vio_sid != vd->sid) && (vd->initialized & VD_SID)) { 12581ae08745Sheppo PRN("Expected SID %u, received %u", vd->sid, 12591ae08745Sheppo msg->tag.vio_sid); 12601ae08745Sheppo return (EBADMSG); 12611ae08745Sheppo } 12621ae08745Sheppo 12631ae08745Sheppo 12641ae08745Sheppo /* 12651ae08745Sheppo * Process the received message based on connection state 12661ae08745Sheppo */ 12671ae08745Sheppo switch (vd->state) { 12681ae08745Sheppo case VD_STATE_INIT: /* expect version message */ 1269*0a55fbb7Slm66018 if ((status = vd_process_ver_msg(vd, msg, msglen)) != 0) 12701ae08745Sheppo return (status); 12711ae08745Sheppo 12721ae08745Sheppo /* Version negotiated, move to that state */ 12731ae08745Sheppo vd->state = VD_STATE_VER; 12741ae08745Sheppo return (0); 12751ae08745Sheppo 12761ae08745Sheppo case VD_STATE_VER: /* expect attribute message */ 12771ae08745Sheppo if ((status = vd_process_attr_msg(vd, msg, msglen)) != 0) 12781ae08745Sheppo return (status); 12791ae08745Sheppo 12801ae08745Sheppo /* Attributes exchanged, move to that state */ 12811ae08745Sheppo vd->state = VD_STATE_ATTR; 12821ae08745Sheppo return (0); 12831ae08745Sheppo 12841ae08745Sheppo case VD_STATE_ATTR: 12851ae08745Sheppo switch (vd->xfer_mode) { 12861ae08745Sheppo case VIO_DESC_MODE: /* expect RDX message */ 12871ae08745Sheppo if ((status = process_rdx_msg(msg, msglen)) != 0) 12881ae08745Sheppo return (status); 12891ae08745Sheppo 12901ae08745Sheppo /* Ready to receive in-band descriptors */ 12911ae08745Sheppo vd->state = VD_STATE_DATA; 12921ae08745Sheppo return (0); 12931ae08745Sheppo 12941ae08745Sheppo case VIO_DRING_MODE: /* expect register-dring message */ 12951ae08745Sheppo if ((status = 12961ae08745Sheppo vd_process_dring_reg_msg(vd, msg, msglen)) != 0) 12971ae08745Sheppo return (status); 12981ae08745Sheppo 12991ae08745Sheppo /* One dring negotiated, move to that state */ 13001ae08745Sheppo vd->state = VD_STATE_DRING; 13011ae08745Sheppo return (0); 13021ae08745Sheppo 13031ae08745Sheppo default: 13041ae08745Sheppo ASSERT("Unsupported transfer mode"); 13051ae08745Sheppo PRN("Unsupported transfer mode"); 13061ae08745Sheppo return (ENOTSUP); 13071ae08745Sheppo } 13081ae08745Sheppo 13091ae08745Sheppo case VD_STATE_DRING: /* expect RDX, register-dring, or unreg-dring */ 13101ae08745Sheppo if ((status = process_rdx_msg(msg, msglen)) == 0) { 13111ae08745Sheppo /* Ready to receive data */ 13121ae08745Sheppo vd->state = VD_STATE_DATA; 13131ae08745Sheppo return (0); 13141ae08745Sheppo } else if (status != ENOMSG) { 13151ae08745Sheppo return (status); 13161ae08745Sheppo } 13171ae08745Sheppo 13181ae08745Sheppo 13191ae08745Sheppo /* 13201ae08745Sheppo * If another register-dring message is received, stay in 13211ae08745Sheppo * dring state in case the client sends RDX; although the 13221ae08745Sheppo * protocol allows multiple drings, this server does not 13231ae08745Sheppo * support using more than one 13241ae08745Sheppo */ 13251ae08745Sheppo if ((status = 13261ae08745Sheppo vd_process_dring_reg_msg(vd, msg, msglen)) != ENOMSG) 13271ae08745Sheppo return (status); 13281ae08745Sheppo 13291ae08745Sheppo /* 13301ae08745Sheppo * Acknowledge an unregister-dring message, but reset the 13311ae08745Sheppo * connection anyway: Although the protocol allows 13321ae08745Sheppo * unregistering drings, this server cannot serve a vdisk 13331ae08745Sheppo * without its only dring 13341ae08745Sheppo */ 13351ae08745Sheppo status = vd_process_dring_unreg_msg(vd, msg, msglen); 13361ae08745Sheppo return ((status == 0) ? ENOTSUP : status); 13371ae08745Sheppo 13381ae08745Sheppo case VD_STATE_DATA: 13391ae08745Sheppo switch (vd->xfer_mode) { 13401ae08745Sheppo case VIO_DESC_MODE: /* expect in-band-descriptor message */ 13411ae08745Sheppo return (vd_process_desc_msg(vd, msg, msglen)); 13421ae08745Sheppo 13431ae08745Sheppo case VIO_DRING_MODE: /* expect dring-data or unreg-dring */ 13441ae08745Sheppo /* 13451ae08745Sheppo * Typically expect dring-data messages, so handle 13461ae08745Sheppo * them first 13471ae08745Sheppo */ 13481ae08745Sheppo if ((status = vd_process_dring_msg(vd, msg, 13491ae08745Sheppo msglen)) != ENOMSG) 13501ae08745Sheppo return (status); 13511ae08745Sheppo 13521ae08745Sheppo /* 13531ae08745Sheppo * Acknowledge an unregister-dring message, but reset 13541ae08745Sheppo * the connection anyway: Although the protocol 13551ae08745Sheppo * allows unregistering drings, this server cannot 13561ae08745Sheppo * serve a vdisk without its only dring 13571ae08745Sheppo */ 13581ae08745Sheppo status = vd_process_dring_unreg_msg(vd, msg, msglen); 13591ae08745Sheppo return ((status == 0) ? ENOTSUP : status); 13601ae08745Sheppo 13611ae08745Sheppo default: 13621ae08745Sheppo ASSERT("Unsupported transfer mode"); 13631ae08745Sheppo PRN("Unsupported transfer mode"); 13641ae08745Sheppo return (ENOTSUP); 13651ae08745Sheppo } 13661ae08745Sheppo 13671ae08745Sheppo default: 13681ae08745Sheppo ASSERT("Invalid client connection state"); 13691ae08745Sheppo PRN("Invalid client connection state"); 13701ae08745Sheppo return (ENOTSUP); 13711ae08745Sheppo } 13721ae08745Sheppo } 13731ae08745Sheppo 13741ae08745Sheppo static void 13751ae08745Sheppo vd_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 13761ae08745Sheppo { 13771ae08745Sheppo int status; 13781ae08745Sheppo boolean_t reset_ldc = B_FALSE; 13791ae08745Sheppo 13801ae08745Sheppo 13811ae08745Sheppo ASSERT(mutex_owned(&vd->lock)); 13821ae08745Sheppo 13831ae08745Sheppo /* 13841ae08745Sheppo * Check that the message is at least big enough for a "tag", so that 13851ae08745Sheppo * message processing can proceed based on tag-specified message type 13861ae08745Sheppo */ 13871ae08745Sheppo if (msglen < sizeof (vio_msg_tag_t)) { 13881ae08745Sheppo PRN("Received short (%lu-byte) message", msglen); 13891ae08745Sheppo /* Can't "nack" short message, so drop the big hammer */ 13901ae08745Sheppo vd_reset_connection(vd, B_TRUE); 13911ae08745Sheppo return; 13921ae08745Sheppo } 13931ae08745Sheppo 13941ae08745Sheppo /* 13951ae08745Sheppo * Process the message 13961ae08745Sheppo */ 13971ae08745Sheppo switch (status = vd_do_process_msg(vd, msg, msglen)) { 13981ae08745Sheppo case 0: 13991ae08745Sheppo /* "ack" valid, successfully-processed messages */ 14001ae08745Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 14011ae08745Sheppo break; 14021ae08745Sheppo 14031ae08745Sheppo case ENOMSG: 14041ae08745Sheppo PRN("Received unexpected message"); 14051ae08745Sheppo _NOTE(FALLTHROUGH); 14061ae08745Sheppo case EBADMSG: 14071ae08745Sheppo case ENOTSUP: 14081ae08745Sheppo /* "nack" invalid messages */ 14091ae08745Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 14101ae08745Sheppo break; 14111ae08745Sheppo 14121ae08745Sheppo default: 14131ae08745Sheppo /* "nack" failed messages */ 14141ae08745Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 14151ae08745Sheppo /* An LDC error probably occurred, so try resetting it */ 14161ae08745Sheppo reset_ldc = B_TRUE; 14171ae08745Sheppo break; 14181ae08745Sheppo } 14191ae08745Sheppo 14201ae08745Sheppo /* "ack" or "nack" the message */ 14211ae08745Sheppo PR1("Sending %s", 14221ae08745Sheppo (msg->tag.vio_subtype == VIO_SUBTYPE_ACK) ? "ACK" : "NACK"); 14231ae08745Sheppo if (send_msg(vd->ldc_handle, msg, msglen) != 0) 14241ae08745Sheppo reset_ldc = B_TRUE; 14251ae08745Sheppo 14261ae08745Sheppo /* Reset the connection for nack'ed or failed messages */ 14271ae08745Sheppo if ((status != 0) || reset_ldc) 14281ae08745Sheppo vd_reset_connection(vd, reset_ldc); 14291ae08745Sheppo } 14301ae08745Sheppo 14311ae08745Sheppo static void 1432*0a55fbb7Slm66018 vd_recv_msg(void *arg) 14331ae08745Sheppo { 14341ae08745Sheppo vd_t *vd = (vd_t *)arg; 1435*0a55fbb7Slm66018 int status = 0; 14361ae08745Sheppo 14371ae08745Sheppo 14381ae08745Sheppo PR2("Entered"); 14391ae08745Sheppo ASSERT(vd != NULL); 14401ae08745Sheppo mutex_enter(&vd->lock); 1441*0a55fbb7Slm66018 /* 1442*0a55fbb7Slm66018 * Receive and process any messages in the LDC queue; max_msglen is 1443*0a55fbb7Slm66018 * reset each time through the loop, as vd->max_msglen can increase 1444*0a55fbb7Slm66018 * during connection handshake 1445*0a55fbb7Slm66018 */ 1446*0a55fbb7Slm66018 for (size_t max_msglen = vd->max_msglen; 1447*0a55fbb7Slm66018 vd->enabled && status == 0; 1448*0a55fbb7Slm66018 max_msglen = vd->max_msglen) { 1449*0a55fbb7Slm66018 size_t msglen = max_msglen; 1450*0a55fbb7Slm66018 vio_msg_t *vio_msg = kmem_alloc(max_msglen, KM_SLEEP); 1451*0a55fbb7Slm66018 1452*0a55fbb7Slm66018 if ((status = recv_msg(vd->ldc_handle, vio_msg, &msglen)) == 0) 1453*0a55fbb7Slm66018 vd_process_msg(vd, vio_msg, msglen); 1454*0a55fbb7Slm66018 else if (status != ENOMSG) 1455*0a55fbb7Slm66018 vd_reset_connection(vd, B_TRUE); 14561ae08745Sheppo kmem_free(vio_msg, max_msglen); 1457*0a55fbb7Slm66018 } 14581ae08745Sheppo mutex_exit(&vd->lock); 14591ae08745Sheppo PR2("Returning"); 14601ae08745Sheppo } 14611ae08745Sheppo 14621ae08745Sheppo static uint_t 1463*0a55fbb7Slm66018 vd_do_handle_ldc_events(vd_t *vd, uint64_t event) 1464*0a55fbb7Slm66018 { 1465*0a55fbb7Slm66018 ASSERT(mutex_owned(&vd->lock)); 1466*0a55fbb7Slm66018 1467*0a55fbb7Slm66018 if (!vd->enabled) 1468*0a55fbb7Slm66018 return (LDC_SUCCESS); 1469*0a55fbb7Slm66018 1470*0a55fbb7Slm66018 if (event & LDC_EVT_RESET) { 1471*0a55fbb7Slm66018 PR0("Channel was reset"); 1472*0a55fbb7Slm66018 return (LDC_SUCCESS); 1473*0a55fbb7Slm66018 } 1474*0a55fbb7Slm66018 1475*0a55fbb7Slm66018 if (event & LDC_EVT_UP) { 1476*0a55fbb7Slm66018 /* Reset the connection state when channel comes (back) up */ 1477*0a55fbb7Slm66018 vd_reset_connection(vd, B_FALSE); 1478*0a55fbb7Slm66018 } 1479*0a55fbb7Slm66018 1480*0a55fbb7Slm66018 if (event & LDC_EVT_READ) { 1481*0a55fbb7Slm66018 PR1("New data available"); 1482*0a55fbb7Slm66018 /* Queue a task to receive the new data */ 1483*0a55fbb7Slm66018 if (ddi_taskq_dispatch(vd->taskq, vd_recv_msg, vd, DDI_SLEEP) != 1484*0a55fbb7Slm66018 DDI_SUCCESS) 1485*0a55fbb7Slm66018 PRN("Unable to dispatch vd_recv_msg()"); 1486*0a55fbb7Slm66018 } 1487*0a55fbb7Slm66018 1488*0a55fbb7Slm66018 return (LDC_SUCCESS); 1489*0a55fbb7Slm66018 } 1490*0a55fbb7Slm66018 1491*0a55fbb7Slm66018 static uint_t 14921ae08745Sheppo vd_handle_ldc_events(uint64_t event, caddr_t arg) 14931ae08745Sheppo { 14941ae08745Sheppo uint_t status; 14951ae08745Sheppo vd_t *vd = (vd_t *)(void *)arg; 14961ae08745Sheppo 14971ae08745Sheppo 14981ae08745Sheppo ASSERT(vd != NULL); 14991ae08745Sheppo mutex_enter(&vd->lock); 1500*0a55fbb7Slm66018 status = vd_do_handle_ldc_events(vd, event); 15011ae08745Sheppo mutex_exit(&vd->lock); 1502*0a55fbb7Slm66018 return (status); 15031ae08745Sheppo } 15041ae08745Sheppo 15051ae08745Sheppo static uint_t 15061ae08745Sheppo vds_check_for_vd(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 15071ae08745Sheppo { 15081ae08745Sheppo _NOTE(ARGUNUSED(key, val)) 15091ae08745Sheppo (*((uint_t *)arg))++; 15101ae08745Sheppo return (MH_WALK_TERMINATE); 15111ae08745Sheppo } 15121ae08745Sheppo 15131ae08745Sheppo 15141ae08745Sheppo static int 15151ae08745Sheppo vds_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 15161ae08745Sheppo { 15171ae08745Sheppo uint_t vd_present = 0; 15181ae08745Sheppo minor_t instance; 15191ae08745Sheppo vds_t *vds; 15201ae08745Sheppo 15211ae08745Sheppo 15221ae08745Sheppo PR0("Entered"); 15231ae08745Sheppo switch (cmd) { 15241ae08745Sheppo case DDI_DETACH: 15251ae08745Sheppo /* the real work happens below */ 15261ae08745Sheppo break; 15271ae08745Sheppo case DDI_SUSPEND: 15281ae08745Sheppo /* nothing to do for this non-device */ 15291ae08745Sheppo return (DDI_SUCCESS); 15301ae08745Sheppo default: 15311ae08745Sheppo return (DDI_FAILURE); 15321ae08745Sheppo } 15331ae08745Sheppo 15341ae08745Sheppo ASSERT(cmd == DDI_DETACH); 15351ae08745Sheppo instance = ddi_get_instance(dip); 15361ae08745Sheppo if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) { 15371ae08745Sheppo PRN("Could not get state for instance %u", instance); 15381ae08745Sheppo ddi_soft_state_free(vds_state, instance); 15391ae08745Sheppo return (DDI_FAILURE); 15401ae08745Sheppo } 15411ae08745Sheppo 15421ae08745Sheppo /* Do no detach when serving any vdisks */ 15431ae08745Sheppo mod_hash_walk(vds->vd_table, vds_check_for_vd, &vd_present); 15441ae08745Sheppo if (vd_present) { 15451ae08745Sheppo PR0("Not detaching because serving vdisks"); 15461ae08745Sheppo return (DDI_FAILURE); 15471ae08745Sheppo } 15481ae08745Sheppo 15491ae08745Sheppo PR0("Detaching"); 15501ae08745Sheppo if (vds->initialized & VDS_MDEG) 15511ae08745Sheppo (void) mdeg_unregister(vds->mdeg); 15521ae08745Sheppo if (vds->initialized & VDS_LDI) 15531ae08745Sheppo (void) ldi_ident_release(vds->ldi_ident); 15541ae08745Sheppo mod_hash_destroy_hash(vds->vd_table); 15551ae08745Sheppo if (vds->initialized & VDS_LOCKING) 15561ae08745Sheppo mutex_destroy(&vds->lock); 15571ae08745Sheppo ddi_soft_state_free(vds_state, instance); 15581ae08745Sheppo return (DDI_SUCCESS); 15591ae08745Sheppo } 15601ae08745Sheppo 15611ae08745Sheppo static boolean_t 15621ae08745Sheppo is_pseudo_device(dev_info_t *dip) 15631ae08745Sheppo { 15641ae08745Sheppo dev_info_t *parent, *root = ddi_root_node(); 15651ae08745Sheppo 15661ae08745Sheppo 15671ae08745Sheppo for (parent = ddi_get_parent(dip); (parent != NULL) && (parent != root); 15681ae08745Sheppo parent = ddi_get_parent(parent)) { 15691ae08745Sheppo if (strcmp(ddi_get_name(parent), DEVI_PSEUDO_NEXNAME) == 0) 15701ae08745Sheppo return (B_TRUE); 15711ae08745Sheppo } 15721ae08745Sheppo 15731ae08745Sheppo return (B_FALSE); 15741ae08745Sheppo } 15751ae08745Sheppo 15761ae08745Sheppo static int 1577*0a55fbb7Slm66018 vd_setup_full_disk(vd_t *vd) 1578*0a55fbb7Slm66018 { 1579*0a55fbb7Slm66018 int rval, status; 1580*0a55fbb7Slm66018 major_t major = getmajor(vd->dev[0]); 1581*0a55fbb7Slm66018 minor_t minor = getminor(vd->dev[0]) - VD_ENTIRE_DISK_SLICE; 1582*0a55fbb7Slm66018 struct vtoc vtoc; 1583*0a55fbb7Slm66018 1584*0a55fbb7Slm66018 1585*0a55fbb7Slm66018 /* Get the VTOC for slice sizes */ 1586*0a55fbb7Slm66018 if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCGVTOC, (intptr_t)&vtoc, 1587*0a55fbb7Slm66018 FKIOCTL, kcred, &rval)) != 0) { 1588*0a55fbb7Slm66018 PRN("ldi_ioctl(DKIOCGVTOC) returned errno %d", status); 1589*0a55fbb7Slm66018 return (status); 1590*0a55fbb7Slm66018 } 1591*0a55fbb7Slm66018 1592*0a55fbb7Slm66018 /* Set full-disk parameters */ 1593*0a55fbb7Slm66018 vd->vdisk_type = VD_DISK_TYPE_DISK; 1594*0a55fbb7Slm66018 vd->nslices = (sizeof (vd->dev))/(sizeof (vd->dev[0])); 1595*0a55fbb7Slm66018 1596*0a55fbb7Slm66018 /* Move dev number and LDI handle to entire-disk-slice array elements */ 1597*0a55fbb7Slm66018 vd->dev[VD_ENTIRE_DISK_SLICE] = vd->dev[0]; 1598*0a55fbb7Slm66018 vd->dev[0] = 0; 1599*0a55fbb7Slm66018 vd->ldi_handle[VD_ENTIRE_DISK_SLICE] = vd->ldi_handle[0]; 1600*0a55fbb7Slm66018 vd->ldi_handle[0] = NULL; 1601*0a55fbb7Slm66018 1602*0a55fbb7Slm66018 /* Initialize device numbers for remaining slices and open them */ 1603*0a55fbb7Slm66018 for (int slice = 0; slice < vd->nslices; slice++) { 1604*0a55fbb7Slm66018 /* 1605*0a55fbb7Slm66018 * Skip the entire-disk slice, as it's already open and its 1606*0a55fbb7Slm66018 * device known 1607*0a55fbb7Slm66018 */ 1608*0a55fbb7Slm66018 if (slice == VD_ENTIRE_DISK_SLICE) 1609*0a55fbb7Slm66018 continue; 1610*0a55fbb7Slm66018 ASSERT(vd->dev[slice] == 0); 1611*0a55fbb7Slm66018 ASSERT(vd->ldi_handle[slice] == NULL); 1612*0a55fbb7Slm66018 1613*0a55fbb7Slm66018 /* 1614*0a55fbb7Slm66018 * Construct the device number for the current slice 1615*0a55fbb7Slm66018 */ 1616*0a55fbb7Slm66018 vd->dev[slice] = makedevice(major, (minor + slice)); 1617*0a55fbb7Slm66018 1618*0a55fbb7Slm66018 /* 1619*0a55fbb7Slm66018 * At least some underlying drivers refuse to open 1620*0a55fbb7Slm66018 * devices for (currently) zero-length slices, so skip 1621*0a55fbb7Slm66018 * them for now 1622*0a55fbb7Slm66018 */ 1623*0a55fbb7Slm66018 if (vtoc.v_part[slice].p_size == 0) { 1624*0a55fbb7Slm66018 PR0("Skipping zero-length slice %u", slice); 1625*0a55fbb7Slm66018 continue; 1626*0a55fbb7Slm66018 } 1627*0a55fbb7Slm66018 1628*0a55fbb7Slm66018 /* 1629*0a55fbb7Slm66018 * Open all non-empty slices of the disk to serve them to the 1630*0a55fbb7Slm66018 * client. Slices are opened exclusively to prevent other 1631*0a55fbb7Slm66018 * threads or processes in the service domain from performing 1632*0a55fbb7Slm66018 * I/O to slices being accessed by a client. Failure to open 1633*0a55fbb7Slm66018 * a slice results in vds not serving this disk, as the client 1634*0a55fbb7Slm66018 * could attempt (and should be able) to access any non-empty 1635*0a55fbb7Slm66018 * slice immediately. Any slices successfully opened before a 1636*0a55fbb7Slm66018 * failure will get closed by vds_destroy_vd() as a result of 1637*0a55fbb7Slm66018 * the error returned by this function. 1638*0a55fbb7Slm66018 */ 1639*0a55fbb7Slm66018 PR0("Opening device major %u, minor %u = slice %u", 1640*0a55fbb7Slm66018 major, minor, slice); 1641*0a55fbb7Slm66018 if ((status = ldi_open_by_dev(&vd->dev[slice], OTYP_BLK, 1642*0a55fbb7Slm66018 vd_open_flags, kcred, &vd->ldi_handle[slice], 1643*0a55fbb7Slm66018 vd->vds->ldi_ident)) != 0) { 1644*0a55fbb7Slm66018 PRN("ldi_open_by_dev() returned errno %d " 1645*0a55fbb7Slm66018 "for slice %u", status, slice); 1646*0a55fbb7Slm66018 /* vds_destroy_vd() will close any open slices */ 1647*0a55fbb7Slm66018 return (status); 1648*0a55fbb7Slm66018 } 1649*0a55fbb7Slm66018 } 1650*0a55fbb7Slm66018 1651*0a55fbb7Slm66018 return (0); 1652*0a55fbb7Slm66018 } 1653*0a55fbb7Slm66018 1654*0a55fbb7Slm66018 static int 1655*0a55fbb7Slm66018 vd_setup_vd(char *block_device, vd_t *vd) 16561ae08745Sheppo { 16571ae08745Sheppo int otyp, rval, status; 16581ae08745Sheppo dev_info_t *dip; 16591ae08745Sheppo struct dk_cinfo dk_cinfo; 16601ae08745Sheppo 16611ae08745Sheppo 1662*0a55fbb7Slm66018 if ((status = ldi_open_by_name(block_device, vd_open_flags, kcred, 1663*0a55fbb7Slm66018 &vd->ldi_handle[0], vd->vds->ldi_ident)) != 0) { 1664*0a55fbb7Slm66018 PRN("ldi_open_by_name(%s) = errno %d", block_device, status); 1665*0a55fbb7Slm66018 return (status); 1666*0a55fbb7Slm66018 } 1667*0a55fbb7Slm66018 16681ae08745Sheppo /* Get block device's device number, otyp, and size */ 1669*0a55fbb7Slm66018 if ((status = ldi_get_dev(vd->ldi_handle[0], &vd->dev[0])) != 0) { 16701ae08745Sheppo PRN("ldi_get_dev() returned errno %d for %s", 16711ae08745Sheppo status, block_device); 16721ae08745Sheppo return (status); 16731ae08745Sheppo } 1674*0a55fbb7Slm66018 if ((status = ldi_get_otyp(vd->ldi_handle[0], &otyp)) != 0) { 16751ae08745Sheppo PRN("ldi_get_otyp() returned errno %d for %s", 16761ae08745Sheppo status, block_device); 16771ae08745Sheppo return (status); 16781ae08745Sheppo } 16791ae08745Sheppo if (otyp != OTYP_BLK) { 16801ae08745Sheppo PRN("Cannot serve non-block device %s", block_device); 16811ae08745Sheppo return (ENOTBLK); 16821ae08745Sheppo } 1683*0a55fbb7Slm66018 if (ldi_get_size(vd->ldi_handle[0], &vd->vdisk_size) != DDI_SUCCESS) { 16841ae08745Sheppo PRN("ldi_get_size() failed for %s", block_device); 16851ae08745Sheppo return (EIO); 16861ae08745Sheppo } 16871ae08745Sheppo 16881ae08745Sheppo /* Determine if backing block device is a pseudo device */ 16891ae08745Sheppo if ((dip = ddi_hold_devi_by_instance(getmajor(vd->dev[0]), 16901ae08745Sheppo dev_to_instance(vd->dev[0]), 0)) == NULL) { 16911ae08745Sheppo PRN("%s is no longer accessible", block_device); 16921ae08745Sheppo return (EIO); 16931ae08745Sheppo } 16941ae08745Sheppo vd->pseudo = is_pseudo_device(dip); 16951ae08745Sheppo ddi_release_devi(dip); 16961ae08745Sheppo if (vd->pseudo) { 16971ae08745Sheppo vd->vdisk_type = VD_DISK_TYPE_SLICE; 16981ae08745Sheppo vd->nslices = 1; 16991ae08745Sheppo return (0); /* ...and we're done */ 17001ae08745Sheppo } 17011ae08745Sheppo 17021ae08745Sheppo /* Get dk_cinfo to determine slice of backing block device */ 1703*0a55fbb7Slm66018 if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCINFO, 1704*0a55fbb7Slm66018 (intptr_t)&dk_cinfo, FKIOCTL, kcred, &rval)) != 0) { 17051ae08745Sheppo PRN("ldi_ioctl(DKIOCINFO) returned errno %d for %s", 17061ae08745Sheppo status, block_device); 17071ae08745Sheppo return (status); 17081ae08745Sheppo } 17091ae08745Sheppo 17101ae08745Sheppo if (dk_cinfo.dki_partition >= V_NUMPAR) { 17111ae08745Sheppo PRN("slice %u >= maximum slice %u for %s", 17121ae08745Sheppo dk_cinfo.dki_partition, V_NUMPAR, block_device); 17131ae08745Sheppo return (EIO); 17141ae08745Sheppo } 17151ae08745Sheppo 17161ae08745Sheppo 1717*0a55fbb7Slm66018 /* If slice is entire-disk slice, initialize for full disk */ 1718*0a55fbb7Slm66018 if (dk_cinfo.dki_partition == VD_ENTIRE_DISK_SLICE) 1719*0a55fbb7Slm66018 return (vd_setup_full_disk(vd)); 17201ae08745Sheppo 1721*0a55fbb7Slm66018 1722*0a55fbb7Slm66018 /* Otherwise, we have a non-entire slice of a block device */ 17231ae08745Sheppo vd->vdisk_type = VD_DISK_TYPE_SLICE; 17241ae08745Sheppo vd->nslices = 1; 17251ae08745Sheppo 17261ae08745Sheppo 17271ae08745Sheppo /* Initialize dk_geom structure for single-slice block device */ 1728*0a55fbb7Slm66018 if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCGGEOM, 1729*0a55fbb7Slm66018 (intptr_t)&vd->dk_geom, FKIOCTL, kcred, &rval)) != 0) { 17301ae08745Sheppo PRN("ldi_ioctl(DKIOCGEOM) returned errno %d for %s", 17311ae08745Sheppo status, block_device); 17321ae08745Sheppo return (status); 17331ae08745Sheppo } 17341ae08745Sheppo if (vd->dk_geom.dkg_nsect == 0) { 17351ae08745Sheppo PRN("%s geometry claims 0 sectors per track", block_device); 17361ae08745Sheppo return (EIO); 17371ae08745Sheppo } 17381ae08745Sheppo if (vd->dk_geom.dkg_nhead == 0) { 17391ae08745Sheppo PRN("%s geometry claims 0 heads", block_device); 17401ae08745Sheppo return (EIO); 17411ae08745Sheppo } 17421ae08745Sheppo vd->dk_geom.dkg_ncyl = 17431ae08745Sheppo lbtodb(vd->vdisk_size)/vd->dk_geom.dkg_nsect/vd->dk_geom.dkg_nhead; 17441ae08745Sheppo vd->dk_geom.dkg_acyl = 0; 17451ae08745Sheppo vd->dk_geom.dkg_pcyl = vd->dk_geom.dkg_ncyl + vd->dk_geom.dkg_acyl; 17461ae08745Sheppo 17471ae08745Sheppo 17481ae08745Sheppo /* Initialize vtoc structure for single-slice block device */ 1749*0a55fbb7Slm66018 if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCGVTOC, 1750*0a55fbb7Slm66018 (intptr_t)&vd->vtoc, FKIOCTL, kcred, &rval)) != 0) { 17511ae08745Sheppo PRN("ldi_ioctl(DKIOCGVTOC) returned errno %d for %s", 17521ae08745Sheppo status, block_device); 17531ae08745Sheppo return (status); 17541ae08745Sheppo } 17551ae08745Sheppo bcopy(VD_VOLUME_NAME, vd->vtoc.v_volume, 17561ae08745Sheppo MIN(sizeof (VD_VOLUME_NAME), sizeof (vd->vtoc.v_volume))); 17571ae08745Sheppo bzero(vd->vtoc.v_part, sizeof (vd->vtoc.v_part)); 17581ae08745Sheppo vd->vtoc.v_nparts = 1; 17591ae08745Sheppo vd->vtoc.v_part[0].p_tag = V_UNASSIGNED; 17601ae08745Sheppo vd->vtoc.v_part[0].p_flag = 0; 17611ae08745Sheppo vd->vtoc.v_part[0].p_start = 0; 17621ae08745Sheppo vd->vtoc.v_part[0].p_size = lbtodb(vd->vdisk_size); 17631ae08745Sheppo bcopy(VD_ASCIILABEL, vd->vtoc.v_asciilabel, 17641ae08745Sheppo MIN(sizeof (VD_ASCIILABEL), sizeof (vd->vtoc.v_asciilabel))); 17651ae08745Sheppo 17661ae08745Sheppo 17671ae08745Sheppo return (0); 17681ae08745Sheppo } 17691ae08745Sheppo 17701ae08745Sheppo static int 17711ae08745Sheppo vds_do_init_vd(vds_t *vds, uint64_t id, char *block_device, uint64_t ldc_id, 17721ae08745Sheppo vd_t **vdp) 17731ae08745Sheppo { 17741ae08745Sheppo char tq_name[TASKQ_NAMELEN]; 1775*0a55fbb7Slm66018 int status; 17761ae08745Sheppo ddi_iblock_cookie_t iblock = NULL; 17771ae08745Sheppo ldc_attr_t ldc_attr; 17781ae08745Sheppo vd_t *vd; 17791ae08745Sheppo 17801ae08745Sheppo 17811ae08745Sheppo ASSERT(vds != NULL); 17821ae08745Sheppo ASSERT(block_device != NULL); 17831ae08745Sheppo ASSERT(vdp != NULL); 17841ae08745Sheppo PR0("Adding vdisk for %s", block_device); 17851ae08745Sheppo 17861ae08745Sheppo if ((vd = kmem_zalloc(sizeof (*vd), KM_NOSLEEP)) == NULL) { 17871ae08745Sheppo PRN("No memory for virtual disk"); 17881ae08745Sheppo return (EAGAIN); 17891ae08745Sheppo } 17901ae08745Sheppo *vdp = vd; /* assign here so vds_destroy_vd() can cleanup later */ 17911ae08745Sheppo vd->vds = vds; 17921ae08745Sheppo 17931ae08745Sheppo 1794*0a55fbb7Slm66018 /* Open vdisk and initialize parameters */ 1795*0a55fbb7Slm66018 if ((status = vd_setup_vd(block_device, vd)) != 0) 17961ae08745Sheppo return (status); 17971ae08745Sheppo ASSERT(vd->nslices > 0 && vd->nslices <= V_NUMPAR); 17981ae08745Sheppo PR0("vdisk_type = %s, pseudo = %s, nslices = %u", 17991ae08745Sheppo ((vd->vdisk_type == VD_DISK_TYPE_DISK) ? "disk" : "slice"), 18001ae08745Sheppo (vd->pseudo ? "yes" : "no"), vd->nslices); 18011ae08745Sheppo 18021ae08745Sheppo 18031ae08745Sheppo /* Initialize locking */ 18041ae08745Sheppo if (ddi_get_soft_iblock_cookie(vds->dip, DDI_SOFTINT_MED, 18051ae08745Sheppo &iblock) != DDI_SUCCESS) { 18061ae08745Sheppo PRN("Could not get iblock cookie."); 18071ae08745Sheppo return (EIO); 18081ae08745Sheppo } 18091ae08745Sheppo 18101ae08745Sheppo mutex_init(&vd->lock, NULL, MUTEX_DRIVER, iblock); 18111ae08745Sheppo vd->initialized |= VD_LOCKING; 18121ae08745Sheppo 18131ae08745Sheppo 18141ae08745Sheppo /* Create the task queue for the vdisk */ 18151ae08745Sheppo (void) snprintf(tq_name, sizeof (tq_name), "vd%lu", id); 18161ae08745Sheppo PR1("tq_name = %s", tq_name); 18171ae08745Sheppo if ((vd->taskq = ddi_taskq_create(vds->dip, tq_name, 1, 18181ae08745Sheppo TASKQ_DEFAULTPRI, 0)) == NULL) { 18191ae08745Sheppo PRN("Could not create task queue"); 18201ae08745Sheppo return (EIO); 18211ae08745Sheppo } 18221ae08745Sheppo vd->initialized |= VD_TASKQ; 18231ae08745Sheppo vd->enabled = 1; /* before callback can dispatch to taskq */ 18241ae08745Sheppo 18251ae08745Sheppo 18261ae08745Sheppo /* Bring up LDC */ 18271ae08745Sheppo ldc_attr.devclass = LDC_DEV_BLK_SVC; 18281ae08745Sheppo ldc_attr.instance = ddi_get_instance(vds->dip); 18291ae08745Sheppo ldc_attr.mode = LDC_MODE_UNRELIABLE; 18301ae08745Sheppo ldc_attr.qlen = VD_LDC_QLEN; 18311ae08745Sheppo if ((status = ldc_init(ldc_id, &ldc_attr, &vd->ldc_handle)) != 0) { 18321ae08745Sheppo PRN("ldc_init(%lu) = errno %d", ldc_id, status); 18331ae08745Sheppo return (status); 18341ae08745Sheppo } 18351ae08745Sheppo vd->initialized |= VD_LDC; 18361ae08745Sheppo 18371ae08745Sheppo if ((status = ldc_reg_callback(vd->ldc_handle, vd_handle_ldc_events, 18381ae08745Sheppo (caddr_t)vd)) != 0) { 18391ae08745Sheppo PRN("ldc_reg_callback() returned errno %d", status); 18401ae08745Sheppo return (status); 18411ae08745Sheppo } 18421ae08745Sheppo 18431ae08745Sheppo if ((status = ldc_open(vd->ldc_handle)) != 0) { 18441ae08745Sheppo PRN("ldc_open() returned errno %d", status); 18451ae08745Sheppo return (status); 18461ae08745Sheppo } 18471ae08745Sheppo 18481ae08745Sheppo 18491ae08745Sheppo /* Add the successfully-initialized vdisk to the server's table */ 18501ae08745Sheppo if (mod_hash_insert(vds->vd_table, (mod_hash_key_t)id, vd) != 0) { 18511ae08745Sheppo PRN("Error adding vdisk ID %lu to table", id); 18521ae08745Sheppo return (EIO); 18531ae08745Sheppo } 18541ae08745Sheppo 18551ae08745Sheppo return (0); 18561ae08745Sheppo } 18571ae08745Sheppo 18581ae08745Sheppo /* 18591ae08745Sheppo * Destroy the state associated with a virtual disk 18601ae08745Sheppo */ 18611ae08745Sheppo static void 18621ae08745Sheppo vds_destroy_vd(void *arg) 18631ae08745Sheppo { 18641ae08745Sheppo vd_t *vd = (vd_t *)arg; 18651ae08745Sheppo 18661ae08745Sheppo 18671ae08745Sheppo PR0("Entered"); 18681ae08745Sheppo if (vd == NULL) 18691ae08745Sheppo return; 18701ae08745Sheppo 18711ae08745Sheppo /* Disable queuing requests for the vdisk */ 18721ae08745Sheppo if (vd->initialized & VD_LOCKING) { 18731ae08745Sheppo mutex_enter(&vd->lock); 18741ae08745Sheppo vd->enabled = 0; 18751ae08745Sheppo mutex_exit(&vd->lock); 18761ae08745Sheppo } 18771ae08745Sheppo 18781ae08745Sheppo /* Drain and destroy the task queue (*before* shutting down LDC) */ 18791ae08745Sheppo if (vd->initialized & VD_TASKQ) 18801ae08745Sheppo ddi_taskq_destroy(vd->taskq); /* waits for queued tasks */ 18811ae08745Sheppo 18821ae08745Sheppo /* Shut down LDC */ 18831ae08745Sheppo if (vd->initialized & VD_LDC) { 18841ae08745Sheppo if (vd->initialized & VD_DRING) 18851ae08745Sheppo (void) ldc_mem_dring_unmap(vd->dring_handle); 18861ae08745Sheppo (void) ldc_unreg_callback(vd->ldc_handle); 18871ae08745Sheppo (void) ldc_close(vd->ldc_handle); 18881ae08745Sheppo (void) ldc_fini(vd->ldc_handle); 18891ae08745Sheppo } 18901ae08745Sheppo 18911ae08745Sheppo /* Close any open backing-device slices */ 18921ae08745Sheppo for (uint_t slice = 0; slice < vd->nslices; slice++) { 18931ae08745Sheppo if (vd->ldi_handle[slice] != NULL) { 18941ae08745Sheppo PR0("Closing slice %u", slice); 18951ae08745Sheppo (void) ldi_close(vd->ldi_handle[slice], 18961ae08745Sheppo vd_open_flags, kcred); 18971ae08745Sheppo } 18981ae08745Sheppo } 18991ae08745Sheppo 19001ae08745Sheppo /* Free lock */ 19011ae08745Sheppo if (vd->initialized & VD_LOCKING) 19021ae08745Sheppo mutex_destroy(&vd->lock); 19031ae08745Sheppo 19041ae08745Sheppo /* Finally, free the vdisk structure itself */ 19051ae08745Sheppo kmem_free(vd, sizeof (*vd)); 19061ae08745Sheppo } 19071ae08745Sheppo 19081ae08745Sheppo static int 19091ae08745Sheppo vds_init_vd(vds_t *vds, uint64_t id, char *block_device, uint64_t ldc_id) 19101ae08745Sheppo { 19111ae08745Sheppo int status; 19121ae08745Sheppo vd_t *vd = NULL; 19131ae08745Sheppo 19141ae08745Sheppo 19151ae08745Sheppo #ifdef lint 19161ae08745Sheppo (void) vd; 19171ae08745Sheppo #endif /* lint */ 19181ae08745Sheppo 19191ae08745Sheppo if ((status = vds_do_init_vd(vds, id, block_device, ldc_id, &vd)) != 0) 19201ae08745Sheppo vds_destroy_vd(vd); 19211ae08745Sheppo 19221ae08745Sheppo return (status); 19231ae08745Sheppo } 19241ae08745Sheppo 19251ae08745Sheppo static int 19261ae08745Sheppo vds_do_get_ldc_id(md_t *md, mde_cookie_t vd_node, mde_cookie_t *channel, 19271ae08745Sheppo uint64_t *ldc_id) 19281ae08745Sheppo { 19291ae08745Sheppo int num_channels; 19301ae08745Sheppo 19311ae08745Sheppo 19321ae08745Sheppo /* Look for channel endpoint child(ren) of the vdisk MD node */ 19331ae08745Sheppo if ((num_channels = md_scan_dag(md, vd_node, 19341ae08745Sheppo md_find_name(md, VD_CHANNEL_ENDPOINT), 19351ae08745Sheppo md_find_name(md, "fwd"), channel)) <= 0) { 19361ae08745Sheppo PRN("No \"%s\" found for virtual disk", VD_CHANNEL_ENDPOINT); 19371ae08745Sheppo return (-1); 19381ae08745Sheppo } 19391ae08745Sheppo 19401ae08745Sheppo /* Get the "id" value for the first channel endpoint node */ 19411ae08745Sheppo if (md_get_prop_val(md, channel[0], VD_ID_PROP, ldc_id) != 0) { 19421ae08745Sheppo PRN("No \"%s\" property found for \"%s\" of vdisk", 19431ae08745Sheppo VD_ID_PROP, VD_CHANNEL_ENDPOINT); 19441ae08745Sheppo return (-1); 19451ae08745Sheppo } 19461ae08745Sheppo 19471ae08745Sheppo if (num_channels > 1) { 19481ae08745Sheppo PRN("Using ID of first of multiple channels for this vdisk"); 19491ae08745Sheppo } 19501ae08745Sheppo 19511ae08745Sheppo return (0); 19521ae08745Sheppo } 19531ae08745Sheppo 19541ae08745Sheppo static int 19551ae08745Sheppo vds_get_ldc_id(md_t *md, mde_cookie_t vd_node, uint64_t *ldc_id) 19561ae08745Sheppo { 19571ae08745Sheppo int num_nodes, status; 19581ae08745Sheppo size_t size; 19591ae08745Sheppo mde_cookie_t *channel; 19601ae08745Sheppo 19611ae08745Sheppo 19621ae08745Sheppo if ((num_nodes = md_node_count(md)) <= 0) { 19631ae08745Sheppo PRN("Invalid node count in Machine Description subtree"); 19641ae08745Sheppo return (-1); 19651ae08745Sheppo } 19661ae08745Sheppo size = num_nodes*(sizeof (*channel)); 19671ae08745Sheppo channel = kmem_zalloc(size, KM_SLEEP); 19681ae08745Sheppo status = vds_do_get_ldc_id(md, vd_node, channel, ldc_id); 19691ae08745Sheppo kmem_free(channel, size); 19701ae08745Sheppo 19711ae08745Sheppo return (status); 19721ae08745Sheppo } 19731ae08745Sheppo 19741ae08745Sheppo static void 19751ae08745Sheppo vds_add_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node) 19761ae08745Sheppo { 19771ae08745Sheppo char *block_device = NULL; 19781ae08745Sheppo uint64_t id = 0, ldc_id = 0; 19791ae08745Sheppo 19801ae08745Sheppo 19811ae08745Sheppo if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) { 19821ae08745Sheppo PRN("Error getting vdisk \"%s\"", VD_ID_PROP); 19831ae08745Sheppo return; 19841ae08745Sheppo } 19851ae08745Sheppo PR0("Adding vdisk ID %lu", id); 19861ae08745Sheppo if (md_get_prop_str(md, vd_node, VD_BLOCK_DEVICE_PROP, 19871ae08745Sheppo &block_device) != 0) { 19881ae08745Sheppo PRN("Error getting vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); 19891ae08745Sheppo return; 19901ae08745Sheppo } 19911ae08745Sheppo 19921ae08745Sheppo if (vds_get_ldc_id(md, vd_node, &ldc_id) != 0) { 19931ae08745Sheppo PRN("Error getting LDC ID for vdisk %lu", id); 19941ae08745Sheppo return; 19951ae08745Sheppo } 19961ae08745Sheppo 19971ae08745Sheppo if (vds_init_vd(vds, id, block_device, ldc_id) != 0) { 19981ae08745Sheppo PRN("Failed to add vdisk ID %lu", id); 19991ae08745Sheppo return; 20001ae08745Sheppo } 20011ae08745Sheppo } 20021ae08745Sheppo 20031ae08745Sheppo static void 20041ae08745Sheppo vds_remove_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node) 20051ae08745Sheppo { 20061ae08745Sheppo uint64_t id = 0; 20071ae08745Sheppo 20081ae08745Sheppo 20091ae08745Sheppo if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) { 20101ae08745Sheppo PRN("Unable to get \"%s\" property from vdisk's MD node", 20111ae08745Sheppo VD_ID_PROP); 20121ae08745Sheppo return; 20131ae08745Sheppo } 20141ae08745Sheppo PR0("Removing vdisk ID %lu", id); 20151ae08745Sheppo if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)id) != 0) 20161ae08745Sheppo PRN("No vdisk entry found for vdisk ID %lu", id); 20171ae08745Sheppo } 20181ae08745Sheppo 20191ae08745Sheppo static void 20201ae08745Sheppo vds_change_vd(vds_t *vds, md_t *prev_md, mde_cookie_t prev_vd_node, 20211ae08745Sheppo md_t *curr_md, mde_cookie_t curr_vd_node) 20221ae08745Sheppo { 20231ae08745Sheppo char *curr_dev, *prev_dev; 20241ae08745Sheppo uint64_t curr_id = 0, curr_ldc_id = 0; 20251ae08745Sheppo uint64_t prev_id = 0, prev_ldc_id = 0; 20261ae08745Sheppo size_t len; 20271ae08745Sheppo 20281ae08745Sheppo 20291ae08745Sheppo /* Validate that vdisk ID has not changed */ 20301ae08745Sheppo if (md_get_prop_val(prev_md, prev_vd_node, VD_ID_PROP, &prev_id) != 0) { 20311ae08745Sheppo PRN("Error getting previous vdisk \"%s\" property", 20321ae08745Sheppo VD_ID_PROP); 20331ae08745Sheppo return; 20341ae08745Sheppo } 20351ae08745Sheppo if (md_get_prop_val(curr_md, curr_vd_node, VD_ID_PROP, &curr_id) != 0) { 20361ae08745Sheppo PRN("Error getting current vdisk \"%s\" property", VD_ID_PROP); 20371ae08745Sheppo return; 20381ae08745Sheppo } 20391ae08745Sheppo if (curr_id != prev_id) { 20401ae08745Sheppo PRN("Not changing vdisk: ID changed from %lu to %lu", 20411ae08745Sheppo prev_id, curr_id); 20421ae08745Sheppo return; 20431ae08745Sheppo } 20441ae08745Sheppo 20451ae08745Sheppo /* Validate that LDC ID has not changed */ 20461ae08745Sheppo if (vds_get_ldc_id(prev_md, prev_vd_node, &prev_ldc_id) != 0) { 20471ae08745Sheppo PRN("Error getting LDC ID for vdisk %lu", prev_id); 20481ae08745Sheppo return; 20491ae08745Sheppo } 20501ae08745Sheppo 20511ae08745Sheppo if (vds_get_ldc_id(curr_md, curr_vd_node, &curr_ldc_id) != 0) { 20521ae08745Sheppo PRN("Error getting LDC ID for vdisk %lu", curr_id); 20531ae08745Sheppo return; 20541ae08745Sheppo } 20551ae08745Sheppo if (curr_ldc_id != prev_ldc_id) { 2056*0a55fbb7Slm66018 _NOTE(NOTREACHED); /* lint is confused */ 20571ae08745Sheppo PRN("Not changing vdisk: " 20581ae08745Sheppo "LDC ID changed from %lu to %lu", prev_ldc_id, curr_ldc_id); 20591ae08745Sheppo return; 20601ae08745Sheppo } 20611ae08745Sheppo 20621ae08745Sheppo /* Determine whether device path has changed */ 20631ae08745Sheppo if (md_get_prop_str(prev_md, prev_vd_node, VD_BLOCK_DEVICE_PROP, 20641ae08745Sheppo &prev_dev) != 0) { 20651ae08745Sheppo PRN("Error getting previous vdisk \"%s\"", 20661ae08745Sheppo VD_BLOCK_DEVICE_PROP); 20671ae08745Sheppo return; 20681ae08745Sheppo } 20691ae08745Sheppo if (md_get_prop_str(curr_md, curr_vd_node, VD_BLOCK_DEVICE_PROP, 20701ae08745Sheppo &curr_dev) != 0) { 20711ae08745Sheppo PRN("Error getting current vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); 20721ae08745Sheppo return; 20731ae08745Sheppo } 20741ae08745Sheppo if (((len = strlen(curr_dev)) == strlen(prev_dev)) && 20751ae08745Sheppo (strncmp(curr_dev, prev_dev, len) == 0)) 20761ae08745Sheppo return; /* no relevant (supported) change */ 20771ae08745Sheppo 20781ae08745Sheppo PR0("Changing vdisk ID %lu", prev_id); 20791ae08745Sheppo /* Remove old state, which will close vdisk and reset */ 20801ae08745Sheppo if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)prev_id) != 0) 20811ae08745Sheppo PRN("No entry found for vdisk ID %lu", prev_id); 20821ae08745Sheppo /* Re-initialize vdisk with new state */ 20831ae08745Sheppo if (vds_init_vd(vds, curr_id, curr_dev, curr_ldc_id) != 0) { 20841ae08745Sheppo PRN("Failed to change vdisk ID %lu", curr_id); 20851ae08745Sheppo return; 20861ae08745Sheppo } 20871ae08745Sheppo } 20881ae08745Sheppo 20891ae08745Sheppo static int 20901ae08745Sheppo vds_process_md(void *arg, mdeg_result_t *md) 20911ae08745Sheppo { 20921ae08745Sheppo int i; 20931ae08745Sheppo vds_t *vds = arg; 20941ae08745Sheppo 20951ae08745Sheppo 20961ae08745Sheppo if (md == NULL) 20971ae08745Sheppo return (MDEG_FAILURE); 20981ae08745Sheppo ASSERT(vds != NULL); 20991ae08745Sheppo 21001ae08745Sheppo for (i = 0; i < md->removed.nelem; i++) 21011ae08745Sheppo vds_remove_vd(vds, md->removed.mdp, md->removed.mdep[i]); 21021ae08745Sheppo for (i = 0; i < md->match_curr.nelem; i++) 21031ae08745Sheppo vds_change_vd(vds, md->match_prev.mdp, md->match_prev.mdep[i], 21041ae08745Sheppo md->match_curr.mdp, md->match_curr.mdep[i]); 21051ae08745Sheppo for (i = 0; i < md->added.nelem; i++) 21061ae08745Sheppo vds_add_vd(vds, md->added.mdp, md->added.mdep[i]); 21071ae08745Sheppo 21081ae08745Sheppo return (MDEG_SUCCESS); 21091ae08745Sheppo } 21101ae08745Sheppo 21111ae08745Sheppo static int 21121ae08745Sheppo vds_do_attach(dev_info_t *dip) 21131ae08745Sheppo { 21141ae08745Sheppo static char reg_prop[] = "reg"; /* devinfo ID prop */ 21151ae08745Sheppo 21161ae08745Sheppo /* MDEG specification for a (particular) vds node */ 21171ae08745Sheppo static mdeg_prop_spec_t vds_prop_spec[] = { 21181ae08745Sheppo {MDET_PROP_STR, "name", {VDS_NAME}}, 21191ae08745Sheppo {MDET_PROP_VAL, "cfg-handle", {0}}, 21201ae08745Sheppo {MDET_LIST_END, NULL, {0}}}; 21211ae08745Sheppo static mdeg_node_spec_t vds_spec = {"virtual-device", vds_prop_spec}; 21221ae08745Sheppo 21231ae08745Sheppo /* MDEG specification for matching a vd node */ 21241ae08745Sheppo static md_prop_match_t vd_prop_spec[] = { 21251ae08745Sheppo {MDET_PROP_VAL, VD_ID_PROP}, 21261ae08745Sheppo {MDET_LIST_END, NULL}}; 21271ae08745Sheppo static mdeg_node_match_t vd_spec = {"virtual-device-port", 21281ae08745Sheppo vd_prop_spec}; 21291ae08745Sheppo 21301ae08745Sheppo int status; 21311ae08745Sheppo uint64_t cfg_handle; 21321ae08745Sheppo minor_t instance = ddi_get_instance(dip); 21331ae08745Sheppo vds_t *vds; 21341ae08745Sheppo 21351ae08745Sheppo 21361ae08745Sheppo /* 21371ae08745Sheppo * The "cfg-handle" property of a vds node in an MD contains the MD's 21381ae08745Sheppo * notion of "instance", or unique identifier, for that node; OBP 21391ae08745Sheppo * stores the value of the "cfg-handle" MD property as the value of 21401ae08745Sheppo * the "reg" property on the node in the device tree it builds from 21411ae08745Sheppo * the MD and passes to Solaris. Thus, we look up the devinfo node's 21421ae08745Sheppo * "reg" property value to uniquely identify this device instance when 21431ae08745Sheppo * registering with the MD event-generation framework. If the "reg" 21441ae08745Sheppo * property cannot be found, the device tree state is presumably so 21451ae08745Sheppo * broken that there is no point in continuing. 21461ae08745Sheppo */ 21471ae08745Sheppo if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, reg_prop)) { 21481ae08745Sheppo PRN("vds \"%s\" property does not exist", reg_prop); 21491ae08745Sheppo return (DDI_FAILURE); 21501ae08745Sheppo } 21511ae08745Sheppo 21521ae08745Sheppo /* Get the MD instance for later MDEG registration */ 21531ae08745Sheppo cfg_handle = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 21541ae08745Sheppo reg_prop, -1); 21551ae08745Sheppo 21561ae08745Sheppo if (ddi_soft_state_zalloc(vds_state, instance) != DDI_SUCCESS) { 21571ae08745Sheppo PRN("Could not allocate state for instance %u", instance); 21581ae08745Sheppo return (DDI_FAILURE); 21591ae08745Sheppo } 21601ae08745Sheppo 21611ae08745Sheppo if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) { 21621ae08745Sheppo PRN("Could not get state for instance %u", instance); 21631ae08745Sheppo ddi_soft_state_free(vds_state, instance); 21641ae08745Sheppo return (DDI_FAILURE); 21651ae08745Sheppo } 21661ae08745Sheppo 21671ae08745Sheppo 21681ae08745Sheppo vds->dip = dip; 21691ae08745Sheppo vds->vd_table = mod_hash_create_ptrhash("vds_vd_table", VDS_NCHAINS, 21701ae08745Sheppo vds_destroy_vd, 21711ae08745Sheppo sizeof (void *)); 21721ae08745Sheppo ASSERT(vds->vd_table != NULL); 21731ae08745Sheppo 21741ae08745Sheppo mutex_init(&vds->lock, NULL, MUTEX_DRIVER, NULL); 21751ae08745Sheppo vds->initialized |= VDS_LOCKING; 21761ae08745Sheppo 21771ae08745Sheppo if ((status = ldi_ident_from_dip(dip, &vds->ldi_ident)) != 0) { 21781ae08745Sheppo PRN("ldi_ident_from_dip() returned errno %d", status); 21791ae08745Sheppo return (DDI_FAILURE); 21801ae08745Sheppo } 21811ae08745Sheppo vds->initialized |= VDS_LDI; 21821ae08745Sheppo 21831ae08745Sheppo /* Register for MD updates */ 21841ae08745Sheppo vds_prop_spec[1].ps_val = cfg_handle; 21851ae08745Sheppo if (mdeg_register(&vds_spec, &vd_spec, vds_process_md, vds, 21861ae08745Sheppo &vds->mdeg) != MDEG_SUCCESS) { 21871ae08745Sheppo PRN("Unable to register for MD updates"); 21881ae08745Sheppo return (DDI_FAILURE); 21891ae08745Sheppo } 21901ae08745Sheppo vds->initialized |= VDS_MDEG; 21911ae08745Sheppo 2192*0a55fbb7Slm66018 /* Prevent auto-detaching so driver is available whenever MD changes */ 2193*0a55fbb7Slm66018 if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1) != 2194*0a55fbb7Slm66018 DDI_PROP_SUCCESS) { 2195*0a55fbb7Slm66018 PRN("failed to set \"%s\" property for instance %u", 2196*0a55fbb7Slm66018 DDI_NO_AUTODETACH, instance); 2197*0a55fbb7Slm66018 } 2198*0a55fbb7Slm66018 21991ae08745Sheppo ddi_report_dev(dip); 22001ae08745Sheppo return (DDI_SUCCESS); 22011ae08745Sheppo } 22021ae08745Sheppo 22031ae08745Sheppo static int 22041ae08745Sheppo vds_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 22051ae08745Sheppo { 22061ae08745Sheppo int status; 22071ae08745Sheppo 22081ae08745Sheppo PR0("Entered"); 22091ae08745Sheppo switch (cmd) { 22101ae08745Sheppo case DDI_ATTACH: 22111ae08745Sheppo if ((status = vds_do_attach(dip)) != DDI_SUCCESS) 22121ae08745Sheppo (void) vds_detach(dip, DDI_DETACH); 22131ae08745Sheppo return (status); 22141ae08745Sheppo case DDI_RESUME: 22151ae08745Sheppo /* nothing to do for this non-device */ 22161ae08745Sheppo return (DDI_SUCCESS); 22171ae08745Sheppo default: 22181ae08745Sheppo return (DDI_FAILURE); 22191ae08745Sheppo } 22201ae08745Sheppo } 22211ae08745Sheppo 22221ae08745Sheppo static struct dev_ops vds_ops = { 22231ae08745Sheppo DEVO_REV, /* devo_rev */ 22241ae08745Sheppo 0, /* devo_refcnt */ 22251ae08745Sheppo ddi_no_info, /* devo_getinfo */ 22261ae08745Sheppo nulldev, /* devo_identify */ 22271ae08745Sheppo nulldev, /* devo_probe */ 22281ae08745Sheppo vds_attach, /* devo_attach */ 22291ae08745Sheppo vds_detach, /* devo_detach */ 22301ae08745Sheppo nodev, /* devo_reset */ 22311ae08745Sheppo NULL, /* devo_cb_ops */ 22321ae08745Sheppo NULL, /* devo_bus_ops */ 22331ae08745Sheppo nulldev /* devo_power */ 22341ae08745Sheppo }; 22351ae08745Sheppo 22361ae08745Sheppo static struct modldrv modldrv = { 22371ae08745Sheppo &mod_driverops, 22381ae08745Sheppo "virtual disk server v%I%", 22391ae08745Sheppo &vds_ops, 22401ae08745Sheppo }; 22411ae08745Sheppo 22421ae08745Sheppo static struct modlinkage modlinkage = { 22431ae08745Sheppo MODREV_1, 22441ae08745Sheppo &modldrv, 22451ae08745Sheppo NULL 22461ae08745Sheppo }; 22471ae08745Sheppo 22481ae08745Sheppo 22491ae08745Sheppo int 22501ae08745Sheppo _init(void) 22511ae08745Sheppo { 22521ae08745Sheppo int i, status; 22531ae08745Sheppo 22541ae08745Sheppo 22551ae08745Sheppo PR0("Built %s %s", __DATE__, __TIME__); 22561ae08745Sheppo if ((status = ddi_soft_state_init(&vds_state, sizeof (vds_t), 1)) != 0) 22571ae08745Sheppo return (status); 22581ae08745Sheppo if ((status = mod_install(&modlinkage)) != 0) { 22591ae08745Sheppo ddi_soft_state_fini(&vds_state); 22601ae08745Sheppo return (status); 22611ae08745Sheppo } 22621ae08745Sheppo 22631ae08745Sheppo /* Fill in the bit-mask of server-supported operations */ 22641ae08745Sheppo for (i = 0; i < vds_noperations; i++) 22651ae08745Sheppo vds_operations |= 1 << (vds_operation[i].operation - 1); 22661ae08745Sheppo 22671ae08745Sheppo return (0); 22681ae08745Sheppo } 22691ae08745Sheppo 22701ae08745Sheppo int 22711ae08745Sheppo _info(struct modinfo *modinfop) 22721ae08745Sheppo { 22731ae08745Sheppo return (mod_info(&modlinkage, modinfop)); 22741ae08745Sheppo } 22751ae08745Sheppo 22761ae08745Sheppo int 22771ae08745Sheppo _fini(void) 22781ae08745Sheppo { 22791ae08745Sheppo int status; 22801ae08745Sheppo 22811ae08745Sheppo 22821ae08745Sheppo PR0("Entered"); 22831ae08745Sheppo if ((status = mod_remove(&modlinkage)) != 0) 22841ae08745Sheppo return (status); 22851ae08745Sheppo ddi_soft_state_fini(&vds_state); 22861ae08745Sheppo return (0); 22871ae08745Sheppo } 2288