1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Virtual disk server 31 */ 32 33 34 #include <sys/types.h> 35 #include <sys/conf.h> 36 #include <sys/crc32.h> 37 #include <sys/ddi.h> 38 #include <sys/dkio.h> 39 #include <sys/file.h> 40 #include <sys/mdeg.h> 41 #include <sys/modhash.h> 42 #include <sys/note.h> 43 #include <sys/pathname.h> 44 #include <sys/sdt.h> 45 #include <sys/sunddi.h> 46 #include <sys/sunldi.h> 47 #include <sys/sysmacros.h> 48 #include <sys/vio_common.h> 49 #include <sys/vdsk_mailbox.h> 50 #include <sys/vdsk_common.h> 51 #include <sys/vtoc.h> 52 #include <sys/vfs.h> 53 #include <sys/stat.h> 54 #include <sys/scsi/impl/uscsi.h> 55 #include <vm/seg_map.h> 56 57 /* Virtual disk server initialization flags */ 58 #define VDS_LDI 0x01 59 #define VDS_MDEG 0x02 60 61 /* Virtual disk server tunable parameters */ 62 #define VDS_RETRIES 5 63 #define VDS_LDC_DELAY 1000 /* 1 msecs */ 64 #define VDS_DEV_DELAY 10000000 /* 10 secs */ 65 #define VDS_NCHAINS 32 66 67 /* Identification parameters for MD, synthetic dkio(7i) structures, etc. */ 68 #define VDS_NAME "virtual-disk-server" 69 70 #define VD_NAME "vd" 71 #define VD_VOLUME_NAME "vdisk" 72 #define VD_ASCIILABEL "Virtual Disk" 73 74 #define VD_CHANNEL_ENDPOINT "channel-endpoint" 75 #define VD_ID_PROP "id" 76 #define VD_BLOCK_DEVICE_PROP "vds-block-device" 77 #define VD_REG_PROP "reg" 78 79 /* Virtual disk initialization flags */ 80 #define VD_DISK_READY 0x01 81 #define VD_LOCKING 0x02 82 #define VD_LDC 0x04 83 #define VD_DRING 0x08 84 #define VD_SID 0x10 85 #define VD_SEQ_NUM 0x20 86 87 /* Flags for opening/closing backing devices via LDI */ 88 #define VD_OPEN_FLAGS (FEXCL | FREAD | FWRITE) 89 90 /* Flags for writing to a vdisk which is a file */ 91 #define VD_FILE_WRITE_FLAGS SM_ASYNC 92 93 /* Number of backup labels */ 94 #define VD_FILE_NUM_BACKUP 5 95 96 /* Timeout for SCSI I/O */ 97 #define VD_SCSI_RDWR_TIMEOUT 30 /* 30 secs */ 98 99 /* 100 * By Solaris convention, slice/partition 2 represents the entire disk; 101 * unfortunately, this convention does not appear to be codified. 102 */ 103 #define VD_ENTIRE_DISK_SLICE 2 104 105 /* Return a cpp token as a string */ 106 #define STRINGIZE(token) #token 107 108 /* 109 * Print a message prefixed with the current function name to the message log 110 * (and optionally to the console for verbose boots); these macros use cpp's 111 * concatenation of string literals and C99 variable-length-argument-list 112 * macros 113 */ 114 #define PRN(...) _PRN("?%s(): "__VA_ARGS__, "") 115 #define _PRN(format, ...) \ 116 cmn_err(CE_CONT, format"%s", __func__, __VA_ARGS__) 117 118 /* Return a pointer to the "i"th vdisk dring element */ 119 #define VD_DRING_ELEM(i) ((vd_dring_entry_t *)(void *) \ 120 (vd->dring + (i)*vd->descriptor_size)) 121 122 /* Return the virtual disk client's type as a string (for use in messages) */ 123 #define VD_CLIENT(vd) \ 124 (((vd)->xfer_mode == VIO_DESC_MODE) ? "in-band client" : \ 125 (((vd)->xfer_mode == VIO_DRING_MODE) ? "dring client" : \ 126 (((vd)->xfer_mode == 0) ? "null client" : \ 127 "unsupported client"))) 128 129 /* Read disk label from a disk on file */ 130 #define VD_FILE_LABEL_READ(vd, labelp) \ 131 vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BREAD, (caddr_t)labelp, \ 132 0, sizeof (struct dk_label)) 133 134 /* Write disk label to a disk on file */ 135 #define VD_FILE_LABEL_WRITE(vd, labelp) \ 136 vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, (caddr_t)labelp, \ 137 0, sizeof (struct dk_label)) 138 139 /* 140 * Specification of an MD node passed to the MDEG to filter any 141 * 'vport' nodes that do not belong to the specified node. This 142 * template is copied for each vds instance and filled in with 143 * the appropriate 'cfg-handle' value before being passed to the MDEG. 144 */ 145 static mdeg_prop_spec_t vds_prop_template[] = { 146 { MDET_PROP_STR, "name", VDS_NAME }, 147 { MDET_PROP_VAL, "cfg-handle", NULL }, 148 { MDET_LIST_END, NULL, NULL } 149 }; 150 151 #define VDS_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 152 153 /* 154 * Matching criteria passed to the MDEG to register interest 155 * in changes to 'virtual-device-port' nodes identified by their 156 * 'id' property. 157 */ 158 static md_prop_match_t vd_prop_match[] = { 159 { MDET_PROP_VAL, VD_ID_PROP }, 160 { MDET_LIST_END, NULL } 161 }; 162 163 static mdeg_node_match_t vd_match = {"virtual-device-port", 164 vd_prop_match}; 165 166 /* Debugging macros */ 167 #ifdef DEBUG 168 169 static int vd_msglevel = 0; 170 171 #define PR0 if (vd_msglevel > 0) PRN 172 #define PR1 if (vd_msglevel > 1) PRN 173 #define PR2 if (vd_msglevel > 2) PRN 174 175 #define VD_DUMP_DRING_ELEM(elem) \ 176 PR0("dst:%x op:%x st:%u nb:%lx addr:%lx ncook:%u\n", \ 177 elem->hdr.dstate, \ 178 elem->payload.operation, \ 179 elem->payload.status, \ 180 elem->payload.nbytes, \ 181 elem->payload.addr, \ 182 elem->payload.ncookies); 183 184 char * 185 vd_decode_state(int state) 186 { 187 char *str; 188 189 #define CASE_STATE(_s) case _s: str = #_s; break; 190 191 switch (state) { 192 CASE_STATE(VD_STATE_INIT) 193 CASE_STATE(VD_STATE_VER) 194 CASE_STATE(VD_STATE_ATTR) 195 CASE_STATE(VD_STATE_DRING) 196 CASE_STATE(VD_STATE_RDX) 197 CASE_STATE(VD_STATE_DATA) 198 default: str = "unknown"; break; 199 } 200 201 #undef CASE_STATE 202 203 return (str); 204 } 205 206 void 207 vd_decode_tag(vio_msg_t *msg) 208 { 209 char *tstr, *sstr, *estr; 210 211 #define CASE_TYPE(_s) case _s: tstr = #_s; break; 212 213 switch (msg->tag.vio_msgtype) { 214 CASE_TYPE(VIO_TYPE_CTRL) 215 CASE_TYPE(VIO_TYPE_DATA) 216 CASE_TYPE(VIO_TYPE_ERR) 217 default: tstr = "unknown"; break; 218 } 219 220 #undef CASE_TYPE 221 222 #define CASE_SUBTYPE(_s) case _s: sstr = #_s; break; 223 224 switch (msg->tag.vio_subtype) { 225 CASE_SUBTYPE(VIO_SUBTYPE_INFO) 226 CASE_SUBTYPE(VIO_SUBTYPE_ACK) 227 CASE_SUBTYPE(VIO_SUBTYPE_NACK) 228 default: sstr = "unknown"; break; 229 } 230 231 #undef CASE_SUBTYPE 232 233 #define CASE_ENV(_s) case _s: estr = #_s; break; 234 235 switch (msg->tag.vio_subtype_env) { 236 CASE_ENV(VIO_VER_INFO) 237 CASE_ENV(VIO_ATTR_INFO) 238 CASE_ENV(VIO_DRING_REG) 239 CASE_ENV(VIO_DRING_UNREG) 240 CASE_ENV(VIO_RDX) 241 CASE_ENV(VIO_PKT_DATA) 242 CASE_ENV(VIO_DESC_DATA) 243 CASE_ENV(VIO_DRING_DATA) 244 default: estr = "unknown"; break; 245 } 246 247 #undef CASE_ENV 248 249 PR1("(%x/%x/%x) message : (%s/%s/%s)", 250 msg->tag.vio_msgtype, msg->tag.vio_subtype, 251 msg->tag.vio_subtype_env, tstr, sstr, estr); 252 } 253 254 #else /* !DEBUG */ 255 256 #define PR0(...) 257 #define PR1(...) 258 #define PR2(...) 259 260 #define VD_DUMP_DRING_ELEM(elem) 261 262 #define vd_decode_state(_s) (NULL) 263 #define vd_decode_tag(_s) (NULL) 264 265 #endif /* DEBUG */ 266 267 268 /* 269 * Soft state structure for a vds instance 270 */ 271 typedef struct vds { 272 uint_t initialized; /* driver inst initialization flags */ 273 dev_info_t *dip; /* driver inst devinfo pointer */ 274 ldi_ident_t ldi_ident; /* driver's identifier for LDI */ 275 mod_hash_t *vd_table; /* table of virtual disks served */ 276 mdeg_node_spec_t *ispecp; /* mdeg node specification */ 277 mdeg_handle_t mdeg; /* handle for MDEG operations */ 278 } vds_t; 279 280 /* 281 * Types of descriptor-processing tasks 282 */ 283 typedef enum vd_task_type { 284 VD_NONFINAL_RANGE_TASK, /* task for intermediate descriptor in range */ 285 VD_FINAL_RANGE_TASK, /* task for last in a range of descriptors */ 286 } vd_task_type_t; 287 288 /* 289 * Structure describing the task for processing a descriptor 290 */ 291 typedef struct vd_task { 292 struct vd *vd; /* vd instance task is for */ 293 vd_task_type_t type; /* type of descriptor task */ 294 int index; /* dring elem index for task */ 295 vio_msg_t *msg; /* VIO message task is for */ 296 size_t msglen; /* length of message content */ 297 vd_dring_payload_t *request; /* request task will perform */ 298 struct buf buf; /* buf(9s) for I/O request */ 299 ldc_mem_handle_t mhdl; /* task memory handle */ 300 int status; /* status of processing task */ 301 int (*completef)(struct vd_task *task); /* completion func ptr */ 302 } vd_task_t; 303 304 /* 305 * Soft state structure for a virtual disk instance 306 */ 307 typedef struct vd { 308 uint_t initialized; /* vdisk initialization flags */ 309 vds_t *vds; /* server for this vdisk */ 310 ddi_taskq_t *startq; /* queue for I/O start tasks */ 311 ddi_taskq_t *completionq; /* queue for completion tasks */ 312 ldi_handle_t ldi_handle[V_NUMPAR]; /* LDI slice handles */ 313 char device_path[MAXPATHLEN + 1]; /* vdisk device */ 314 dev_t dev[V_NUMPAR]; /* dev numbers for slices */ 315 uint_t nslices; /* number of slices */ 316 size_t vdisk_size; /* number of blocks in vdisk */ 317 vd_disk_type_t vdisk_type; /* slice or entire disk */ 318 vd_disk_label_t vdisk_label; /* EFI or VTOC label */ 319 ushort_t max_xfer_sz; /* max xfer size in DEV_BSIZE */ 320 boolean_t pseudo; /* underlying pseudo dev */ 321 boolean_t file; /* underlying file */ 322 vnode_t *file_vnode; /* file vnode */ 323 size_t file_size; /* file size */ 324 ddi_devid_t file_devid; /* devid for disk image */ 325 struct dk_efi dk_efi; /* synthetic for slice type */ 326 struct dk_geom dk_geom; /* synthetic for slice type */ 327 struct vtoc vtoc; /* synthetic for slice type */ 328 ldc_status_t ldc_state; /* LDC connection state */ 329 ldc_handle_t ldc_handle; /* handle for LDC comm */ 330 size_t max_msglen; /* largest LDC message len */ 331 vd_state_t state; /* client handshake state */ 332 uint8_t xfer_mode; /* transfer mode with client */ 333 uint32_t sid; /* client's session ID */ 334 uint64_t seq_num; /* message sequence number */ 335 uint64_t dring_ident; /* identifier of dring */ 336 ldc_dring_handle_t dring_handle; /* handle for dring ops */ 337 uint32_t descriptor_size; /* num bytes in desc */ 338 uint32_t dring_len; /* number of dring elements */ 339 caddr_t dring; /* address of dring */ 340 caddr_t vio_msgp; /* vio msg staging buffer */ 341 vd_task_t inband_task; /* task for inband descriptor */ 342 vd_task_t *dring_task; /* tasks dring elements */ 343 344 kmutex_t lock; /* protects variables below */ 345 boolean_t enabled; /* is vdisk enabled? */ 346 boolean_t reset_state; /* reset connection state? */ 347 boolean_t reset_ldc; /* reset LDC channel? */ 348 } vd_t; 349 350 typedef struct vds_operation { 351 char *namep; 352 uint8_t operation; 353 int (*start)(vd_task_t *task); 354 int (*complete)(vd_task_t *task); 355 } vds_operation_t; 356 357 typedef struct vd_ioctl { 358 uint8_t operation; /* vdisk operation */ 359 const char *operation_name; /* vdisk operation name */ 360 size_t nbytes; /* size of operation buffer */ 361 int cmd; /* corresponding ioctl cmd */ 362 const char *cmd_name; /* ioctl cmd name */ 363 void *arg; /* ioctl cmd argument */ 364 /* convert input vd_buf to output ioctl_arg */ 365 void (*copyin)(void *vd_buf, void *ioctl_arg); 366 /* convert input ioctl_arg to output vd_buf */ 367 void (*copyout)(void *ioctl_arg, void *vd_buf); 368 } vd_ioctl_t; 369 370 /* Define trivial copyin/copyout conversion function flag */ 371 #define VD_IDENTITY ((void (*)(void *, void *))-1) 372 373 374 static int vds_ldc_retries = VDS_RETRIES; 375 static int vds_ldc_delay = VDS_LDC_DELAY; 376 static int vds_dev_retries = VDS_RETRIES; 377 static int vds_dev_delay = VDS_DEV_DELAY; 378 static void *vds_state; 379 static uint64_t vds_operations; /* see vds_operation[] definition below */ 380 381 static int vd_open_flags = VD_OPEN_FLAGS; 382 383 static uint_t vd_file_write_flags = VD_FILE_WRITE_FLAGS; 384 385 static short vd_scsi_rdwr_timeout = VD_SCSI_RDWR_TIMEOUT; 386 387 /* 388 * Supported protocol version pairs, from highest (newest) to lowest (oldest) 389 * 390 * Each supported major version should appear only once, paired with (and only 391 * with) its highest supported minor version number (as the protocol requires 392 * supporting all lower minor version numbers as well) 393 */ 394 static const vio_ver_t vds_version[] = {{1, 0}}; 395 static const size_t vds_num_versions = 396 sizeof (vds_version)/sizeof (vds_version[0]); 397 398 static void vd_free_dring_task(vd_t *vdp); 399 static int vd_setup_vd(vd_t *vd); 400 static boolean_t vd_enabled(vd_t *vd); 401 static ushort_t vd_lbl2cksum(struct dk_label *label); 402 static int vd_file_validate_geometry(vd_t *vd); 403 /* 404 * Function: 405 * vd_file_rw 406 * 407 * Description: 408 * Read or write to a disk on file. 409 * 410 * Parameters: 411 * vd - disk on which the operation is performed. 412 * slice - slice on which the operation is performed, 413 * VD_SLICE_NONE indicates that the operation 414 * is done using an absolute disk offset. 415 * operation - operation to execute: read (VD_OP_BREAD) or 416 * write (VD_OP_BWRITE). 417 * data - buffer where data are read to or written from. 418 * blk - starting block for the operation. 419 * len - number of bytes to read or write. 420 * 421 * Return Code: 422 * n >= 0 - success, n indicates the number of bytes read 423 * or written. 424 * -1 - error. 425 */ 426 static ssize_t 427 vd_file_rw(vd_t *vd, int slice, int operation, caddr_t data, size_t blk, 428 size_t len) 429 { 430 caddr_t maddr; 431 size_t offset, maxlen, moffset, mlen, n; 432 uint_t smflags; 433 enum seg_rw srw; 434 435 ASSERT(vd->file); 436 ASSERT(len > 0); 437 438 if (slice == VD_SLICE_NONE) { 439 /* raw disk access */ 440 offset = blk * DEV_BSIZE; 441 } else { 442 ASSERT(slice >= 0 && slice < V_NUMPAR); 443 444 if (vd->vdisk_label == VD_DISK_LABEL_UNK && 445 vd_file_validate_geometry(vd) != 0) { 446 PR0("Unknown disk label, can't do I/O from slice %d", 447 slice); 448 return (-1); 449 } 450 451 if (blk >= vd->vtoc.v_part[slice].p_size) { 452 /* address past the end of the slice */ 453 PR0("req_addr (0x%lx) > psize (0x%lx)", 454 blk, vd->vtoc.v_part[slice].p_size); 455 return (0); 456 } 457 458 offset = (vd->vtoc.v_part[slice].p_start + blk) * DEV_BSIZE; 459 460 /* 461 * If the requested size is greater than the size 462 * of the partition, truncate the read/write. 463 */ 464 maxlen = (vd->vtoc.v_part[slice].p_size - blk) * DEV_BSIZE; 465 466 if (len > maxlen) { 467 PR0("I/O size truncated to %lu bytes from %lu bytes", 468 maxlen, len); 469 len = maxlen; 470 } 471 } 472 473 /* 474 * We have to ensure that we are reading/writing into the mmap 475 * range. If we have a partial disk image (e.g. an image of 476 * s0 instead s2) the system can try to access slices that 477 * are not included into the disk image. 478 */ 479 if ((offset + len) >= vd->file_size) { 480 PR0("offset + nbytes (0x%lx + 0x%lx) >= " 481 "file_size (0x%lx)", offset, len, vd->file_size); 482 return (-1); 483 } 484 485 srw = (operation == VD_OP_BREAD)? S_READ : S_WRITE; 486 smflags = (operation == VD_OP_BREAD)? 0 : 487 (SM_WRITE | vd_file_write_flags); 488 n = len; 489 490 do { 491 /* 492 * segmap_getmapflt() returns a MAXBSIZE chunk which is 493 * MAXBSIZE aligned. 494 */ 495 moffset = offset & MAXBOFFSET; 496 mlen = MIN(MAXBSIZE - moffset, n); 497 maddr = segmap_getmapflt(segkmap, vd->file_vnode, offset, 498 mlen, 1, srw); 499 /* 500 * Fault in the pages so we can check for error and ensure 501 * that we can safely used the mapped address. 502 */ 503 if (segmap_fault(kas.a_hat, segkmap, maddr, mlen, 504 F_SOFTLOCK, srw) != 0) { 505 (void) segmap_release(segkmap, maddr, 0); 506 return (-1); 507 } 508 509 if (operation == VD_OP_BREAD) 510 bcopy(maddr + moffset, data, mlen); 511 else 512 bcopy(data, maddr + moffset, mlen); 513 514 if (segmap_fault(kas.a_hat, segkmap, maddr, mlen, 515 F_SOFTUNLOCK, srw) != 0) { 516 (void) segmap_release(segkmap, maddr, 0); 517 return (-1); 518 } 519 if (segmap_release(segkmap, maddr, smflags) != 0) 520 return (-1); 521 n -= mlen; 522 offset += mlen; 523 data += mlen; 524 525 } while (n > 0); 526 527 return (len); 528 } 529 530 /* 531 * Function: 532 * vd_file_build_default_label 533 * 534 * Description: 535 * Return a default label for the given disk. This is used when the disk 536 * does not have a valid VTOC so that the user can get a valid default 537 * configuration. The default label have all slices size set to 0 (except 538 * slice 2 which is the entire disk) to force the user to write a valid 539 * label onto the disk image. 540 * 541 * Parameters: 542 * vd - disk on which the operation is performed. 543 * label - the returned default label. 544 * 545 * Return Code: 546 * none. 547 */ 548 static void 549 vd_file_build_default_label(vd_t *vd, struct dk_label *label) 550 { 551 size_t size; 552 char prefix; 553 554 ASSERT(vd->file); 555 556 /* 557 * We must have a resonable number of cylinders and sectors so 558 * that newfs can run using default values. 559 * 560 * if (disk_size < 2MB) 561 * phys_cylinders = disk_size / 100K 562 * else 563 * phys_cylinders = disk_size / 300K 564 * 565 * phys_cylinders = (phys_cylinders == 0) ? 1 : phys_cylinders 566 * alt_cylinders = (phys_cylinders > 2) ? 2 : 0; 567 * data_cylinders = phys_cylinders - alt_cylinders 568 * 569 * sectors = disk_size / (phys_cylinders * blk_size) 570 * 571 * The file size test is an attempt to not have too few cylinders 572 * for a small file, or so many on a big file that you waste space 573 * for backup superblocks or cylinder group structures. 574 */ 575 if (vd->file_size < (2 * 1024 * 1024)) 576 label->dkl_pcyl = vd->file_size / (100 * 1024); 577 else 578 label->dkl_pcyl = vd->file_size / (300 * 1024); 579 580 if (label->dkl_pcyl == 0) 581 label->dkl_pcyl = 1; 582 583 if (label->dkl_pcyl > 2) 584 label->dkl_acyl = 2; 585 else 586 label->dkl_acyl = 0; 587 588 label->dkl_nsect = vd->file_size / 589 (DEV_BSIZE * label->dkl_pcyl); 590 label->dkl_ncyl = label->dkl_pcyl - label->dkl_acyl; 591 label->dkl_nhead = 1; 592 label->dkl_write_reinstruct = 0; 593 label->dkl_read_reinstruct = 0; 594 label->dkl_rpm = 7200; 595 label->dkl_apc = 0; 596 label->dkl_intrlv = 0; 597 598 PR0("requested disk size: %ld bytes\n", vd->file_size); 599 PR0("setup: ncyl=%d nhead=%d nsec=%d\n", label->dkl_pcyl, 600 label->dkl_nhead, label->dkl_nsect); 601 PR0("provided disk size: %ld bytes\n", (uint64_t) 602 (label->dkl_pcyl * label->dkl_nhead * 603 label->dkl_nsect * DEV_BSIZE)); 604 605 if (vd->file_size < (1ULL << 20)) { 606 size = vd->file_size >> 10; 607 prefix = 'K'; /* Kilobyte */ 608 } else if (vd->file_size < (1ULL << 30)) { 609 size = vd->file_size >> 20; 610 prefix = 'M'; /* Megabyte */ 611 } else if (vd->file_size < (1ULL << 40)) { 612 size = vd->file_size >> 30; 613 prefix = 'G'; /* Gigabyte */ 614 } else { 615 size = vd->file_size >> 40; 616 prefix = 'T'; /* Terabyte */ 617 } 618 619 /* 620 * We must have a correct label name otherwise format(1m) will 621 * not recognized the disk as labeled. 622 */ 623 (void) snprintf(label->dkl_asciilabel, LEN_DKL_ASCII, 624 "SUN-DiskImage-%ld%cB cyl %d alt %d hd %d sec %d", 625 size, prefix, 626 label->dkl_ncyl, label->dkl_acyl, label->dkl_nhead, 627 label->dkl_nsect); 628 629 /* default VTOC */ 630 label->dkl_vtoc.v_version = V_VERSION; 631 label->dkl_vtoc.v_nparts = V_NUMPAR; 632 label->dkl_vtoc.v_sanity = VTOC_SANE; 633 label->dkl_vtoc.v_part[2].p_tag = V_BACKUP; 634 label->dkl_map[2].dkl_cylno = 0; 635 label->dkl_map[2].dkl_nblk = label->dkl_ncyl * 636 label->dkl_nhead * label->dkl_nsect; 637 label->dkl_cksum = vd_lbl2cksum(label); 638 } 639 640 /* 641 * Function: 642 * vd_file_set_vtoc 643 * 644 * Description: 645 * Set the vtoc of a disk image by writing the label and backup 646 * labels into the disk image backend. 647 * 648 * Parameters: 649 * vd - disk on which the operation is performed. 650 * label - the data to be written. 651 * 652 * Return Code: 653 * 0 - success. 654 * n > 0 - error, n indicates the errno code. 655 */ 656 static int 657 vd_file_set_vtoc(vd_t *vd, struct dk_label *label) 658 { 659 int blk, sec, cyl, head, cnt; 660 661 ASSERT(vd->file); 662 663 if (VD_FILE_LABEL_WRITE(vd, label) < 0) { 664 PR0("fail to write disk label"); 665 return (EIO); 666 } 667 668 /* 669 * Backup labels are on the last alternate cylinder's 670 * first five odd sectors. 671 */ 672 if (label->dkl_acyl == 0) { 673 PR0("no alternate cylinder, can not store backup labels"); 674 return (0); 675 } 676 677 cyl = label->dkl_ncyl + label->dkl_acyl - 1; 678 head = label->dkl_nhead - 1; 679 680 blk = (cyl * ((label->dkl_nhead * label->dkl_nsect) - label->dkl_apc)) + 681 (head * label->dkl_nsect); 682 683 /* 684 * Write the backup labels. Make sure we don't try to write past 685 * the last cylinder. 686 */ 687 sec = 1; 688 689 for (cnt = 0; cnt < VD_FILE_NUM_BACKUP; cnt++) { 690 691 if (sec >= label->dkl_nsect) { 692 PR0("not enough sector to store all backup labels"); 693 return (0); 694 } 695 696 if (vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, (caddr_t)label, 697 blk + sec, sizeof (struct dk_label)) < 0) { 698 PR0("error writing backup label at block %d\n", 699 blk + sec); 700 return (EIO); 701 } 702 703 PR1("wrote backup label at block %d\n", blk + sec); 704 705 sec += 2; 706 } 707 708 return (0); 709 } 710 711 /* 712 * Function: 713 * vd_file_get_devid_block 714 * 715 * Description: 716 * Return the block number where the device id is stored. 717 * 718 * Parameters: 719 * vd - disk on which the operation is performed. 720 * blkp - pointer to the block number 721 * 722 * Return Code: 723 * 0 - success 724 * ENOSPC - disk has no space to store a device id 725 */ 726 static int 727 vd_file_get_devid_block(vd_t *vd, size_t *blkp) 728 { 729 diskaddr_t spc, head, cyl; 730 731 ASSERT(vd->file); 732 ASSERT(vd->vdisk_label == VD_DISK_LABEL_VTOC); 733 734 /* this geometry doesn't allow us to have a devid */ 735 if (vd->dk_geom.dkg_acyl < 2) { 736 PR0("not enough alternate cylinder available for devid " 737 "(acyl=%u)", vd->dk_geom.dkg_acyl); 738 return (ENOSPC); 739 } 740 741 /* the devid is in on the track next to the last cylinder */ 742 cyl = vd->dk_geom.dkg_ncyl + vd->dk_geom.dkg_acyl - 2; 743 spc = vd->dk_geom.dkg_nhead * vd->dk_geom.dkg_nsect; 744 head = vd->dk_geom.dkg_nhead - 1; 745 746 *blkp = (cyl * (spc - vd->dk_geom.dkg_apc)) + 747 (head * vd->dk_geom.dkg_nsect) + 1; 748 749 return (0); 750 } 751 752 /* 753 * Return the checksum of a disk block containing an on-disk devid. 754 */ 755 static uint_t 756 vd_dkdevid2cksum(struct dk_devid *dkdevid) 757 { 758 uint_t chksum, *ip; 759 int i; 760 761 chksum = 0; 762 ip = (uint_t *)dkdevid; 763 for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int)); i++) 764 chksum ^= ip[i]; 765 766 return (chksum); 767 } 768 769 /* 770 * Function: 771 * vd_file_read_devid 772 * 773 * Description: 774 * Read the device id stored on a disk image. 775 * 776 * Parameters: 777 * vd - disk on which the operation is performed. 778 * devid - the return address of the device ID. 779 * 780 * Return Code: 781 * 0 - success 782 * EIO - I/O error while trying to access the disk image 783 * EINVAL - no valid device id was found 784 * ENOSPC - disk has no space to store a device id 785 */ 786 static int 787 vd_file_read_devid(vd_t *vd, ddi_devid_t *devid) 788 { 789 struct dk_devid *dkdevid; 790 size_t blk; 791 uint_t chksum; 792 int status, sz; 793 794 if ((status = vd_file_get_devid_block(vd, &blk)) != 0) 795 return (status); 796 797 dkdevid = kmem_zalloc(DEV_BSIZE, KM_SLEEP); 798 799 /* get the devid */ 800 if ((vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BREAD, (caddr_t)dkdevid, blk, 801 DEV_BSIZE)) < 0) { 802 PR0("error reading devid block at %lu", blk); 803 status = EIO; 804 goto done; 805 } 806 807 /* validate the revision */ 808 if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) || 809 (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) { 810 PR0("invalid devid found at block %lu (bad revision)", blk); 811 status = EINVAL; 812 goto done; 813 } 814 815 /* compute checksum */ 816 chksum = vd_dkdevid2cksum(dkdevid); 817 818 /* compare the checksums */ 819 if (DKD_GETCHKSUM(dkdevid) != chksum) { 820 PR0("invalid devid found at block %lu (bad checksum)", blk); 821 status = EINVAL; 822 goto done; 823 } 824 825 /* validate the device id */ 826 if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) { 827 PR0("invalid devid found at block %lu", blk); 828 status = EINVAL; 829 goto done; 830 } 831 832 PR1("devid read at block %lu", blk); 833 834 sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid); 835 *devid = kmem_alloc(sz, KM_SLEEP); 836 bcopy(&dkdevid->dkd_devid, *devid, sz); 837 838 done: 839 kmem_free(dkdevid, DEV_BSIZE); 840 return (status); 841 842 } 843 844 /* 845 * Function: 846 * vd_file_write_devid 847 * 848 * Description: 849 * Write a device id into disk image. 850 * 851 * Parameters: 852 * vd - disk on which the operation is performed. 853 * devid - the device ID to store. 854 * 855 * Return Code: 856 * 0 - success 857 * EIO - I/O error while trying to access the disk image 858 * ENOSPC - disk has no space to store a device id 859 */ 860 static int 861 vd_file_write_devid(vd_t *vd, ddi_devid_t devid) 862 { 863 struct dk_devid *dkdevid; 864 uint_t chksum; 865 size_t blk; 866 int status; 867 868 if ((status = vd_file_get_devid_block(vd, &blk)) != 0) 869 return (status); 870 871 dkdevid = kmem_zalloc(DEV_BSIZE, KM_SLEEP); 872 873 /* set revision */ 874 dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB; 875 dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB; 876 877 /* copy devid */ 878 bcopy(devid, &dkdevid->dkd_devid, ddi_devid_sizeof(devid)); 879 880 /* compute checksum */ 881 chksum = vd_dkdevid2cksum(dkdevid); 882 883 /* set checksum */ 884 DKD_FORMCHKSUM(chksum, dkdevid); 885 886 /* store the devid */ 887 if ((status = vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, 888 (caddr_t)dkdevid, blk, DEV_BSIZE)) < 0) { 889 PR0("Error writing devid block at %lu", blk); 890 status = EIO; 891 } else { 892 PR1("devid written at block %lu", blk); 893 status = 0; 894 } 895 896 kmem_free(dkdevid, DEV_BSIZE); 897 return (status); 898 } 899 900 /* 901 * Function: 902 * vd_scsi_rdwr 903 * 904 * Description: 905 * Read or write to a SCSI disk using an absolute disk offset. 906 * 907 * Parameters: 908 * vd - disk on which the operation is performed. 909 * operation - operation to execute: read (VD_OP_BREAD) or 910 * write (VD_OP_BWRITE). 911 * data - buffer where data are read to or written from. 912 * blk - starting block for the operation. 913 * len - number of bytes to read or write. 914 * 915 * Return Code: 916 * 0 - success 917 * n != 0 - error. 918 */ 919 static int 920 vd_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t blk, size_t len) 921 { 922 struct uscsi_cmd ucmd; 923 union scsi_cdb cdb; 924 int nsectors, nblk; 925 int max_sectors; 926 int status, rval; 927 928 ASSERT(!vd->file); 929 930 max_sectors = vd->max_xfer_sz; 931 nblk = (len / DEV_BSIZE); 932 933 if (len % DEV_BSIZE != 0) 934 return (EINVAL); 935 936 /* 937 * Build and execute the uscsi ioctl. We build a group0, group1 938 * or group4 command as necessary, since some targets 939 * do not support group1 commands. 940 */ 941 while (nblk) { 942 943 bzero(&ucmd, sizeof (ucmd)); 944 bzero(&cdb, sizeof (cdb)); 945 946 nsectors = (max_sectors < nblk) ? max_sectors : nblk; 947 948 if (blk < (2 << 20) && nsectors <= 0xff) { 949 FORMG0ADDR(&cdb, blk); 950 FORMG0COUNT(&cdb, nsectors); 951 ucmd.uscsi_cdblen = CDB_GROUP0; 952 } else if (blk > 0xffffffff) { 953 FORMG4LONGADDR(&cdb, blk); 954 FORMG4COUNT(&cdb, nsectors); 955 ucmd.uscsi_cdblen = CDB_GROUP4; 956 cdb.scc_cmd |= SCMD_GROUP4; 957 } else { 958 FORMG1ADDR(&cdb, blk); 959 FORMG1COUNT(&cdb, nsectors); 960 ucmd.uscsi_cdblen = CDB_GROUP1; 961 cdb.scc_cmd |= SCMD_GROUP1; 962 } 963 964 ucmd.uscsi_cdb = (caddr_t)&cdb; 965 ucmd.uscsi_bufaddr = data; 966 ucmd.uscsi_buflen = nsectors * DEV_BSIZE; 967 ucmd.uscsi_timeout = vd_scsi_rdwr_timeout; 968 /* 969 * Set flags so that the command is isolated from normal 970 * commands and no error message is printed. 971 */ 972 ucmd.uscsi_flags = USCSI_ISOLATE | USCSI_SILENT; 973 974 if (operation == VD_OP_BREAD) { 975 cdb.scc_cmd |= SCMD_READ; 976 ucmd.uscsi_flags |= USCSI_READ; 977 } else { 978 cdb.scc_cmd |= SCMD_WRITE; 979 } 980 981 status = ldi_ioctl(vd->ldi_handle[VD_ENTIRE_DISK_SLICE], 982 USCSICMD, (intptr_t)&ucmd, (vd_open_flags | FKIOCTL), 983 kcred, &rval); 984 985 if (status == 0) 986 status = ucmd.uscsi_status; 987 988 if (status != 0) 989 break; 990 991 /* 992 * Check if partial DMA breakup is required. If so, reduce 993 * the request size by half and retry the last request. 994 */ 995 if (ucmd.uscsi_resid == ucmd.uscsi_buflen) { 996 max_sectors >>= 1; 997 if (max_sectors <= 0) { 998 status = EIO; 999 break; 1000 } 1001 continue; 1002 } 1003 1004 if (ucmd.uscsi_resid != 0) { 1005 status = EIO; 1006 break; 1007 } 1008 1009 blk += nsectors; 1010 nblk -= nsectors; 1011 data += nsectors * DEV_BSIZE; /* SECSIZE */ 1012 } 1013 1014 return (status); 1015 } 1016 1017 /* 1018 * Return Values 1019 * EINPROGRESS - operation was successfully started 1020 * EIO - encountered LDC (aka. task error) 1021 * 0 - operation completed successfully 1022 * 1023 * Side Effect 1024 * sets request->status = <disk operation status> 1025 */ 1026 static int 1027 vd_start_bio(vd_task_t *task) 1028 { 1029 int rv, status = 0; 1030 vd_t *vd = task->vd; 1031 vd_dring_payload_t *request = task->request; 1032 struct buf *buf = &task->buf; 1033 uint8_t mtype; 1034 int slice; 1035 1036 ASSERT(vd != NULL); 1037 ASSERT(request != NULL); 1038 1039 slice = request->slice; 1040 1041 ASSERT(slice == VD_SLICE_NONE || slice < vd->nslices); 1042 ASSERT((request->operation == VD_OP_BREAD) || 1043 (request->operation == VD_OP_BWRITE)); 1044 1045 if (request->nbytes == 0) { 1046 /* no service for trivial requests */ 1047 request->status = EINVAL; 1048 return (0); 1049 } 1050 1051 PR1("%s %lu bytes at block %lu", 1052 (request->operation == VD_OP_BREAD) ? "Read" : "Write", 1053 request->nbytes, request->addr); 1054 1055 bioinit(buf); 1056 buf->b_flags = B_BUSY; 1057 buf->b_bcount = request->nbytes; 1058 buf->b_lblkno = request->addr; 1059 buf->b_edev = (slice == VD_SLICE_NONE)? NODEV : vd->dev[slice]; 1060 1061 mtype = (&vd->inband_task == task) ? LDC_SHADOW_MAP : LDC_DIRECT_MAP; 1062 1063 /* Map memory exported by client */ 1064 status = ldc_mem_map(task->mhdl, request->cookie, request->ncookies, 1065 mtype, (request->operation == VD_OP_BREAD) ? LDC_MEM_W : LDC_MEM_R, 1066 &(buf->b_un.b_addr), NULL); 1067 if (status != 0) { 1068 PR0("ldc_mem_map() returned err %d ", status); 1069 biofini(buf); 1070 return (EIO); 1071 } 1072 1073 status = ldc_mem_acquire(task->mhdl, 0, buf->b_bcount); 1074 if (status != 0) { 1075 (void) ldc_mem_unmap(task->mhdl); 1076 PR0("ldc_mem_acquire() returned err %d ", status); 1077 biofini(buf); 1078 return (EIO); 1079 } 1080 1081 buf->b_flags |= (request->operation == VD_OP_BREAD) ? B_READ : B_WRITE; 1082 1083 /* Start the block I/O */ 1084 if (vd->file) { 1085 rv = vd_file_rw(vd, slice, request->operation, buf->b_un.b_addr, 1086 request->addr, request->nbytes); 1087 if (rv < 0) { 1088 request->nbytes = 0; 1089 request->status = EIO; 1090 } else { 1091 request->nbytes = rv; 1092 request->status = 0; 1093 } 1094 } else { 1095 if (slice == VD_SLICE_NONE) { 1096 /* 1097 * This is not a disk image so it is a real disk. We 1098 * assume that the underlying device driver supports 1099 * USCSICMD ioctls. This is the case of all SCSI devices 1100 * (sd, ssd...). 1101 * 1102 * In the future if we have non-SCSI disks we would need 1103 * to invoke the appropriate function to do I/O using an 1104 * absolute disk offset (for example using DKIOCTL_RWCMD 1105 * for IDE disks). 1106 */ 1107 rv = vd_scsi_rdwr(vd, request->operation, 1108 buf->b_un.b_addr, request->addr, request->nbytes); 1109 if (rv != 0) { 1110 request->nbytes = 0; 1111 request->status = EIO; 1112 } else { 1113 request->status = 0; 1114 } 1115 } else { 1116 request->status = 1117 ldi_strategy(vd->ldi_handle[slice], buf); 1118 1119 /* 1120 * This is to indicate to the caller that the request 1121 * needs to be finished by vd_complete_bio() by calling 1122 * biowait() there and waiting for that to return before 1123 * triggering the notification of the vDisk client. 1124 * 1125 * This is necessary when writing to real disks as 1126 * otherwise calls to ldi_strategy() would be serialized 1127 * behind the calls to biowait() and performance would 1128 * suffer. 1129 */ 1130 if (request->status == 0) 1131 return (EINPROGRESS); 1132 } 1133 } 1134 1135 /* Clean up after error */ 1136 rv = ldc_mem_release(task->mhdl, 0, buf->b_bcount); 1137 if (rv) { 1138 PR0("ldc_mem_release() returned err %d ", rv); 1139 status = EIO; 1140 } 1141 rv = ldc_mem_unmap(task->mhdl); 1142 if (rv) { 1143 PR0("ldc_mem_unmap() returned err %d ", rv); 1144 status = EIO; 1145 } 1146 1147 biofini(buf); 1148 1149 return (status); 1150 } 1151 1152 /* 1153 * This function should only be called from vd_notify to ensure that requests 1154 * are responded to in the order that they are received. 1155 */ 1156 static int 1157 send_msg(ldc_handle_t ldc_handle, void *msg, size_t msglen) 1158 { 1159 int status; 1160 size_t nbytes; 1161 1162 do { 1163 nbytes = msglen; 1164 status = ldc_write(ldc_handle, msg, &nbytes); 1165 if (status != EWOULDBLOCK) 1166 break; 1167 drv_usecwait(vds_ldc_delay); 1168 } while (status == EWOULDBLOCK); 1169 1170 if (status != 0) { 1171 if (status != ECONNRESET) 1172 PR0("ldc_write() returned errno %d", status); 1173 return (status); 1174 } else if (nbytes != msglen) { 1175 PR0("ldc_write() performed only partial write"); 1176 return (EIO); 1177 } 1178 1179 PR1("SENT %lu bytes", msglen); 1180 return (0); 1181 } 1182 1183 static void 1184 vd_need_reset(vd_t *vd, boolean_t reset_ldc) 1185 { 1186 mutex_enter(&vd->lock); 1187 vd->reset_state = B_TRUE; 1188 vd->reset_ldc = reset_ldc; 1189 mutex_exit(&vd->lock); 1190 } 1191 1192 /* 1193 * Reset the state of the connection with a client, if needed; reset the LDC 1194 * transport as well, if needed. This function should only be called from the 1195 * "vd_recv_msg", as it waits for tasks - otherwise a deadlock can occur. 1196 */ 1197 static void 1198 vd_reset_if_needed(vd_t *vd) 1199 { 1200 int status = 0; 1201 1202 mutex_enter(&vd->lock); 1203 if (!vd->reset_state) { 1204 ASSERT(!vd->reset_ldc); 1205 mutex_exit(&vd->lock); 1206 return; 1207 } 1208 mutex_exit(&vd->lock); 1209 1210 PR0("Resetting connection state with %s", VD_CLIENT(vd)); 1211 1212 /* 1213 * Let any asynchronous I/O complete before possibly pulling the rug 1214 * out from under it; defer checking vd->reset_ldc, as one of the 1215 * asynchronous tasks might set it 1216 */ 1217 ddi_taskq_wait(vd->completionq); 1218 1219 if (vd->file) { 1220 status = VOP_FSYNC(vd->file_vnode, FSYNC, kcred); 1221 if (status) { 1222 PR0("VOP_FSYNC returned errno %d", status); 1223 } 1224 } 1225 1226 if ((vd->initialized & VD_DRING) && 1227 ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0)) 1228 PR0("ldc_mem_dring_unmap() returned errno %d", status); 1229 1230 vd_free_dring_task(vd); 1231 1232 /* Free the staging buffer for msgs */ 1233 if (vd->vio_msgp != NULL) { 1234 kmem_free(vd->vio_msgp, vd->max_msglen); 1235 vd->vio_msgp = NULL; 1236 } 1237 1238 /* Free the inband message buffer */ 1239 if (vd->inband_task.msg != NULL) { 1240 kmem_free(vd->inband_task.msg, vd->max_msglen); 1241 vd->inband_task.msg = NULL; 1242 } 1243 1244 mutex_enter(&vd->lock); 1245 1246 if (vd->reset_ldc) 1247 PR0("taking down LDC channel"); 1248 if (vd->reset_ldc && ((status = ldc_down(vd->ldc_handle)) != 0)) 1249 PR0("ldc_down() returned errno %d", status); 1250 1251 vd->initialized &= ~(VD_SID | VD_SEQ_NUM | VD_DRING); 1252 vd->state = VD_STATE_INIT; 1253 vd->max_msglen = sizeof (vio_msg_t); /* baseline vio message size */ 1254 1255 /* Allocate the staging buffer */ 1256 vd->vio_msgp = kmem_alloc(vd->max_msglen, KM_SLEEP); 1257 1258 PR0("calling ldc_up\n"); 1259 (void) ldc_up(vd->ldc_handle); 1260 1261 vd->reset_state = B_FALSE; 1262 vd->reset_ldc = B_FALSE; 1263 1264 mutex_exit(&vd->lock); 1265 } 1266 1267 static void vd_recv_msg(void *arg); 1268 1269 static void 1270 vd_mark_in_reset(vd_t *vd) 1271 { 1272 int status; 1273 1274 PR0("vd_mark_in_reset: marking vd in reset\n"); 1275 1276 vd_need_reset(vd, B_FALSE); 1277 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, DDI_SLEEP); 1278 if (status == DDI_FAILURE) { 1279 PR0("cannot schedule task to recv msg\n"); 1280 vd_need_reset(vd, B_TRUE); 1281 return; 1282 } 1283 } 1284 1285 static int 1286 vd_mark_elem_done(vd_t *vd, int idx, int elem_status, int elem_nbytes) 1287 { 1288 boolean_t accepted; 1289 int status; 1290 vd_dring_entry_t *elem = VD_DRING_ELEM(idx); 1291 1292 if (vd->reset_state) 1293 return (0); 1294 1295 /* Acquire the element */ 1296 if (!vd->reset_state && 1297 (status = ldc_mem_dring_acquire(vd->dring_handle, idx, idx)) != 0) { 1298 if (status == ECONNRESET) { 1299 vd_mark_in_reset(vd); 1300 return (0); 1301 } else { 1302 PR0("ldc_mem_dring_acquire() returned errno %d", 1303 status); 1304 return (status); 1305 } 1306 } 1307 1308 /* Set the element's status and mark it done */ 1309 accepted = (elem->hdr.dstate == VIO_DESC_ACCEPTED); 1310 if (accepted) { 1311 elem->payload.nbytes = elem_nbytes; 1312 elem->payload.status = elem_status; 1313 elem->hdr.dstate = VIO_DESC_DONE; 1314 } else { 1315 /* Perhaps client timed out waiting for I/O... */ 1316 PR0("element %u no longer \"accepted\"", idx); 1317 VD_DUMP_DRING_ELEM(elem); 1318 } 1319 /* Release the element */ 1320 if (!vd->reset_state && 1321 (status = ldc_mem_dring_release(vd->dring_handle, idx, idx)) != 0) { 1322 if (status == ECONNRESET) { 1323 vd_mark_in_reset(vd); 1324 return (0); 1325 } else { 1326 PR0("ldc_mem_dring_release() returned errno %d", 1327 status); 1328 return (status); 1329 } 1330 } 1331 1332 return (accepted ? 0 : EINVAL); 1333 } 1334 1335 /* 1336 * Return Values 1337 * 0 - operation completed successfully 1338 * EIO - encountered LDC / task error 1339 * 1340 * Side Effect 1341 * sets request->status = <disk operation status> 1342 */ 1343 static int 1344 vd_complete_bio(vd_task_t *task) 1345 { 1346 int status = 0; 1347 int rv = 0; 1348 vd_t *vd = task->vd; 1349 vd_dring_payload_t *request = task->request; 1350 struct buf *buf = &task->buf; 1351 1352 1353 ASSERT(vd != NULL); 1354 ASSERT(request != NULL); 1355 ASSERT(task->msg != NULL); 1356 ASSERT(task->msglen >= sizeof (*task->msg)); 1357 ASSERT(!vd->file); 1358 ASSERT(request->slice != VD_SLICE_NONE); 1359 1360 /* Wait for the I/O to complete [ call to ldi_strategy(9f) ] */ 1361 request->status = biowait(buf); 1362 1363 /* return back the number of bytes read/written */ 1364 request->nbytes = buf->b_bcount - buf->b_resid; 1365 1366 /* Release the buffer */ 1367 if (!vd->reset_state) 1368 status = ldc_mem_release(task->mhdl, 0, buf->b_bcount); 1369 if (status) { 1370 PR0("ldc_mem_release() returned errno %d copying to " 1371 "client", status); 1372 if (status == ECONNRESET) { 1373 vd_mark_in_reset(vd); 1374 } 1375 rv = EIO; 1376 } 1377 1378 /* Unmap the memory, even if in reset */ 1379 status = ldc_mem_unmap(task->mhdl); 1380 if (status) { 1381 PR0("ldc_mem_unmap() returned errno %d copying to client", 1382 status); 1383 if (status == ECONNRESET) { 1384 vd_mark_in_reset(vd); 1385 } 1386 rv = EIO; 1387 } 1388 1389 biofini(buf); 1390 1391 return (rv); 1392 } 1393 1394 /* 1395 * Description: 1396 * This function is called by the two functions called by a taskq 1397 * [ vd_complete_notify() and vd_serial_notify()) ] to send the 1398 * message to the client. 1399 * 1400 * Parameters: 1401 * arg - opaque pointer to structure containing task to be completed 1402 * 1403 * Return Values 1404 * None 1405 */ 1406 static void 1407 vd_notify(vd_task_t *task) 1408 { 1409 int status; 1410 1411 ASSERT(task != NULL); 1412 ASSERT(task->vd != NULL); 1413 1414 if (task->vd->reset_state) 1415 return; 1416 1417 /* 1418 * Send the "ack" or "nack" back to the client; if sending the message 1419 * via LDC fails, arrange to reset both the connection state and LDC 1420 * itself 1421 */ 1422 PR2("Sending %s", 1423 (task->msg->tag.vio_subtype == VIO_SUBTYPE_ACK) ? "ACK" : "NACK"); 1424 1425 status = send_msg(task->vd->ldc_handle, task->msg, task->msglen); 1426 switch (status) { 1427 case 0: 1428 break; 1429 case ECONNRESET: 1430 vd_mark_in_reset(task->vd); 1431 break; 1432 default: 1433 PR0("initiating full reset"); 1434 vd_need_reset(task->vd, B_TRUE); 1435 break; 1436 } 1437 1438 DTRACE_PROBE1(task__end, vd_task_t *, task); 1439 } 1440 1441 /* 1442 * Description: 1443 * Mark the Dring entry as Done and (if necessary) send an ACK/NACK to 1444 * the vDisk client 1445 * 1446 * Parameters: 1447 * task - structure containing the request sent from client 1448 * 1449 * Return Values 1450 * None 1451 */ 1452 static void 1453 vd_complete_notify(vd_task_t *task) 1454 { 1455 int status = 0; 1456 vd_t *vd = task->vd; 1457 vd_dring_payload_t *request = task->request; 1458 1459 /* Update the dring element for a dring client */ 1460 if (!vd->reset_state && (vd->xfer_mode == VIO_DRING_MODE)) { 1461 status = vd_mark_elem_done(vd, task->index, 1462 request->status, request->nbytes); 1463 if (status == ECONNRESET) 1464 vd_mark_in_reset(vd); 1465 } 1466 1467 /* 1468 * If a transport error occurred while marking the element done or 1469 * previously while executing the task, arrange to "nack" the message 1470 * when the final task in the descriptor element range completes 1471 */ 1472 if ((status != 0) || (task->status != 0)) 1473 task->msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 1474 1475 /* 1476 * Only the final task for a range of elements will respond to and 1477 * free the message 1478 */ 1479 if (task->type == VD_NONFINAL_RANGE_TASK) { 1480 return; 1481 } 1482 1483 vd_notify(task); 1484 } 1485 1486 /* 1487 * Description: 1488 * This is the basic completion function called to handle inband data 1489 * requests and handshake messages. All it needs to do is trigger a 1490 * message to the client that the request is completed. 1491 * 1492 * Parameters: 1493 * arg - opaque pointer to structure containing task to be completed 1494 * 1495 * Return Values 1496 * None 1497 */ 1498 static void 1499 vd_serial_notify(void *arg) 1500 { 1501 vd_task_t *task = (vd_task_t *)arg; 1502 1503 ASSERT(task != NULL); 1504 vd_notify(task); 1505 } 1506 1507 static void 1508 vd_geom2dk_geom(void *vd_buf, void *ioctl_arg) 1509 { 1510 VD_GEOM2DK_GEOM((vd_geom_t *)vd_buf, (struct dk_geom *)ioctl_arg); 1511 } 1512 1513 static void 1514 vd_vtoc2vtoc(void *vd_buf, void *ioctl_arg) 1515 { 1516 VD_VTOC2VTOC((vd_vtoc_t *)vd_buf, (struct vtoc *)ioctl_arg); 1517 } 1518 1519 static void 1520 dk_geom2vd_geom(void *ioctl_arg, void *vd_buf) 1521 { 1522 DK_GEOM2VD_GEOM((struct dk_geom *)ioctl_arg, (vd_geom_t *)vd_buf); 1523 } 1524 1525 static void 1526 vtoc2vd_vtoc(void *ioctl_arg, void *vd_buf) 1527 { 1528 VTOC2VD_VTOC((struct vtoc *)ioctl_arg, (vd_vtoc_t *)vd_buf); 1529 } 1530 1531 static void 1532 vd_get_efi_in(void *vd_buf, void *ioctl_arg) 1533 { 1534 vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 1535 dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 1536 1537 dk_efi->dki_lba = vd_efi->lba; 1538 dk_efi->dki_length = vd_efi->length; 1539 dk_efi->dki_data = kmem_zalloc(vd_efi->length, KM_SLEEP); 1540 } 1541 1542 static void 1543 vd_get_efi_out(void *ioctl_arg, void *vd_buf) 1544 { 1545 int len; 1546 vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 1547 dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 1548 1549 len = vd_efi->length; 1550 DK_EFI2VD_EFI(dk_efi, vd_efi); 1551 kmem_free(dk_efi->dki_data, len); 1552 } 1553 1554 static void 1555 vd_set_efi_in(void *vd_buf, void *ioctl_arg) 1556 { 1557 vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 1558 dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 1559 1560 dk_efi->dki_data = kmem_alloc(vd_efi->length, KM_SLEEP); 1561 VD_EFI2DK_EFI(vd_efi, dk_efi); 1562 } 1563 1564 static void 1565 vd_set_efi_out(void *ioctl_arg, void *vd_buf) 1566 { 1567 vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 1568 dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 1569 1570 kmem_free(dk_efi->dki_data, vd_efi->length); 1571 } 1572 1573 static vd_disk_label_t 1574 vd_read_vtoc(ldi_handle_t handle, struct vtoc *vtoc) 1575 { 1576 int status, rval; 1577 struct dk_gpt *efi; 1578 size_t efi_len; 1579 1580 status = ldi_ioctl(handle, DKIOCGVTOC, (intptr_t)vtoc, 1581 (vd_open_flags | FKIOCTL), kcred, &rval); 1582 1583 if (status == 0) { 1584 return (VD_DISK_LABEL_VTOC); 1585 } else if (status != ENOTSUP) { 1586 PR0("ldi_ioctl(DKIOCGVTOC) returned error %d", status); 1587 return (VD_DISK_LABEL_UNK); 1588 } 1589 1590 status = vds_efi_alloc_and_read(handle, &efi, &efi_len); 1591 1592 if (status) { 1593 PR0("vds_efi_alloc_and_read returned error %d", status); 1594 return (VD_DISK_LABEL_UNK); 1595 } 1596 1597 vd_efi_to_vtoc(efi, vtoc); 1598 vd_efi_free(efi, efi_len); 1599 1600 return (VD_DISK_LABEL_EFI); 1601 } 1602 1603 static ushort_t 1604 vd_lbl2cksum(struct dk_label *label) 1605 { 1606 int count; 1607 ushort_t sum, *sp; 1608 1609 count = (sizeof (struct dk_label)) / (sizeof (short)) - 1; 1610 sp = (ushort_t *)label; 1611 sum = 0; 1612 while (count--) { 1613 sum ^= *sp++; 1614 } 1615 1616 return (sum); 1617 } 1618 1619 /* 1620 * Handle ioctls to a disk slice. 1621 * 1622 * Return Values 1623 * 0 - Indicates that there are no errors in disk operations 1624 * ENOTSUP - Unknown disk label type or unsupported DKIO ioctl 1625 * EINVAL - Not enough room to copy the EFI label 1626 * 1627 */ 1628 static int 1629 vd_do_slice_ioctl(vd_t *vd, int cmd, void *ioctl_arg) 1630 { 1631 dk_efi_t *dk_ioc; 1632 1633 switch (vd->vdisk_label) { 1634 1635 /* ioctls for a slice from a disk with a VTOC label */ 1636 case VD_DISK_LABEL_VTOC: 1637 1638 switch (cmd) { 1639 case DKIOCGGEOM: 1640 ASSERT(ioctl_arg != NULL); 1641 bcopy(&vd->dk_geom, ioctl_arg, sizeof (vd->dk_geom)); 1642 return (0); 1643 case DKIOCGVTOC: 1644 ASSERT(ioctl_arg != NULL); 1645 bcopy(&vd->vtoc, ioctl_arg, sizeof (vd->vtoc)); 1646 return (0); 1647 default: 1648 return (ENOTSUP); 1649 } 1650 1651 /* ioctls for a slice from a disk with an EFI label */ 1652 case VD_DISK_LABEL_EFI: 1653 1654 switch (cmd) { 1655 case DKIOCGETEFI: 1656 ASSERT(ioctl_arg != NULL); 1657 dk_ioc = (dk_efi_t *)ioctl_arg; 1658 if (dk_ioc->dki_length < vd->dk_efi.dki_length) 1659 return (EINVAL); 1660 bcopy(vd->dk_efi.dki_data, dk_ioc->dki_data, 1661 vd->dk_efi.dki_length); 1662 return (0); 1663 default: 1664 return (ENOTSUP); 1665 } 1666 1667 default: 1668 /* Unknown disk label type */ 1669 return (ENOTSUP); 1670 } 1671 } 1672 1673 /* 1674 * Function: 1675 * vd_file_validate_geometry 1676 * 1677 * Description: 1678 * Read the label and validate the geometry of a disk image. The driver 1679 * label, vtoc and geometry information are updated according to the 1680 * label read from the disk image. 1681 * 1682 * If no valid label is found, the label is set to unknown and the 1683 * function returns EINVAL, but a default vtoc and geometry are provided 1684 * to the driver. 1685 * 1686 * Parameters: 1687 * vd - disk on which the operation is performed. 1688 * 1689 * Return Code: 1690 * 0 - success. 1691 * EIO - error reading the label from the disk image. 1692 * EINVAL - unknown disk label. 1693 */ 1694 static int 1695 vd_file_validate_geometry(vd_t *vd) 1696 { 1697 struct dk_label label; 1698 struct dk_geom *geom = &vd->dk_geom; 1699 struct vtoc *vtoc = &vd->vtoc; 1700 int i; 1701 int status = 0; 1702 1703 ASSERT(vd->file); 1704 1705 if (VD_FILE_LABEL_READ(vd, &label) < 0) 1706 return (EIO); 1707 1708 if (label.dkl_magic != DKL_MAGIC || 1709 label.dkl_cksum != vd_lbl2cksum(&label) || 1710 label.dkl_vtoc.v_sanity != VTOC_SANE || 1711 label.dkl_vtoc.v_nparts != V_NUMPAR) { 1712 vd->vdisk_label = VD_DISK_LABEL_UNK; 1713 vd_file_build_default_label(vd, &label); 1714 status = EINVAL; 1715 } else { 1716 vd->vdisk_label = VD_DISK_LABEL_VTOC; 1717 } 1718 1719 /* Update the driver geometry */ 1720 bzero(geom, sizeof (struct dk_geom)); 1721 1722 geom->dkg_ncyl = label.dkl_ncyl; 1723 geom->dkg_acyl = label.dkl_acyl; 1724 geom->dkg_nhead = label.dkl_nhead; 1725 geom->dkg_nsect = label.dkl_nsect; 1726 geom->dkg_intrlv = label.dkl_intrlv; 1727 geom->dkg_apc = label.dkl_apc; 1728 geom->dkg_rpm = label.dkl_rpm; 1729 geom->dkg_pcyl = label.dkl_pcyl; 1730 geom->dkg_write_reinstruct = label.dkl_write_reinstruct; 1731 geom->dkg_read_reinstruct = label.dkl_read_reinstruct; 1732 1733 /* Update the driver vtoc */ 1734 bzero(vtoc, sizeof (struct vtoc)); 1735 1736 vtoc->v_sanity = label.dkl_vtoc.v_sanity; 1737 vtoc->v_version = label.dkl_vtoc.v_version; 1738 vtoc->v_sectorsz = DEV_BSIZE; 1739 vtoc->v_nparts = label.dkl_vtoc.v_nparts; 1740 1741 for (i = 0; i < vtoc->v_nparts; i++) { 1742 vtoc->v_part[i].p_tag = 1743 label.dkl_vtoc.v_part[i].p_tag; 1744 vtoc->v_part[i].p_flag = 1745 label.dkl_vtoc.v_part[i].p_flag; 1746 vtoc->v_part[i].p_start = 1747 label.dkl_map[i].dkl_cylno * 1748 (label.dkl_nhead * label.dkl_nsect); 1749 vtoc->v_part[i].p_size = label.dkl_map[i].dkl_nblk; 1750 vtoc->timestamp[i] = 1751 label.dkl_vtoc.v_timestamp[i]; 1752 } 1753 /* 1754 * The bootinfo array can not be copied with bcopy() because 1755 * elements are of type long in vtoc (so 64-bit) and of type 1756 * int in dk_vtoc (so 32-bit). 1757 */ 1758 vtoc->v_bootinfo[0] = label.dkl_vtoc.v_bootinfo[0]; 1759 vtoc->v_bootinfo[1] = label.dkl_vtoc.v_bootinfo[1]; 1760 vtoc->v_bootinfo[2] = label.dkl_vtoc.v_bootinfo[2]; 1761 bcopy(label.dkl_asciilabel, vtoc->v_asciilabel, 1762 LEN_DKL_ASCII); 1763 bcopy(label.dkl_vtoc.v_volume, vtoc->v_volume, 1764 LEN_DKL_VVOL); 1765 1766 return (status); 1767 } 1768 1769 /* 1770 * Handle ioctls to a disk image (file-based). 1771 * 1772 * Return Values 1773 * 0 - Indicates that there are no errors 1774 * != 0 - Disk operation returned an error 1775 */ 1776 static int 1777 vd_do_file_ioctl(vd_t *vd, int cmd, void *ioctl_arg) 1778 { 1779 struct dk_label label; 1780 struct dk_geom *geom; 1781 struct vtoc *vtoc; 1782 int i, rc; 1783 1784 ASSERT(vd->file); 1785 1786 switch (cmd) { 1787 1788 case DKIOCGGEOM: 1789 ASSERT(ioctl_arg != NULL); 1790 geom = (struct dk_geom *)ioctl_arg; 1791 1792 rc = vd_file_validate_geometry(vd); 1793 if (rc != 0 && rc != EINVAL) 1794 return (rc); 1795 1796 bcopy(&vd->dk_geom, geom, sizeof (struct dk_geom)); 1797 return (0); 1798 1799 case DKIOCGVTOC: 1800 ASSERT(ioctl_arg != NULL); 1801 vtoc = (struct vtoc *)ioctl_arg; 1802 1803 rc = vd_file_validate_geometry(vd); 1804 if (rc != 0 && rc != EINVAL) 1805 return (rc); 1806 1807 bcopy(&vd->vtoc, vtoc, sizeof (struct vtoc)); 1808 return (0); 1809 1810 case DKIOCSGEOM: 1811 ASSERT(ioctl_arg != NULL); 1812 geom = (struct dk_geom *)ioctl_arg; 1813 1814 if (geom->dkg_nhead == 0 || geom->dkg_nsect == 0) 1815 return (EINVAL); 1816 1817 /* 1818 * The current device geometry is not updated, just the driver 1819 * "notion" of it. The device geometry will be effectively 1820 * updated when a label is written to the device during a next 1821 * DKIOCSVTOC. 1822 */ 1823 bcopy(ioctl_arg, &vd->dk_geom, sizeof (vd->dk_geom)); 1824 return (0); 1825 1826 case DKIOCSVTOC: 1827 ASSERT(ioctl_arg != NULL); 1828 ASSERT(vd->dk_geom.dkg_nhead != 0 && 1829 vd->dk_geom.dkg_nsect != 0); 1830 vtoc = (struct vtoc *)ioctl_arg; 1831 1832 if (vtoc->v_sanity != VTOC_SANE || 1833 vtoc->v_sectorsz != DEV_BSIZE || 1834 vtoc->v_nparts != V_NUMPAR) 1835 return (EINVAL); 1836 1837 bzero(&label, sizeof (label)); 1838 label.dkl_ncyl = vd->dk_geom.dkg_ncyl; 1839 label.dkl_acyl = vd->dk_geom.dkg_acyl; 1840 label.dkl_pcyl = vd->dk_geom.dkg_pcyl; 1841 label.dkl_nhead = vd->dk_geom.dkg_nhead; 1842 label.dkl_nsect = vd->dk_geom.dkg_nsect; 1843 label.dkl_intrlv = vd->dk_geom.dkg_intrlv; 1844 label.dkl_apc = vd->dk_geom.dkg_apc; 1845 label.dkl_rpm = vd->dk_geom.dkg_rpm; 1846 label.dkl_write_reinstruct = vd->dk_geom.dkg_write_reinstruct; 1847 label.dkl_read_reinstruct = vd->dk_geom.dkg_read_reinstruct; 1848 1849 label.dkl_vtoc.v_nparts = V_NUMPAR; 1850 label.dkl_vtoc.v_sanity = VTOC_SANE; 1851 label.dkl_vtoc.v_version = vtoc->v_version; 1852 for (i = 0; i < V_NUMPAR; i++) { 1853 label.dkl_vtoc.v_timestamp[i] = 1854 vtoc->timestamp[i]; 1855 label.dkl_vtoc.v_part[i].p_tag = 1856 vtoc->v_part[i].p_tag; 1857 label.dkl_vtoc.v_part[i].p_flag = 1858 vtoc->v_part[i].p_flag; 1859 label.dkl_map[i].dkl_cylno = 1860 vtoc->v_part[i].p_start / 1861 (label.dkl_nhead * label.dkl_nsect); 1862 label.dkl_map[i].dkl_nblk = 1863 vtoc->v_part[i].p_size; 1864 } 1865 /* 1866 * The bootinfo array can not be copied with bcopy() because 1867 * elements are of type long in vtoc (so 64-bit) and of type 1868 * int in dk_vtoc (so 32-bit). 1869 */ 1870 label.dkl_vtoc.v_bootinfo[0] = vtoc->v_bootinfo[0]; 1871 label.dkl_vtoc.v_bootinfo[1] = vtoc->v_bootinfo[1]; 1872 label.dkl_vtoc.v_bootinfo[2] = vtoc->v_bootinfo[2]; 1873 bcopy(vtoc->v_asciilabel, label.dkl_asciilabel, 1874 LEN_DKL_ASCII); 1875 bcopy(vtoc->v_volume, label.dkl_vtoc.v_volume, 1876 LEN_DKL_VVOL); 1877 1878 /* re-compute checksum */ 1879 label.dkl_magic = DKL_MAGIC; 1880 label.dkl_cksum = vd_lbl2cksum(&label); 1881 1882 /* write label to the disk image */ 1883 if ((rc = vd_file_set_vtoc(vd, &label)) != 0) 1884 return (rc); 1885 1886 /* check the geometry and update the driver info */ 1887 if ((rc = vd_file_validate_geometry(vd)) != 0) 1888 return (rc); 1889 1890 /* 1891 * The disk geometry may have changed, so we need to write 1892 * the devid (if there is one) so that it is stored at the 1893 * right location. 1894 */ 1895 if (vd->file_devid != NULL && 1896 vd_file_write_devid(vd, vd->file_devid) != 0) { 1897 PR0("Fail to write devid"); 1898 } 1899 1900 return (0); 1901 1902 default: 1903 return (ENOTSUP); 1904 } 1905 } 1906 1907 /* 1908 * Description: 1909 * This is the function that processes the ioctl requests (farming it 1910 * out to functions that handle slices, files or whole disks) 1911 * 1912 * Return Values 1913 * 0 - ioctl operation completed successfully 1914 * != 0 - The LDC error value encountered 1915 * (propagated back up the call stack as a task error) 1916 * 1917 * Side Effect 1918 * sets request->status to the return value of the ioctl function. 1919 */ 1920 static int 1921 vd_do_ioctl(vd_t *vd, vd_dring_payload_t *request, void* buf, vd_ioctl_t *ioctl) 1922 { 1923 int rval = 0, status = 0; 1924 size_t nbytes = request->nbytes; /* modifiable copy */ 1925 1926 1927 ASSERT(request->slice < vd->nslices); 1928 PR0("Performing %s", ioctl->operation_name); 1929 1930 /* Get data from client and convert, if necessary */ 1931 if (ioctl->copyin != NULL) { 1932 ASSERT(nbytes != 0 && buf != NULL); 1933 PR1("Getting \"arg\" data from client"); 1934 if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, 1935 request->cookie, request->ncookies, 1936 LDC_COPY_IN)) != 0) { 1937 PR0("ldc_mem_copy() returned errno %d " 1938 "copying from client", status); 1939 return (status); 1940 } 1941 1942 /* Convert client's data, if necessary */ 1943 if (ioctl->copyin == VD_IDENTITY) /* use client buffer */ 1944 ioctl->arg = buf; 1945 else /* convert client vdisk operation data to ioctl data */ 1946 (ioctl->copyin)(buf, (void *)ioctl->arg); 1947 } 1948 1949 /* 1950 * Handle single-slice block devices internally; otherwise, have the 1951 * real driver perform the ioctl() 1952 */ 1953 if (vd->file) { 1954 request->status = 1955 vd_do_file_ioctl(vd, ioctl->cmd, (void *)ioctl->arg); 1956 1957 } else if (vd->vdisk_type == VD_DISK_TYPE_SLICE && !vd->pseudo) { 1958 request->status = 1959 vd_do_slice_ioctl(vd, ioctl->cmd, (void *)ioctl->arg); 1960 1961 } else { 1962 request->status = ldi_ioctl(vd->ldi_handle[request->slice], 1963 ioctl->cmd, (intptr_t)ioctl->arg, (vd_open_flags | FKIOCTL), 1964 kcred, &rval); 1965 1966 #ifdef DEBUG 1967 if (rval != 0) { 1968 PR0("%s set rval = %d, which is not being returned to" 1969 " client", ioctl->cmd_name, rval); 1970 } 1971 #endif /* DEBUG */ 1972 } 1973 1974 if (request->status != 0) { 1975 PR0("ioctl(%s) = errno %d", ioctl->cmd_name, request->status); 1976 return (0); 1977 } 1978 1979 /* Convert data and send to client, if necessary */ 1980 if (ioctl->copyout != NULL) { 1981 ASSERT(nbytes != 0 && buf != NULL); 1982 PR1("Sending \"arg\" data to client"); 1983 1984 /* Convert ioctl data to vdisk operation data, if necessary */ 1985 if (ioctl->copyout != VD_IDENTITY) 1986 (ioctl->copyout)((void *)ioctl->arg, buf); 1987 1988 if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, 1989 request->cookie, request->ncookies, 1990 LDC_COPY_OUT)) != 0) { 1991 PR0("ldc_mem_copy() returned errno %d " 1992 "copying to client", status); 1993 return (status); 1994 } 1995 } 1996 1997 return (status); 1998 } 1999 2000 #define RNDSIZE(expr) P2ROUNDUP(sizeof (expr), sizeof (uint64_t)) 2001 2002 /* 2003 * Description: 2004 * This generic function is called by the task queue to complete 2005 * the processing of the tasks. The specific completion function 2006 * is passed in as a field in the task pointer. 2007 * 2008 * Parameters: 2009 * arg - opaque pointer to structure containing task to be completed 2010 * 2011 * Return Values 2012 * None 2013 */ 2014 static void 2015 vd_complete(void *arg) 2016 { 2017 vd_task_t *task = (vd_task_t *)arg; 2018 2019 ASSERT(task != NULL); 2020 ASSERT(task->status == EINPROGRESS); 2021 ASSERT(task->completef != NULL); 2022 2023 task->status = task->completef(task); 2024 if (task->status) 2025 PR0("%s: Error %d completing task", __func__, task->status); 2026 2027 /* Now notify the vDisk client */ 2028 vd_complete_notify(task); 2029 } 2030 2031 static int 2032 vd_ioctl(vd_task_t *task) 2033 { 2034 int i, status; 2035 void *buf = NULL; 2036 struct dk_geom dk_geom = {0}; 2037 struct vtoc vtoc = {0}; 2038 struct dk_efi dk_efi = {0}; 2039 vd_t *vd = task->vd; 2040 vd_dring_payload_t *request = task->request; 2041 vd_ioctl_t ioctl[] = { 2042 /* Command (no-copy) operations */ 2043 {VD_OP_FLUSH, STRINGIZE(VD_OP_FLUSH), 0, 2044 DKIOCFLUSHWRITECACHE, STRINGIZE(DKIOCFLUSHWRITECACHE), 2045 NULL, NULL, NULL}, 2046 2047 /* "Get" (copy-out) operations */ 2048 {VD_OP_GET_WCE, STRINGIZE(VD_OP_GET_WCE), RNDSIZE(int), 2049 DKIOCGETWCE, STRINGIZE(DKIOCGETWCE), 2050 NULL, VD_IDENTITY, VD_IDENTITY}, 2051 {VD_OP_GET_DISKGEOM, STRINGIZE(VD_OP_GET_DISKGEOM), 2052 RNDSIZE(vd_geom_t), 2053 DKIOCGGEOM, STRINGIZE(DKIOCGGEOM), 2054 &dk_geom, NULL, dk_geom2vd_geom}, 2055 {VD_OP_GET_VTOC, STRINGIZE(VD_OP_GET_VTOC), RNDSIZE(vd_vtoc_t), 2056 DKIOCGVTOC, STRINGIZE(DKIOCGVTOC), 2057 &vtoc, NULL, vtoc2vd_vtoc}, 2058 {VD_OP_GET_EFI, STRINGIZE(VD_OP_GET_EFI), RNDSIZE(vd_efi_t), 2059 DKIOCGETEFI, STRINGIZE(DKIOCGETEFI), 2060 &dk_efi, vd_get_efi_in, vd_get_efi_out}, 2061 2062 /* "Set" (copy-in) operations */ 2063 {VD_OP_SET_WCE, STRINGIZE(VD_OP_SET_WCE), RNDSIZE(int), 2064 DKIOCSETWCE, STRINGIZE(DKIOCSETWCE), 2065 NULL, VD_IDENTITY, VD_IDENTITY}, 2066 {VD_OP_SET_DISKGEOM, STRINGIZE(VD_OP_SET_DISKGEOM), 2067 RNDSIZE(vd_geom_t), 2068 DKIOCSGEOM, STRINGIZE(DKIOCSGEOM), 2069 &dk_geom, vd_geom2dk_geom, NULL}, 2070 {VD_OP_SET_VTOC, STRINGIZE(VD_OP_SET_VTOC), RNDSIZE(vd_vtoc_t), 2071 DKIOCSVTOC, STRINGIZE(DKIOCSVTOC), 2072 &vtoc, vd_vtoc2vtoc, NULL}, 2073 {VD_OP_SET_EFI, STRINGIZE(VD_OP_SET_EFI), RNDSIZE(vd_efi_t), 2074 DKIOCSETEFI, STRINGIZE(DKIOCSETEFI), 2075 &dk_efi, vd_set_efi_in, vd_set_efi_out}, 2076 }; 2077 size_t nioctls = (sizeof (ioctl))/(sizeof (ioctl[0])); 2078 2079 2080 ASSERT(vd != NULL); 2081 ASSERT(request != NULL); 2082 ASSERT(request->slice < vd->nslices); 2083 2084 /* 2085 * Determine ioctl corresponding to caller's "operation" and 2086 * validate caller's "nbytes" 2087 */ 2088 for (i = 0; i < nioctls; i++) { 2089 if (request->operation == ioctl[i].operation) { 2090 /* LDC memory operations require 8-byte multiples */ 2091 ASSERT(ioctl[i].nbytes % sizeof (uint64_t) == 0); 2092 2093 if (request->operation == VD_OP_GET_EFI || 2094 request->operation == VD_OP_SET_EFI) { 2095 if (request->nbytes >= ioctl[i].nbytes) 2096 break; 2097 PR0("%s: Expected at least nbytes = %lu, " 2098 "got %lu", ioctl[i].operation_name, 2099 ioctl[i].nbytes, request->nbytes); 2100 return (EINVAL); 2101 } 2102 2103 if (request->nbytes != ioctl[i].nbytes) { 2104 PR0("%s: Expected nbytes = %lu, got %lu", 2105 ioctl[i].operation_name, ioctl[i].nbytes, 2106 request->nbytes); 2107 return (EINVAL); 2108 } 2109 2110 break; 2111 } 2112 } 2113 ASSERT(i < nioctls); /* because "operation" already validated */ 2114 2115 if (request->nbytes) 2116 buf = kmem_zalloc(request->nbytes, KM_SLEEP); 2117 status = vd_do_ioctl(vd, request, buf, &ioctl[i]); 2118 if (request->nbytes) 2119 kmem_free(buf, request->nbytes); 2120 2121 return (status); 2122 } 2123 2124 static int 2125 vd_get_devid(vd_task_t *task) 2126 { 2127 vd_t *vd = task->vd; 2128 vd_dring_payload_t *request = task->request; 2129 vd_devid_t *vd_devid; 2130 impl_devid_t *devid; 2131 int status, bufid_len, devid_len, len, sz; 2132 int bufbytes; 2133 2134 PR1("Get Device ID, nbytes=%ld", request->nbytes); 2135 2136 if (vd->file) { 2137 if (vd->file_devid == NULL) { 2138 PR2("No Device ID"); 2139 request->status = ENOENT; 2140 return (0); 2141 } else { 2142 sz = ddi_devid_sizeof(vd->file_devid); 2143 devid = kmem_alloc(sz, KM_SLEEP); 2144 bcopy(vd->file_devid, devid, sz); 2145 } 2146 } else { 2147 if (ddi_lyr_get_devid(vd->dev[request->slice], 2148 (ddi_devid_t *)&devid) != DDI_SUCCESS) { 2149 PR2("No Device ID"); 2150 request->status = ENOENT; 2151 return (0); 2152 } 2153 } 2154 2155 bufid_len = request->nbytes - sizeof (vd_devid_t) + 1; 2156 devid_len = DEVID_GETLEN(devid); 2157 2158 /* 2159 * Save the buffer size here for use in deallocation. 2160 * The actual number of bytes copied is returned in 2161 * the 'nbytes' field of the request structure. 2162 */ 2163 bufbytes = request->nbytes; 2164 2165 vd_devid = kmem_zalloc(bufbytes, KM_SLEEP); 2166 vd_devid->length = devid_len; 2167 vd_devid->type = DEVID_GETTYPE(devid); 2168 2169 len = (devid_len > bufid_len)? bufid_len : devid_len; 2170 2171 bcopy(devid->did_id, vd_devid->id, len); 2172 2173 request->status = 0; 2174 2175 /* LDC memory operations require 8-byte multiples */ 2176 ASSERT(request->nbytes % sizeof (uint64_t) == 0); 2177 2178 if ((status = ldc_mem_copy(vd->ldc_handle, (caddr_t)vd_devid, 0, 2179 &request->nbytes, request->cookie, request->ncookies, 2180 LDC_COPY_OUT)) != 0) { 2181 PR0("ldc_mem_copy() returned errno %d copying to client", 2182 status); 2183 } 2184 PR1("post mem_copy: nbytes=%ld", request->nbytes); 2185 2186 kmem_free(vd_devid, bufbytes); 2187 ddi_devid_free((ddi_devid_t)devid); 2188 2189 return (status); 2190 } 2191 2192 /* 2193 * Define the supported operations once the functions for performing them have 2194 * been defined 2195 */ 2196 static const vds_operation_t vds_operation[] = { 2197 #define X(_s) #_s, _s 2198 {X(VD_OP_BREAD), vd_start_bio, vd_complete_bio}, 2199 {X(VD_OP_BWRITE), vd_start_bio, vd_complete_bio}, 2200 {X(VD_OP_FLUSH), vd_ioctl, NULL}, 2201 {X(VD_OP_GET_WCE), vd_ioctl, NULL}, 2202 {X(VD_OP_SET_WCE), vd_ioctl, NULL}, 2203 {X(VD_OP_GET_VTOC), vd_ioctl, NULL}, 2204 {X(VD_OP_SET_VTOC), vd_ioctl, NULL}, 2205 {X(VD_OP_GET_DISKGEOM), vd_ioctl, NULL}, 2206 {X(VD_OP_SET_DISKGEOM), vd_ioctl, NULL}, 2207 {X(VD_OP_GET_EFI), vd_ioctl, NULL}, 2208 {X(VD_OP_SET_EFI), vd_ioctl, NULL}, 2209 {X(VD_OP_GET_DEVID), vd_get_devid, NULL}, 2210 #undef X 2211 }; 2212 2213 static const size_t vds_noperations = 2214 (sizeof (vds_operation))/(sizeof (vds_operation[0])); 2215 2216 /* 2217 * Process a task specifying a client I/O request 2218 * 2219 * Parameters: 2220 * task - structure containing the request sent from client 2221 * 2222 * Return Value 2223 * 0 - success 2224 * ENOTSUP - Unknown/Unsupported VD_OP_XXX operation 2225 * EINVAL - Invalid disk slice 2226 * != 0 - some other non-zero return value from start function 2227 */ 2228 static int 2229 vd_do_process_task(vd_task_t *task) 2230 { 2231 int i; 2232 vd_t *vd = task->vd; 2233 vd_dring_payload_t *request = task->request; 2234 2235 ASSERT(vd != NULL); 2236 ASSERT(request != NULL); 2237 2238 /* Find the requested operation */ 2239 for (i = 0; i < vds_noperations; i++) { 2240 if (request->operation == vds_operation[i].operation) { 2241 /* all operations should have a start func */ 2242 ASSERT(vds_operation[i].start != NULL); 2243 2244 task->completef = vds_operation[i].complete; 2245 break; 2246 } 2247 } 2248 if (i == vds_noperations) { 2249 PR0("Unsupported operation %u", request->operation); 2250 return (ENOTSUP); 2251 } 2252 2253 /* Range-check slice */ 2254 if (request->slice >= vd->nslices && 2255 (vd->vdisk_type != VD_DISK_TYPE_DISK || 2256 request->slice != VD_SLICE_NONE)) { 2257 PR0("Invalid \"slice\" %u (max %u) for virtual disk", 2258 request->slice, (vd->nslices - 1)); 2259 return (EINVAL); 2260 } 2261 2262 /* 2263 * Call the function pointer that starts the operation. 2264 */ 2265 return (vds_operation[i].start(task)); 2266 } 2267 2268 /* 2269 * Description: 2270 * This function is called by both the in-band and descriptor ring 2271 * message processing functions paths to actually execute the task 2272 * requested by the vDisk client. It in turn calls its worker 2273 * function, vd_do_process_task(), to carry our the request. 2274 * 2275 * Any transport errors (e.g. LDC errors, vDisk protocol errors) are 2276 * saved in the 'status' field of the task and are propagated back 2277 * up the call stack to trigger a NACK 2278 * 2279 * Any request errors (e.g. ENOTTY from an ioctl) are saved in 2280 * the 'status' field of the request and result in an ACK being sent 2281 * by the completion handler. 2282 * 2283 * Parameters: 2284 * task - structure containing the request sent from client 2285 * 2286 * Return Value 2287 * 0 - successful synchronous request. 2288 * != 0 - transport error (e.g. LDC errors, vDisk protocol) 2289 * EINPROGRESS - task will be finished in a completion handler 2290 */ 2291 static int 2292 vd_process_task(vd_task_t *task) 2293 { 2294 vd_t *vd = task->vd; 2295 int status; 2296 2297 DTRACE_PROBE1(task__start, vd_task_t *, task); 2298 2299 task->status = vd_do_process_task(task); 2300 2301 /* 2302 * If the task processing function returned EINPROGRESS indicating 2303 * that the task needs completing then schedule a taskq entry to 2304 * finish it now. 2305 * 2306 * Otherwise the task processing function returned either zero 2307 * indicating that the task was finished in the start function (and we 2308 * don't need to wait in a completion function) or the start function 2309 * returned an error - in both cases all that needs to happen is the 2310 * notification to the vDisk client higher up the call stack. 2311 * If the task was using a Descriptor Ring, we need to mark it as done 2312 * at this stage. 2313 */ 2314 if (task->status == EINPROGRESS) { 2315 /* Queue a task to complete the operation */ 2316 (void) ddi_taskq_dispatch(vd->completionq, vd_complete, 2317 task, DDI_SLEEP); 2318 2319 } else if (!vd->reset_state && (vd->xfer_mode == VIO_DRING_MODE)) { 2320 /* Update the dring element if it's a dring client */ 2321 status = vd_mark_elem_done(vd, task->index, 2322 task->request->status, task->request->nbytes); 2323 if (status == ECONNRESET) 2324 vd_mark_in_reset(vd); 2325 } 2326 2327 return (task->status); 2328 } 2329 2330 /* 2331 * Return true if the "type", "subtype", and "env" fields of the "tag" first 2332 * argument match the corresponding remaining arguments; otherwise, return false 2333 */ 2334 boolean_t 2335 vd_msgtype(vio_msg_tag_t *tag, int type, int subtype, int env) 2336 { 2337 return ((tag->vio_msgtype == type) && 2338 (tag->vio_subtype == subtype) && 2339 (tag->vio_subtype_env == env)) ? B_TRUE : B_FALSE; 2340 } 2341 2342 /* 2343 * Check whether the major/minor version specified in "ver_msg" is supported 2344 * by this server. 2345 */ 2346 static boolean_t 2347 vds_supported_version(vio_ver_msg_t *ver_msg) 2348 { 2349 for (int i = 0; i < vds_num_versions; i++) { 2350 ASSERT(vds_version[i].major > 0); 2351 ASSERT((i == 0) || 2352 (vds_version[i].major < vds_version[i-1].major)); 2353 2354 /* 2355 * If the major versions match, adjust the minor version, if 2356 * necessary, down to the highest value supported by this 2357 * server and return true so this message will get "ack"ed; 2358 * the client should also support all minor versions lower 2359 * than the value it sent 2360 */ 2361 if (ver_msg->ver_major == vds_version[i].major) { 2362 if (ver_msg->ver_minor > vds_version[i].minor) { 2363 PR0("Adjusting minor version from %u to %u", 2364 ver_msg->ver_minor, vds_version[i].minor); 2365 ver_msg->ver_minor = vds_version[i].minor; 2366 } 2367 return (B_TRUE); 2368 } 2369 2370 /* 2371 * If the message contains a higher major version number, set 2372 * the message's major/minor versions to the current values 2373 * and return false, so this message will get "nack"ed with 2374 * these values, and the client will potentially try again 2375 * with the same or a lower version 2376 */ 2377 if (ver_msg->ver_major > vds_version[i].major) { 2378 ver_msg->ver_major = vds_version[i].major; 2379 ver_msg->ver_minor = vds_version[i].minor; 2380 return (B_FALSE); 2381 } 2382 2383 /* 2384 * Otherwise, the message's major version is less than the 2385 * current major version, so continue the loop to the next 2386 * (lower) supported version 2387 */ 2388 } 2389 2390 /* 2391 * No common version was found; "ground" the version pair in the 2392 * message to terminate negotiation 2393 */ 2394 ver_msg->ver_major = 0; 2395 ver_msg->ver_minor = 0; 2396 return (B_FALSE); 2397 } 2398 2399 /* 2400 * Process a version message from a client. vds expects to receive version 2401 * messages from clients seeking service, but never issues version messages 2402 * itself; therefore, vds can ACK or NACK client version messages, but does 2403 * not expect to receive version-message ACKs or NACKs (and will treat such 2404 * messages as invalid). 2405 */ 2406 static int 2407 vd_process_ver_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 2408 { 2409 vio_ver_msg_t *ver_msg = (vio_ver_msg_t *)msg; 2410 2411 2412 ASSERT(msglen >= sizeof (msg->tag)); 2413 2414 if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 2415 VIO_VER_INFO)) { 2416 return (ENOMSG); /* not a version message */ 2417 } 2418 2419 if (msglen != sizeof (*ver_msg)) { 2420 PR0("Expected %lu-byte version message; " 2421 "received %lu bytes", sizeof (*ver_msg), msglen); 2422 return (EBADMSG); 2423 } 2424 2425 if (ver_msg->dev_class != VDEV_DISK) { 2426 PR0("Expected device class %u (disk); received %u", 2427 VDEV_DISK, ver_msg->dev_class); 2428 return (EBADMSG); 2429 } 2430 2431 /* 2432 * We're talking to the expected kind of client; set our device class 2433 * for "ack/nack" back to the client 2434 */ 2435 ver_msg->dev_class = VDEV_DISK_SERVER; 2436 2437 /* 2438 * Check whether the (valid) version message specifies a version 2439 * supported by this server. If the version is not supported, return 2440 * EBADMSG so the message will get "nack"ed; vds_supported_version() 2441 * will have updated the message with a supported version for the 2442 * client to consider 2443 */ 2444 if (!vds_supported_version(ver_msg)) 2445 return (EBADMSG); 2446 2447 2448 /* 2449 * A version has been agreed upon; use the client's SID for 2450 * communication on this channel now 2451 */ 2452 ASSERT(!(vd->initialized & VD_SID)); 2453 vd->sid = ver_msg->tag.vio_sid; 2454 vd->initialized |= VD_SID; 2455 2456 /* 2457 * When multiple versions are supported, this function should store 2458 * the negotiated major and minor version values in the "vd" data 2459 * structure to govern further communication; in particular, note that 2460 * the client might have specified a lower minor version for the 2461 * agreed major version than specifed in the vds_version[] array. The 2462 * following assertions should help remind future maintainers to make 2463 * the appropriate changes to support multiple versions. 2464 */ 2465 ASSERT(vds_num_versions == 1); 2466 ASSERT(ver_msg->ver_major == vds_version[0].major); 2467 ASSERT(ver_msg->ver_minor == vds_version[0].minor); 2468 2469 PR0("Using major version %u, minor version %u", 2470 ver_msg->ver_major, ver_msg->ver_minor); 2471 return (0); 2472 } 2473 2474 static int 2475 vd_process_attr_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 2476 { 2477 vd_attr_msg_t *attr_msg = (vd_attr_msg_t *)msg; 2478 int status, retry = 0; 2479 2480 2481 ASSERT(msglen >= sizeof (msg->tag)); 2482 2483 if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 2484 VIO_ATTR_INFO)) { 2485 PR0("Message is not an attribute message"); 2486 return (ENOMSG); 2487 } 2488 2489 if (msglen != sizeof (*attr_msg)) { 2490 PR0("Expected %lu-byte attribute message; " 2491 "received %lu bytes", sizeof (*attr_msg), msglen); 2492 return (EBADMSG); 2493 } 2494 2495 if (attr_msg->max_xfer_sz == 0) { 2496 PR0("Received maximum transfer size of 0 from client"); 2497 return (EBADMSG); 2498 } 2499 2500 if ((attr_msg->xfer_mode != VIO_DESC_MODE) && 2501 (attr_msg->xfer_mode != VIO_DRING_MODE)) { 2502 PR0("Client requested unsupported transfer mode"); 2503 return (EBADMSG); 2504 } 2505 2506 /* 2507 * check if the underlying disk is ready, if not try accessing 2508 * the device again. Open the vdisk device and extract info 2509 * about it, as this is needed to respond to the attr info msg 2510 */ 2511 if ((vd->initialized & VD_DISK_READY) == 0) { 2512 PR0("Retry setting up disk (%s)", vd->device_path); 2513 do { 2514 status = vd_setup_vd(vd); 2515 if (status != EAGAIN || ++retry > vds_dev_retries) 2516 break; 2517 2518 /* incremental delay */ 2519 delay(drv_usectohz(vds_dev_delay)); 2520 2521 /* if vdisk is no longer enabled - return error */ 2522 if (!vd_enabled(vd)) 2523 return (ENXIO); 2524 2525 } while (status == EAGAIN); 2526 2527 if (status) 2528 return (ENXIO); 2529 2530 vd->initialized |= VD_DISK_READY; 2531 ASSERT(vd->nslices > 0 && vd->nslices <= V_NUMPAR); 2532 PR0("vdisk_type = %s, pseudo = %s, file = %s, nslices = %u", 2533 ((vd->vdisk_type == VD_DISK_TYPE_DISK) ? "disk" : "slice"), 2534 (vd->pseudo ? "yes" : "no"), 2535 (vd->file ? "yes" : "no"), 2536 vd->nslices); 2537 } 2538 2539 /* Success: valid message and transfer mode */ 2540 vd->xfer_mode = attr_msg->xfer_mode; 2541 2542 if (vd->xfer_mode == VIO_DESC_MODE) { 2543 2544 /* 2545 * The vd_dring_inband_msg_t contains one cookie; need room 2546 * for up to n-1 more cookies, where "n" is the number of full 2547 * pages plus possibly one partial page required to cover 2548 * "max_xfer_sz". Add room for one more cookie if 2549 * "max_xfer_sz" isn't an integral multiple of the page size. 2550 * Must first get the maximum transfer size in bytes. 2551 */ 2552 size_t max_xfer_bytes = attr_msg->vdisk_block_size ? 2553 attr_msg->vdisk_block_size*attr_msg->max_xfer_sz : 2554 attr_msg->max_xfer_sz; 2555 size_t max_inband_msglen = 2556 sizeof (vd_dring_inband_msg_t) + 2557 ((max_xfer_bytes/PAGESIZE + 2558 ((max_xfer_bytes % PAGESIZE) ? 1 : 0))* 2559 (sizeof (ldc_mem_cookie_t))); 2560 2561 /* 2562 * Set the maximum expected message length to 2563 * accommodate in-band-descriptor messages with all 2564 * their cookies 2565 */ 2566 vd->max_msglen = MAX(vd->max_msglen, max_inband_msglen); 2567 2568 /* 2569 * Initialize the data structure for processing in-band I/O 2570 * request descriptors 2571 */ 2572 vd->inband_task.vd = vd; 2573 vd->inband_task.msg = kmem_alloc(vd->max_msglen, KM_SLEEP); 2574 vd->inband_task.index = 0; 2575 vd->inband_task.type = VD_FINAL_RANGE_TASK; /* range == 1 */ 2576 } 2577 2578 /* Return the device's block size and max transfer size to the client */ 2579 attr_msg->vdisk_block_size = DEV_BSIZE; 2580 attr_msg->max_xfer_sz = vd->max_xfer_sz; 2581 2582 attr_msg->vdisk_size = vd->vdisk_size; 2583 attr_msg->vdisk_type = vd->vdisk_type; 2584 attr_msg->operations = vds_operations; 2585 PR0("%s", VD_CLIENT(vd)); 2586 2587 ASSERT(vd->dring_task == NULL); 2588 2589 return (0); 2590 } 2591 2592 static int 2593 vd_process_dring_reg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 2594 { 2595 int status; 2596 size_t expected; 2597 ldc_mem_info_t dring_minfo; 2598 vio_dring_reg_msg_t *reg_msg = (vio_dring_reg_msg_t *)msg; 2599 2600 2601 ASSERT(msglen >= sizeof (msg->tag)); 2602 2603 if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 2604 VIO_DRING_REG)) { 2605 PR0("Message is not a register-dring message"); 2606 return (ENOMSG); 2607 } 2608 2609 if (msglen < sizeof (*reg_msg)) { 2610 PR0("Expected at least %lu-byte register-dring message; " 2611 "received %lu bytes", sizeof (*reg_msg), msglen); 2612 return (EBADMSG); 2613 } 2614 2615 expected = sizeof (*reg_msg) + 2616 (reg_msg->ncookies - 1)*(sizeof (reg_msg->cookie[0])); 2617 if (msglen != expected) { 2618 PR0("Expected %lu-byte register-dring message; " 2619 "received %lu bytes", expected, msglen); 2620 return (EBADMSG); 2621 } 2622 2623 if (vd->initialized & VD_DRING) { 2624 PR0("A dring was previously registered; only support one"); 2625 return (EBADMSG); 2626 } 2627 2628 if (reg_msg->num_descriptors > INT32_MAX) { 2629 PR0("reg_msg->num_descriptors = %u; must be <= %u (%s)", 2630 reg_msg->ncookies, INT32_MAX, STRINGIZE(INT32_MAX)); 2631 return (EBADMSG); 2632 } 2633 2634 if (reg_msg->ncookies != 1) { 2635 /* 2636 * In addition to fixing the assertion in the success case 2637 * below, supporting drings which require more than one 2638 * "cookie" requires increasing the value of vd->max_msglen 2639 * somewhere in the code path prior to receiving the message 2640 * which results in calling this function. Note that without 2641 * making this change, the larger message size required to 2642 * accommodate multiple cookies cannot be successfully 2643 * received, so this function will not even get called. 2644 * Gracefully accommodating more dring cookies might 2645 * reasonably demand exchanging an additional attribute or 2646 * making a minor protocol adjustment 2647 */ 2648 PR0("reg_msg->ncookies = %u != 1", reg_msg->ncookies); 2649 return (EBADMSG); 2650 } 2651 2652 status = ldc_mem_dring_map(vd->ldc_handle, reg_msg->cookie, 2653 reg_msg->ncookies, reg_msg->num_descriptors, 2654 reg_msg->descriptor_size, LDC_DIRECT_MAP, &vd->dring_handle); 2655 if (status != 0) { 2656 PR0("ldc_mem_dring_map() returned errno %d", status); 2657 return (status); 2658 } 2659 2660 /* 2661 * To remove the need for this assertion, must call 2662 * ldc_mem_dring_nextcookie() successfully ncookies-1 times after a 2663 * successful call to ldc_mem_dring_map() 2664 */ 2665 ASSERT(reg_msg->ncookies == 1); 2666 2667 if ((status = 2668 ldc_mem_dring_info(vd->dring_handle, &dring_minfo)) != 0) { 2669 PR0("ldc_mem_dring_info() returned errno %d", status); 2670 if ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0) 2671 PR0("ldc_mem_dring_unmap() returned errno %d", status); 2672 return (status); 2673 } 2674 2675 if (dring_minfo.vaddr == NULL) { 2676 PR0("Descriptor ring virtual address is NULL"); 2677 return (ENXIO); 2678 } 2679 2680 2681 /* Initialize for valid message and mapped dring */ 2682 PR1("descriptor size = %u, dring length = %u", 2683 vd->descriptor_size, vd->dring_len); 2684 vd->initialized |= VD_DRING; 2685 vd->dring_ident = 1; /* "There Can Be Only One" */ 2686 vd->dring = dring_minfo.vaddr; 2687 vd->descriptor_size = reg_msg->descriptor_size; 2688 vd->dring_len = reg_msg->num_descriptors; 2689 reg_msg->dring_ident = vd->dring_ident; 2690 2691 /* 2692 * Allocate and initialize a "shadow" array of data structures for 2693 * tasks to process I/O requests in dring elements 2694 */ 2695 vd->dring_task = 2696 kmem_zalloc((sizeof (*vd->dring_task)) * vd->dring_len, KM_SLEEP); 2697 for (int i = 0; i < vd->dring_len; i++) { 2698 vd->dring_task[i].vd = vd; 2699 vd->dring_task[i].index = i; 2700 vd->dring_task[i].request = &VD_DRING_ELEM(i)->payload; 2701 2702 status = ldc_mem_alloc_handle(vd->ldc_handle, 2703 &(vd->dring_task[i].mhdl)); 2704 if (status) { 2705 PR0("ldc_mem_alloc_handle() returned err %d ", status); 2706 return (ENXIO); 2707 } 2708 2709 vd->dring_task[i].msg = kmem_alloc(vd->max_msglen, KM_SLEEP); 2710 } 2711 2712 return (0); 2713 } 2714 2715 static int 2716 vd_process_dring_unreg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 2717 { 2718 vio_dring_unreg_msg_t *unreg_msg = (vio_dring_unreg_msg_t *)msg; 2719 2720 2721 ASSERT(msglen >= sizeof (msg->tag)); 2722 2723 if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 2724 VIO_DRING_UNREG)) { 2725 PR0("Message is not an unregister-dring message"); 2726 return (ENOMSG); 2727 } 2728 2729 if (msglen != sizeof (*unreg_msg)) { 2730 PR0("Expected %lu-byte unregister-dring message; " 2731 "received %lu bytes", sizeof (*unreg_msg), msglen); 2732 return (EBADMSG); 2733 } 2734 2735 if (unreg_msg->dring_ident != vd->dring_ident) { 2736 PR0("Expected dring ident %lu; received %lu", 2737 vd->dring_ident, unreg_msg->dring_ident); 2738 return (EBADMSG); 2739 } 2740 2741 return (0); 2742 } 2743 2744 static int 2745 process_rdx_msg(vio_msg_t *msg, size_t msglen) 2746 { 2747 ASSERT(msglen >= sizeof (msg->tag)); 2748 2749 if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_RDX)) { 2750 PR0("Message is not an RDX message"); 2751 return (ENOMSG); 2752 } 2753 2754 if (msglen != sizeof (vio_rdx_msg_t)) { 2755 PR0("Expected %lu-byte RDX message; received %lu bytes", 2756 sizeof (vio_rdx_msg_t), msglen); 2757 return (EBADMSG); 2758 } 2759 2760 PR0("Valid RDX message"); 2761 return (0); 2762 } 2763 2764 static int 2765 vd_check_seq_num(vd_t *vd, uint64_t seq_num) 2766 { 2767 if ((vd->initialized & VD_SEQ_NUM) && (seq_num != vd->seq_num + 1)) { 2768 PR0("Received seq_num %lu; expected %lu", 2769 seq_num, (vd->seq_num + 1)); 2770 PR0("initiating soft reset"); 2771 vd_need_reset(vd, B_FALSE); 2772 return (1); 2773 } 2774 2775 vd->seq_num = seq_num; 2776 vd->initialized |= VD_SEQ_NUM; /* superfluous after first time... */ 2777 return (0); 2778 } 2779 2780 /* 2781 * Return the expected size of an inband-descriptor message with all the 2782 * cookies it claims to include 2783 */ 2784 static size_t 2785 expected_inband_size(vd_dring_inband_msg_t *msg) 2786 { 2787 return ((sizeof (*msg)) + 2788 (msg->payload.ncookies - 1)*(sizeof (msg->payload.cookie[0]))); 2789 } 2790 2791 /* 2792 * Process an in-band descriptor message: used with clients like OBP, with 2793 * which vds exchanges descriptors within VIO message payloads, rather than 2794 * operating on them within a descriptor ring 2795 */ 2796 static int 2797 vd_process_desc_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 2798 { 2799 size_t expected; 2800 vd_dring_inband_msg_t *desc_msg = (vd_dring_inband_msg_t *)msg; 2801 2802 2803 ASSERT(msglen >= sizeof (msg->tag)); 2804 2805 if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, 2806 VIO_DESC_DATA)) { 2807 PR1("Message is not an in-band-descriptor message"); 2808 return (ENOMSG); 2809 } 2810 2811 if (msglen < sizeof (*desc_msg)) { 2812 PR0("Expected at least %lu-byte descriptor message; " 2813 "received %lu bytes", sizeof (*desc_msg), msglen); 2814 return (EBADMSG); 2815 } 2816 2817 if (msglen != (expected = expected_inband_size(desc_msg))) { 2818 PR0("Expected %lu-byte descriptor message; " 2819 "received %lu bytes", expected, msglen); 2820 return (EBADMSG); 2821 } 2822 2823 if (vd_check_seq_num(vd, desc_msg->hdr.seq_num) != 0) 2824 return (EBADMSG); 2825 2826 /* 2827 * Valid message: Set up the in-band descriptor task and process the 2828 * request. Arrange to acknowledge the client's message, unless an 2829 * error processing the descriptor task results in setting 2830 * VIO_SUBTYPE_NACK 2831 */ 2832 PR1("Valid in-band-descriptor message"); 2833 msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 2834 2835 ASSERT(vd->inband_task.msg != NULL); 2836 2837 bcopy(msg, vd->inband_task.msg, msglen); 2838 vd->inband_task.msglen = msglen; 2839 2840 /* 2841 * The task request is now the payload of the message 2842 * that was just copied into the body of the task. 2843 */ 2844 desc_msg = (vd_dring_inband_msg_t *)vd->inband_task.msg; 2845 vd->inband_task.request = &desc_msg->payload; 2846 2847 return (vd_process_task(&vd->inband_task)); 2848 } 2849 2850 static int 2851 vd_process_element(vd_t *vd, vd_task_type_t type, uint32_t idx, 2852 vio_msg_t *msg, size_t msglen) 2853 { 2854 int status; 2855 boolean_t ready; 2856 vd_dring_entry_t *elem = VD_DRING_ELEM(idx); 2857 2858 2859 /* Accept the updated dring element */ 2860 if ((status = ldc_mem_dring_acquire(vd->dring_handle, idx, idx)) != 0) { 2861 PR0("ldc_mem_dring_acquire() returned errno %d", status); 2862 return (status); 2863 } 2864 ready = (elem->hdr.dstate == VIO_DESC_READY); 2865 if (ready) { 2866 elem->hdr.dstate = VIO_DESC_ACCEPTED; 2867 } else { 2868 PR0("descriptor %u not ready", idx); 2869 VD_DUMP_DRING_ELEM(elem); 2870 } 2871 if ((status = ldc_mem_dring_release(vd->dring_handle, idx, idx)) != 0) { 2872 PR0("ldc_mem_dring_release() returned errno %d", status); 2873 return (status); 2874 } 2875 if (!ready) 2876 return (EBUSY); 2877 2878 2879 /* Initialize a task and process the accepted element */ 2880 PR1("Processing dring element %u", idx); 2881 vd->dring_task[idx].type = type; 2882 2883 /* duplicate msg buf for cookies etc. */ 2884 bcopy(msg, vd->dring_task[idx].msg, msglen); 2885 2886 vd->dring_task[idx].msglen = msglen; 2887 return (vd_process_task(&vd->dring_task[idx])); 2888 } 2889 2890 static int 2891 vd_process_element_range(vd_t *vd, int start, int end, 2892 vio_msg_t *msg, size_t msglen) 2893 { 2894 int i, n, nelem, status = 0; 2895 boolean_t inprogress = B_FALSE; 2896 vd_task_type_t type; 2897 2898 2899 ASSERT(start >= 0); 2900 ASSERT(end >= 0); 2901 2902 /* 2903 * Arrange to acknowledge the client's message, unless an error 2904 * processing one of the dring elements results in setting 2905 * VIO_SUBTYPE_NACK 2906 */ 2907 msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 2908 2909 /* 2910 * Process the dring elements in the range 2911 */ 2912 nelem = ((end < start) ? end + vd->dring_len : end) - start + 1; 2913 for (i = start, n = nelem; n > 0; i = (i + 1) % vd->dring_len, n--) { 2914 ((vio_dring_msg_t *)msg)->end_idx = i; 2915 type = (n == 1) ? VD_FINAL_RANGE_TASK : VD_NONFINAL_RANGE_TASK; 2916 status = vd_process_element(vd, type, i, msg, msglen); 2917 if (status == EINPROGRESS) 2918 inprogress = B_TRUE; 2919 else if (status != 0) 2920 break; 2921 } 2922 2923 /* 2924 * If some, but not all, operations of a multi-element range are in 2925 * progress, wait for other operations to complete before returning 2926 * (which will result in "ack" or "nack" of the message). Note that 2927 * all outstanding operations will need to complete, not just the ones 2928 * corresponding to the current range of dring elements; howevever, as 2929 * this situation is an error case, performance is less critical. 2930 */ 2931 if ((nelem > 1) && (status != EINPROGRESS) && inprogress) 2932 ddi_taskq_wait(vd->completionq); 2933 2934 return (status); 2935 } 2936 2937 static int 2938 vd_process_dring_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 2939 { 2940 vio_dring_msg_t *dring_msg = (vio_dring_msg_t *)msg; 2941 2942 2943 ASSERT(msglen >= sizeof (msg->tag)); 2944 2945 if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, 2946 VIO_DRING_DATA)) { 2947 PR1("Message is not a dring-data message"); 2948 return (ENOMSG); 2949 } 2950 2951 if (msglen != sizeof (*dring_msg)) { 2952 PR0("Expected %lu-byte dring message; received %lu bytes", 2953 sizeof (*dring_msg), msglen); 2954 return (EBADMSG); 2955 } 2956 2957 if (vd_check_seq_num(vd, dring_msg->seq_num) != 0) 2958 return (EBADMSG); 2959 2960 if (dring_msg->dring_ident != vd->dring_ident) { 2961 PR0("Expected dring ident %lu; received ident %lu", 2962 vd->dring_ident, dring_msg->dring_ident); 2963 return (EBADMSG); 2964 } 2965 2966 if (dring_msg->start_idx >= vd->dring_len) { 2967 PR0("\"start_idx\" = %u; must be less than %u", 2968 dring_msg->start_idx, vd->dring_len); 2969 return (EBADMSG); 2970 } 2971 2972 if ((dring_msg->end_idx < 0) || 2973 (dring_msg->end_idx >= vd->dring_len)) { 2974 PR0("\"end_idx\" = %u; must be >= 0 and less than %u", 2975 dring_msg->end_idx, vd->dring_len); 2976 return (EBADMSG); 2977 } 2978 2979 /* Valid message; process range of updated dring elements */ 2980 PR1("Processing descriptor range, start = %u, end = %u", 2981 dring_msg->start_idx, dring_msg->end_idx); 2982 return (vd_process_element_range(vd, dring_msg->start_idx, 2983 dring_msg->end_idx, msg, msglen)); 2984 } 2985 2986 static int 2987 recv_msg(ldc_handle_t ldc_handle, void *msg, size_t *nbytes) 2988 { 2989 int retry, status; 2990 size_t size = *nbytes; 2991 2992 2993 for (retry = 0, status = ETIMEDOUT; 2994 retry < vds_ldc_retries && status == ETIMEDOUT; 2995 retry++) { 2996 PR1("ldc_read() attempt %d", (retry + 1)); 2997 *nbytes = size; 2998 status = ldc_read(ldc_handle, msg, nbytes); 2999 } 3000 3001 if (status) { 3002 PR0("ldc_read() returned errno %d", status); 3003 if (status != ECONNRESET) 3004 return (ENOMSG); 3005 return (status); 3006 } else if (*nbytes == 0) { 3007 PR1("ldc_read() returned 0 and no message read"); 3008 return (ENOMSG); 3009 } 3010 3011 PR1("RCVD %lu-byte message", *nbytes); 3012 return (0); 3013 } 3014 3015 static int 3016 vd_do_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 3017 { 3018 int status; 3019 3020 3021 PR1("Processing (%x/%x/%x) message", msg->tag.vio_msgtype, 3022 msg->tag.vio_subtype, msg->tag.vio_subtype_env); 3023 #ifdef DEBUG 3024 vd_decode_tag(msg); 3025 #endif 3026 3027 /* 3028 * Validate session ID up front, since it applies to all messages 3029 * once set 3030 */ 3031 if ((msg->tag.vio_sid != vd->sid) && (vd->initialized & VD_SID)) { 3032 PR0("Expected SID %u, received %u", vd->sid, 3033 msg->tag.vio_sid); 3034 return (EBADMSG); 3035 } 3036 3037 PR1("\tWhile in state %d (%s)", vd->state, vd_decode_state(vd->state)); 3038 3039 /* 3040 * Process the received message based on connection state 3041 */ 3042 switch (vd->state) { 3043 case VD_STATE_INIT: /* expect version message */ 3044 if ((status = vd_process_ver_msg(vd, msg, msglen)) != 0) 3045 return (status); 3046 3047 /* Version negotiated, move to that state */ 3048 vd->state = VD_STATE_VER; 3049 return (0); 3050 3051 case VD_STATE_VER: /* expect attribute message */ 3052 if ((status = vd_process_attr_msg(vd, msg, msglen)) != 0) 3053 return (status); 3054 3055 /* Attributes exchanged, move to that state */ 3056 vd->state = VD_STATE_ATTR; 3057 return (0); 3058 3059 case VD_STATE_ATTR: 3060 switch (vd->xfer_mode) { 3061 case VIO_DESC_MODE: /* expect RDX message */ 3062 if ((status = process_rdx_msg(msg, msglen)) != 0) 3063 return (status); 3064 3065 /* Ready to receive in-band descriptors */ 3066 vd->state = VD_STATE_DATA; 3067 return (0); 3068 3069 case VIO_DRING_MODE: /* expect register-dring message */ 3070 if ((status = 3071 vd_process_dring_reg_msg(vd, msg, msglen)) != 0) 3072 return (status); 3073 3074 /* One dring negotiated, move to that state */ 3075 vd->state = VD_STATE_DRING; 3076 return (0); 3077 3078 default: 3079 ASSERT("Unsupported transfer mode"); 3080 PR0("Unsupported transfer mode"); 3081 return (ENOTSUP); 3082 } 3083 3084 case VD_STATE_DRING: /* expect RDX, register-dring, or unreg-dring */ 3085 if ((status = process_rdx_msg(msg, msglen)) == 0) { 3086 /* Ready to receive data */ 3087 vd->state = VD_STATE_DATA; 3088 return (0); 3089 } else if (status != ENOMSG) { 3090 return (status); 3091 } 3092 3093 3094 /* 3095 * If another register-dring message is received, stay in 3096 * dring state in case the client sends RDX; although the 3097 * protocol allows multiple drings, this server does not 3098 * support using more than one 3099 */ 3100 if ((status = 3101 vd_process_dring_reg_msg(vd, msg, msglen)) != ENOMSG) 3102 return (status); 3103 3104 /* 3105 * Acknowledge an unregister-dring message, but reset the 3106 * connection anyway: Although the protocol allows 3107 * unregistering drings, this server cannot serve a vdisk 3108 * without its only dring 3109 */ 3110 status = vd_process_dring_unreg_msg(vd, msg, msglen); 3111 return ((status == 0) ? ENOTSUP : status); 3112 3113 case VD_STATE_DATA: 3114 switch (vd->xfer_mode) { 3115 case VIO_DESC_MODE: /* expect in-band-descriptor message */ 3116 return (vd_process_desc_msg(vd, msg, msglen)); 3117 3118 case VIO_DRING_MODE: /* expect dring-data or unreg-dring */ 3119 /* 3120 * Typically expect dring-data messages, so handle 3121 * them first 3122 */ 3123 if ((status = vd_process_dring_msg(vd, msg, 3124 msglen)) != ENOMSG) 3125 return (status); 3126 3127 /* 3128 * Acknowledge an unregister-dring message, but reset 3129 * the connection anyway: Although the protocol 3130 * allows unregistering drings, this server cannot 3131 * serve a vdisk without its only dring 3132 */ 3133 status = vd_process_dring_unreg_msg(vd, msg, msglen); 3134 return ((status == 0) ? ENOTSUP : status); 3135 3136 default: 3137 ASSERT("Unsupported transfer mode"); 3138 PR0("Unsupported transfer mode"); 3139 return (ENOTSUP); 3140 } 3141 3142 default: 3143 ASSERT("Invalid client connection state"); 3144 PR0("Invalid client connection state"); 3145 return (ENOTSUP); 3146 } 3147 } 3148 3149 static int 3150 vd_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 3151 { 3152 int status; 3153 boolean_t reset_ldc = B_FALSE; 3154 vd_task_t task; 3155 3156 /* 3157 * Check that the message is at least big enough for a "tag", so that 3158 * message processing can proceed based on tag-specified message type 3159 */ 3160 if (msglen < sizeof (vio_msg_tag_t)) { 3161 PR0("Received short (%lu-byte) message", msglen); 3162 /* Can't "nack" short message, so drop the big hammer */ 3163 PR0("initiating full reset"); 3164 vd_need_reset(vd, B_TRUE); 3165 return (EBADMSG); 3166 } 3167 3168 /* 3169 * Process the message 3170 */ 3171 switch (status = vd_do_process_msg(vd, msg, msglen)) { 3172 case 0: 3173 /* "ack" valid, successfully-processed messages */ 3174 msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 3175 break; 3176 3177 case EINPROGRESS: 3178 /* The completion handler will "ack" or "nack" the message */ 3179 return (EINPROGRESS); 3180 case ENOMSG: 3181 PR0("Received unexpected message"); 3182 _NOTE(FALLTHROUGH); 3183 case EBADMSG: 3184 case ENOTSUP: 3185 /* "transport" error will cause NACK of invalid messages */ 3186 msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 3187 break; 3188 3189 default: 3190 /* "transport" error will cause NACK of invalid messages */ 3191 msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 3192 /* An LDC error probably occurred, so try resetting it */ 3193 reset_ldc = B_TRUE; 3194 break; 3195 } 3196 3197 PR1("\tResulting in state %d (%s)", vd->state, 3198 vd_decode_state(vd->state)); 3199 3200 /* populate the task so we can dispatch it on the taskq */ 3201 task.vd = vd; 3202 task.msg = msg; 3203 task.msglen = msglen; 3204 3205 /* 3206 * Queue a task to send the notification that the operation completed. 3207 * We need to ensure that requests are responded to in the correct 3208 * order and since the taskq is processed serially this ordering 3209 * is maintained. 3210 */ 3211 (void) ddi_taskq_dispatch(vd->completionq, vd_serial_notify, 3212 &task, DDI_SLEEP); 3213 3214 /* 3215 * To ensure handshake negotiations do not happen out of order, such 3216 * requests that come through this path should not be done in parallel 3217 * so we need to wait here until the response is sent to the client. 3218 */ 3219 ddi_taskq_wait(vd->completionq); 3220 3221 /* Arrange to reset the connection for nack'ed or failed messages */ 3222 if ((status != 0) || reset_ldc) { 3223 PR0("initiating %s reset", 3224 (reset_ldc) ? "full" : "soft"); 3225 vd_need_reset(vd, reset_ldc); 3226 } 3227 3228 return (status); 3229 } 3230 3231 static boolean_t 3232 vd_enabled(vd_t *vd) 3233 { 3234 boolean_t enabled; 3235 3236 mutex_enter(&vd->lock); 3237 enabled = vd->enabled; 3238 mutex_exit(&vd->lock); 3239 return (enabled); 3240 } 3241 3242 static void 3243 vd_recv_msg(void *arg) 3244 { 3245 vd_t *vd = (vd_t *)arg; 3246 int rv = 0, status = 0; 3247 3248 ASSERT(vd != NULL); 3249 3250 PR2("New task to receive incoming message(s)"); 3251 3252 3253 while (vd_enabled(vd) && status == 0) { 3254 size_t msglen, msgsize; 3255 ldc_status_t lstatus; 3256 3257 /* 3258 * Receive and process a message 3259 */ 3260 vd_reset_if_needed(vd); /* can change vd->max_msglen */ 3261 3262 /* 3263 * check if channel is UP - else break out of loop 3264 */ 3265 status = ldc_status(vd->ldc_handle, &lstatus); 3266 if (lstatus != LDC_UP) { 3267 PR0("channel not up (status=%d), exiting recv loop\n", 3268 lstatus); 3269 break; 3270 } 3271 3272 ASSERT(vd->max_msglen != 0); 3273 3274 msgsize = vd->max_msglen; /* stable copy for alloc/free */ 3275 msglen = msgsize; /* actual len after recv_msg() */ 3276 3277 status = recv_msg(vd->ldc_handle, vd->vio_msgp, &msglen); 3278 switch (status) { 3279 case 0: 3280 rv = vd_process_msg(vd, (vio_msg_t *)vd->vio_msgp, 3281 msglen); 3282 /* check if max_msglen changed */ 3283 if (msgsize != vd->max_msglen) { 3284 PR0("max_msglen changed 0x%lx to 0x%lx bytes\n", 3285 msgsize, vd->max_msglen); 3286 kmem_free(vd->vio_msgp, msgsize); 3287 vd->vio_msgp = 3288 kmem_alloc(vd->max_msglen, KM_SLEEP); 3289 } 3290 if (rv == EINPROGRESS) 3291 continue; 3292 break; 3293 3294 case ENOMSG: 3295 break; 3296 3297 case ECONNRESET: 3298 PR0("initiating soft reset (ECONNRESET)\n"); 3299 vd_need_reset(vd, B_FALSE); 3300 status = 0; 3301 break; 3302 3303 default: 3304 /* Probably an LDC failure; arrange to reset it */ 3305 PR0("initiating full reset (status=0x%x)", status); 3306 vd_need_reset(vd, B_TRUE); 3307 break; 3308 } 3309 } 3310 3311 PR2("Task finished"); 3312 } 3313 3314 static uint_t 3315 vd_handle_ldc_events(uint64_t event, caddr_t arg) 3316 { 3317 vd_t *vd = (vd_t *)(void *)arg; 3318 int status; 3319 3320 ASSERT(vd != NULL); 3321 3322 if (!vd_enabled(vd)) 3323 return (LDC_SUCCESS); 3324 3325 if (event & LDC_EVT_DOWN) { 3326 PR0("LDC_EVT_DOWN: LDC channel went down"); 3327 3328 vd_need_reset(vd, B_TRUE); 3329 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, 3330 DDI_SLEEP); 3331 if (status == DDI_FAILURE) { 3332 PR0("cannot schedule task to recv msg\n"); 3333 vd_need_reset(vd, B_TRUE); 3334 } 3335 } 3336 3337 if (event & LDC_EVT_RESET) { 3338 PR0("LDC_EVT_RESET: LDC channel was reset"); 3339 3340 if (vd->state != VD_STATE_INIT) { 3341 PR0("scheduling full reset"); 3342 vd_need_reset(vd, B_FALSE); 3343 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, 3344 vd, DDI_SLEEP); 3345 if (status == DDI_FAILURE) { 3346 PR0("cannot schedule task to recv msg\n"); 3347 vd_need_reset(vd, B_TRUE); 3348 } 3349 3350 } else { 3351 PR0("channel already reset, ignoring...\n"); 3352 PR0("doing ldc up...\n"); 3353 (void) ldc_up(vd->ldc_handle); 3354 } 3355 3356 return (LDC_SUCCESS); 3357 } 3358 3359 if (event & LDC_EVT_UP) { 3360 PR0("EVT_UP: LDC is up\nResetting client connection state"); 3361 PR0("initiating soft reset"); 3362 vd_need_reset(vd, B_FALSE); 3363 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, 3364 vd, DDI_SLEEP); 3365 if (status == DDI_FAILURE) { 3366 PR0("cannot schedule task to recv msg\n"); 3367 vd_need_reset(vd, B_TRUE); 3368 return (LDC_SUCCESS); 3369 } 3370 } 3371 3372 if (event & LDC_EVT_READ) { 3373 int status; 3374 3375 PR1("New data available"); 3376 /* Queue a task to receive the new data */ 3377 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, 3378 DDI_SLEEP); 3379 3380 if (status == DDI_FAILURE) { 3381 PR0("cannot schedule task to recv msg\n"); 3382 vd_need_reset(vd, B_TRUE); 3383 } 3384 } 3385 3386 return (LDC_SUCCESS); 3387 } 3388 3389 static uint_t 3390 vds_check_for_vd(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 3391 { 3392 _NOTE(ARGUNUSED(key, val)) 3393 (*((uint_t *)arg))++; 3394 return (MH_WALK_TERMINATE); 3395 } 3396 3397 3398 static int 3399 vds_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3400 { 3401 uint_t vd_present = 0; 3402 minor_t instance; 3403 vds_t *vds; 3404 3405 3406 switch (cmd) { 3407 case DDI_DETACH: 3408 /* the real work happens below */ 3409 break; 3410 case DDI_SUSPEND: 3411 PR0("No action required for DDI_SUSPEND"); 3412 return (DDI_SUCCESS); 3413 default: 3414 PR0("Unrecognized \"cmd\""); 3415 return (DDI_FAILURE); 3416 } 3417 3418 ASSERT(cmd == DDI_DETACH); 3419 instance = ddi_get_instance(dip); 3420 if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) { 3421 PR0("Could not get state for instance %u", instance); 3422 ddi_soft_state_free(vds_state, instance); 3423 return (DDI_FAILURE); 3424 } 3425 3426 /* Do no detach when serving any vdisks */ 3427 mod_hash_walk(vds->vd_table, vds_check_for_vd, &vd_present); 3428 if (vd_present) { 3429 PR0("Not detaching because serving vdisks"); 3430 return (DDI_FAILURE); 3431 } 3432 3433 PR0("Detaching"); 3434 if (vds->initialized & VDS_MDEG) { 3435 (void) mdeg_unregister(vds->mdeg); 3436 kmem_free(vds->ispecp->specp, sizeof (vds_prop_template)); 3437 kmem_free(vds->ispecp, sizeof (mdeg_node_spec_t)); 3438 vds->ispecp = NULL; 3439 vds->mdeg = NULL; 3440 } 3441 3442 if (vds->initialized & VDS_LDI) 3443 (void) ldi_ident_release(vds->ldi_ident); 3444 mod_hash_destroy_hash(vds->vd_table); 3445 ddi_soft_state_free(vds_state, instance); 3446 return (DDI_SUCCESS); 3447 } 3448 3449 static boolean_t 3450 is_pseudo_device(dev_info_t *dip) 3451 { 3452 dev_info_t *parent, *root = ddi_root_node(); 3453 3454 3455 for (parent = ddi_get_parent(dip); (parent != NULL) && (parent != root); 3456 parent = ddi_get_parent(parent)) { 3457 if (strcmp(ddi_get_name(parent), DEVI_PSEUDO_NEXNAME) == 0) 3458 return (B_TRUE); 3459 } 3460 3461 return (B_FALSE); 3462 } 3463 3464 static int 3465 vd_setup_full_disk(vd_t *vd) 3466 { 3467 int rval, status; 3468 major_t major = getmajor(vd->dev[0]); 3469 minor_t minor = getminor(vd->dev[0]) - VD_ENTIRE_DISK_SLICE; 3470 struct dk_minfo dk_minfo; 3471 3472 /* 3473 * At this point, vdisk_size is set to the size of partition 2 but 3474 * this does not represent the size of the disk because partition 2 3475 * may not cover the entire disk and its size does not include reserved 3476 * blocks. So we update vdisk_size to be the size of the entire disk. 3477 */ 3478 if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCGMEDIAINFO, 3479 (intptr_t)&dk_minfo, (vd_open_flags | FKIOCTL), 3480 kcred, &rval)) != 0) { 3481 PRN("ldi_ioctl(DKIOCGMEDIAINFO) returned errno %d", 3482 status); 3483 return (status); 3484 } 3485 vd->vdisk_size = dk_minfo.dki_capacity; 3486 3487 /* Set full-disk parameters */ 3488 vd->vdisk_type = VD_DISK_TYPE_DISK; 3489 vd->nslices = (sizeof (vd->dev))/(sizeof (vd->dev[0])); 3490 3491 /* Move dev number and LDI handle to entire-disk-slice array elements */ 3492 vd->dev[VD_ENTIRE_DISK_SLICE] = vd->dev[0]; 3493 vd->dev[0] = 0; 3494 vd->ldi_handle[VD_ENTIRE_DISK_SLICE] = vd->ldi_handle[0]; 3495 vd->ldi_handle[0] = NULL; 3496 3497 /* Initialize device numbers for remaining slices and open them */ 3498 for (int slice = 0; slice < vd->nslices; slice++) { 3499 /* 3500 * Skip the entire-disk slice, as it's already open and its 3501 * device known 3502 */ 3503 if (slice == VD_ENTIRE_DISK_SLICE) 3504 continue; 3505 ASSERT(vd->dev[slice] == 0); 3506 ASSERT(vd->ldi_handle[slice] == NULL); 3507 3508 /* 3509 * Construct the device number for the current slice 3510 */ 3511 vd->dev[slice] = makedevice(major, (minor + slice)); 3512 3513 /* 3514 * Open all slices of the disk to serve them to the client. 3515 * Slices are opened exclusively to prevent other threads or 3516 * processes in the service domain from performing I/O to 3517 * slices being accessed by a client. Failure to open a slice 3518 * results in vds not serving this disk, as the client could 3519 * attempt (and should be able) to access any slice immediately. 3520 * Any slices successfully opened before a failure will get 3521 * closed by vds_destroy_vd() as a result of the error returned 3522 * by this function. 3523 * 3524 * We need to do the open with FNDELAY so that opening an empty 3525 * slice does not fail. 3526 */ 3527 PR0("Opening device major %u, minor %u = slice %u", 3528 major, minor, slice); 3529 if ((status = ldi_open_by_dev(&vd->dev[slice], OTYP_BLK, 3530 vd_open_flags | FNDELAY, kcred, &vd->ldi_handle[slice], 3531 vd->vds->ldi_ident)) != 0) { 3532 PRN("ldi_open_by_dev() returned errno %d " 3533 "for slice %u", status, slice); 3534 /* vds_destroy_vd() will close any open slices */ 3535 vd->ldi_handle[slice] = NULL; 3536 return (status); 3537 } 3538 } 3539 3540 return (0); 3541 } 3542 3543 static int 3544 vd_setup_partition_vtoc(vd_t *vd) 3545 { 3546 int rval, status; 3547 char *device_path = vd->device_path; 3548 3549 status = ldi_ioctl(vd->ldi_handle[0], DKIOCGGEOM, 3550 (intptr_t)&vd->dk_geom, (vd_open_flags | FKIOCTL), kcred, &rval); 3551 3552 if (status != 0) { 3553 PRN("ldi_ioctl(DKIOCGEOM) returned errno %d for %s", 3554 status, device_path); 3555 return (status); 3556 } 3557 3558 /* Initialize dk_geom structure for single-slice device */ 3559 if (vd->dk_geom.dkg_nsect == 0) { 3560 PRN("%s geometry claims 0 sectors per track", device_path); 3561 return (EIO); 3562 } 3563 if (vd->dk_geom.dkg_nhead == 0) { 3564 PRN("%s geometry claims 0 heads", device_path); 3565 return (EIO); 3566 } 3567 vd->dk_geom.dkg_ncyl = vd->vdisk_size / vd->dk_geom.dkg_nsect / 3568 vd->dk_geom.dkg_nhead; 3569 vd->dk_geom.dkg_acyl = 0; 3570 vd->dk_geom.dkg_pcyl = vd->dk_geom.dkg_ncyl + vd->dk_geom.dkg_acyl; 3571 3572 3573 /* Initialize vtoc structure for single-slice device */ 3574 bcopy(VD_VOLUME_NAME, vd->vtoc.v_volume, 3575 MIN(sizeof (VD_VOLUME_NAME), sizeof (vd->vtoc.v_volume))); 3576 bzero(vd->vtoc.v_part, sizeof (vd->vtoc.v_part)); 3577 vd->vtoc.v_nparts = 1; 3578 vd->vtoc.v_part[0].p_tag = V_UNASSIGNED; 3579 vd->vtoc.v_part[0].p_flag = 0; 3580 vd->vtoc.v_part[0].p_start = 0; 3581 vd->vtoc.v_part[0].p_size = vd->vdisk_size; 3582 bcopy(VD_ASCIILABEL, vd->vtoc.v_asciilabel, 3583 MIN(sizeof (VD_ASCIILABEL), sizeof (vd->vtoc.v_asciilabel))); 3584 3585 return (0); 3586 } 3587 3588 static int 3589 vd_setup_partition_efi(vd_t *vd) 3590 { 3591 efi_gpt_t *gpt; 3592 efi_gpe_t *gpe; 3593 struct uuid uuid = EFI_RESERVED; 3594 uint32_t crc; 3595 int length; 3596 3597 length = sizeof (efi_gpt_t) + sizeof (efi_gpe_t); 3598 3599 gpt = kmem_zalloc(length, KM_SLEEP); 3600 gpe = (efi_gpe_t *)(gpt + 1); 3601 3602 gpt->efi_gpt_Signature = LE_64(EFI_SIGNATURE); 3603 gpt->efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); 3604 gpt->efi_gpt_HeaderSize = LE_32(sizeof (efi_gpt_t)); 3605 gpt->efi_gpt_FirstUsableLBA = LE_64(0ULL); 3606 gpt->efi_gpt_LastUsableLBA = LE_64(vd->vdisk_size - 1); 3607 gpt->efi_gpt_NumberOfPartitionEntries = LE_32(1); 3608 gpt->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (efi_gpe_t)); 3609 3610 UUID_LE_CONVERT(gpe->efi_gpe_PartitionTypeGUID, uuid); 3611 gpe->efi_gpe_StartingLBA = gpt->efi_gpt_FirstUsableLBA; 3612 gpe->efi_gpe_EndingLBA = gpt->efi_gpt_LastUsableLBA; 3613 3614 CRC32(crc, gpe, sizeof (efi_gpe_t), -1U, crc32_table); 3615 gpt->efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); 3616 3617 CRC32(crc, gpt, sizeof (efi_gpt_t), -1U, crc32_table); 3618 gpt->efi_gpt_HeaderCRC32 = LE_32(~crc); 3619 3620 vd->dk_efi.dki_lba = 0; 3621 vd->dk_efi.dki_length = length; 3622 vd->dk_efi.dki_data = gpt; 3623 3624 return (0); 3625 } 3626 3627 static int 3628 vd_setup_file(vd_t *vd) 3629 { 3630 int rval, status; 3631 vattr_t vattr; 3632 dev_t dev; 3633 char *file_path = vd->device_path; 3634 char dev_path[MAXPATHLEN + 1]; 3635 ldi_handle_t lhandle; 3636 struct dk_cinfo dk_cinfo; 3637 3638 /* make sure the file is valid */ 3639 if ((status = lookupname(file_path, UIO_SYSSPACE, FOLLOW, 3640 NULLVPP, &vd->file_vnode)) != 0) { 3641 PRN("Cannot lookup file(%s) errno %d", file_path, status); 3642 return (status); 3643 } 3644 3645 if (vd->file_vnode->v_type != VREG) { 3646 PRN("Invalid file type (%s)\n", file_path); 3647 VN_RELE(vd->file_vnode); 3648 return (EBADF); 3649 } 3650 VN_RELE(vd->file_vnode); 3651 3652 if ((status = vn_open(file_path, UIO_SYSSPACE, vd_open_flags | FOFFMAX, 3653 0, &vd->file_vnode, 0, 0)) != 0) { 3654 PRN("vn_open(%s) = errno %d", file_path, status); 3655 return (status); 3656 } 3657 3658 /* 3659 * We set vd->file now so that vds_destroy_vd will take care of 3660 * closing the file and releasing the vnode in case of an error. 3661 */ 3662 vd->file = B_TRUE; 3663 vd->pseudo = B_FALSE; 3664 3665 vattr.va_mask = AT_SIZE; 3666 if ((status = VOP_GETATTR(vd->file_vnode, &vattr, 0, kcred)) != 0) { 3667 PRN("VOP_GETATTR(%s) = errno %d", file_path, status); 3668 return (EIO); 3669 } 3670 3671 vd->file_size = vattr.va_size; 3672 /* size should be at least sizeof(dk_label) */ 3673 if (vd->file_size < sizeof (struct dk_label)) { 3674 PRN("Size of file has to be at least %ld bytes", 3675 sizeof (struct dk_label)); 3676 return (EIO); 3677 } 3678 3679 if (vd->file_vnode->v_flag & VNOMAP) { 3680 PRN("File %s cannot be mapped", file_path); 3681 return (EIO); 3682 } 3683 3684 /* find and validate the geometry of the disk image */ 3685 status = vd_file_validate_geometry(vd); 3686 if (status != 0 && status != EINVAL) { 3687 PRN("Fail to read label from %s", file_path); 3688 return (EIO); 3689 } 3690 3691 vd->nslices = V_NUMPAR; 3692 /* sector size = block size = DEV_BSIZE */ 3693 vd->vdisk_size = vd->file_size / DEV_BSIZE; 3694 vd->vdisk_type = VD_DISK_TYPE_DISK; 3695 vd->max_xfer_sz = maxphys / DEV_BSIZE; /* default transfer size */ 3696 3697 /* Get max_xfer_sz from the device where the file is */ 3698 dev = vd->file_vnode->v_vfsp->vfs_dev; 3699 dev_path[0] = NULL; 3700 if (ddi_dev_pathname(dev, S_IFBLK, dev_path) == DDI_SUCCESS) { 3701 PR0("underlying device = %s\n", dev_path); 3702 } 3703 3704 if ((status = ldi_open_by_dev(&dev, OTYP_BLK, FREAD, 3705 kcred, &lhandle, vd->vds->ldi_ident)) != 0) { 3706 PR0("ldi_open_by_dev() returned errno %d for device %s", 3707 status, dev_path); 3708 } else { 3709 if ((status = ldi_ioctl(lhandle, DKIOCINFO, 3710 (intptr_t)&dk_cinfo, (vd_open_flags | FKIOCTL), kcred, 3711 &rval)) != 0) { 3712 PR0("ldi_ioctl(DKIOCINFO) returned errno %d for %s", 3713 status, dev_path); 3714 } else { 3715 /* 3716 * Store the device's max transfer size for 3717 * return to the client 3718 */ 3719 vd->max_xfer_sz = dk_cinfo.dki_maxtransfer; 3720 } 3721 3722 PR0("close the device %s", dev_path); 3723 (void) ldi_close(lhandle, FREAD, kcred); 3724 } 3725 3726 PR0("using file %s, dev %s, max_xfer = %u blks", 3727 file_path, dev_path, vd->max_xfer_sz); 3728 3729 /* Setup devid for the disk image */ 3730 3731 if (vd->vdisk_label != VD_DISK_LABEL_UNK) { 3732 3733 status = vd_file_read_devid(vd, &vd->file_devid); 3734 3735 if (status == 0) { 3736 /* a valid devid was found */ 3737 return (0); 3738 } 3739 3740 if (status != EINVAL) { 3741 /* 3742 * There was an error while trying to read the devid. 3743 * So this disk image may have a devid but we are 3744 * unable to read it. 3745 */ 3746 PR0("can not read devid for %s", file_path); 3747 vd->file_devid = NULL; 3748 return (0); 3749 } 3750 } 3751 3752 /* 3753 * No valid device id was found so we create one. Note that a failure 3754 * to create a device id is not fatal and does not prevent the disk 3755 * image from being attached. 3756 */ 3757 PR1("creating devid for %s", file_path); 3758 3759 if (ddi_devid_init(vd->vds->dip, DEVID_FAB, NULL, 0, 3760 &vd->file_devid) != DDI_SUCCESS) { 3761 PR0("fail to create devid for %s", file_path); 3762 vd->file_devid = NULL; 3763 return (0); 3764 } 3765 3766 /* 3767 * Write devid to the disk image. The devid is stored into the disk 3768 * image if we have a valid label; otherwise the devid will be stored 3769 * when the user writes a valid label. 3770 */ 3771 if (vd->vdisk_label != VD_DISK_LABEL_UNK) { 3772 if (vd_file_write_devid(vd, vd->file_devid) != 0) { 3773 PR0("fail to write devid for %s", file_path); 3774 ddi_devid_free(vd->file_devid); 3775 vd->file_devid = NULL; 3776 } 3777 } 3778 3779 return (0); 3780 } 3781 3782 static int 3783 vd_setup_vd(vd_t *vd) 3784 { 3785 int rval, status; 3786 dev_info_t *dip; 3787 struct dk_cinfo dk_cinfo; 3788 char *device_path = vd->device_path; 3789 3790 /* 3791 * We need to open with FNDELAY so that opening an empty partition 3792 * does not fail. 3793 */ 3794 if ((status = ldi_open_by_name(device_path, vd_open_flags | FNDELAY, 3795 kcred, &vd->ldi_handle[0], vd->vds->ldi_ident)) != 0) { 3796 PR0("ldi_open_by_name(%s) = errno %d", device_path, status); 3797 vd->ldi_handle[0] = NULL; 3798 3799 /* this may not be a device try opening as a file */ 3800 if (status == ENXIO || status == ENODEV) 3801 status = vd_setup_file(vd); 3802 if (status) { 3803 PRN("Cannot use device/file (%s), errno=%d\n", 3804 device_path, status); 3805 if (status == ENXIO || status == ENODEV || 3806 status == ENOENT || status == EROFS) { 3807 return (EAGAIN); 3808 } 3809 } 3810 return (status); 3811 } 3812 3813 /* 3814 * nslices must be updated now so that vds_destroy_vd() will close 3815 * the slice we have just opened in case of an error. 3816 */ 3817 vd->nslices = 1; 3818 vd->file = B_FALSE; 3819 3820 /* Get device number and size of backing device */ 3821 if ((status = ldi_get_dev(vd->ldi_handle[0], &vd->dev[0])) != 0) { 3822 PRN("ldi_get_dev() returned errno %d for %s", 3823 status, device_path); 3824 return (status); 3825 } 3826 if (ldi_get_size(vd->ldi_handle[0], &vd->vdisk_size) != DDI_SUCCESS) { 3827 PRN("ldi_get_size() failed for %s", device_path); 3828 return (EIO); 3829 } 3830 vd->vdisk_size = lbtodb(vd->vdisk_size); /* convert to blocks */ 3831 3832 /* Verify backing device supports dk_cinfo */ 3833 if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCINFO, 3834 (intptr_t)&dk_cinfo, (vd_open_flags | FKIOCTL), kcred, 3835 &rval)) != 0) { 3836 PRN("ldi_ioctl(DKIOCINFO) returned errno %d for %s", 3837 status, device_path); 3838 return (status); 3839 } 3840 if (dk_cinfo.dki_partition >= V_NUMPAR) { 3841 PRN("slice %u >= maximum slice %u for %s", 3842 dk_cinfo.dki_partition, V_NUMPAR, device_path); 3843 return (EIO); 3844 } 3845 3846 vd->vdisk_label = vd_read_vtoc(vd->ldi_handle[0], &vd->vtoc); 3847 3848 /* Store the device's max transfer size for return to the client */ 3849 vd->max_xfer_sz = dk_cinfo.dki_maxtransfer; 3850 3851 /* Determine if backing device is a pseudo device */ 3852 if ((dip = ddi_hold_devi_by_instance(getmajor(vd->dev[0]), 3853 dev_to_instance(vd->dev[0]), 0)) == NULL) { 3854 PRN("%s is no longer accessible", device_path); 3855 return (EIO); 3856 } 3857 vd->pseudo = is_pseudo_device(dip); 3858 ddi_release_devi(dip); 3859 if (vd->pseudo) { 3860 /* 3861 * Currently we only support exporting pseudo devices which 3862 * provide a valid disk label. 3863 */ 3864 if (vd->vdisk_label == VD_DISK_LABEL_UNK) { 3865 PRN("%s is a pseudo device with an invalid disk " 3866 "label\n", device_path); 3867 return (EINVAL); 3868 } 3869 vd->vdisk_type = VD_DISK_TYPE_SLICE; 3870 vd->nslices = 1; 3871 return (0); /* ...and we're done */ 3872 } 3873 3874 /* If slice is entire-disk slice, initialize for full disk */ 3875 if (dk_cinfo.dki_partition == VD_ENTIRE_DISK_SLICE) 3876 return (vd_setup_full_disk(vd)); 3877 3878 /* We can only export a slice if the disk has a valid label */ 3879 if (vd->vdisk_label == VD_DISK_LABEL_UNK) { 3880 PRN("%s is a slice from a disk with an unknown disk label\n", 3881 device_path); 3882 return (EINVAL); 3883 } 3884 3885 /* Otherwise, we have a non-entire slice of a device */ 3886 vd->vdisk_type = VD_DISK_TYPE_SLICE; 3887 vd->nslices = 1; 3888 3889 if (vd->vdisk_label == VD_DISK_LABEL_EFI) { 3890 /* Slice from a disk with an EFI label */ 3891 status = vd_setup_partition_efi(vd); 3892 } else { 3893 /* Slice from a disk with a VTOC label */ 3894 ASSERT(vd->vdisk_label == VD_DISK_LABEL_VTOC); 3895 status = vd_setup_partition_vtoc(vd); 3896 } 3897 3898 return (status); 3899 } 3900 3901 static int 3902 vds_do_init_vd(vds_t *vds, uint64_t id, char *device_path, uint64_t ldc_id, 3903 vd_t **vdp) 3904 { 3905 char tq_name[TASKQ_NAMELEN]; 3906 int status; 3907 ddi_iblock_cookie_t iblock = NULL; 3908 ldc_attr_t ldc_attr; 3909 vd_t *vd; 3910 3911 3912 ASSERT(vds != NULL); 3913 ASSERT(device_path != NULL); 3914 ASSERT(vdp != NULL); 3915 PR0("Adding vdisk for %s", device_path); 3916 3917 if ((vd = kmem_zalloc(sizeof (*vd), KM_NOSLEEP)) == NULL) { 3918 PRN("No memory for virtual disk"); 3919 return (EAGAIN); 3920 } 3921 *vdp = vd; /* assign here so vds_destroy_vd() can cleanup later */ 3922 vd->vds = vds; 3923 (void) strncpy(vd->device_path, device_path, MAXPATHLEN); 3924 3925 /* Open vdisk and initialize parameters */ 3926 if ((status = vd_setup_vd(vd)) == 0) { 3927 vd->initialized |= VD_DISK_READY; 3928 3929 ASSERT(vd->nslices > 0 && vd->nslices <= V_NUMPAR); 3930 PR0("vdisk_type = %s, pseudo = %s, file = %s, nslices = %u", 3931 ((vd->vdisk_type == VD_DISK_TYPE_DISK) ? "disk" : "slice"), 3932 (vd->pseudo ? "yes" : "no"), (vd->file ? "yes" : "no"), 3933 vd->nslices); 3934 } else { 3935 if (status != EAGAIN) 3936 return (status); 3937 } 3938 3939 /* Initialize locking */ 3940 if (ddi_get_soft_iblock_cookie(vds->dip, DDI_SOFTINT_MED, 3941 &iblock) != DDI_SUCCESS) { 3942 PRN("Could not get iblock cookie."); 3943 return (EIO); 3944 } 3945 3946 mutex_init(&vd->lock, NULL, MUTEX_DRIVER, iblock); 3947 vd->initialized |= VD_LOCKING; 3948 3949 3950 /* Create start and completion task queues for the vdisk */ 3951 (void) snprintf(tq_name, sizeof (tq_name), "vd_startq%lu", id); 3952 PR1("tq_name = %s", tq_name); 3953 if ((vd->startq = ddi_taskq_create(vds->dip, tq_name, 1, 3954 TASKQ_DEFAULTPRI, 0)) == NULL) { 3955 PRN("Could not create task queue"); 3956 return (EIO); 3957 } 3958 (void) snprintf(tq_name, sizeof (tq_name), "vd_completionq%lu", id); 3959 PR1("tq_name = %s", tq_name); 3960 if ((vd->completionq = ddi_taskq_create(vds->dip, tq_name, 1, 3961 TASKQ_DEFAULTPRI, 0)) == NULL) { 3962 PRN("Could not create task queue"); 3963 return (EIO); 3964 } 3965 vd->enabled = 1; /* before callback can dispatch to startq */ 3966 3967 3968 /* Bring up LDC */ 3969 ldc_attr.devclass = LDC_DEV_BLK_SVC; 3970 ldc_attr.instance = ddi_get_instance(vds->dip); 3971 ldc_attr.mode = LDC_MODE_UNRELIABLE; 3972 ldc_attr.mtu = VD_LDC_MTU; 3973 if ((status = ldc_init(ldc_id, &ldc_attr, &vd->ldc_handle)) != 0) { 3974 PRN("Could not initialize LDC channel %lu, " 3975 "init failed with error %d", ldc_id, status); 3976 return (status); 3977 } 3978 vd->initialized |= VD_LDC; 3979 3980 if ((status = ldc_reg_callback(vd->ldc_handle, vd_handle_ldc_events, 3981 (caddr_t)vd)) != 0) { 3982 PRN("Could not initialize LDC channel %lu," 3983 "reg_callback failed with error %d", ldc_id, status); 3984 return (status); 3985 } 3986 3987 if ((status = ldc_open(vd->ldc_handle)) != 0) { 3988 PRN("Could not initialize LDC channel %lu," 3989 "open failed with error %d", ldc_id, status); 3990 return (status); 3991 } 3992 3993 if ((status = ldc_up(vd->ldc_handle)) != 0) { 3994 PR0("ldc_up() returned errno %d", status); 3995 } 3996 3997 /* Allocate the inband task memory handle */ 3998 status = ldc_mem_alloc_handle(vd->ldc_handle, &(vd->inband_task.mhdl)); 3999 if (status) { 4000 PRN("Could not initialize LDC channel %lu," 4001 "alloc_handle failed with error %d", ldc_id, status); 4002 return (ENXIO); 4003 } 4004 4005 /* Add the successfully-initialized vdisk to the server's table */ 4006 if (mod_hash_insert(vds->vd_table, (mod_hash_key_t)id, vd) != 0) { 4007 PRN("Error adding vdisk ID %lu to table", id); 4008 return (EIO); 4009 } 4010 4011 /* Allocate the staging buffer */ 4012 vd->max_msglen = sizeof (vio_msg_t); /* baseline vio message size */ 4013 vd->vio_msgp = kmem_alloc(vd->max_msglen, KM_SLEEP); 4014 4015 /* store initial state */ 4016 vd->state = VD_STATE_INIT; 4017 4018 return (0); 4019 } 4020 4021 static void 4022 vd_free_dring_task(vd_t *vdp) 4023 { 4024 if (vdp->dring_task != NULL) { 4025 ASSERT(vdp->dring_len != 0); 4026 /* Free all dring_task memory handles */ 4027 for (int i = 0; i < vdp->dring_len; i++) { 4028 (void) ldc_mem_free_handle(vdp->dring_task[i].mhdl); 4029 kmem_free(vdp->dring_task[i].msg, vdp->max_msglen); 4030 vdp->dring_task[i].msg = NULL; 4031 } 4032 kmem_free(vdp->dring_task, 4033 (sizeof (*vdp->dring_task)) * vdp->dring_len); 4034 vdp->dring_task = NULL; 4035 } 4036 } 4037 4038 /* 4039 * Destroy the state associated with a virtual disk 4040 */ 4041 static void 4042 vds_destroy_vd(void *arg) 4043 { 4044 vd_t *vd = (vd_t *)arg; 4045 int retry = 0, rv; 4046 4047 if (vd == NULL) 4048 return; 4049 4050 PR0("Destroying vdisk state"); 4051 4052 if (vd->dk_efi.dki_data != NULL) 4053 kmem_free(vd->dk_efi.dki_data, vd->dk_efi.dki_length); 4054 4055 /* Disable queuing requests for the vdisk */ 4056 if (vd->initialized & VD_LOCKING) { 4057 mutex_enter(&vd->lock); 4058 vd->enabled = 0; 4059 mutex_exit(&vd->lock); 4060 } 4061 4062 /* Drain and destroy start queue (*before* destroying completionq) */ 4063 if (vd->startq != NULL) 4064 ddi_taskq_destroy(vd->startq); /* waits for queued tasks */ 4065 4066 /* Drain and destroy completion queue (*before* shutting down LDC) */ 4067 if (vd->completionq != NULL) 4068 ddi_taskq_destroy(vd->completionq); /* waits for tasks */ 4069 4070 vd_free_dring_task(vd); 4071 4072 /* Free the inband task memory handle */ 4073 (void) ldc_mem_free_handle(vd->inband_task.mhdl); 4074 4075 /* Shut down LDC */ 4076 if (vd->initialized & VD_LDC) { 4077 /* unmap the dring */ 4078 if (vd->initialized & VD_DRING) 4079 (void) ldc_mem_dring_unmap(vd->dring_handle); 4080 4081 /* close LDC channel - retry on EAGAIN */ 4082 while ((rv = ldc_close(vd->ldc_handle)) == EAGAIN) { 4083 if (++retry > vds_ldc_retries) { 4084 PR0("Timed out closing channel"); 4085 break; 4086 } 4087 drv_usecwait(vds_ldc_delay); 4088 } 4089 if (rv == 0) { 4090 (void) ldc_unreg_callback(vd->ldc_handle); 4091 (void) ldc_fini(vd->ldc_handle); 4092 } else { 4093 /* 4094 * Closing the LDC channel has failed. Ideally we should 4095 * fail here but there is no Zeus level infrastructure 4096 * to handle this. The MD has already been changed and 4097 * we have to do the close. So we try to do as much 4098 * clean up as we can. 4099 */ 4100 (void) ldc_set_cb_mode(vd->ldc_handle, LDC_CB_DISABLE); 4101 while (ldc_unreg_callback(vd->ldc_handle) == EAGAIN) 4102 drv_usecwait(vds_ldc_delay); 4103 } 4104 } 4105 4106 /* Free the staging buffer for msgs */ 4107 if (vd->vio_msgp != NULL) { 4108 kmem_free(vd->vio_msgp, vd->max_msglen); 4109 vd->vio_msgp = NULL; 4110 } 4111 4112 /* Free the inband message buffer */ 4113 if (vd->inband_task.msg != NULL) { 4114 kmem_free(vd->inband_task.msg, vd->max_msglen); 4115 vd->inband_task.msg = NULL; 4116 } 4117 if (vd->file) { 4118 /* Close file */ 4119 (void) VOP_CLOSE(vd->file_vnode, vd_open_flags, 1, 4120 0, kcred); 4121 VN_RELE(vd->file_vnode); 4122 if (vd->file_devid != NULL) 4123 ddi_devid_free(vd->file_devid); 4124 } else { 4125 /* Close any open backing-device slices */ 4126 for (uint_t slice = 0; slice < vd->nslices; slice++) { 4127 if (vd->ldi_handle[slice] != NULL) { 4128 PR0("Closing slice %u", slice); 4129 (void) ldi_close(vd->ldi_handle[slice], 4130 vd_open_flags | FNDELAY, kcred); 4131 } 4132 } 4133 } 4134 4135 /* Free lock */ 4136 if (vd->initialized & VD_LOCKING) 4137 mutex_destroy(&vd->lock); 4138 4139 /* Finally, free the vdisk structure itself */ 4140 kmem_free(vd, sizeof (*vd)); 4141 } 4142 4143 static int 4144 vds_init_vd(vds_t *vds, uint64_t id, char *device_path, uint64_t ldc_id) 4145 { 4146 int status; 4147 vd_t *vd = NULL; 4148 4149 4150 if ((status = vds_do_init_vd(vds, id, device_path, ldc_id, &vd)) != 0) 4151 vds_destroy_vd(vd); 4152 4153 return (status); 4154 } 4155 4156 static int 4157 vds_do_get_ldc_id(md_t *md, mde_cookie_t vd_node, mde_cookie_t *channel, 4158 uint64_t *ldc_id) 4159 { 4160 int num_channels; 4161 4162 4163 /* Look for channel endpoint child(ren) of the vdisk MD node */ 4164 if ((num_channels = md_scan_dag(md, vd_node, 4165 md_find_name(md, VD_CHANNEL_ENDPOINT), 4166 md_find_name(md, "fwd"), channel)) <= 0) { 4167 PRN("No \"%s\" found for virtual disk", VD_CHANNEL_ENDPOINT); 4168 return (-1); 4169 } 4170 4171 /* Get the "id" value for the first channel endpoint node */ 4172 if (md_get_prop_val(md, channel[0], VD_ID_PROP, ldc_id) != 0) { 4173 PRN("No \"%s\" property found for \"%s\" of vdisk", 4174 VD_ID_PROP, VD_CHANNEL_ENDPOINT); 4175 return (-1); 4176 } 4177 4178 if (num_channels > 1) { 4179 PRN("Using ID of first of multiple channels for this vdisk"); 4180 } 4181 4182 return (0); 4183 } 4184 4185 static int 4186 vds_get_ldc_id(md_t *md, mde_cookie_t vd_node, uint64_t *ldc_id) 4187 { 4188 int num_nodes, status; 4189 size_t size; 4190 mde_cookie_t *channel; 4191 4192 4193 if ((num_nodes = md_node_count(md)) <= 0) { 4194 PRN("Invalid node count in Machine Description subtree"); 4195 return (-1); 4196 } 4197 size = num_nodes*(sizeof (*channel)); 4198 channel = kmem_zalloc(size, KM_SLEEP); 4199 status = vds_do_get_ldc_id(md, vd_node, channel, ldc_id); 4200 kmem_free(channel, size); 4201 4202 return (status); 4203 } 4204 4205 static void 4206 vds_add_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node) 4207 { 4208 char *device_path = NULL; 4209 uint64_t id = 0, ldc_id = 0; 4210 4211 4212 if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) { 4213 PRN("Error getting vdisk \"%s\"", VD_ID_PROP); 4214 return; 4215 } 4216 PR0("Adding vdisk ID %lu", id); 4217 if (md_get_prop_str(md, vd_node, VD_BLOCK_DEVICE_PROP, 4218 &device_path) != 0) { 4219 PRN("Error getting vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); 4220 return; 4221 } 4222 4223 if (vds_get_ldc_id(md, vd_node, &ldc_id) != 0) { 4224 PRN("Error getting LDC ID for vdisk %lu", id); 4225 return; 4226 } 4227 4228 if (vds_init_vd(vds, id, device_path, ldc_id) != 0) { 4229 PRN("Failed to add vdisk ID %lu", id); 4230 return; 4231 } 4232 } 4233 4234 static void 4235 vds_remove_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node) 4236 { 4237 uint64_t id = 0; 4238 4239 4240 if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) { 4241 PRN("Unable to get \"%s\" property from vdisk's MD node", 4242 VD_ID_PROP); 4243 return; 4244 } 4245 PR0("Removing vdisk ID %lu", id); 4246 if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)id) != 0) 4247 PRN("No vdisk entry found for vdisk ID %lu", id); 4248 } 4249 4250 static void 4251 vds_change_vd(vds_t *vds, md_t *prev_md, mde_cookie_t prev_vd_node, 4252 md_t *curr_md, mde_cookie_t curr_vd_node) 4253 { 4254 char *curr_dev, *prev_dev; 4255 uint64_t curr_id = 0, curr_ldc_id = 0; 4256 uint64_t prev_id = 0, prev_ldc_id = 0; 4257 size_t len; 4258 4259 4260 /* Validate that vdisk ID has not changed */ 4261 if (md_get_prop_val(prev_md, prev_vd_node, VD_ID_PROP, &prev_id) != 0) { 4262 PRN("Error getting previous vdisk \"%s\" property", 4263 VD_ID_PROP); 4264 return; 4265 } 4266 if (md_get_prop_val(curr_md, curr_vd_node, VD_ID_PROP, &curr_id) != 0) { 4267 PRN("Error getting current vdisk \"%s\" property", VD_ID_PROP); 4268 return; 4269 } 4270 if (curr_id != prev_id) { 4271 PRN("Not changing vdisk: ID changed from %lu to %lu", 4272 prev_id, curr_id); 4273 return; 4274 } 4275 4276 /* Validate that LDC ID has not changed */ 4277 if (vds_get_ldc_id(prev_md, prev_vd_node, &prev_ldc_id) != 0) { 4278 PRN("Error getting LDC ID for vdisk %lu", prev_id); 4279 return; 4280 } 4281 4282 if (vds_get_ldc_id(curr_md, curr_vd_node, &curr_ldc_id) != 0) { 4283 PRN("Error getting LDC ID for vdisk %lu", curr_id); 4284 return; 4285 } 4286 if (curr_ldc_id != prev_ldc_id) { 4287 _NOTE(NOTREACHED); /* lint is confused */ 4288 PRN("Not changing vdisk: " 4289 "LDC ID changed from %lu to %lu", prev_ldc_id, curr_ldc_id); 4290 return; 4291 } 4292 4293 /* Determine whether device path has changed */ 4294 if (md_get_prop_str(prev_md, prev_vd_node, VD_BLOCK_DEVICE_PROP, 4295 &prev_dev) != 0) { 4296 PRN("Error getting previous vdisk \"%s\"", 4297 VD_BLOCK_DEVICE_PROP); 4298 return; 4299 } 4300 if (md_get_prop_str(curr_md, curr_vd_node, VD_BLOCK_DEVICE_PROP, 4301 &curr_dev) != 0) { 4302 PRN("Error getting current vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); 4303 return; 4304 } 4305 if (((len = strlen(curr_dev)) == strlen(prev_dev)) && 4306 (strncmp(curr_dev, prev_dev, len) == 0)) 4307 return; /* no relevant (supported) change */ 4308 4309 PR0("Changing vdisk ID %lu", prev_id); 4310 4311 /* Remove old state, which will close vdisk and reset */ 4312 if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)prev_id) != 0) 4313 PRN("No entry found for vdisk ID %lu", prev_id); 4314 4315 /* Re-initialize vdisk with new state */ 4316 if (vds_init_vd(vds, curr_id, curr_dev, curr_ldc_id) != 0) { 4317 PRN("Failed to change vdisk ID %lu", curr_id); 4318 return; 4319 } 4320 } 4321 4322 static int 4323 vds_process_md(void *arg, mdeg_result_t *md) 4324 { 4325 int i; 4326 vds_t *vds = arg; 4327 4328 4329 if (md == NULL) 4330 return (MDEG_FAILURE); 4331 ASSERT(vds != NULL); 4332 4333 for (i = 0; i < md->removed.nelem; i++) 4334 vds_remove_vd(vds, md->removed.mdp, md->removed.mdep[i]); 4335 for (i = 0; i < md->match_curr.nelem; i++) 4336 vds_change_vd(vds, md->match_prev.mdp, md->match_prev.mdep[i], 4337 md->match_curr.mdp, md->match_curr.mdep[i]); 4338 for (i = 0; i < md->added.nelem; i++) 4339 vds_add_vd(vds, md->added.mdp, md->added.mdep[i]); 4340 4341 return (MDEG_SUCCESS); 4342 } 4343 4344 4345 static int 4346 vds_do_attach(dev_info_t *dip) 4347 { 4348 int status, sz; 4349 int cfg_handle; 4350 minor_t instance = ddi_get_instance(dip); 4351 vds_t *vds; 4352 mdeg_prop_spec_t *pspecp; 4353 mdeg_node_spec_t *ispecp; 4354 4355 /* 4356 * The "cfg-handle" property of a vds node in an MD contains the MD's 4357 * notion of "instance", or unique identifier, for that node; OBP 4358 * stores the value of the "cfg-handle" MD property as the value of 4359 * the "reg" property on the node in the device tree it builds from 4360 * the MD and passes to Solaris. Thus, we look up the devinfo node's 4361 * "reg" property value to uniquely identify this device instance when 4362 * registering with the MD event-generation framework. If the "reg" 4363 * property cannot be found, the device tree state is presumably so 4364 * broken that there is no point in continuing. 4365 */ 4366 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 4367 VD_REG_PROP)) { 4368 PRN("vds \"%s\" property does not exist", VD_REG_PROP); 4369 return (DDI_FAILURE); 4370 } 4371 4372 /* Get the MD instance for later MDEG registration */ 4373 cfg_handle = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 4374 VD_REG_PROP, -1); 4375 4376 if (ddi_soft_state_zalloc(vds_state, instance) != DDI_SUCCESS) { 4377 PRN("Could not allocate state for instance %u", instance); 4378 return (DDI_FAILURE); 4379 } 4380 4381 if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) { 4382 PRN("Could not get state for instance %u", instance); 4383 ddi_soft_state_free(vds_state, instance); 4384 return (DDI_FAILURE); 4385 } 4386 4387 vds->dip = dip; 4388 vds->vd_table = mod_hash_create_ptrhash("vds_vd_table", VDS_NCHAINS, 4389 vds_destroy_vd, sizeof (void *)); 4390 4391 ASSERT(vds->vd_table != NULL); 4392 4393 if ((status = ldi_ident_from_dip(dip, &vds->ldi_ident)) != 0) { 4394 PRN("ldi_ident_from_dip() returned errno %d", status); 4395 return (DDI_FAILURE); 4396 } 4397 vds->initialized |= VDS_LDI; 4398 4399 /* Register for MD updates */ 4400 sz = sizeof (vds_prop_template); 4401 pspecp = kmem_alloc(sz, KM_SLEEP); 4402 bcopy(vds_prop_template, pspecp, sz); 4403 4404 VDS_SET_MDEG_PROP_INST(pspecp, cfg_handle); 4405 4406 /* initialize the complete prop spec structure */ 4407 ispecp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 4408 ispecp->namep = "virtual-device"; 4409 ispecp->specp = pspecp; 4410 4411 if (mdeg_register(ispecp, &vd_match, vds_process_md, vds, 4412 &vds->mdeg) != MDEG_SUCCESS) { 4413 PRN("Unable to register for MD updates"); 4414 kmem_free(ispecp, sizeof (mdeg_node_spec_t)); 4415 kmem_free(pspecp, sz); 4416 return (DDI_FAILURE); 4417 } 4418 4419 vds->ispecp = ispecp; 4420 vds->initialized |= VDS_MDEG; 4421 4422 /* Prevent auto-detaching so driver is available whenever MD changes */ 4423 if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1) != 4424 DDI_PROP_SUCCESS) { 4425 PRN("failed to set \"%s\" property for instance %u", 4426 DDI_NO_AUTODETACH, instance); 4427 } 4428 4429 ddi_report_dev(dip); 4430 return (DDI_SUCCESS); 4431 } 4432 4433 static int 4434 vds_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 4435 { 4436 int status; 4437 4438 switch (cmd) { 4439 case DDI_ATTACH: 4440 PR0("Attaching"); 4441 if ((status = vds_do_attach(dip)) != DDI_SUCCESS) 4442 (void) vds_detach(dip, DDI_DETACH); 4443 return (status); 4444 case DDI_RESUME: 4445 PR0("No action required for DDI_RESUME"); 4446 return (DDI_SUCCESS); 4447 default: 4448 return (DDI_FAILURE); 4449 } 4450 } 4451 4452 static struct dev_ops vds_ops = { 4453 DEVO_REV, /* devo_rev */ 4454 0, /* devo_refcnt */ 4455 ddi_no_info, /* devo_getinfo */ 4456 nulldev, /* devo_identify */ 4457 nulldev, /* devo_probe */ 4458 vds_attach, /* devo_attach */ 4459 vds_detach, /* devo_detach */ 4460 nodev, /* devo_reset */ 4461 NULL, /* devo_cb_ops */ 4462 NULL, /* devo_bus_ops */ 4463 nulldev /* devo_power */ 4464 }; 4465 4466 static struct modldrv modldrv = { 4467 &mod_driverops, 4468 "virtual disk server", 4469 &vds_ops, 4470 }; 4471 4472 static struct modlinkage modlinkage = { 4473 MODREV_1, 4474 &modldrv, 4475 NULL 4476 }; 4477 4478 4479 int 4480 _init(void) 4481 { 4482 int i, status; 4483 4484 4485 if ((status = ddi_soft_state_init(&vds_state, sizeof (vds_t), 1)) != 0) 4486 return (status); 4487 if ((status = mod_install(&modlinkage)) != 0) { 4488 ddi_soft_state_fini(&vds_state); 4489 return (status); 4490 } 4491 4492 /* Fill in the bit-mask of server-supported operations */ 4493 for (i = 0; i < vds_noperations; i++) 4494 vds_operations |= 1 << (vds_operation[i].operation - 1); 4495 4496 return (0); 4497 } 4498 4499 int 4500 _info(struct modinfo *modinfop) 4501 { 4502 return (mod_info(&modlinkage, modinfop)); 4503 } 4504 4505 int 4506 _fini(void) 4507 { 4508 int status; 4509 4510 4511 if ((status = mod_remove(&modlinkage)) != 0) 4512 return (status); 4513 ddi_soft_state_fini(&vds_state); 4514 return (0); 4515 } 4516