1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Virtual disk server 31 */ 32 33 34 #include <sys/types.h> 35 #include <sys/conf.h> 36 #include <sys/ddi.h> 37 #include <sys/dkio.h> 38 #include <sys/file.h> 39 #include <sys/mdeg.h> 40 #include <sys/modhash.h> 41 #include <sys/note.h> 42 #include <sys/pathname.h> 43 #include <sys/sunddi.h> 44 #include <sys/sunldi.h> 45 #include <sys/sysmacros.h> 46 #include <sys/vio_common.h> 47 #include <sys/vdsk_mailbox.h> 48 #include <sys/vdsk_common.h> 49 #include <sys/vtoc.h> 50 #include <sys/scsi/impl/uscsi.h> 51 52 53 /* Virtual disk server initialization flags */ 54 #define VDS_LOCKING 0x01 55 #define VDS_LDI 0x02 56 #define VDS_MDEG 0x04 57 58 /* Virtual disk server tunable parameters */ 59 #define VDS_LDC_RETRIES 3 60 #define VDS_NCHAINS 32 61 62 /* Identification parameters for MD, synthetic dkio(7i) structures, etc. */ 63 #define VDS_NAME "virtual-disk-server" 64 65 #define VD_NAME "vd" 66 #define VD_VOLUME_NAME "vdisk" 67 #define VD_ASCIILABEL "Virtual Disk" 68 69 #define VD_CHANNEL_ENDPOINT "channel-endpoint" 70 #define VD_ID_PROP "id" 71 #define VD_BLOCK_DEVICE_PROP "vds-block-device" 72 73 /* Virtual disk initialization flags */ 74 #define VD_LOCKING 0x01 75 #define VD_TASKQ 0x02 76 #define VD_LDC 0x04 77 #define VD_DRING 0x08 78 #define VD_SID 0x10 79 #define VD_SEQ_NUM 0x20 80 81 /* Flags for opening/closing backing devices via LDI */ 82 #define VD_OPEN_FLAGS (FEXCL | FREAD | FWRITE) 83 84 /* 85 * By Solaris convention, slice/partition 2 represents the entire disk; 86 * unfortunately, this convention does not appear to be codified. 87 */ 88 #define VD_ENTIRE_DISK_SLICE 2 89 90 /* Return a cpp token as a string */ 91 #define STRINGIZE(token) #token 92 93 /* 94 * Print a message prefixed with the current function name to the message log 95 * (and optionally to the console for verbose boots); these macros use cpp's 96 * concatenation of string literals and C99 variable-length-argument-list 97 * macros 98 */ 99 #define PRN(...) _PRN("?%s(): "__VA_ARGS__, "") 100 #define _PRN(format, ...) \ 101 cmn_err(CE_CONT, format"%s", __func__, __VA_ARGS__) 102 103 /* Return a pointer to the "i"th vdisk dring element */ 104 #define VD_DRING_ELEM(i) ((vd_dring_entry_t *)(void *) \ 105 (vd->dring + (i)*vd->descriptor_size)) 106 107 /* Return the virtual disk client's type as a string (for use in messages) */ 108 #define VD_CLIENT(vd) \ 109 (((vd)->xfer_mode == VIO_DESC_MODE) ? "in-band client" : \ 110 (((vd)->xfer_mode == VIO_DRING_MODE) ? "dring client" : \ 111 (((vd)->xfer_mode == 0) ? "null client" : \ 112 "unsupported client"))) 113 114 /* Debugging macros */ 115 #ifdef DEBUG 116 #define PR0 if (vd_msglevel > 0) PRN 117 #define PR1 if (vd_msglevel > 1) PRN 118 #define PR2 if (vd_msglevel > 2) PRN 119 120 #define VD_DUMP_DRING_ELEM(elem) \ 121 PRN("dst:%x op:%x st:%u nb:%lx addr:%lx ncook:%u\n", \ 122 elem->hdr.dstate, \ 123 elem->payload.operation, \ 124 elem->payload.status, \ 125 elem->payload.nbytes, \ 126 elem->payload.addr, \ 127 elem->payload.ncookies); 128 129 #else /* !DEBUG */ 130 #define PR0(...) 131 #define PR1(...) 132 #define PR2(...) 133 134 #define VD_DUMP_DRING_ELEM(elem) 135 136 #endif /* DEBUG */ 137 138 139 typedef struct vds { 140 uint_t initialized; /* driver inst initialization flags */ 141 dev_info_t *dip; /* driver inst devinfo pointer */ 142 kmutex_t lock; /* lock for this structure */ 143 ldi_ident_t ldi_ident; /* driver's identifier for LDI */ 144 mod_hash_t *vd_table; /* table of virtual disks served */ 145 mdeg_handle_t mdeg; /* handle for MDEG operations */ 146 } vds_t; 147 148 typedef struct vd { 149 uint_t initialized; /* vdisk initialization flags */ 150 kmutex_t lock; /* lock for this structure */ 151 vds_t *vds; /* server for this vdisk */ 152 ddi_taskq_t *taskq; /* taskq for this vdisk */ 153 ldi_handle_t ldi_handle[V_NUMPAR]; /* LDI slice handles */ 154 dev_t dev[V_NUMPAR]; /* dev numbers for slices */ 155 uint_t nslices; /* number for slices */ 156 size_t vdisk_size; /* number of blocks in vdisk */ 157 vd_disk_type_t vdisk_type; /* slice or entire disk */ 158 boolean_t pseudo; /* underlying pseudo dev */ 159 struct dk_geom dk_geom; /* synthetic for slice type */ 160 struct vtoc vtoc; /* synthetic for slice type */ 161 ldc_status_t ldc_state; /* LDC connection state */ 162 ldc_handle_t ldc_handle; /* handle for LDC comm */ 163 size_t max_msglen; /* largest LDC message len */ 164 boolean_t enabled; /* whether vdisk is enabled */ 165 vd_state_t state; /* client handshake state */ 166 uint8_t xfer_mode; /* transfer mode with client */ 167 uint32_t sid; /* client's session ID */ 168 uint64_t seq_num; /* message sequence number */ 169 uint64_t dring_ident; /* identifier of dring */ 170 ldc_dring_handle_t dring_handle; /* handle for dring ops */ 171 uint32_t descriptor_size; /* num bytes in desc */ 172 uint32_t dring_len; /* number of dring elements */ 173 caddr_t dring; /* address of dring */ 174 } vd_t; 175 176 typedef struct vds_operation { 177 uint8_t operation; 178 int (*function)(vd_t *vd, vd_dring_payload_t *request); 179 } vds_operation_t; 180 181 typedef struct ioctl { 182 uint8_t operation; 183 const char *operation_name; 184 int cmd; 185 const char *cmd_name; 186 uint_t copy; 187 size_t nbytes; 188 } ioctl_t; 189 190 191 static int vds_ldc_retries = VDS_LDC_RETRIES; 192 static void *vds_state; 193 static uint64_t vds_operations; /* see vds_operation[] definition below */ 194 195 static int vd_open_flags = VD_OPEN_FLAGS; 196 197 #ifdef DEBUG 198 static int vd_msglevel; 199 #endif /* DEBUG */ 200 201 202 static int 203 vd_bread(vd_t *vd, vd_dring_payload_t *request) 204 { 205 int status; 206 struct buf buf; 207 208 PR1("Read %lu bytes at block %lu", request->nbytes, request->addr); 209 if (request->nbytes == 0) 210 return (EINVAL); /* no service for trivial requests */ 211 ASSERT(mutex_owned(&vd->lock)); 212 ASSERT(request->slice < vd->nslices); 213 214 bioinit(&buf); 215 buf.b_flags = B_BUSY | B_READ; 216 buf.b_bcount = request->nbytes; 217 buf.b_un.b_addr = kmem_alloc(buf.b_bcount, KM_SLEEP); 218 buf.b_lblkno = request->addr; 219 buf.b_edev = vd->dev[request->slice]; 220 221 if ((status = ldi_strategy(vd->ldi_handle[request->slice], &buf)) == 0) 222 status = biowait(&buf); 223 biofini(&buf); 224 if ((status == 0) && 225 ((status = ldc_mem_copy(vd->ldc_handle, buf.b_un.b_addr, 0, 226 &request->nbytes, request->cookie, request->ncookies, 227 LDC_COPY_OUT)) != 0)) { 228 PRN("ldc_mem_copy() returned errno %d copying to client", 229 status); 230 } 231 kmem_free(buf.b_un.b_addr, buf.b_bcount); /* nbytes can change */ 232 return (status); 233 } 234 235 static int 236 vd_do_bwrite(vd_t *vd, uint_t slice, diskaddr_t block, size_t nbytes, 237 ldc_mem_cookie_t *cookie, uint64_t ncookies, caddr_t data) 238 { 239 int status; 240 struct buf buf; 241 242 ASSERT(mutex_owned(&vd->lock)); 243 ASSERT(slice < vd->nslices); 244 ASSERT(nbytes != 0); 245 ASSERT(data != NULL); 246 247 /* Get data from client */ 248 if ((status = ldc_mem_copy(vd->ldc_handle, data, 0, &nbytes, 249 cookie, ncookies, LDC_COPY_IN)) != 0) { 250 PRN("ldc_mem_copy() returned errno %d copying from client", 251 status); 252 return (status); 253 } 254 255 bioinit(&buf); 256 buf.b_flags = B_BUSY | B_WRITE; 257 buf.b_bcount = nbytes; 258 buf.b_un.b_addr = data; 259 buf.b_lblkno = block; 260 buf.b_edev = vd->dev[slice]; 261 262 if ((status = ldi_strategy(vd->ldi_handle[slice], &buf)) == 0) 263 status = biowait(&buf); 264 biofini(&buf); 265 return (status); 266 } 267 268 static int 269 vd_bwrite(vd_t *vd, vd_dring_payload_t *request) 270 { 271 int status; 272 caddr_t data; 273 274 275 PR1("Write %ld bytes at block %lu", request->nbytes, request->addr); 276 if (request->nbytes == 0) 277 return (EINVAL); /* no service for trivial requests */ 278 data = kmem_alloc(request->nbytes, KM_SLEEP); 279 status = vd_do_bwrite(vd, request->slice, request->addr, 280 request->nbytes, request->cookie, request->ncookies, data); 281 kmem_free(data, request->nbytes); 282 return (status); 283 } 284 285 static int 286 vd_do_slice_ioctl(vd_t *vd, int cmd, void *buf) 287 { 288 switch (cmd) { 289 case DKIOCGGEOM: 290 ASSERT(buf != NULL); 291 bcopy(&vd->dk_geom, buf, sizeof (vd->dk_geom)); 292 return (0); 293 case DKIOCGVTOC: 294 ASSERT(buf != NULL); 295 bcopy(&vd->vtoc, buf, sizeof (vd->vtoc)); 296 return (0); 297 default: 298 return (ENOTSUP); 299 } 300 } 301 302 static int 303 vd_do_ioctl(vd_t *vd, vd_dring_payload_t *request, void* buf, ioctl_t *ioctl) 304 { 305 int rval = 0, status; 306 size_t nbytes = request->nbytes; /* modifiable copy */ 307 308 309 ASSERT(mutex_owned(&vd->lock)); 310 ASSERT(request->slice < vd->nslices); 311 PR0("Performing %s", ioctl->operation_name); 312 313 /* Get data from client, if necessary */ 314 if (ioctl->copy & VD_COPYIN) { 315 ASSERT(nbytes != 0 && buf != NULL); 316 PR1("Getting \"arg\" data from client"); 317 if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, 318 request->cookie, request->ncookies, 319 LDC_COPY_IN)) != 0) { 320 PRN("ldc_mem_copy() returned errno %d " 321 "copying from client", status); 322 return (status); 323 } 324 } 325 326 /* 327 * Handle single-slice block devices internally; otherwise, have the 328 * real driver perform the ioctl() 329 */ 330 if (vd->vdisk_type == VD_DISK_TYPE_SLICE && !vd->pseudo) { 331 if ((status = vd_do_slice_ioctl(vd, ioctl->cmd, buf)) != 0) 332 return (status); 333 } else if ((status = ldi_ioctl(vd->ldi_handle[request->slice], 334 ioctl->cmd, (intptr_t)buf, FKIOCTL, kcred, &rval)) != 0) { 335 PR0("ldi_ioctl(%s) = errno %d", ioctl->cmd_name, status); 336 return (status); 337 } 338 #ifdef DEBUG 339 if (rval != 0) { 340 PRN("%s set rval = %d, which is not being returned to client", 341 ioctl->cmd_name, rval); 342 } 343 #endif /* DEBUG */ 344 345 /* Send data to client, if necessary */ 346 if (ioctl->copy & VD_COPYOUT) { 347 ASSERT(nbytes != 0 && buf != NULL); 348 PR1("Sending \"arg\" data to client"); 349 if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, 350 request->cookie, request->ncookies, 351 LDC_COPY_OUT)) != 0) { 352 PRN("ldc_mem_copy() returned errno %d " 353 "copying to client", status); 354 return (status); 355 } 356 } 357 358 return (status); 359 } 360 361 #define RNDSIZE(expr) P2ROUNDUP(sizeof (expr), sizeof (uint64_t)) 362 static int 363 vd_ioctl(vd_t *vd, vd_dring_payload_t *request) 364 { 365 static ioctl_t ioctl[] = { 366 /* Command (no-copy) operations */ 367 {VD_OP_FLUSH, STRINGIZE(VD_OP_FLUSH), DKIOCFLUSHWRITECACHE, 368 STRINGIZE(DKIOCFLUSHWRITECACHE), 0, 0}, 369 370 /* "Get" (copy-out) operations */ 371 {VD_OP_GET_WCE, STRINGIZE(VD_OP_GET_WCE), DKIOCGETWCE, 372 STRINGIZE(DKIOCGETWCE), VD_COPYOUT, RNDSIZE(int)}, 373 {VD_OP_GET_DISKGEOM, STRINGIZE(VD_OP_GET_DISKGEOM), DKIOCGGEOM, 374 STRINGIZE(DKIOCGGEOM), VD_COPYOUT, RNDSIZE(struct dk_geom)}, 375 {VD_OP_GET_VTOC, STRINGIZE(VD_OP_GET_VTOC), DKIOCGVTOC, 376 STRINGIZE(DKIOCGVTOC), VD_COPYOUT, RNDSIZE(struct vtoc)}, 377 378 /* "Set" (copy-in) operations */ 379 {VD_OP_SET_WCE, STRINGIZE(VD_OP_SET_WCE), DKIOCSETWCE, 380 STRINGIZE(DKIOCSETWCE), VD_COPYOUT, RNDSIZE(int)}, 381 {VD_OP_SET_DISKGEOM, STRINGIZE(VD_OP_SET_DISKGEOM), DKIOCSGEOM, 382 STRINGIZE(DKIOCSGEOM), VD_COPYIN, RNDSIZE(struct dk_geom)}, 383 {VD_OP_SET_VTOC, STRINGIZE(VD_OP_SET_VTOC), DKIOCSVTOC, 384 STRINGIZE(DKIOCSVTOC), VD_COPYIN, RNDSIZE(struct vtoc)}, 385 386 /* "Get/set" (copy-in/copy-out) operations */ 387 {VD_OP_SCSICMD, STRINGIZE(VD_OP_SCSICMD), USCSICMD, 388 STRINGIZE(USCSICMD), VD_COPYIN|VD_COPYOUT, 389 RNDSIZE(struct uscsi_cmd)} 390 391 }; 392 int i, status; 393 void *buf = NULL; 394 size_t nioctls = (sizeof (ioctl))/(sizeof (ioctl[0])); 395 396 397 ASSERT(mutex_owned(&vd->lock)); 398 ASSERT(request->slice < vd->nslices); 399 400 /* 401 * Determine ioctl corresponding to caller's "operation" and 402 * validate caller's "nbytes" 403 */ 404 for (i = 0; i < nioctls; i++) { 405 if (request->operation == ioctl[i].operation) { 406 if (request->nbytes > ioctl[i].nbytes) { 407 PRN("%s: Expected <= %lu \"nbytes\", " 408 "got %lu", ioctl[i].operation_name, 409 ioctl[i].nbytes, request->nbytes); 410 return (EINVAL); 411 } else if ((request->nbytes % sizeof (uint64_t)) != 0) { 412 PRN("%s: nbytes = %lu not a multiple of %lu", 413 ioctl[i].operation_name, request->nbytes, 414 sizeof (uint64_t)); 415 return (EINVAL); 416 } 417 418 break; 419 } 420 } 421 ASSERT(i < nioctls); /* because "operation" already validated */ 422 423 if (request->nbytes) 424 buf = kmem_zalloc(request->nbytes, KM_SLEEP); 425 status = vd_do_ioctl(vd, request, buf, &ioctl[i]); 426 if (request->nbytes) 427 kmem_free(buf, request->nbytes); 428 return (status); 429 } 430 431 /* 432 * Define the supported operations once the functions for performing them have 433 * been defined 434 */ 435 static const vds_operation_t vds_operation[] = { 436 {VD_OP_BREAD, vd_bread}, 437 {VD_OP_BWRITE, vd_bwrite}, 438 {VD_OP_FLUSH, vd_ioctl}, 439 {VD_OP_GET_WCE, vd_ioctl}, 440 {VD_OP_SET_WCE, vd_ioctl}, 441 {VD_OP_GET_VTOC, vd_ioctl}, 442 {VD_OP_SET_VTOC, vd_ioctl}, 443 {VD_OP_GET_DISKGEOM, vd_ioctl}, 444 {VD_OP_SET_DISKGEOM, vd_ioctl}, 445 {VD_OP_SCSICMD, vd_ioctl} 446 }; 447 448 static const size_t vds_noperations = 449 (sizeof (vds_operation))/(sizeof (vds_operation[0])); 450 451 /* 452 * Process a request using a defined operation 453 */ 454 static int 455 vd_process_request(vd_t *vd, vd_dring_payload_t *request) 456 { 457 int i; 458 459 460 PR1("Entered"); 461 ASSERT(mutex_owned(&vd->lock)); 462 463 /* Range-check slice */ 464 if (request->slice >= vd->nslices) { 465 PRN("Invalid \"slice\" %u (max %u) for virtual disk", 466 request->slice, (vd->nslices - 1)); 467 return (EINVAL); 468 } 469 470 /* Perform the requested operation */ 471 for (i = 0; i < vds_noperations; i++) 472 if (request->operation == vds_operation[i].operation) 473 return (vds_operation[i].function(vd, request)); 474 475 /* No matching operation found */ 476 PRN("Unsupported operation %u", request->operation); 477 return (ENOTSUP); 478 } 479 480 static int 481 send_msg(ldc_handle_t ldc_handle, void *msg, size_t msglen) 482 { 483 int retry, status; 484 size_t nbytes; 485 486 487 for (retry = 0, status = EWOULDBLOCK; 488 retry < vds_ldc_retries && status == EWOULDBLOCK; 489 retry++) { 490 PR1("ldc_write() attempt %d", (retry + 1)); 491 nbytes = msglen; 492 status = ldc_write(ldc_handle, msg, &nbytes); 493 } 494 495 if (status != 0) { 496 PRN("ldc_write() returned errno %d", status); 497 return (status); 498 } else if (nbytes != msglen) { 499 PRN("ldc_write() performed only partial write"); 500 return (EIO); 501 } 502 503 PR1("SENT %lu bytes", msglen); 504 return (0); 505 } 506 507 /* 508 * Return 1 if the "type", "subtype", and "env" fields of the "tag" first 509 * argument match the corresponding remaining arguments; otherwise, return 0 510 */ 511 int 512 vd_msgtype(vio_msg_tag_t *tag, int type, int subtype, int env) 513 { 514 return ((tag->vio_msgtype == type) && 515 (tag->vio_subtype == subtype) && 516 (tag->vio_subtype_env == env)) ? 1 : 0; 517 } 518 519 static int 520 process_ver_msg(vio_msg_t *msg, size_t msglen) 521 { 522 vio_ver_msg_t *ver_msg = (vio_ver_msg_t *)msg; 523 524 525 ASSERT(msglen >= sizeof (msg->tag)); 526 527 if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 528 VIO_VER_INFO)) { 529 return (ENOMSG); /* not a version message */ 530 } 531 532 if (msglen != sizeof (*ver_msg)) { 533 PRN("Expected %lu-byte version message; " 534 "received %lu bytes", sizeof (*ver_msg), msglen); 535 return (EBADMSG); 536 } 537 538 if (ver_msg->dev_class != VDEV_DISK) { 539 PRN("Expected device class %u (disk); received %u", 540 VDEV_DISK, ver_msg->dev_class); 541 return (EBADMSG); 542 } 543 544 if ((ver_msg->ver_major != VD_VER_MAJOR) || 545 (ver_msg->ver_minor != VD_VER_MINOR)) { 546 /* Unsupported version; send back supported version */ 547 ver_msg->ver_major = VD_VER_MAJOR; 548 ver_msg->ver_minor = VD_VER_MINOR; 549 return (EBADMSG); 550 } 551 552 /* Valid message, version accepted */ 553 ver_msg->dev_class = VDEV_DISK_SERVER; 554 return (0); 555 } 556 557 static int 558 vd_process_attr_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 559 { 560 vd_attr_msg_t *attr_msg = (vd_attr_msg_t *)msg; 561 562 563 PR0("Entered"); 564 ASSERT(mutex_owned(&vd->lock)); 565 ASSERT(msglen >= sizeof (msg->tag)); 566 567 if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 568 VIO_ATTR_INFO)) { 569 return (ENOMSG); /* not an attribute message */ 570 } 571 572 if (msglen != sizeof (*attr_msg)) { 573 PRN("Expected %lu-byte attribute message; " 574 "received %lu bytes", sizeof (*attr_msg), msglen); 575 return (EBADMSG); 576 } 577 578 if (attr_msg->max_xfer_sz == 0) { 579 PRN("Received maximum transfer size of 0 from client"); 580 return (EBADMSG); 581 } 582 583 if ((attr_msg->xfer_mode != VIO_DESC_MODE) && 584 (attr_msg->xfer_mode != VIO_DRING_MODE)) { 585 PRN("Client requested unsupported transfer mode"); 586 return (EBADMSG); 587 } 588 589 590 /* Success: valid message and transfer mode */ 591 vd->xfer_mode = attr_msg->xfer_mode; 592 if (vd->xfer_mode == VIO_DESC_MODE) { 593 /* 594 * The vd_dring_inband_msg_t contains one cookie; need room 595 * for up to n-1 more cookies, where "n" is the number of full 596 * pages plus possibly one partial page required to cover 597 * "max_xfer_sz". Add room for one more cookie if 598 * "max_xfer_sz" isn't an integral multiple of the page size. 599 * Must first get the maximum transfer size in bytes. 600 */ 601 #if 1 /* NEWOBP */ 602 size_t max_xfer_bytes = attr_msg->vdisk_block_size ? 603 attr_msg->vdisk_block_size*attr_msg->max_xfer_sz : 604 attr_msg->max_xfer_sz; 605 size_t max_inband_msglen = 606 sizeof (vd_dring_inband_msg_t) + 607 ((max_xfer_bytes/PAGESIZE + 608 ((max_xfer_bytes % PAGESIZE) ? 1 : 0))* 609 (sizeof (ldc_mem_cookie_t))); 610 #else /* NEWOBP */ 611 size_t max_inband_msglen = 612 sizeof (vd_dring_inband_msg_t) + 613 ((attr_msg->max_xfer_sz/PAGESIZE 614 + (attr_msg->max_xfer_sz % PAGESIZE ? 1 : 0))* 615 (sizeof (ldc_mem_cookie_t))); 616 #endif /* NEWOBP */ 617 618 /* 619 * Set the maximum expected message length to 620 * accommodate in-band-descriptor messages with all 621 * their cookies 622 */ 623 vd->max_msglen = MAX(vd->max_msglen, max_inband_msglen); 624 } 625 626 attr_msg->vdisk_size = vd->vdisk_size; 627 attr_msg->vdisk_type = vd->vdisk_type; 628 attr_msg->operations = vds_operations; 629 PR0("%s", VD_CLIENT(vd)); 630 return (0); 631 } 632 633 static int 634 vd_process_dring_reg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 635 { 636 int status; 637 size_t expected; 638 ldc_mem_info_t dring_minfo; 639 vio_dring_reg_msg_t *reg_msg = (vio_dring_reg_msg_t *)msg; 640 641 642 PR0("Entered"); 643 ASSERT(mutex_owned(&vd->lock)); 644 ASSERT(msglen >= sizeof (msg->tag)); 645 646 if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 647 VIO_DRING_REG)) { 648 return (ENOMSG); /* not a register-dring message */ 649 } 650 651 if (msglen < sizeof (*reg_msg)) { 652 PRN("Expected at least %lu-byte register-dring message; " 653 "received %lu bytes", sizeof (*reg_msg), msglen); 654 return (EBADMSG); 655 } 656 657 expected = sizeof (*reg_msg) + 658 (reg_msg->ncookies - 1)*(sizeof (reg_msg->cookie[0])); 659 if (msglen != expected) { 660 PRN("Expected %lu-byte register-dring message; " 661 "received %lu bytes", expected, msglen); 662 return (EBADMSG); 663 } 664 665 if (vd->initialized & VD_DRING) { 666 PRN("A dring was previously registered; only support one"); 667 return (EBADMSG); 668 } 669 670 if (reg_msg->ncookies != 1) { 671 /* 672 * In addition to fixing the assertion in the success case 673 * below, supporting drings which require more than one 674 * "cookie" requires increasing the value of vd->max_msglen 675 * somewhere in the code path prior to receiving the message 676 * which results in calling this function. Note that without 677 * making this change, the larger message size required to 678 * accommodate multiple cookies cannot be successfully 679 * received, so this function will not even get called. 680 * Gracefully accommodating more dring cookies might 681 * reasonably demand exchanging an additional attribute or 682 * making a minor protocol adjustment 683 */ 684 PRN("reg_msg->ncookies = %u != 1", reg_msg->ncookies); 685 return (EBADMSG); 686 } 687 688 status = ldc_mem_dring_map(vd->ldc_handle, reg_msg->cookie, 689 reg_msg->ncookies, reg_msg->num_descriptors, 690 reg_msg->descriptor_size, LDC_SHADOW_MAP, &vd->dring_handle); 691 if (status != 0) { 692 PRN("ldc_mem_dring_map() returned errno %d", status); 693 return (status); 694 } 695 696 /* 697 * To remove the need for this assertion, must call 698 * ldc_mem_dring_nextcookie() successfully ncookies-1 times after a 699 * successful call to ldc_mem_dring_map() 700 */ 701 ASSERT(reg_msg->ncookies == 1); 702 703 if ((status = 704 ldc_mem_dring_info(vd->dring_handle, &dring_minfo)) != 0) { 705 PRN("ldc_mem_dring_info() returned errno %d", status); 706 if ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0) 707 PRN("ldc_mem_dring_unmap() returned errno %d", status); 708 return (status); 709 } 710 711 if (dring_minfo.vaddr == NULL) { 712 PRN("Descriptor ring virtual address is NULL"); 713 return (EBADMSG); /* FIXME appropriate status? */ 714 } 715 716 717 /* Valid message and dring mapped */ 718 PR1("descriptor size = %u, dring length = %u", 719 vd->descriptor_size, vd->dring_len); 720 vd->initialized |= VD_DRING; 721 vd->dring_ident = 1; /* "There Can Be Only One" */ 722 vd->dring = dring_minfo.vaddr; 723 vd->descriptor_size = reg_msg->descriptor_size; 724 vd->dring_len = reg_msg->num_descriptors; 725 reg_msg->dring_ident = vd->dring_ident; 726 return (0); 727 } 728 729 static int 730 vd_process_dring_unreg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 731 { 732 vio_dring_unreg_msg_t *unreg_msg = (vio_dring_unreg_msg_t *)msg; 733 734 735 PR0("Entered"); 736 ASSERT(mutex_owned(&vd->lock)); 737 ASSERT(msglen >= sizeof (msg->tag)); 738 739 if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 740 VIO_DRING_UNREG)) { 741 return (ENOMSG); /* not an unregister-dring message */ 742 } 743 744 if (msglen != sizeof (*unreg_msg)) { 745 PRN("Expected %lu-byte unregister-dring message; " 746 "received %lu bytes", sizeof (*unreg_msg), msglen); 747 return (EBADMSG); 748 } 749 750 if (unreg_msg->dring_ident != vd->dring_ident) { 751 PRN("Expected dring ident %lu; received %lu", 752 vd->dring_ident, unreg_msg->dring_ident); 753 return (EBADMSG); 754 } 755 756 /* FIXME set ack in unreg_msg? */ 757 return (0); 758 } 759 760 static int 761 process_rdx_msg(vio_msg_t *msg, size_t msglen) 762 { 763 PR0("Entered"); 764 ASSERT(msglen >= sizeof (msg->tag)); 765 766 if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_RDX)) 767 return (ENOMSG); /* not an RDX message */ 768 769 if (msglen != sizeof (vio_rdx_msg_t)) { 770 PRN("Expected %lu-byte RDX message; received %lu bytes", 771 sizeof (vio_rdx_msg_t), msglen); 772 return (EBADMSG); 773 } 774 775 return (0); 776 } 777 778 static void 779 vd_reset_connection(vd_t *vd, boolean_t reset_ldc) 780 { 781 int status = 0; 782 783 784 ASSERT(mutex_owned(&vd->lock)); 785 PR0("Resetting connection with %s", VD_CLIENT(vd)); 786 if ((vd->initialized & VD_DRING) && 787 ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0)) 788 PRN("ldc_mem_dring_unmap() returned errno %d", status); 789 if ((reset_ldc == B_TRUE) && 790 ((status = ldc_reset(vd->ldc_handle)) != 0)) 791 PRN("ldc_reset() returned errno %d", status); 792 vd->initialized &= ~(VD_SID | VD_SEQ_NUM | VD_DRING); 793 vd->state = VD_STATE_INIT; 794 vd->max_msglen = sizeof (vio_msg_t); /* baseline vio message size */ 795 } 796 797 static int 798 vd_check_seq_num(vd_t *vd, uint64_t seq_num) 799 { 800 ASSERT(mutex_owned(&vd->lock)); 801 if ((vd->initialized & VD_SEQ_NUM) && (seq_num != vd->seq_num + 1)) { 802 PRN("Received seq_num %lu; expected %lu", 803 seq_num, (vd->seq_num + 1)); 804 vd_reset_connection(vd, B_FALSE); 805 return (1); 806 } 807 808 vd->seq_num = seq_num; 809 vd->initialized |= VD_SEQ_NUM; /* superfluous after first time... */ 810 return (0); 811 } 812 813 /* 814 * Return the expected size of an inband-descriptor message with all the 815 * cookies it claims to include 816 */ 817 static size_t 818 expected_inband_size(vd_dring_inband_msg_t *msg) 819 { 820 return ((sizeof (*msg)) + 821 (msg->payload.ncookies - 1)*(sizeof (msg->payload.cookie[0]))); 822 } 823 824 /* 825 * Process an in-band descriptor message: used with clients like OBP, with 826 * which vds exchanges descriptors within VIO message payloads, rather than 827 * operating on them within a descriptor ring 828 */ 829 static int 830 vd_process_desc_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 831 { 832 size_t expected; 833 vd_dring_inband_msg_t *desc_msg = (vd_dring_inband_msg_t *)msg; 834 835 836 PR1("Entered"); 837 ASSERT(mutex_owned(&vd->lock)); 838 ASSERT(msglen >= sizeof (msg->tag)); 839 840 if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, 841 VIO_DESC_DATA)) 842 return (ENOMSG); /* not an in-band-descriptor message */ 843 844 if (msglen < sizeof (*desc_msg)) { 845 PRN("Expected at least %lu-byte descriptor message; " 846 "received %lu bytes", sizeof (*desc_msg), msglen); 847 return (EBADMSG); 848 } 849 850 if (msglen != (expected = expected_inband_size(desc_msg))) { 851 PRN("Expected %lu-byte descriptor message; " 852 "received %lu bytes", expected, msglen); 853 return (EBADMSG); 854 } 855 856 if (vd_check_seq_num(vd, desc_msg->hdr.seq_num) != 0) { 857 return (EBADMSG); 858 } 859 860 /* Valid message; process the request */ 861 desc_msg->payload.status = vd_process_request(vd, &desc_msg->payload); 862 return (0); 863 } 864 865 static boolean_t 866 vd_accept_dring_elems(vd_t *vd, uint32_t start, uint32_t ndesc) 867 { 868 uint32_t i, n; 869 870 871 /* Check descriptor states */ 872 for (n = ndesc, i = start; n > 0; n--, i = (i + 1) % vd->dring_len) { 873 if (VD_DRING_ELEM(i)->hdr.dstate != VIO_DESC_READY) { 874 PRN("descriptor %u not ready", i); 875 VD_DUMP_DRING_ELEM(VD_DRING_ELEM(i)); 876 return (B_FALSE); 877 } 878 } 879 880 /* Descriptors are valid; accept them */ 881 for (n = ndesc, i = start; n > 0; n--, i = (i + 1) % vd->dring_len) 882 VD_DRING_ELEM(i)->hdr.dstate = VIO_DESC_ACCEPTED; 883 884 return (B_TRUE); 885 } 886 887 static int 888 vd_process_dring(vd_t *vd, uint32_t start, uint32_t end) 889 { 890 int status; 891 boolean_t accepted; 892 uint32_t i, io_status, n, ndesc; 893 894 895 ASSERT(mutex_owned(&vd->lock)); 896 PR1("start = %u, end = %u", start, end); 897 898 /* Validate descriptor range */ 899 if ((start >= vd->dring_len) || (end >= vd->dring_len)) { 900 PRN("\"start\" = %u, \"end\" = %u; both must be less than %u", 901 start, end, vd->dring_len); 902 return (EINVAL); 903 } 904 905 /* Acquire updated dring elements */ 906 if ((status = ldc_mem_dring_acquire(vd->dring_handle, 907 start, end)) != 0) { 908 PRN("ldc_mem_dring_acquire() returned errno %d", status); 909 return (status); 910 } 911 /* Accept updated dring elements */ 912 ndesc = ((end < start) ? end + vd->dring_len : end) - start + 1; 913 PR1("ndesc = %u", ndesc); 914 accepted = vd_accept_dring_elems(vd, start, ndesc); 915 /* Release dring elements */ 916 if ((status = ldc_mem_dring_release(vd->dring_handle, 917 start, end)) != 0) { 918 PRN("ldc_mem_dring_release() returned errno %d", status); 919 return (status); 920 } 921 /* If a descriptor was in the wrong state, return an error */ 922 if (!accepted) 923 return (EINVAL); 924 925 926 /* Process accepted dring elements */ 927 for (n = ndesc, i = start; n > 0; n--, i = (i + 1) % vd->dring_len) { 928 vd_dring_entry_t *elem = VD_DRING_ELEM(i); 929 930 /* Process descriptor outside acquire/release bracket */ 931 PR1("Processing dring element %u", i); 932 io_status = vd_process_request(vd, &elem->payload); 933 934 /* Re-acquire client's dring element */ 935 if ((status = ldc_mem_dring_acquire(vd->dring_handle, 936 i, i)) != 0) { 937 PRN("ldc_mem_dring_acquire() returned errno %d", 938 status); 939 return (status); 940 } 941 /* Update processed element */ 942 if (elem->hdr.dstate == VIO_DESC_ACCEPTED) { 943 elem->payload.status = io_status; 944 elem->hdr.dstate = VIO_DESC_DONE; 945 } else { 946 /* Perhaps client timed out waiting for I/O... */ 947 accepted = B_FALSE; 948 PRN("element %u no longer \"accepted\"", i); 949 VD_DUMP_DRING_ELEM(elem); 950 } 951 /* Release updated processed element */ 952 if ((status = ldc_mem_dring_release(vd->dring_handle, 953 i, i)) != 0) { 954 PRN("ldc_mem_dring_release() returned errno %d", 955 status); 956 return (status); 957 } 958 /* If the descriptor was in the wrong state, return an error */ 959 if (!accepted) 960 return (EINVAL); 961 } 962 963 return (0); 964 } 965 966 static int 967 vd_process_dring_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 968 { 969 vio_dring_msg_t *dring_msg = (vio_dring_msg_t *)msg; 970 971 972 PR1("Entered"); 973 ASSERT(mutex_owned(&vd->lock)); 974 ASSERT(msglen >= sizeof (msg->tag)); 975 976 if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, 977 VIO_DRING_DATA)) { 978 return (ENOMSG); /* not a dring-data message */ 979 } 980 981 if (msglen != sizeof (*dring_msg)) { 982 PRN("Expected %lu-byte dring message; received %lu bytes", 983 sizeof (*dring_msg), msglen); 984 return (EBADMSG); 985 } 986 987 if (vd_check_seq_num(vd, dring_msg->seq_num) != 0) { 988 return (EBADMSG); 989 } 990 991 if (dring_msg->dring_ident != vd->dring_ident) { 992 PRN("Expected dring ident %lu; received ident %lu", 993 vd->dring_ident, dring_msg->dring_ident); 994 return (EBADMSG); 995 } 996 997 998 /* Valid message; process dring */ 999 dring_msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 1000 return (vd_process_dring(vd, dring_msg->start_idx, dring_msg->end_idx)); 1001 } 1002 1003 static int 1004 recv_msg(ldc_handle_t ldc_handle, void *msg, size_t *nbytes) 1005 { 1006 int retry, status; 1007 size_t size = *nbytes; 1008 boolean_t isempty = B_FALSE; 1009 1010 1011 /* FIXME work around interrupt problem */ 1012 if ((ldc_chkq(ldc_handle, &isempty) != 0) || isempty) 1013 return (ENOMSG); 1014 1015 for (retry = 0, status = ETIMEDOUT; 1016 retry < vds_ldc_retries && status == ETIMEDOUT; 1017 retry++) { 1018 PR1("ldc_read() attempt %d", (retry + 1)); 1019 *nbytes = size; 1020 status = ldc_read(ldc_handle, msg, nbytes); 1021 } 1022 1023 if (status != 0) { 1024 PRN("ldc_read() returned errno %d", status); 1025 return (status); 1026 } else if (*nbytes == 0) { 1027 PR1("ldc_read() returned 0 and no message read"); 1028 return (ENOMSG); 1029 } 1030 1031 PR1("RCVD %lu-byte message", *nbytes); 1032 return (0); 1033 } 1034 1035 static int 1036 vd_do_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 1037 { 1038 int status; 1039 1040 1041 PR1("Processing (%x/%x/%x) message", msg->tag.vio_msgtype, 1042 msg->tag.vio_subtype, msg->tag.vio_subtype_env); 1043 ASSERT(mutex_owned(&vd->lock)); 1044 1045 /* 1046 * Validate session ID up front, since it applies to all messages 1047 * once set 1048 */ 1049 if ((msg->tag.vio_sid != vd->sid) && (vd->initialized & VD_SID)) { 1050 PRN("Expected SID %u, received %u", vd->sid, 1051 msg->tag.vio_sid); 1052 return (EBADMSG); 1053 } 1054 1055 1056 /* 1057 * Process the received message based on connection state 1058 */ 1059 switch (vd->state) { 1060 case VD_STATE_INIT: /* expect version message */ 1061 if ((status = process_ver_msg(msg, msglen)) != 0) 1062 return (status); 1063 1064 /* The first version message sets the SID */ 1065 ASSERT(!(vd->initialized & VD_SID)); 1066 vd->sid = msg->tag.vio_sid; 1067 vd->initialized |= VD_SID; 1068 1069 /* Version negotiated, move to that state */ 1070 vd->state = VD_STATE_VER; 1071 return (0); 1072 1073 case VD_STATE_VER: /* expect attribute message */ 1074 if ((status = vd_process_attr_msg(vd, msg, msglen)) != 0) 1075 return (status); 1076 1077 /* Attributes exchanged, move to that state */ 1078 vd->state = VD_STATE_ATTR; 1079 return (0); 1080 1081 case VD_STATE_ATTR: 1082 switch (vd->xfer_mode) { 1083 case VIO_DESC_MODE: /* expect RDX message */ 1084 if ((status = process_rdx_msg(msg, msglen)) != 0) 1085 return (status); 1086 1087 /* Ready to receive in-band descriptors */ 1088 vd->state = VD_STATE_DATA; 1089 return (0); 1090 1091 case VIO_DRING_MODE: /* expect register-dring message */ 1092 if ((status = 1093 vd_process_dring_reg_msg(vd, msg, msglen)) != 0) 1094 return (status); 1095 1096 /* One dring negotiated, move to that state */ 1097 vd->state = VD_STATE_DRING; 1098 return (0); 1099 1100 default: 1101 ASSERT("Unsupported transfer mode"); 1102 PRN("Unsupported transfer mode"); 1103 return (ENOTSUP); 1104 } 1105 1106 case VD_STATE_DRING: /* expect RDX, register-dring, or unreg-dring */ 1107 if ((status = process_rdx_msg(msg, msglen)) == 0) { 1108 /* Ready to receive data */ 1109 vd->state = VD_STATE_DATA; 1110 return (0); 1111 } else if (status != ENOMSG) { 1112 return (status); 1113 } 1114 1115 1116 /* 1117 * If another register-dring message is received, stay in 1118 * dring state in case the client sends RDX; although the 1119 * protocol allows multiple drings, this server does not 1120 * support using more than one 1121 */ 1122 if ((status = 1123 vd_process_dring_reg_msg(vd, msg, msglen)) != ENOMSG) 1124 return (status); 1125 1126 /* 1127 * Acknowledge an unregister-dring message, but reset the 1128 * connection anyway: Although the protocol allows 1129 * unregistering drings, this server cannot serve a vdisk 1130 * without its only dring 1131 */ 1132 status = vd_process_dring_unreg_msg(vd, msg, msglen); 1133 return ((status == 0) ? ENOTSUP : status); 1134 1135 case VD_STATE_DATA: 1136 switch (vd->xfer_mode) { 1137 case VIO_DESC_MODE: /* expect in-band-descriptor message */ 1138 return (vd_process_desc_msg(vd, msg, msglen)); 1139 1140 case VIO_DRING_MODE: /* expect dring-data or unreg-dring */ 1141 /* 1142 * Typically expect dring-data messages, so handle 1143 * them first 1144 */ 1145 if ((status = vd_process_dring_msg(vd, msg, 1146 msglen)) != ENOMSG) 1147 return (status); 1148 1149 /* 1150 * Acknowledge an unregister-dring message, but reset 1151 * the connection anyway: Although the protocol 1152 * allows unregistering drings, this server cannot 1153 * serve a vdisk without its only dring 1154 */ 1155 status = vd_process_dring_unreg_msg(vd, msg, msglen); 1156 return ((status == 0) ? ENOTSUP : status); 1157 1158 default: 1159 ASSERT("Unsupported transfer mode"); 1160 PRN("Unsupported transfer mode"); 1161 return (ENOTSUP); 1162 } 1163 1164 default: 1165 ASSERT("Invalid client connection state"); 1166 PRN("Invalid client connection state"); 1167 return (ENOTSUP); 1168 } 1169 } 1170 1171 static void 1172 vd_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 1173 { 1174 int status; 1175 boolean_t reset_ldc = B_FALSE; 1176 1177 1178 ASSERT(mutex_owned(&vd->lock)); 1179 1180 /* 1181 * Check that the message is at least big enough for a "tag", so that 1182 * message processing can proceed based on tag-specified message type 1183 */ 1184 if (msglen < sizeof (vio_msg_tag_t)) { 1185 PRN("Received short (%lu-byte) message", msglen); 1186 /* Can't "nack" short message, so drop the big hammer */ 1187 vd_reset_connection(vd, B_TRUE); 1188 return; 1189 } 1190 1191 /* 1192 * Process the message 1193 */ 1194 switch (status = vd_do_process_msg(vd, msg, msglen)) { 1195 case 0: 1196 /* "ack" valid, successfully-processed messages */ 1197 msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 1198 break; 1199 1200 case ENOMSG: 1201 PRN("Received unexpected message"); 1202 _NOTE(FALLTHROUGH); 1203 case EBADMSG: 1204 case ENOTSUP: 1205 /* "nack" invalid messages */ 1206 msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 1207 break; 1208 1209 default: 1210 /* "nack" failed messages */ 1211 msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 1212 /* An LDC error probably occurred, so try resetting it */ 1213 reset_ldc = B_TRUE; 1214 break; 1215 } 1216 1217 /* "ack" or "nack" the message */ 1218 PR1("Sending %s", 1219 (msg->tag.vio_subtype == VIO_SUBTYPE_ACK) ? "ACK" : "NACK"); 1220 if (send_msg(vd->ldc_handle, msg, msglen) != 0) 1221 reset_ldc = B_TRUE; 1222 1223 /* Reset the connection for nack'ed or failed messages */ 1224 if ((status != 0) || reset_ldc) 1225 vd_reset_connection(vd, reset_ldc); 1226 } 1227 1228 static void 1229 vd_process_queue(void *arg) 1230 { 1231 vd_t *vd = (vd_t *)arg; 1232 size_t max_msglen, nbytes; 1233 vio_msg_t *vio_msg; 1234 1235 1236 PR2("Entered"); 1237 ASSERT(vd != NULL); 1238 mutex_enter(&vd->lock); 1239 max_msglen = vd->max_msglen; /* vd->maxmsglen can change */ 1240 vio_msg = kmem_alloc(max_msglen, KM_SLEEP); 1241 for (nbytes = vd->max_msglen; 1242 vd->enabled && recv_msg(vd->ldc_handle, vio_msg, &nbytes) == 0; 1243 nbytes = vd->max_msglen) 1244 vd_process_msg(vd, vio_msg, nbytes); 1245 kmem_free(vio_msg, max_msglen); 1246 mutex_exit(&vd->lock); 1247 PR2("Returning"); 1248 } 1249 1250 static uint_t 1251 vd_handle_ldc_events(uint64_t event, caddr_t arg) 1252 { 1253 uint_t status; 1254 vd_t *vd = (vd_t *)(void *)arg; 1255 1256 1257 ASSERT(vd != NULL); 1258 mutex_enter(&vd->lock); 1259 if (event & LDC_EVT_READ) { 1260 PR1("New packet(s) available"); 1261 /* Queue a task to process the new data */ 1262 if (ddi_taskq_dispatch(vd->taskq, vd_process_queue, vd, 0) != 1263 DDI_SUCCESS) 1264 PRN("Unable to dispatch vd_process_queue()"); 1265 } else if (event & LDC_EVT_RESET) { 1266 PR0("Attempting to bring up reset channel"); 1267 if (((status = ldc_up(vd->ldc_handle)) != 0) && 1268 (status != ECONNREFUSED)) { 1269 PRN("ldc_up() returned errno %d", status); 1270 } 1271 } else if (event & LDC_EVT_UP) { 1272 /* Reset the connection state when channel comes (back) up */ 1273 vd_reset_connection(vd, B_FALSE); 1274 } 1275 mutex_exit(&vd->lock); 1276 return (LDC_SUCCESS); 1277 } 1278 1279 static uint_t 1280 vds_check_for_vd(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 1281 { 1282 _NOTE(ARGUNUSED(key, val)) 1283 (*((uint_t *)arg))++; 1284 return (MH_WALK_TERMINATE); 1285 } 1286 1287 1288 static int 1289 vds_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1290 { 1291 uint_t vd_present = 0; 1292 minor_t instance; 1293 vds_t *vds; 1294 1295 1296 PR0("Entered"); 1297 switch (cmd) { 1298 case DDI_DETACH: 1299 /* the real work happens below */ 1300 break; 1301 case DDI_SUSPEND: 1302 /* nothing to do for this non-device */ 1303 return (DDI_SUCCESS); 1304 default: 1305 return (DDI_FAILURE); 1306 } 1307 1308 ASSERT(cmd == DDI_DETACH); 1309 instance = ddi_get_instance(dip); 1310 if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) { 1311 PRN("Could not get state for instance %u", instance); 1312 ddi_soft_state_free(vds_state, instance); 1313 return (DDI_FAILURE); 1314 } 1315 1316 /* Do no detach when serving any vdisks */ 1317 mod_hash_walk(vds->vd_table, vds_check_for_vd, &vd_present); 1318 if (vd_present) { 1319 PR0("Not detaching because serving vdisks"); 1320 return (DDI_FAILURE); 1321 } 1322 1323 PR0("Detaching"); 1324 if (vds->initialized & VDS_MDEG) 1325 (void) mdeg_unregister(vds->mdeg); 1326 if (vds->initialized & VDS_LDI) 1327 (void) ldi_ident_release(vds->ldi_ident); 1328 mod_hash_destroy_hash(vds->vd_table); 1329 if (vds->initialized & VDS_LOCKING) 1330 mutex_destroy(&vds->lock); 1331 ddi_soft_state_free(vds_state, instance); 1332 return (DDI_SUCCESS); 1333 } 1334 1335 static boolean_t 1336 is_pseudo_device(dev_info_t *dip) 1337 { 1338 dev_info_t *parent, *root = ddi_root_node(); 1339 1340 1341 for (parent = ddi_get_parent(dip); (parent != NULL) && (parent != root); 1342 parent = ddi_get_parent(parent)) { 1343 if (strcmp(ddi_get_name(parent), DEVI_PSEUDO_NEXNAME) == 0) 1344 return (B_TRUE); 1345 } 1346 1347 return (B_FALSE); 1348 } 1349 1350 static int 1351 vd_get_params(ldi_handle_t lh, char *block_device, vd_t *vd) 1352 { 1353 int otyp, rval, status; 1354 dev_info_t *dip; 1355 struct dk_cinfo dk_cinfo; 1356 1357 1358 /* Get block device's device number, otyp, and size */ 1359 if ((status = ldi_get_dev(lh, &vd->dev[0])) != 0) { 1360 PRN("ldi_get_dev() returned errno %d for %s", 1361 status, block_device); 1362 return (status); 1363 } 1364 if ((status = ldi_get_otyp(lh, &otyp)) != 0) { 1365 PRN("ldi_get_otyp() returned errno %d for %s", 1366 status, block_device); 1367 return (status); 1368 } 1369 if (otyp != OTYP_BLK) { 1370 PRN("Cannot serve non-block device %s", block_device); 1371 return (ENOTBLK); 1372 } 1373 if (ldi_get_size(lh, &vd->vdisk_size) != DDI_SUCCESS) { 1374 PRN("ldi_get_size() failed for %s", block_device); 1375 return (EIO); 1376 } 1377 1378 /* Determine if backing block device is a pseudo device */ 1379 if ((dip = ddi_hold_devi_by_instance(getmajor(vd->dev[0]), 1380 dev_to_instance(vd->dev[0]), 0)) == NULL) { 1381 PRN("%s is no longer accessible", block_device); 1382 return (EIO); 1383 } 1384 vd->pseudo = is_pseudo_device(dip); 1385 ddi_release_devi(dip); 1386 if (vd->pseudo) { 1387 vd->vdisk_type = VD_DISK_TYPE_SLICE; 1388 vd->nslices = 1; 1389 return (0); /* ...and we're done */ 1390 } 1391 1392 /* Get dk_cinfo to determine slice of backing block device */ 1393 if ((status = ldi_ioctl(lh, DKIOCINFO, (intptr_t)&dk_cinfo, 1394 FKIOCTL, kcred, &rval)) != 0) { 1395 PRN("ldi_ioctl(DKIOCINFO) returned errno %d for %s", 1396 status, block_device); 1397 return (status); 1398 } 1399 1400 if (dk_cinfo.dki_partition >= V_NUMPAR) { 1401 PRN("slice %u >= maximum slice %u for %s", 1402 dk_cinfo.dki_partition, V_NUMPAR, block_device); 1403 return (EIO); 1404 } 1405 1406 /* If block device slice is entire disk, fill in all slice devices */ 1407 if (dk_cinfo.dki_partition == VD_ENTIRE_DISK_SLICE) { 1408 uint_t slice; 1409 major_t major = getmajor(vd->dev[0]); 1410 minor_t minor = getminor(vd->dev[0]) - VD_ENTIRE_DISK_SLICE; 1411 1412 vd->vdisk_type = VD_DISK_TYPE_DISK; 1413 vd->nslices = V_NUMPAR; 1414 for (slice = 0; slice < vd->nslices; slice++) 1415 vd->dev[slice] = makedevice(major, (minor + slice)); 1416 return (0); /* ...and we're done */ 1417 } 1418 1419 /* Otherwise, we have a (partial) slice of a block device */ 1420 vd->vdisk_type = VD_DISK_TYPE_SLICE; 1421 vd->nslices = 1; 1422 1423 1424 /* Initialize dk_geom structure for single-slice block device */ 1425 if ((status = ldi_ioctl(lh, DKIOCGGEOM, (intptr_t)&vd->dk_geom, 1426 FKIOCTL, kcred, &rval)) != 0) { 1427 PRN("ldi_ioctl(DKIOCGEOM) returned errno %d for %s", 1428 status, block_device); 1429 return (status); 1430 } 1431 if (vd->dk_geom.dkg_nsect == 0) { 1432 PRN("%s geometry claims 0 sectors per track", block_device); 1433 return (EIO); 1434 } 1435 if (vd->dk_geom.dkg_nhead == 0) { 1436 PRN("%s geometry claims 0 heads", block_device); 1437 return (EIO); 1438 } 1439 vd->dk_geom.dkg_ncyl = 1440 lbtodb(vd->vdisk_size)/vd->dk_geom.dkg_nsect/vd->dk_geom.dkg_nhead; 1441 vd->dk_geom.dkg_acyl = 0; 1442 vd->dk_geom.dkg_pcyl = vd->dk_geom.dkg_ncyl + vd->dk_geom.dkg_acyl; 1443 1444 1445 /* Initialize vtoc structure for single-slice block device */ 1446 if ((status = ldi_ioctl(lh, DKIOCGVTOC, (intptr_t)&vd->vtoc, 1447 FKIOCTL, kcred, &rval)) != 0) { 1448 PRN("ldi_ioctl(DKIOCGVTOC) returned errno %d for %s", 1449 status, block_device); 1450 return (status); 1451 } 1452 bcopy(VD_VOLUME_NAME, vd->vtoc.v_volume, 1453 MIN(sizeof (VD_VOLUME_NAME), sizeof (vd->vtoc.v_volume))); 1454 bzero(vd->vtoc.v_part, sizeof (vd->vtoc.v_part)); 1455 vd->vtoc.v_nparts = 1; 1456 vd->vtoc.v_part[0].p_tag = V_UNASSIGNED; 1457 vd->vtoc.v_part[0].p_flag = 0; 1458 vd->vtoc.v_part[0].p_start = 0; 1459 vd->vtoc.v_part[0].p_size = lbtodb(vd->vdisk_size); 1460 bcopy(VD_ASCIILABEL, vd->vtoc.v_asciilabel, 1461 MIN(sizeof (VD_ASCIILABEL), sizeof (vd->vtoc.v_asciilabel))); 1462 1463 1464 return (0); 1465 } 1466 1467 static int 1468 vds_do_init_vd(vds_t *vds, uint64_t id, char *block_device, uint64_t ldc_id, 1469 vd_t **vdp) 1470 { 1471 char tq_name[TASKQ_NAMELEN]; 1472 int param_status, status; 1473 uint_t slice; 1474 ddi_iblock_cookie_t iblock = NULL; 1475 ldc_attr_t ldc_attr; 1476 ldi_handle_t lh = NULL; 1477 vd_t *vd; 1478 1479 1480 ASSERT(vds != NULL); 1481 ASSERT(block_device != NULL); 1482 ASSERT(vdp != NULL); 1483 PR0("Adding vdisk for %s", block_device); 1484 1485 if ((vd = kmem_zalloc(sizeof (*vd), KM_NOSLEEP)) == NULL) { 1486 PRN("No memory for virtual disk"); 1487 return (EAGAIN); 1488 } 1489 *vdp = vd; /* assign here so vds_destroy_vd() can cleanup later */ 1490 vd->vds = vds; 1491 1492 1493 /* Get device parameters */ 1494 if ((status = ldi_open_by_name(block_device, FREAD, kcred, &lh, 1495 vds->ldi_ident)) != 0) { 1496 PRN("ldi_open_by_name(%s) = errno %d", block_device, status); 1497 return (status); 1498 } 1499 param_status = vd_get_params(lh, block_device, vd); 1500 if ((status = ldi_close(lh, FREAD, kcred)) != 0) { 1501 PRN("ldi_close(%s) = errno %d", block_device, status); 1502 return (status); 1503 } 1504 if (param_status != 0) 1505 return (param_status); 1506 ASSERT(vd->nslices > 0 && vd->nslices <= V_NUMPAR); 1507 PR0("vdisk_type = %s, pseudo = %s, nslices = %u", 1508 ((vd->vdisk_type == VD_DISK_TYPE_DISK) ? "disk" : "slice"), 1509 (vd->pseudo ? "yes" : "no"), vd->nslices); 1510 1511 1512 /* Initialize locking */ 1513 if (ddi_get_soft_iblock_cookie(vds->dip, DDI_SOFTINT_MED, 1514 &iblock) != DDI_SUCCESS) { 1515 PRN("Could not get iblock cookie."); 1516 return (EIO); 1517 } 1518 1519 mutex_init(&vd->lock, NULL, MUTEX_DRIVER, iblock); 1520 vd->initialized |= VD_LOCKING; 1521 1522 1523 /* Open the backing-device slices */ 1524 for (slice = 0; slice < vd->nslices; slice++) { 1525 ASSERT(vd->ldi_handle[slice] == NULL); 1526 PR0("Opening device %u, minor %u = slice %u", 1527 getmajor(vd->dev[slice]), getminor(vd->dev[slice]), slice); 1528 if ((status = ldi_open_by_dev(&vd->dev[slice], OTYP_BLK, 1529 vd_open_flags, kcred, &vd->ldi_handle[slice], 1530 vds->ldi_ident)) != 0) { 1531 PRN("ldi_open_by_dev() returned errno %d for slice %u", 1532 status, slice); 1533 /* vds_destroy_vd() will close any open slices */ 1534 #if 0 /* FIXME */ 1535 return (status); 1536 #endif 1537 } 1538 } 1539 1540 1541 /* Create the task queue for the vdisk */ 1542 (void) snprintf(tq_name, sizeof (tq_name), "vd%lu", id); 1543 PR1("tq_name = %s", tq_name); 1544 if ((vd->taskq = ddi_taskq_create(vds->dip, tq_name, 1, 1545 TASKQ_DEFAULTPRI, 0)) == NULL) { 1546 PRN("Could not create task queue"); 1547 return (EIO); 1548 } 1549 vd->initialized |= VD_TASKQ; 1550 vd->enabled = 1; /* before callback can dispatch to taskq */ 1551 1552 1553 /* Bring up LDC */ 1554 ldc_attr.devclass = LDC_DEV_BLK_SVC; 1555 ldc_attr.instance = ddi_get_instance(vds->dip); 1556 ldc_attr.mode = LDC_MODE_UNRELIABLE; 1557 ldc_attr.qlen = VD_LDC_QLEN; 1558 if ((status = ldc_init(ldc_id, &ldc_attr, &vd->ldc_handle)) != 0) { 1559 PRN("ldc_init(%lu) = errno %d", ldc_id, status); 1560 return (status); 1561 } 1562 vd->initialized |= VD_LDC; 1563 1564 if ((status = ldc_reg_callback(vd->ldc_handle, vd_handle_ldc_events, 1565 (caddr_t)vd)) != 0) { 1566 PRN("ldc_reg_callback() returned errno %d", status); 1567 return (status); 1568 } 1569 1570 if ((status = ldc_open(vd->ldc_handle)) != 0) { 1571 PRN("ldc_open() returned errno %d", status); 1572 return (status); 1573 } 1574 1575 if (((status = ldc_up(vd->ldc_handle)) != 0) && 1576 (status != ECONNREFUSED)) { 1577 PRN("ldc_up() returned errno %d", status); 1578 return (status); 1579 } 1580 1581 1582 /* Add the successfully-initialized vdisk to the server's table */ 1583 if (mod_hash_insert(vds->vd_table, (mod_hash_key_t)id, vd) != 0) { 1584 PRN("Error adding vdisk ID %lu to table", id); 1585 return (EIO); 1586 } 1587 1588 return (0); 1589 } 1590 1591 /* 1592 * Destroy the state associated with a virtual disk 1593 */ 1594 static void 1595 vds_destroy_vd(void *arg) 1596 { 1597 vd_t *vd = (vd_t *)arg; 1598 1599 1600 PR0("Entered"); 1601 if (vd == NULL) 1602 return; 1603 1604 /* Disable queuing requests for the vdisk */ 1605 if (vd->initialized & VD_LOCKING) { 1606 mutex_enter(&vd->lock); 1607 vd->enabled = 0; 1608 mutex_exit(&vd->lock); 1609 } 1610 1611 /* Drain and destroy the task queue (*before* shutting down LDC) */ 1612 if (vd->initialized & VD_TASKQ) 1613 ddi_taskq_destroy(vd->taskq); /* waits for queued tasks */ 1614 1615 /* Shut down LDC */ 1616 if (vd->initialized & VD_LDC) { 1617 if (vd->initialized & VD_DRING) 1618 (void) ldc_mem_dring_unmap(vd->dring_handle); 1619 (void) ldc_unreg_callback(vd->ldc_handle); 1620 (void) ldc_close(vd->ldc_handle); 1621 (void) ldc_fini(vd->ldc_handle); 1622 } 1623 1624 /* Close any open backing-device slices */ 1625 for (uint_t slice = 0; slice < vd->nslices; slice++) { 1626 if (vd->ldi_handle[slice] != NULL) { 1627 PR0("Closing slice %u", slice); 1628 (void) ldi_close(vd->ldi_handle[slice], 1629 vd_open_flags, kcred); 1630 } 1631 } 1632 1633 /* Free lock */ 1634 if (vd->initialized & VD_LOCKING) 1635 mutex_destroy(&vd->lock); 1636 1637 /* Finally, free the vdisk structure itself */ 1638 kmem_free(vd, sizeof (*vd)); 1639 } 1640 1641 static int 1642 vds_init_vd(vds_t *vds, uint64_t id, char *block_device, uint64_t ldc_id) 1643 { 1644 int status; 1645 vd_t *vd = NULL; 1646 1647 1648 #ifdef lint 1649 (void) vd; 1650 #endif /* lint */ 1651 1652 if ((status = vds_do_init_vd(vds, id, block_device, ldc_id, &vd)) != 0) 1653 vds_destroy_vd(vd); 1654 1655 return (status); 1656 } 1657 1658 static int 1659 vds_do_get_ldc_id(md_t *md, mde_cookie_t vd_node, mde_cookie_t *channel, 1660 uint64_t *ldc_id) 1661 { 1662 int num_channels; 1663 1664 1665 /* Look for channel endpoint child(ren) of the vdisk MD node */ 1666 if ((num_channels = md_scan_dag(md, vd_node, 1667 md_find_name(md, VD_CHANNEL_ENDPOINT), 1668 md_find_name(md, "fwd"), channel)) <= 0) { 1669 PRN("No \"%s\" found for virtual disk", VD_CHANNEL_ENDPOINT); 1670 return (-1); 1671 } 1672 1673 /* Get the "id" value for the first channel endpoint node */ 1674 if (md_get_prop_val(md, channel[0], VD_ID_PROP, ldc_id) != 0) { 1675 PRN("No \"%s\" property found for \"%s\" of vdisk", 1676 VD_ID_PROP, VD_CHANNEL_ENDPOINT); 1677 return (-1); 1678 } 1679 1680 if (num_channels > 1) { 1681 PRN("Using ID of first of multiple channels for this vdisk"); 1682 } 1683 1684 return (0); 1685 } 1686 1687 static int 1688 vds_get_ldc_id(md_t *md, mde_cookie_t vd_node, uint64_t *ldc_id) 1689 { 1690 int num_nodes, status; 1691 size_t size; 1692 mde_cookie_t *channel; 1693 1694 1695 if ((num_nodes = md_node_count(md)) <= 0) { 1696 PRN("Invalid node count in Machine Description subtree"); 1697 return (-1); 1698 } 1699 size = num_nodes*(sizeof (*channel)); 1700 channel = kmem_zalloc(size, KM_SLEEP); 1701 status = vds_do_get_ldc_id(md, vd_node, channel, ldc_id); 1702 kmem_free(channel, size); 1703 1704 return (status); 1705 } 1706 1707 static void 1708 vds_add_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node) 1709 { 1710 char *block_device = NULL; 1711 uint64_t id = 0, ldc_id = 0; 1712 1713 1714 if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) { 1715 PRN("Error getting vdisk \"%s\"", VD_ID_PROP); 1716 return; 1717 } 1718 PR0("Adding vdisk ID %lu", id); 1719 if (md_get_prop_str(md, vd_node, VD_BLOCK_DEVICE_PROP, 1720 &block_device) != 0) { 1721 PRN("Error getting vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); 1722 return; 1723 } 1724 1725 if (vds_get_ldc_id(md, vd_node, &ldc_id) != 0) { 1726 PRN("Error getting LDC ID for vdisk %lu", id); 1727 return; 1728 } 1729 1730 if (vds_init_vd(vds, id, block_device, ldc_id) != 0) { 1731 PRN("Failed to add vdisk ID %lu", id); 1732 return; 1733 } 1734 } 1735 1736 static void 1737 vds_remove_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node) 1738 { 1739 uint64_t id = 0; 1740 1741 1742 if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) { 1743 PRN("Unable to get \"%s\" property from vdisk's MD node", 1744 VD_ID_PROP); 1745 return; 1746 } 1747 PR0("Removing vdisk ID %lu", id); 1748 if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)id) != 0) 1749 PRN("No vdisk entry found for vdisk ID %lu", id); 1750 } 1751 1752 static void 1753 vds_change_vd(vds_t *vds, md_t *prev_md, mde_cookie_t prev_vd_node, 1754 md_t *curr_md, mde_cookie_t curr_vd_node) 1755 { 1756 char *curr_dev, *prev_dev; 1757 uint64_t curr_id = 0, curr_ldc_id = 0; 1758 uint64_t prev_id = 0, prev_ldc_id = 0; 1759 size_t len; 1760 1761 1762 /* Validate that vdisk ID has not changed */ 1763 if (md_get_prop_val(prev_md, prev_vd_node, VD_ID_PROP, &prev_id) != 0) { 1764 PRN("Error getting previous vdisk \"%s\" property", 1765 VD_ID_PROP); 1766 return; 1767 } 1768 if (md_get_prop_val(curr_md, curr_vd_node, VD_ID_PROP, &curr_id) != 0) { 1769 PRN("Error getting current vdisk \"%s\" property", VD_ID_PROP); 1770 return; 1771 } 1772 if (curr_id != prev_id) { 1773 PRN("Not changing vdisk: ID changed from %lu to %lu", 1774 prev_id, curr_id); 1775 return; 1776 } 1777 1778 /* Validate that LDC ID has not changed */ 1779 if (vds_get_ldc_id(prev_md, prev_vd_node, &prev_ldc_id) != 0) { 1780 PRN("Error getting LDC ID for vdisk %lu", prev_id); 1781 return; 1782 } 1783 1784 if (vds_get_ldc_id(curr_md, curr_vd_node, &curr_ldc_id) != 0) { 1785 PRN("Error getting LDC ID for vdisk %lu", curr_id); 1786 return; 1787 } 1788 if (curr_ldc_id != prev_ldc_id) { 1789 _NOTE(NOTREACHED); /* FIXME is there a better way? */ 1790 PRN("Not changing vdisk: " 1791 "LDC ID changed from %lu to %lu", prev_ldc_id, curr_ldc_id); 1792 return; 1793 } 1794 1795 /* Determine whether device path has changed */ 1796 if (md_get_prop_str(prev_md, prev_vd_node, VD_BLOCK_DEVICE_PROP, 1797 &prev_dev) != 0) { 1798 PRN("Error getting previous vdisk \"%s\"", 1799 VD_BLOCK_DEVICE_PROP); 1800 return; 1801 } 1802 if (md_get_prop_str(curr_md, curr_vd_node, VD_BLOCK_DEVICE_PROP, 1803 &curr_dev) != 0) { 1804 PRN("Error getting current vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); 1805 return; 1806 } 1807 if (((len = strlen(curr_dev)) == strlen(prev_dev)) && 1808 (strncmp(curr_dev, prev_dev, len) == 0)) 1809 return; /* no relevant (supported) change */ 1810 1811 PR0("Changing vdisk ID %lu", prev_id); 1812 /* Remove old state, which will close vdisk and reset */ 1813 if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)prev_id) != 0) 1814 PRN("No entry found for vdisk ID %lu", prev_id); 1815 /* Re-initialize vdisk with new state */ 1816 if (vds_init_vd(vds, curr_id, curr_dev, curr_ldc_id) != 0) { 1817 PRN("Failed to change vdisk ID %lu", curr_id); 1818 return; 1819 } 1820 } 1821 1822 static int 1823 vds_process_md(void *arg, mdeg_result_t *md) 1824 { 1825 int i; 1826 vds_t *vds = arg; 1827 1828 1829 if (md == NULL) 1830 return (MDEG_FAILURE); 1831 ASSERT(vds != NULL); 1832 1833 for (i = 0; i < md->removed.nelem; i++) 1834 vds_remove_vd(vds, md->removed.mdp, md->removed.mdep[i]); 1835 for (i = 0; i < md->match_curr.nelem; i++) 1836 vds_change_vd(vds, md->match_prev.mdp, md->match_prev.mdep[i], 1837 md->match_curr.mdp, md->match_curr.mdep[i]); 1838 for (i = 0; i < md->added.nelem; i++) 1839 vds_add_vd(vds, md->added.mdp, md->added.mdep[i]); 1840 1841 return (MDEG_SUCCESS); 1842 } 1843 1844 static int 1845 vds_do_attach(dev_info_t *dip) 1846 { 1847 static char reg_prop[] = "reg"; /* devinfo ID prop */ 1848 1849 /* MDEG specification for a (particular) vds node */ 1850 static mdeg_prop_spec_t vds_prop_spec[] = { 1851 {MDET_PROP_STR, "name", {VDS_NAME}}, 1852 {MDET_PROP_VAL, "cfg-handle", {0}}, 1853 {MDET_LIST_END, NULL, {0}}}; 1854 static mdeg_node_spec_t vds_spec = {"virtual-device", vds_prop_spec}; 1855 1856 /* MDEG specification for matching a vd node */ 1857 static md_prop_match_t vd_prop_spec[] = { 1858 {MDET_PROP_VAL, VD_ID_PROP}, 1859 {MDET_LIST_END, NULL}}; 1860 static mdeg_node_match_t vd_spec = {"virtual-device-port", 1861 vd_prop_spec}; 1862 1863 int status; 1864 uint64_t cfg_handle; 1865 minor_t instance = ddi_get_instance(dip); 1866 vds_t *vds; 1867 1868 1869 /* 1870 * The "cfg-handle" property of a vds node in an MD contains the MD's 1871 * notion of "instance", or unique identifier, for that node; OBP 1872 * stores the value of the "cfg-handle" MD property as the value of 1873 * the "reg" property on the node in the device tree it builds from 1874 * the MD and passes to Solaris. Thus, we look up the devinfo node's 1875 * "reg" property value to uniquely identify this device instance when 1876 * registering with the MD event-generation framework. If the "reg" 1877 * property cannot be found, the device tree state is presumably so 1878 * broken that there is no point in continuing. 1879 */ 1880 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, reg_prop)) { 1881 PRN("vds \"%s\" property does not exist", reg_prop); 1882 return (DDI_FAILURE); 1883 } 1884 1885 /* Get the MD instance for later MDEG registration */ 1886 cfg_handle = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1887 reg_prop, -1); 1888 1889 if (ddi_soft_state_zalloc(vds_state, instance) != DDI_SUCCESS) { 1890 PRN("Could not allocate state for instance %u", instance); 1891 return (DDI_FAILURE); 1892 } 1893 1894 if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) { 1895 PRN("Could not get state for instance %u", instance); 1896 ddi_soft_state_free(vds_state, instance); 1897 return (DDI_FAILURE); 1898 } 1899 1900 1901 vds->dip = dip; 1902 vds->vd_table = mod_hash_create_ptrhash("vds_vd_table", VDS_NCHAINS, 1903 vds_destroy_vd, 1904 sizeof (void *)); 1905 ASSERT(vds->vd_table != NULL); 1906 1907 mutex_init(&vds->lock, NULL, MUTEX_DRIVER, NULL); 1908 vds->initialized |= VDS_LOCKING; 1909 1910 if ((status = ldi_ident_from_dip(dip, &vds->ldi_ident)) != 0) { 1911 PRN("ldi_ident_from_dip() returned errno %d", status); 1912 return (DDI_FAILURE); 1913 } 1914 vds->initialized |= VDS_LDI; 1915 1916 /* Register for MD updates */ 1917 vds_prop_spec[1].ps_val = cfg_handle; 1918 if (mdeg_register(&vds_spec, &vd_spec, vds_process_md, vds, 1919 &vds->mdeg) != MDEG_SUCCESS) { 1920 PRN("Unable to register for MD updates"); 1921 return (DDI_FAILURE); 1922 } 1923 vds->initialized |= VDS_MDEG; 1924 1925 ddi_report_dev(dip); 1926 return (DDI_SUCCESS); 1927 } 1928 1929 static int 1930 vds_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1931 { 1932 int status; 1933 1934 PR0("Entered"); 1935 switch (cmd) { 1936 case DDI_ATTACH: 1937 if ((status = vds_do_attach(dip)) != DDI_SUCCESS) 1938 (void) vds_detach(dip, DDI_DETACH); 1939 return (status); 1940 case DDI_RESUME: 1941 /* nothing to do for this non-device */ 1942 return (DDI_SUCCESS); 1943 default: 1944 return (DDI_FAILURE); 1945 } 1946 } 1947 1948 static struct dev_ops vds_ops = { 1949 DEVO_REV, /* devo_rev */ 1950 0, /* devo_refcnt */ 1951 ddi_no_info, /* devo_getinfo */ 1952 nulldev, /* devo_identify */ 1953 nulldev, /* devo_probe */ 1954 vds_attach, /* devo_attach */ 1955 vds_detach, /* devo_detach */ 1956 nodev, /* devo_reset */ 1957 NULL, /* devo_cb_ops */ 1958 NULL, /* devo_bus_ops */ 1959 nulldev /* devo_power */ 1960 }; 1961 1962 static struct modldrv modldrv = { 1963 &mod_driverops, 1964 "virtual disk server v%I%", 1965 &vds_ops, 1966 }; 1967 1968 static struct modlinkage modlinkage = { 1969 MODREV_1, 1970 &modldrv, 1971 NULL 1972 }; 1973 1974 1975 int 1976 _init(void) 1977 { 1978 int i, status; 1979 1980 1981 PR0("Built %s %s", __DATE__, __TIME__); 1982 if ((status = ddi_soft_state_init(&vds_state, sizeof (vds_t), 1)) != 0) 1983 return (status); 1984 if ((status = mod_install(&modlinkage)) != 0) { 1985 ddi_soft_state_fini(&vds_state); 1986 return (status); 1987 } 1988 1989 /* Fill in the bit-mask of server-supported operations */ 1990 for (i = 0; i < vds_noperations; i++) 1991 vds_operations |= 1 << (vds_operation[i].operation - 1); 1992 1993 return (0); 1994 } 1995 1996 int 1997 _info(struct modinfo *modinfop) 1998 { 1999 return (mod_info(&modlinkage, modinfop)); 2000 } 2001 2002 int 2003 _fini(void) 2004 { 2005 int status; 2006 2007 2008 PR0("Entered"); 2009 if ((status = mod_remove(&modlinkage)) != 0) 2010 return (status); 2011 ddi_soft_state_fini(&vds_state); 2012 return (0); 2013 } 2014