1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * LDoms virtual disk client (vdc) device driver 31 * 32 * This driver runs on a guest logical domain and communicates with the virtual 33 * disk server (vds) driver running on the service domain which is exporting 34 * virtualized "disks" to the guest logical domain. 35 * 36 * The driver can be divided into four sections: 37 * 38 * 1) generic device driver housekeeping 39 * _init, _fini, attach, detach, ops structures, etc. 40 * 41 * 2) communication channel setup 42 * Setup the communications link over the LDC channel that vdc uses to 43 * talk to the vDisk server. Initialise the descriptor ring which 44 * allows the LDC clients to transfer data via memory mappings. 45 * 46 * 3) Support exported to upper layers (filesystems, etc) 47 * The upper layers call into vdc via strategy(9E) and DKIO(7I) 48 * ioctl calls. vdc will copy the data to be written to the descriptor 49 * ring or maps the buffer to store the data read by the vDisk 50 * server into the descriptor ring. It then sends a message to the 51 * vDisk server requesting it to complete the operation. 52 * 53 * 4) Handling responses from vDisk server. 54 * The vDisk server will ACK some or all of the messages vdc sends to it 55 * (this is configured during the handshake). Upon receipt of an ACK 56 * vdc will check the descriptor ring and signal to the upper layer 57 * code waiting on the IO. 58 */ 59 60 #include <sys/conf.h> 61 #include <sys/disp.h> 62 #include <sys/ddi.h> 63 #include <sys/dkio.h> 64 #include <sys/efi_partition.h> 65 #include <sys/fcntl.h> 66 #include <sys/file.h> 67 #include <sys/mach_descrip.h> 68 #include <sys/modctl.h> 69 #include <sys/mdeg.h> 70 #include <sys/note.h> 71 #include <sys/open.h> 72 #include <sys/stat.h> 73 #include <sys/sunddi.h> 74 #include <sys/types.h> 75 #include <sys/promif.h> 76 #include <sys/vtoc.h> 77 #include <sys/archsystm.h> 78 #include <sys/sysmacros.h> 79 80 #include <sys/cdio.h> 81 #include <sys/dktp/cm.h> 82 #include <sys/dktp/fdisk.h> 83 #include <sys/scsi/generic/sense.h> 84 #include <sys/scsi/impl/uscsi.h> /* Needed for defn of USCSICMD ioctl */ 85 #include <sys/scsi/targets/sddef.h> 86 87 #include <sys/ldoms.h> 88 #include <sys/ldc.h> 89 #include <sys/vio_common.h> 90 #include <sys/vio_mailbox.h> 91 #include <sys/vdsk_common.h> 92 #include <sys/vdsk_mailbox.h> 93 #include <sys/vdc.h> 94 95 /* 96 * function prototypes 97 */ 98 99 /* standard driver functions */ 100 static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 101 static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 102 static int vdc_strategy(struct buf *buf); 103 static int vdc_print(dev_t dev, char *str); 104 static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 105 static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 106 static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 107 static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 108 cred_t *credp, int *rvalp); 109 static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 110 static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 111 112 static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 113 void *arg, void **resultp); 114 static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 115 static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 116 117 /* setup */ 118 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 119 static int vdc_do_ldc_init(vdc_t *vdc); 120 static int vdc_start_ldc_connection(vdc_t *vdc); 121 static int vdc_create_device_nodes(vdc_t *vdc); 122 static int vdc_create_device_nodes_props(vdc_t *vdc); 123 static int vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id); 124 static int vdc_do_ldc_up(vdc_t *vdc); 125 static void vdc_terminate_ldc(vdc_t *vdc); 126 static int vdc_init_descriptor_ring(vdc_t *vdc); 127 static void vdc_destroy_descriptor_ring(vdc_t *vdc); 128 129 /* handshake with vds */ 130 static void vdc_init_handshake_negotiation(void *arg); 131 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 132 static int vdc_init_attr_negotiation(vdc_t *vdc); 133 static int vdc_init_dring_negotiate(vdc_t *vdc); 134 static void vdc_reset_connection(vdc_t *vdc, boolean_t resetldc); 135 static boolean_t vdc_is_able_to_tx_data(vdc_t *vdc, int flag); 136 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 137 138 /* processing incoming messages from vDisk server */ 139 static void vdc_process_msg_thread(vdc_t *vdc); 140 static void vdc_process_msg(void *arg); 141 static void vdc_do_process_msg(vdc_t *vdc); 142 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 143 static int vdc_process_ctrl_msg(vdc_t *vdc, vio_msg_t msg); 144 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t msg); 145 static int vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg); 146 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 147 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 148 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 149 static int vdc_get_next_dring_entry_id(vdc_t *vdc, uint_t needed); 150 static int vdc_populate_descriptor(vdc_t *vdc, caddr_t addr, 151 size_t nbytes, int op, uint64_t arg, uint64_t slice); 152 static int vdc_wait_for_descriptor_update(vdc_t *vdc, uint_t idx, 153 vio_dring_msg_t dmsg); 154 static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 155 static int vdc_get_response(vdc_t *vdc, int start, int end); 156 static int vdc_populate_mem_hdl(vdc_t *vdc, uint_t idx, 157 caddr_t addr, size_t nbytes, int operation); 158 static boolean_t vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg, int 159 num_msgs); 160 161 /* dkio */ 162 static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode); 163 static int vdc_create_fake_geometry(vdc_t *vdc); 164 static int vdc_setup_disk_layout(vdc_t *vdc); 165 static int vdc_null_copy_func(void *from, void *to, int mode, int dir); 166 static int vdc_get_vtoc_convert(void *from, void *to, int mode, int dir); 167 static int vdc_set_vtoc_convert(void *from, void *to, int mode, int dir); 168 static int vdc_get_geom_convert(void *from, void *to, int mode, int dir); 169 static int vdc_set_geom_convert(void *from, void *to, int mode, int dir); 170 static int vdc_uscsicmd_convert(void *from, void *to, int mode, int dir); 171 172 /* 173 * Module variables 174 */ 175 uint64_t vdc_hz_timeout; 176 uint64_t vdc_usec_timeout = VDC_USEC_TIMEOUT_MIN; 177 uint64_t vdc_dump_usec_timeout = VDC_USEC_TIMEOUT_MIN / 300; 178 static int vdc_retries = VDC_RETRIES; 179 static int vdc_dump_retries = VDC_RETRIES * 10; 180 181 /* Soft state pointer */ 182 static void *vdc_state; 183 184 /* variable level controlling the verbosity of the error/debug messages */ 185 int vdc_msglevel = 0; 186 187 /* 188 * Supported vDisk protocol version pairs. 189 * 190 * The first array entry is the latest and preferred version. 191 */ 192 static const vio_ver_t vdc_version[] = {{1, 0}}; 193 194 static void 195 vdc_msg(const char *format, ...) 196 { 197 va_list args; 198 199 va_start(args, format); 200 vcmn_err(CE_CONT, format, args); 201 va_end(args); 202 } 203 204 static struct cb_ops vdc_cb_ops = { 205 vdc_open, /* cb_open */ 206 vdc_close, /* cb_close */ 207 vdc_strategy, /* cb_strategy */ 208 vdc_print, /* cb_print */ 209 vdc_dump, /* cb_dump */ 210 vdc_read, /* cb_read */ 211 vdc_write, /* cb_write */ 212 vdc_ioctl, /* cb_ioctl */ 213 nodev, /* cb_devmap */ 214 nodev, /* cb_mmap */ 215 nodev, /* cb_segmap */ 216 nochpoll, /* cb_chpoll */ 217 ddi_prop_op, /* cb_prop_op */ 218 NULL, /* cb_str */ 219 D_MP | D_64BIT, /* cb_flag */ 220 CB_REV, /* cb_rev */ 221 vdc_aread, /* cb_aread */ 222 vdc_awrite /* cb_awrite */ 223 }; 224 225 static struct dev_ops vdc_ops = { 226 DEVO_REV, /* devo_rev */ 227 0, /* devo_refcnt */ 228 vdc_getinfo, /* devo_getinfo */ 229 nulldev, /* devo_identify */ 230 nulldev, /* devo_probe */ 231 vdc_attach, /* devo_attach */ 232 vdc_detach, /* devo_detach */ 233 nodev, /* devo_reset */ 234 &vdc_cb_ops, /* devo_cb_ops */ 235 NULL, /* devo_bus_ops */ 236 nulldev /* devo_power */ 237 }; 238 239 static struct modldrv modldrv = { 240 &mod_driverops, 241 "virtual disk client %I%", 242 &vdc_ops, 243 }; 244 245 static struct modlinkage modlinkage = { 246 MODREV_1, 247 &modldrv, 248 NULL 249 }; 250 251 /* -------------------------------------------------------------------------- */ 252 253 /* 254 * Device Driver housekeeping and setup 255 */ 256 257 int 258 _init(void) 259 { 260 int status; 261 262 if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 263 return (status); 264 if ((status = mod_install(&modlinkage)) != 0) 265 ddi_soft_state_fini(&vdc_state); 266 return (status); 267 } 268 269 int 270 _info(struct modinfo *modinfop) 271 { 272 return (mod_info(&modlinkage, modinfop)); 273 } 274 275 int 276 _fini(void) 277 { 278 int status; 279 280 if ((status = mod_remove(&modlinkage)) != 0) 281 return (status); 282 ddi_soft_state_fini(&vdc_state); 283 return (0); 284 } 285 286 static int 287 vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 288 { 289 _NOTE(ARGUNUSED(dip)) 290 291 int instance = SDUNIT(getminor((dev_t)arg)); 292 vdc_t *vdc = NULL; 293 294 switch (cmd) { 295 case DDI_INFO_DEVT2DEVINFO: 296 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 297 *resultp = NULL; 298 return (DDI_FAILURE); 299 } 300 *resultp = vdc->dip; 301 return (DDI_SUCCESS); 302 case DDI_INFO_DEVT2INSTANCE: 303 *resultp = (void *)(uintptr_t)instance; 304 return (DDI_SUCCESS); 305 default: 306 *resultp = NULL; 307 return (DDI_FAILURE); 308 } 309 } 310 311 static int 312 vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 313 { 314 int instance; 315 int rv; 316 uint_t retries = 0; 317 vdc_t *vdc = NULL; 318 319 switch (cmd) { 320 case DDI_DETACH: 321 /* the real work happens below */ 322 break; 323 case DDI_SUSPEND: 324 /* nothing to do for this non-device */ 325 return (DDI_SUCCESS); 326 default: 327 return (DDI_FAILURE); 328 } 329 330 ASSERT(cmd == DDI_DETACH); 331 instance = ddi_get_instance(dip); 332 PR1("%s[%d] Entered\n", __func__, instance); 333 334 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 335 vdc_msg("%s[%d]: Could not get state structure.", 336 __func__, instance); 337 return (DDI_FAILURE); 338 } 339 340 if (vdc->open) { 341 PR0("%s[%d]: Cannot detach: device is open", 342 __func__, instance); 343 return (DDI_FAILURE); 344 } 345 346 PR0("%s[%d] proceeding...\n", __func__, instance); 347 348 /* 349 * try and disable callbacks to prevent another handshake 350 */ 351 rv = ldc_set_cb_mode(vdc->ldc_handle, LDC_CB_DISABLE); 352 PR0("%s[%d] callback disabled (rv=%d)\n", __func__, instance, rv); 353 354 /* 355 * Prevent any more attempts to start a handshake with the vdisk 356 * server and tear down the existing connection. 357 */ 358 mutex_enter(&vdc->lock); 359 vdc->initialized |= VDC_HANDSHAKE_STOP; 360 vdc_reset_connection(vdc, B_TRUE); 361 mutex_exit(&vdc->lock); 362 363 if (vdc->initialized & VDC_THREAD) { 364 mutex_enter(&vdc->msg_proc_lock); 365 vdc->msg_proc_thr_state = VDC_THR_STOP; 366 vdc->msg_pending = B_TRUE; 367 cv_signal(&vdc->msg_proc_cv); 368 369 while (vdc->msg_proc_thr_state != VDC_THR_DONE) { 370 PR0("%s[%d]: Waiting for thread to exit\n", 371 __func__, instance); 372 rv = cv_timedwait(&vdc->msg_proc_cv, 373 &vdc->msg_proc_lock, VD_GET_TIMEOUT_HZ(1)); 374 if ((rv == -1) && (retries++ > vdc_retries)) 375 break; 376 } 377 mutex_exit(&vdc->msg_proc_lock); 378 } 379 380 mutex_enter(&vdc->lock); 381 382 if (vdc->initialized & VDC_DRING) 383 vdc_destroy_descriptor_ring(vdc); 384 385 if (vdc->initialized & VDC_LDC) 386 vdc_terminate_ldc(vdc); 387 388 mutex_exit(&vdc->lock); 389 390 if (vdc->initialized & VDC_MINOR) { 391 ddi_prop_remove_all(dip); 392 ddi_remove_minor_node(dip, NULL); 393 } 394 395 if (vdc->initialized & VDC_LOCKS) { 396 mutex_destroy(&vdc->lock); 397 mutex_destroy(&vdc->attach_lock); 398 mutex_destroy(&vdc->msg_proc_lock); 399 mutex_destroy(&vdc->dring_lock); 400 cv_destroy(&vdc->cv); 401 cv_destroy(&vdc->attach_cv); 402 cv_destroy(&vdc->msg_proc_cv); 403 } 404 405 if (vdc->minfo) 406 kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 407 408 if (vdc->cinfo) 409 kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 410 411 if (vdc->vtoc) 412 kmem_free(vdc->vtoc, sizeof (struct vtoc)); 413 414 if (vdc->label) 415 kmem_free(vdc->label, DK_LABEL_SIZE); 416 417 if (vdc->initialized & VDC_SOFT_STATE) 418 ddi_soft_state_free(vdc_state, instance); 419 420 PR0("%s[%d] End %p\n", __func__, instance, vdc); 421 422 return (DDI_SUCCESS); 423 } 424 425 426 static int 427 vdc_do_attach(dev_info_t *dip) 428 { 429 int instance; 430 vdc_t *vdc = NULL; 431 int status; 432 uint_t retries = 0; 433 434 ASSERT(dip != NULL); 435 436 instance = ddi_get_instance(dip); 437 if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 438 vdc_msg("%s:(%d): Couldn't alloc state structure", 439 __func__, instance); 440 return (DDI_FAILURE); 441 } 442 443 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 444 vdc_msg("%s:(%d): Could not get state structure.", 445 __func__, instance); 446 return (DDI_FAILURE); 447 } 448 449 /* 450 * We assign the value to initialized in this case to zero out the 451 * variable and then set bits in it to indicate what has been done 452 */ 453 vdc->initialized = VDC_SOFT_STATE; 454 455 vdc_hz_timeout = drv_usectohz(vdc_usec_timeout); 456 457 vdc->dip = dip; 458 vdc->instance = instance; 459 vdc->open = 0; 460 vdc->vdisk_type = VD_DISK_TYPE_UNK; 461 vdc->state = VD_STATE_INIT; 462 vdc->ldc_state = 0; 463 vdc->session_id = 0; 464 vdc->block_size = DEV_BSIZE; 465 vdc->max_xfer_sz = VD_MAX_BLOCK_SIZE / DEV_BSIZE; 466 467 vdc->vtoc = NULL; 468 vdc->cinfo = NULL; 469 vdc->minfo = NULL; 470 471 mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 472 mutex_init(&vdc->attach_lock, NULL, MUTEX_DRIVER, NULL); 473 mutex_init(&vdc->msg_proc_lock, NULL, MUTEX_DRIVER, NULL); 474 mutex_init(&vdc->dring_lock, NULL, MUTEX_DRIVER, NULL); 475 cv_init(&vdc->cv, NULL, CV_DRIVER, NULL); 476 cv_init(&vdc->attach_cv, NULL, CV_DRIVER, NULL); 477 cv_init(&vdc->msg_proc_cv, NULL, CV_DRIVER, NULL); 478 vdc->initialized |= VDC_LOCKS; 479 480 vdc->msg_pending = B_FALSE; 481 vdc->msg_proc_thr_id = thread_create(NULL, 0, vdc_process_msg_thread, 482 vdc, 0, &p0, TS_RUN, minclsyspri); 483 if (vdc->msg_proc_thr_id == NULL) { 484 cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 485 instance); 486 return (DDI_FAILURE); 487 } 488 vdc->initialized |= VDC_THREAD; 489 490 /* initialise LDC channel which will be used to communicate with vds */ 491 if (vdc_do_ldc_init(vdc) != 0) { 492 cmn_err(CE_NOTE, "[%d] Couldn't initialize LDC", instance); 493 return (DDI_FAILURE); 494 } 495 496 /* Bring up connection with vds via LDC */ 497 status = vdc_start_ldc_connection(vdc); 498 if (status != 0) { 499 vdc_msg("%s[%d] Could not start LDC", __func__, instance); 500 return (DDI_FAILURE); 501 } 502 503 /* 504 * We need to wait until the handshake has completed before leaving 505 * the attach(). This is to allow the device node(s) to be created 506 * and the first usage of the filesystem to succeed. 507 */ 508 mutex_enter(&vdc->attach_lock); 509 while ((vdc->ldc_state != LDC_UP) || 510 (vdc->state != VD_STATE_DATA)) { 511 512 PR0("%s[%d] handshake in progress [VD %d (LDC %d)]\n", 513 __func__, instance, vdc->state, vdc->ldc_state); 514 515 status = cv_timedwait(&vdc->attach_cv, &vdc->attach_lock, 516 VD_GET_TIMEOUT_HZ(1)); 517 if (status == -1) { 518 if (retries >= vdc_retries) { 519 PR0("%s[%d] Give up handshake wait.\n", 520 __func__, instance); 521 mutex_exit(&vdc->attach_lock); 522 return (DDI_FAILURE); 523 } else { 524 PR0("%s[%d] Retry #%d for handshake.\n", 525 __func__, instance, retries); 526 vdc_init_handshake_negotiation(vdc); 527 retries++; 528 } 529 } 530 } 531 mutex_exit(&vdc->attach_lock); 532 533 /* 534 * Once the handshake is complete, we can use the DRing to send 535 * requests to the vDisk server to calculate the geometry and 536 * VTOC of the "disk" 537 */ 538 status = vdc_setup_disk_layout(vdc); 539 if (status != 0) { 540 cmn_err(CE_NOTE, "[%d] Failed to discover disk layout (err%d)", 541 vdc->instance, status); 542 } 543 544 /* 545 * Now that we have the device info we can create the 546 * device nodes and properties 547 */ 548 status = vdc_create_device_nodes(vdc); 549 if (status) { 550 cmn_err(CE_NOTE, "[%d] Failed to create device nodes", 551 instance); 552 return (status); 553 } 554 status = vdc_create_device_nodes_props(vdc); 555 if (status) { 556 cmn_err(CE_NOTE, "[%d] Failed to create device nodes" 557 " properties (%d)", instance, status); 558 return (status); 559 } 560 561 ddi_report_dev(dip); 562 563 PR0("%s[%d] Attach completed\n", __func__, instance); 564 return (status); 565 } 566 567 static int 568 vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 569 { 570 int status; 571 572 PR0("%s[%d] Entered. Built %s %s\n", __func__, ddi_get_instance(dip), 573 __DATE__, __TIME__); 574 575 switch (cmd) { 576 case DDI_ATTACH: 577 if ((status = vdc_do_attach(dip)) != 0) 578 (void) vdc_detach(dip, DDI_DETACH); 579 return (status); 580 case DDI_RESUME: 581 /* nothing to do for this non-device */ 582 return (DDI_SUCCESS); 583 default: 584 return (DDI_FAILURE); 585 } 586 } 587 588 static int 589 vdc_do_ldc_init(vdc_t *vdc) 590 { 591 int status = 0; 592 ldc_status_t ldc_state; 593 ldc_attr_t ldc_attr; 594 uint64_t ldc_id = 0; 595 dev_info_t *dip = NULL; 596 597 ASSERT(vdc != NULL); 598 599 dip = vdc->dip; 600 vdc->initialized |= VDC_LDC; 601 602 if ((status = vdc_get_ldc_id(dip, &ldc_id)) != 0) { 603 vdc_msg("%s: Failed to get <ldc_id> property\n", __func__); 604 return (EIO); 605 } 606 vdc->ldc_id = ldc_id; 607 608 ldc_attr.devclass = LDC_DEV_BLK; 609 ldc_attr.instance = vdc->instance; 610 ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 611 ldc_attr.qlen = VD_LDC_QLEN; 612 613 if ((vdc->initialized & VDC_LDC_INIT) == 0) { 614 status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle); 615 if (status != 0) { 616 cmn_err(CE_NOTE, "[%d] ldc_init(chan %ld) returned %d", 617 vdc->instance, ldc_id, status); 618 return (status); 619 } 620 vdc->initialized |= VDC_LDC_INIT; 621 } 622 status = ldc_status(vdc->ldc_handle, &ldc_state); 623 if (status != 0) { 624 vdc_msg("Cannot discover LDC status [err=%d].", status); 625 return (status); 626 } 627 vdc->ldc_state = ldc_state; 628 629 if ((vdc->initialized & VDC_LDC_CB) == 0) { 630 status = ldc_reg_callback(vdc->ldc_handle, vdc_handle_cb, 631 (caddr_t)vdc); 632 if (status != 0) { 633 vdc_msg("%s: ldc_reg_callback()=%d", __func__, status); 634 return (status); 635 } 636 vdc->initialized |= VDC_LDC_CB; 637 } 638 639 vdc->initialized |= VDC_LDC; 640 641 /* 642 * At this stage we have initialised LDC, we will now try and open 643 * the connection. 644 */ 645 if (vdc->ldc_state == LDC_INIT) { 646 status = ldc_open(vdc->ldc_handle); 647 if (status != 0) { 648 cmn_err(CE_NOTE, "[%d] ldc_open(chan %ld) returned %d", 649 vdc->instance, vdc->ldc_id, status); 650 return (status); 651 } 652 vdc->initialized |= VDC_LDC_OPEN; 653 } 654 655 return (status); 656 } 657 658 static int 659 vdc_start_ldc_connection(vdc_t *vdc) 660 { 661 int status = 0; 662 663 ASSERT(vdc != NULL); 664 665 mutex_enter(&vdc->lock); 666 667 if (vdc->ldc_state == LDC_UP) { 668 PR0("%s: LDC is already UP ..\n", __func__); 669 mutex_exit(&vdc->lock); 670 return (0); 671 } 672 673 status = vdc_do_ldc_up(vdc); 674 675 PR0("%s[%d] Finished bringing up LDC\n", __func__, vdc->instance); 676 677 mutex_exit(&vdc->lock); 678 679 return (status); 680 } 681 682 683 /* 684 * Function: 685 * vdc_create_device_nodes 686 * 687 * Description: 688 * This function creates the block and character device nodes under 689 * /devices along with the node properties. It is called as part of 690 * the attach(9E) of the instance during the handshake with vds after 691 * vds has sent the attributes to vdc. 692 * 693 * If the device is of type VD_DISK_TYPE_SLICE then the minor node 694 * of 2 is used in keeping with the Solaris convention that slice 2 695 * refers to a whole disk. Slices start at 'a' 696 * 697 * Parameters: 698 * vdc - soft state pointer 699 * 700 * Return Values 701 * 0 - Success 702 * EIO - Failed to create node 703 * EINVAL - Unknown type of disk exported 704 */ 705 static int 706 vdc_create_device_nodes(vdc_t *vdc) 707 { 708 /* uses NNNN which is OK as long as # of disks <= 10000 */ 709 char name[sizeof ("disk@NNNN:s,raw")]; 710 dev_info_t *dip = NULL; 711 int instance; 712 int num_slices = 1; 713 int i; 714 715 ASSERT(vdc != NULL); 716 717 instance = vdc->instance; 718 dip = vdc->dip; 719 720 switch (vdc->vdisk_type) { 721 case VD_DISK_TYPE_DISK: 722 num_slices = V_NUMPAR; 723 break; 724 case VD_DISK_TYPE_SLICE: 725 num_slices = 1; 726 break; 727 case VD_DISK_TYPE_UNK: 728 default: 729 return (EINVAL); 730 } 731 732 for (i = 0; i < num_slices; i++) { 733 (void) snprintf(name, sizeof (name), "%c", 'a' + i); 734 if (ddi_create_minor_node(dip, name, S_IFBLK, 735 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 736 vdc_msg("%s[%d]: Couldn't add block node %s.", 737 __func__, instance, name); 738 return (EIO); 739 } 740 741 /* if any device node is created we set this flag */ 742 vdc->initialized |= VDC_MINOR; 743 744 (void) snprintf(name, sizeof (name), "%c%s", 745 'a' + i, ",raw"); 746 if (ddi_create_minor_node(dip, name, S_IFCHR, 747 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 748 vdc_msg("%s[%d]: Could not add raw node %s.", 749 __func__, instance, name); 750 return (EIO); 751 } 752 } 753 754 return (0); 755 } 756 757 /* 758 * Function: 759 * vdc_create_device_nodes_props 760 * 761 * Description: 762 * This function creates the block and character device nodes under 763 * /devices along with the node properties. It is called as part of 764 * the attach(9E) of the instance during the handshake with vds after 765 * vds has sent the attributes to vdc. 766 * 767 * Parameters: 768 * vdc - soft state pointer 769 * 770 * Return Values 771 * 0 - Success 772 * EIO - Failed to create device node property 773 * EINVAL - Unknown type of disk exported 774 */ 775 static int 776 vdc_create_device_nodes_props(vdc_t *vdc) 777 { 778 dev_info_t *dip = NULL; 779 int instance; 780 int num_slices = 1; 781 int64_t size = 0; 782 dev_t dev; 783 int rv; 784 int i; 785 786 ASSERT(vdc != NULL); 787 788 instance = vdc->instance; 789 dip = vdc->dip; 790 791 if ((vdc->vtoc == NULL) || (vdc->vtoc->v_sanity != VTOC_SANE)) { 792 cmn_err(CE_NOTE, "![%d] Could not create device node property." 793 " No VTOC available", instance); 794 return (ENXIO); 795 } 796 797 switch (vdc->vdisk_type) { 798 case VD_DISK_TYPE_DISK: 799 num_slices = V_NUMPAR; 800 break; 801 case VD_DISK_TYPE_SLICE: 802 num_slices = 1; 803 break; 804 case VD_DISK_TYPE_UNK: 805 default: 806 return (EINVAL); 807 } 808 809 for (i = 0; i < num_slices; i++) { 810 dev = makedevice(ddi_driver_major(dip), 811 VD_MAKE_DEV(instance, i)); 812 813 size = vdc->vtoc->v_part[i].p_size * vdc->vtoc->v_sectorsz; 814 PR0("%s[%d] sz %ld (%ld Mb) p_size %lx\n", 815 __func__, instance, size, size / (1024 * 1024), 816 vdc->vtoc->v_part[i].p_size); 817 818 rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size); 819 if (rv != DDI_PROP_SUCCESS) { 820 vdc_msg("%s:(%d): Couldn't add \"%s\" [%d]\n", 821 __func__, instance, VDC_SIZE_PROP_NAME, size); 822 return (EIO); 823 } 824 825 rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME, 826 lbtodb(size)); 827 if (rv != DDI_PROP_SUCCESS) { 828 vdc_msg("%s:(%d): Couldn't add \"%s\" [%d]\n", __func__, 829 instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size)); 830 return (EIO); 831 } 832 } 833 834 return (0); 835 } 836 837 static int 838 vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 839 { 840 _NOTE(ARGUNUSED(cred)) 841 842 int instance; 843 vdc_t *vdc; 844 845 ASSERT(dev != NULL); 846 instance = SDUNIT(getminor(*dev)); 847 848 PR0("%s[%d] minor = %d flag = %x, otyp = %x\n", __func__, instance, 849 getminor(*dev), flag, otyp); 850 851 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 852 return (EINVAL); 853 854 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 855 vdc_msg("%s[%d] Could not get state.", __func__, instance); 856 return (ENXIO); 857 } 858 859 /* 860 * Check to see if we can communicate with vds 861 */ 862 if (!vdc_is_able_to_tx_data(vdc, flag)) { 863 PR0("%s[%d] Not ready to transmit data\n", __func__, instance); 864 return (ENOLINK); 865 } 866 867 mutex_enter(&vdc->lock); 868 vdc->open++; 869 mutex_exit(&vdc->lock); 870 871 return (0); 872 } 873 874 static int 875 vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 876 { 877 _NOTE(ARGUNUSED(cred)) 878 879 int instance; 880 vdc_t *vdc; 881 882 instance = SDUNIT(getminor(dev)); 883 884 PR0("%s[%d] flag = %x, otyp = %x\n", __func__, instance, flag, otyp); 885 886 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 887 return (EINVAL); 888 889 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 890 vdc_msg("%s[%d] Could not get state.", __func__, instance); 891 return (ENXIO); 892 } 893 894 /* 895 * Check to see if we can communicate with vds 896 */ 897 if (!vdc_is_able_to_tx_data(vdc, 0)) { 898 PR0("%s[%d] Not ready to transmit data\n", __func__, instance); 899 return (ETIMEDOUT); 900 } 901 902 if (vdc->dkio_flush_pending) { 903 PR0("%s[%d]: Cannot detach: %d outstanding DKIO flushes", 904 __func__, instance, vdc->dkio_flush_pending); 905 return (EBUSY); 906 } 907 908 /* 909 * Should not need the mutex here, since the framework should protect 910 * against more opens on this device, but just in case. 911 */ 912 mutex_enter(&vdc->lock); 913 vdc->open--; 914 mutex_exit(&vdc->lock); 915 916 return (0); 917 } 918 919 static int 920 vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 921 { 922 _NOTE(ARGUNUSED(credp)) 923 _NOTE(ARGUNUSED(rvalp)) 924 925 return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode)); 926 } 927 928 static int 929 vdc_print(dev_t dev, char *str) 930 { 931 cmn_err(CE_NOTE, "vdc%d: %s", SDUNIT(getminor(dev)), str); 932 return (0); 933 } 934 935 static int 936 vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 937 { 938 int rv = 0; 939 size_t nbytes = (nblk * DEV_BSIZE); 940 int instance = SDUNIT(getminor(dev)); 941 vdc_t *vdc; 942 943 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 944 vdc_msg("%s (%d): Could not get state.", __func__, instance); 945 return (ENXIO); 946 } 947 948 rv = vdc_populate_descriptor(vdc, addr, nbytes, VD_OP_BWRITE, 949 blkno, SDPART(getminor(dev))); 950 951 PR1("%s: status=%d\n", __func__, rv); 952 953 return (rv); 954 } 955 956 /* -------------------------------------------------------------------------- */ 957 958 /* 959 * Disk access routines 960 * 961 */ 962 963 /* 964 * vdc_strategy() 965 * 966 * Return Value: 967 * 0: As per strategy(9E), the strategy() function must return 0 968 * [ bioerror(9f) sets b_flags to the proper error code ] 969 */ 970 static int 971 vdc_strategy(struct buf *buf) 972 { 973 int rv = -1; 974 vdc_t *vdc = NULL; 975 int instance = SDUNIT(getminor(buf->b_edev)); 976 int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 977 978 PR1("%s: %s %ld bytes at block %ld : b_addr=0x%p", 979 __func__, (buf->b_flags & B_READ) ? "Read" : "Write", 980 buf->b_bcount, buf->b_lblkno, buf->b_un.b_addr); 981 982 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 983 vdc_msg("%s[%d]: Could not get state.", __func__, instance); 984 bioerror(buf, ENXIO); 985 biodone(buf); 986 return (0); 987 } 988 989 ASSERT(buf->b_bcount <= (vdc->max_xfer_sz * vdc->block_size)); 990 991 if (!vdc_is_able_to_tx_data(vdc, O_NONBLOCK)) { 992 vdc_msg("%s: Not ready to transmit data", __func__); 993 bioerror(buf, ENXIO); 994 biodone(buf); 995 return (0); 996 } 997 bp_mapin(buf); 998 999 rv = vdc_populate_descriptor(vdc, buf->b_un.b_addr, buf->b_bcount, op, 1000 buf->b_lblkno, SDPART(getminor(buf->b_edev))); 1001 1002 PR1("%s: status=%d", __func__, rv); 1003 bioerror(buf, rv); 1004 biodone(buf); 1005 return (0); 1006 } 1007 1008 1009 static int 1010 vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 1011 { 1012 _NOTE(ARGUNUSED(cred)) 1013 1014 PR1("vdc_read(): Entered"); 1015 return (physio(vdc_strategy, NULL, dev, B_READ, minphys, uio)); 1016 } 1017 1018 static int 1019 vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 1020 { 1021 _NOTE(ARGUNUSED(cred)) 1022 1023 PR1("vdc_write(): Entered"); 1024 return (physio(vdc_strategy, NULL, dev, B_WRITE, minphys, uio)); 1025 } 1026 1027 static int 1028 vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 1029 { 1030 _NOTE(ARGUNUSED(cred)) 1031 1032 PR1("vdc_aread(): Entered"); 1033 return (aphysio(vdc_strategy, anocancel, dev, B_READ, minphys, aio)); 1034 } 1035 1036 static int 1037 vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 1038 { 1039 _NOTE(ARGUNUSED(cred)) 1040 1041 PR1("vdc_awrite(): Entered"); 1042 return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, minphys, aio)); 1043 } 1044 1045 1046 /* -------------------------------------------------------------------------- */ 1047 1048 /* 1049 * Handshake support 1050 */ 1051 1052 /* 1053 * vdc_init_handshake_negotiation 1054 * 1055 * Description: 1056 * This function is called to trigger the handshake negotiations between 1057 * the client (vdc) and the server (vds). It may be called multiple times. 1058 * 1059 * Parameters: 1060 * vdc - soft state pointer 1061 */ 1062 static void 1063 vdc_init_handshake_negotiation(void *arg) 1064 { 1065 vdc_t *vdc = (vdc_t *)(void *)arg; 1066 ldc_status_t ldc_state; 1067 vd_state_t state; 1068 int status; 1069 1070 ASSERT(vdc != NULL); 1071 1072 PR0("[%d] Initializing vdc<->vds handshake\n", vdc->instance); 1073 1074 /* get LDC state */ 1075 status = ldc_status(vdc->ldc_handle, &ldc_state); 1076 if (status != 0) { 1077 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status: err=%d", 1078 vdc->instance, status); 1079 return; 1080 } 1081 1082 /* 1083 * If the LDC connection is not UP we bring it up now and return. 1084 * The handshake will be started again when the callback is 1085 * triggered due to the UP event. 1086 */ 1087 if (ldc_state != LDC_UP) { 1088 PR0("[%d] Triggering an LDC_UP and returning\n", vdc->instance); 1089 (void) vdc_do_ldc_up(vdc); 1090 return; 1091 } 1092 1093 mutex_enter(&vdc->lock); 1094 /* 1095 * Do not continue if another thread has triggered a handshake which 1096 * has not been reset or detach() has stopped further handshakes. 1097 */ 1098 if (vdc->initialized & (VDC_HANDSHAKE | VDC_HANDSHAKE_STOP)) { 1099 PR0("%s[%d] Negotiation not triggered. [init=%x]\n", 1100 __func__, vdc->instance, vdc->initialized); 1101 mutex_exit(&vdc->lock); 1102 return; 1103 } 1104 1105 if (vdc->hshake_cnt++ > vdc_retries) { 1106 cmn_err(CE_NOTE, "[%d] Failed repeatedly to complete handshake" 1107 "with vDisk server", vdc->instance); 1108 mutex_exit(&vdc->lock); 1109 return; 1110 } 1111 1112 vdc->initialized |= VDC_HANDSHAKE; 1113 vdc->ldc_state = ldc_state; 1114 1115 state = vdc->state; 1116 1117 if (state == VD_STATE_INIT) { 1118 /* 1119 * Set the desired version parameter to the first entry in the 1120 * version array. If this specific version is not supported, 1121 * the response handling code will step down the version number 1122 * to the next array entry and deal with it accordingly. 1123 */ 1124 (void) vdc_init_ver_negotiation(vdc, vdc_version[0]); 1125 } else if (state == VD_STATE_VER) { 1126 (void) vdc_init_attr_negotiation(vdc); 1127 } else if (state == VD_STATE_ATTR) { 1128 (void) vdc_init_dring_negotiate(vdc); 1129 } else if (state == VD_STATE_DATA) { 1130 /* 1131 * nothing to do - we have already completed the negotiation 1132 * and we can transmit data when ready. 1133 */ 1134 PR0("%s[%d] Negotiation triggered after handshake completed", 1135 __func__, vdc->instance); 1136 } 1137 1138 mutex_exit(&vdc->lock); 1139 } 1140 1141 /* 1142 * Function: 1143 * vdc_init_ver_negotiation() 1144 * 1145 * Description: 1146 * 1147 * Arguments: 1148 * vdc - soft state pointer for this instance of the device driver. 1149 * 1150 * Return Code: 1151 * 0 - Success 1152 */ 1153 static int 1154 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 1155 { 1156 vio_ver_msg_t pkt; 1157 size_t msglen = sizeof (pkt); 1158 int status = -1; 1159 1160 PR0("%s: Entered.\n", __func__); 1161 1162 ASSERT(vdc != NULL); 1163 ASSERT(mutex_owned(&vdc->lock)); 1164 1165 /* 1166 * set the Session ID to a unique value 1167 * (the lower 32 bits of the clock tick) 1168 */ 1169 vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 1170 1171 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1172 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1173 pkt.tag.vio_subtype_env = VIO_VER_INFO; 1174 pkt.tag.vio_sid = vdc->session_id; 1175 pkt.dev_class = VDEV_DISK; 1176 pkt.ver_major = ver.major; 1177 pkt.ver_minor = ver.minor; 1178 1179 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1180 PR0("%s: vdc_send(status = %d)\n", __func__, status); 1181 1182 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1183 PR0("%s[%d] vdc_send failed: id(%lx) rv(%d) size(%d)\n", 1184 __func__, vdc->instance, vdc->ldc_handle, 1185 status, msglen); 1186 if (msglen != sizeof (vio_ver_msg_t)) 1187 status = ENOMSG; 1188 } 1189 1190 return (status); 1191 } 1192 1193 /* 1194 * Function: 1195 * vdc_init_attr_negotiation() 1196 * 1197 * Description: 1198 * 1199 * Arguments: 1200 * vdc - soft state pointer for this instance of the device driver. 1201 * 1202 * Return Code: 1203 * 0 - Success 1204 */ 1205 static int 1206 vdc_init_attr_negotiation(vdc_t *vdc) 1207 { 1208 vd_attr_msg_t pkt; 1209 size_t msglen = sizeof (pkt); 1210 int status; 1211 1212 ASSERT(vdc != NULL); 1213 ASSERT(mutex_owned(&vdc->lock)); 1214 1215 PR0("%s[%d] entered\n", __func__, vdc->instance); 1216 1217 /* fill in tag */ 1218 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1219 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1220 pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 1221 pkt.tag.vio_sid = vdc->session_id; 1222 /* fill in payload */ 1223 pkt.max_xfer_sz = vdc->max_xfer_sz; 1224 pkt.vdisk_block_size = vdc->block_size; 1225 pkt.xfer_mode = VIO_DRING_MODE; 1226 pkt.operations = 0; /* server will set bits of valid operations */ 1227 pkt.vdisk_type = 0; /* server will set to valid device type */ 1228 pkt.vdisk_size = 0; /* server will set to valid size */ 1229 1230 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1231 PR0("%s: vdc_send(status = %d)\n", __func__, status); 1232 1233 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1234 PR0("%s[%d] ldc_write failed: id(%lx) rv(%d) size (%d)\n", 1235 __func__, vdc->instance, vdc->ldc_handle, 1236 status, msglen); 1237 if (msglen != sizeof (vio_ver_msg_t)) 1238 status = ENOMSG; 1239 } 1240 1241 return (status); 1242 } 1243 1244 /* 1245 * Function: 1246 * vdc_init_dring_negotiate() 1247 * 1248 * Description: 1249 * 1250 * Arguments: 1251 * vdc - soft state pointer for this instance of the device driver. 1252 * 1253 * Return Code: 1254 * 0 - Success 1255 */ 1256 static int 1257 vdc_init_dring_negotiate(vdc_t *vdc) 1258 { 1259 vio_dring_reg_msg_t pkt; 1260 size_t msglen = sizeof (pkt); 1261 int status = -1; 1262 1263 ASSERT(vdc != NULL); 1264 ASSERT(mutex_owned(&vdc->lock)); 1265 1266 status = vdc_init_descriptor_ring(vdc); 1267 if (status != 0) { 1268 cmn_err(CE_CONT, "[%d] Failed to init DRing (status = %d)\n", 1269 vdc->instance, status); 1270 vdc_destroy_descriptor_ring(vdc); 1271 vdc_reset_connection(vdc, B_FALSE); 1272 return (status); 1273 } 1274 PR0("%s[%d] Init of descriptor ring completed (status = %d)\n", 1275 __func__, vdc->instance, status); 1276 1277 /* fill in tag */ 1278 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1279 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1280 pkt.tag.vio_subtype_env = VIO_DRING_REG; 1281 pkt.tag.vio_sid = vdc->session_id; 1282 /* fill in payload */ 1283 pkt.dring_ident = 0; 1284 pkt.num_descriptors = VD_DRING_LEN; 1285 pkt.descriptor_size = VD_DRING_ENTRY_SZ; 1286 pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 1287 pkt.ncookies = vdc->dring_cookie_count; 1288 pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 1289 1290 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1291 if (status != 0) { 1292 PR0("%s[%d] Failed to register DRing (status = %d)\n", 1293 __func__, vdc->instance, status); 1294 vdc_reset_connection(vdc, B_FALSE); 1295 } 1296 1297 return (status); 1298 } 1299 1300 1301 /* -------------------------------------------------------------------------- */ 1302 1303 /* 1304 * LDC helper routines 1305 */ 1306 1307 /* 1308 * Function: 1309 * vdc_send() 1310 * 1311 * Description: 1312 * The function encapsulates the call to write a message using LDC. 1313 * If LDC indicates that the call failed due to the queue being full, 1314 * we retry the ldc_write() [ up to 'vdc_retries' time ], otherwise 1315 * we return the error returned by LDC. 1316 * 1317 * Arguments: 1318 * ldc_handle - LDC handle for the channel this instance of vdc uses 1319 * pkt - address of LDC message to be sent 1320 * msglen - the size of the message being sent. When the function 1321 * returns, this contains the number of bytes written. 1322 * 1323 * Return Code: 1324 * 0 - Success. 1325 * EINVAL - pkt or msglen were NULL 1326 * ECONNRESET - The connection was not up. 1327 * EWOULDBLOCK - LDC queue is full 1328 * xxx - other error codes returned by ldc_write 1329 */ 1330 static int 1331 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 1332 { 1333 size_t size = 0; 1334 int retries = 0; 1335 int status = 0; 1336 1337 ASSERT(vdc != NULL); 1338 ASSERT(mutex_owned(&vdc->lock)); 1339 ASSERT(msglen != NULL); 1340 ASSERT(*msglen != 0); 1341 1342 do { 1343 size = *msglen; 1344 status = ldc_write(vdc->ldc_handle, pkt, &size); 1345 } while (status == EWOULDBLOCK && retries++ < vdc_retries); 1346 1347 /* if LDC had serious issues --- reset vdc state */ 1348 if (status == EIO || status == ECONNRESET) { 1349 vdc_reset_connection(vdc, B_TRUE); 1350 } 1351 1352 /* return the last size written */ 1353 *msglen = size; 1354 1355 return (status); 1356 } 1357 1358 /* 1359 * Function: 1360 * vdc_get_ldc_id() 1361 * 1362 * Description: 1363 * This function gets the 'ldc-id' for this particular instance of vdc. 1364 * The id returned is the guest domain channel endpoint LDC uses for 1365 * communication with vds. 1366 * 1367 * Arguments: 1368 * dip - dev info pointer for this instance of the device driver. 1369 * ldc_id - pointer to variable used to return the 'ldc-id' found. 1370 * 1371 * Return Code: 1372 * 0 - Success. 1373 * ENOENT - Expected node or property did not exist. 1374 * ENXIO - Unexpected error communicating with MD framework 1375 */ 1376 static int 1377 vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id) 1378 { 1379 int status = ENOENT; 1380 char *node_name = NULL; 1381 md_t *mdp = NULL; 1382 int num_nodes; 1383 int num_vdevs; 1384 int num_chans; 1385 mde_cookie_t rootnode; 1386 mde_cookie_t *listp = NULL; 1387 mde_cookie_t *chanp = NULL; 1388 boolean_t found_inst = B_FALSE; 1389 int listsz; 1390 int idx; 1391 uint64_t md_inst; 1392 int obp_inst; 1393 int instance = ddi_get_instance(dip); 1394 1395 ASSERT(ldc_id != NULL); 1396 *ldc_id = 0; 1397 1398 /* 1399 * Get the OBP instance number for comparison with the MD instance 1400 * 1401 * The "cfg-handle" property of a vdc node in an MD contains the MD's 1402 * notion of "instance", or unique identifier, for that node; OBP 1403 * stores the value of the "cfg-handle" MD property as the value of 1404 * the "reg" property on the node in the device tree it builds from 1405 * the MD and passes to Solaris. Thus, we look up the devinfo node's 1406 * "reg" property value to uniquely identify this device instance. 1407 * If the "reg" property cannot be found, the device tree state is 1408 * presumably so broken that there is no point in continuing. 1409 */ 1410 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 1411 cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 1412 return (ENOENT); 1413 } 1414 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1415 OBP_REG, -1); 1416 PR1("%s[%d]: OBP inst=%d\n", __func__, instance, obp_inst); 1417 1418 /* 1419 * We now walk the MD nodes and if an instance of a vdc node matches 1420 * the instance got from OBP we get the ldc-id property. 1421 */ 1422 if ((mdp = md_get_handle()) == NULL) { 1423 cmn_err(CE_WARN, "unable to init machine description"); 1424 return (ENXIO); 1425 } 1426 1427 num_nodes = md_node_count(mdp); 1428 ASSERT(num_nodes > 0); 1429 1430 listsz = num_nodes * sizeof (mde_cookie_t); 1431 1432 /* allocate memory for nodes */ 1433 listp = kmem_zalloc(listsz, KM_SLEEP); 1434 chanp = kmem_zalloc(listsz, KM_SLEEP); 1435 1436 rootnode = md_root_node(mdp); 1437 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 1438 1439 /* 1440 * Search for all the virtual devices, we will then check to see which 1441 * ones are disk nodes. 1442 */ 1443 num_vdevs = md_scan_dag(mdp, rootnode, 1444 md_find_name(mdp, VDC_MD_VDEV_NAME), 1445 md_find_name(mdp, "fwd"), listp); 1446 1447 if (num_vdevs <= 0) { 1448 cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 1449 status = ENOENT; 1450 goto done; 1451 } 1452 1453 PR1("%s[%d] num_vdevs=%d\n", __func__, instance, num_vdevs); 1454 for (idx = 0; idx < num_vdevs; idx++) { 1455 status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 1456 if ((status != 0) || (node_name == NULL)) { 1457 cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 1458 ": err %d", VDC_MD_VDEV_NAME, status); 1459 continue; 1460 } 1461 1462 PR1("%s[%d] Found node %s\n", __func__, instance, node_name); 1463 if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 1464 status = md_get_prop_val(mdp, listp[idx], 1465 VDC_MD_CFG_HDL, &md_inst); 1466 PR1("%s[%d] vdc inst# in MD=%d\n", 1467 __func__, instance, md_inst); 1468 if ((status == 0) && (md_inst == obp_inst)) { 1469 found_inst = B_TRUE; 1470 break; 1471 } 1472 } 1473 } 1474 1475 if (!found_inst) { 1476 cmn_err(CE_NOTE, "Unable to find correct '%s' node", 1477 VDC_MD_DISK_NAME); 1478 status = ENOENT; 1479 goto done; 1480 } 1481 PR0("%s[%d] MD inst=%d\n", __func__, instance, md_inst); 1482 1483 /* get the channels for this node */ 1484 num_chans = md_scan_dag(mdp, listp[idx], 1485 md_find_name(mdp, VDC_MD_CHAN_NAME), 1486 md_find_name(mdp, "fwd"), chanp); 1487 1488 /* expecting at least one channel */ 1489 if (num_chans <= 0) { 1490 cmn_err(CE_NOTE, "No '%s' node for '%s' port", 1491 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 1492 status = ENOENT; 1493 goto done; 1494 1495 } else if (num_chans != 1) { 1496 PR0("%s[%d] Expected 1 '%s' node for '%s' port, found %d\n", 1497 __func__, instance, VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, 1498 num_chans); 1499 } 1500 1501 /* 1502 * We use the first channel found (index 0), irrespective of how 1503 * many are there in total. 1504 */ 1505 if (md_get_prop_val(mdp, chanp[0], VDC_ID_PROP, ldc_id) != 0) { 1506 cmn_err(CE_NOTE, "Channel '%s' property not found", 1507 VDC_ID_PROP); 1508 status = ENOENT; 1509 } 1510 1511 PR0("%s[%d] LDC id is 0x%lx\n", __func__, instance, *ldc_id); 1512 1513 done: 1514 if (chanp) 1515 kmem_free(chanp, listsz); 1516 if (listp) 1517 kmem_free(listp, listsz); 1518 1519 (void) md_fini_handle(mdp); 1520 1521 return (status); 1522 } 1523 1524 static int 1525 vdc_do_ldc_up(vdc_t *vdc) 1526 { 1527 int status; 1528 1529 PR0("[%d] Bringing up channel %x\n", vdc->instance, vdc->ldc_id); 1530 1531 if ((status = ldc_up(vdc->ldc_handle)) != 0) { 1532 switch (status) { 1533 case ECONNREFUSED: /* listener not ready at other end */ 1534 PR0("%s: ldc_up(%d,...) return %d\n", 1535 __func__, vdc->ldc_id, status); 1536 status = 0; 1537 break; 1538 default: 1539 cmn_err(CE_NOTE, "[%d] Failed to bring up LDC: " 1540 "channel=%ld, err=%d", 1541 vdc->instance, vdc->ldc_id, status); 1542 } 1543 } 1544 1545 return (status); 1546 } 1547 1548 1549 /* 1550 * vdc_is_able_to_tx_data() 1551 * 1552 * Description: 1553 * This function checks if we are able to send data to the 1554 * vDisk server (vds). The LDC connection needs to be up and 1555 * vdc & vds need to have completed the handshake negotiation. 1556 * 1557 * Parameters: 1558 * vdc - soft state pointer 1559 * flag - flag to indicate if we can block or not 1560 * [ If O_NONBLOCK or O_NDELAY (which are defined in 1561 * open(2)) are set then do not block) 1562 * 1563 * Return Values 1564 * B_TRUE - can talk to vds 1565 * B_FALSE - unable to talk to vds 1566 */ 1567 static boolean_t 1568 vdc_is_able_to_tx_data(vdc_t *vdc, int flag) 1569 { 1570 vd_state_t state; 1571 uint32_t ldc_state; 1572 uint_t retries = 0; 1573 int rv = -1; 1574 1575 ASSERT(vdc != NULL); 1576 1577 mutex_enter(&vdc->lock); 1578 state = vdc->state; 1579 ldc_state = vdc->ldc_state; 1580 mutex_exit(&vdc->lock); 1581 1582 if ((state == VD_STATE_DATA) && (ldc_state == LDC_UP)) 1583 return (B_TRUE); 1584 1585 if ((flag & O_NONBLOCK) || (flag & O_NDELAY)) { 1586 PR0("%s[%d] Not ready to tx - state %d LDC state %d\n", 1587 __func__, vdc->instance, state, ldc_state); 1588 return (B_FALSE); 1589 } 1590 1591 /* 1592 * We want to check and see if any negotiations triggered earlier 1593 * have succeeded. We are prepared to wait a little while in case 1594 * they are still in progress. 1595 */ 1596 mutex_enter(&vdc->lock); 1597 while ((vdc->ldc_state != LDC_UP) || (vdc->state != VD_STATE_DATA)) { 1598 PR0("%s: Waiting for connection at state %d (LDC state %d)\n", 1599 __func__, vdc->state, vdc->ldc_state); 1600 1601 rv = cv_timedwait(&vdc->cv, &vdc->lock, 1602 VD_GET_TIMEOUT_HZ(retries)); 1603 1604 /* 1605 * An rv of -1 indicates that we timed out without the LDC 1606 * state changing so it looks like the other side (vdc) is 1607 * not yet ready/responding. 1608 * 1609 * Any other value of rv indicates that the LDC triggered an 1610 * interrupt so we just loop again, check the handshake state 1611 * and keep waiting if necessary. 1612 */ 1613 if (rv == -1) { 1614 if (retries >= vdc_retries) { 1615 PR0("%s[%d] handshake wait timed out.\n", 1616 __func__, vdc->instance); 1617 mutex_exit(&vdc->lock); 1618 return (B_FALSE); 1619 } else { 1620 PR1("%s[%d] Retry #%d for handshake timedout\n", 1621 __func__, vdc->instance, retries); 1622 retries++; 1623 } 1624 } 1625 } 1626 1627 ASSERT(vdc->ldc_state == LDC_UP); 1628 ASSERT(vdc->state == VD_STATE_DATA); 1629 1630 mutex_exit(&vdc->lock); 1631 1632 return (B_TRUE); 1633 } 1634 1635 1636 /* 1637 * Function: 1638 * vdc_terminate_ldc() 1639 * 1640 * Description: 1641 * 1642 * Arguments: 1643 * vdc - soft state pointer for this instance of the device driver. 1644 * 1645 * Return Code: 1646 * None 1647 */ 1648 static void 1649 vdc_terminate_ldc(vdc_t *vdc) 1650 { 1651 int instance = ddi_get_instance(vdc->dip); 1652 1653 ASSERT(vdc != NULL); 1654 ASSERT(mutex_owned(&vdc->lock)); 1655 1656 PR0("%s[%d] initialized=%x\n", __func__, instance, vdc->initialized); 1657 1658 if (vdc->initialized & VDC_LDC_OPEN) { 1659 PR0("%s[%d]: ldc_close()\n", __func__, instance); 1660 (void) ldc_close(vdc->ldc_handle); 1661 } 1662 if (vdc->initialized & VDC_LDC_CB) { 1663 PR0("%s[%d]: ldc_unreg_callback()\n", __func__, instance); 1664 (void) ldc_unreg_callback(vdc->ldc_handle); 1665 } 1666 if (vdc->initialized & VDC_LDC) { 1667 PR0("%s[%d]: ldc_fini()\n", __func__, instance); 1668 (void) ldc_fini(vdc->ldc_handle); 1669 vdc->ldc_handle = NULL; 1670 } 1671 1672 vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN); 1673 } 1674 1675 /* 1676 * Function: 1677 * vdc_reset_connection() 1678 * 1679 * Description: 1680 * 1681 * Arguments: 1682 * vdc - soft state pointer for this instance of the device driver. 1683 * reset_ldc - Flag whether or not to reset the LDC connection also. 1684 * 1685 * Return Code: 1686 * None 1687 */ 1688 static void 1689 vdc_reset_connection(vdc_t *vdc, boolean_t reset_ldc) 1690 { 1691 int status; 1692 1693 ASSERT(vdc != NULL); 1694 ASSERT(mutex_owned(&vdc->lock)); 1695 1696 PR0("%s[%d] Entered\n", __func__, vdc->instance); 1697 1698 vdc->state = VD_STATE_INIT; 1699 1700 if (reset_ldc) { 1701 status = ldc_reset(vdc->ldc_handle); 1702 PR0("%s[%d] ldc_reset() = %d\n", 1703 __func__, vdc->instance, status); 1704 } 1705 1706 vdc->initialized &= ~VDC_HANDSHAKE; 1707 PR0("%s[%d] init=%x\n", __func__, vdc->instance, vdc->initialized); 1708 } 1709 1710 /* -------------------------------------------------------------------------- */ 1711 1712 /* 1713 * Descriptor Ring helper routines 1714 */ 1715 1716 /* 1717 * Function: 1718 * vdc_init_descriptor_ring() 1719 * 1720 * Description: 1721 * 1722 * Arguments: 1723 * vdc - soft state pointer for this instance of the device driver. 1724 * 1725 * Return Code: 1726 * 0 - Success 1727 */ 1728 static int 1729 vdc_init_descriptor_ring(vdc_t *vdc) 1730 { 1731 vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 1732 int status = 0; 1733 int i; 1734 1735 PR0("%s[%d] initialized=%x\n", 1736 __func__, vdc->instance, vdc->initialized); 1737 1738 ASSERT(vdc != NULL); 1739 ASSERT(mutex_owned(&vdc->lock)); 1740 ASSERT(vdc->ldc_handle != NULL); 1741 1742 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 1743 PR0("%s[%d] ldc_mem_dring_create\n", __func__, vdc->instance); 1744 status = ldc_mem_dring_create(VD_DRING_LEN, VD_DRING_ENTRY_SZ, 1745 &vdc->ldc_dring_hdl); 1746 if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) { 1747 PR0("%s: Failed to create a descriptor ring", __func__); 1748 return (status); 1749 } 1750 vdc->dring_entry_size = VD_DRING_ENTRY_SZ; 1751 vdc->dring_len = VD_DRING_LEN; 1752 vdc->initialized |= VDC_DRING_INIT; 1753 } 1754 1755 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 1756 PR0("%s[%d] ldc_mem_dring_bind\n", __func__, vdc->instance); 1757 vdc->dring_cookie = 1758 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 1759 1760 status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl, 1761 LDC_SHADOW_MAP, LDC_MEM_RW, 1762 &vdc->dring_cookie[0], 1763 &vdc->dring_cookie_count); 1764 if (status != 0) { 1765 PR0("%s: Failed to bind descriptor ring (%p) " 1766 "to channel (%p)\n", 1767 __func__, vdc->ldc_dring_hdl, vdc->ldc_handle); 1768 return (status); 1769 } 1770 ASSERT(vdc->dring_cookie_count == 1); 1771 vdc->initialized |= VDC_DRING_BOUND; 1772 } 1773 1774 status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info); 1775 if (status != 0) { 1776 PR0("%s: Failed to get info for descriptor ring (%p)\n", 1777 __func__, vdc->ldc_dring_hdl); 1778 return (status); 1779 } 1780 1781 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 1782 PR0("%s[%d] local dring\n", __func__, vdc->instance); 1783 1784 /* Allocate the local copy of this dring */ 1785 vdc->local_dring = 1786 kmem_zalloc(VD_DRING_LEN * sizeof (vdc_local_desc_t), 1787 KM_SLEEP); 1788 vdc->initialized |= VDC_DRING_LOCAL; 1789 } 1790 1791 /* 1792 * Mark all DRing entries as free and initialize the private 1793 * descriptor's memory handles. If any entry is initialized, 1794 * we need to free it later so we set the bit in 'initialized' 1795 * at the start. 1796 */ 1797 vdc->initialized |= VDC_DRING_ENTRY; 1798 for (i = 0; i < VD_DRING_LEN; i++) { 1799 dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 1800 dep->hdr.dstate = VIO_DESC_FREE; 1801 1802 status = ldc_mem_alloc_handle(vdc->ldc_handle, 1803 &vdc->local_dring[i].desc_mhdl); 1804 if (status != 0) { 1805 cmn_err(CE_NOTE, "![%d] Failed to alloc mem handle for" 1806 " descriptor %d", vdc->instance, i); 1807 return (status); 1808 } 1809 vdc->local_dring[i].flags = VIO_DESC_FREE; 1810 vdc->local_dring[i].dep = dep; 1811 1812 mutex_init(&vdc->local_dring[i].lock, NULL, MUTEX_DRIVER, NULL); 1813 cv_init(&vdc->local_dring[i].cv, NULL, CV_DRIVER, NULL); 1814 } 1815 1816 /* 1817 * We init the index of the last DRing entry used. Since the code to 1818 * get the next available entry increments it before selecting one, 1819 * we set it to the last DRing entry so that it wraps around to zero 1820 * for the 1st entry to be used. 1821 */ 1822 vdc->dring_curr_idx = VD_DRING_LEN - 1; 1823 1824 return (status); 1825 } 1826 1827 /* 1828 * Function: 1829 * vdc_destroy_descriptor_ring() 1830 * 1831 * Description: 1832 * 1833 * Arguments: 1834 * vdc - soft state pointer for this instance of the device driver. 1835 * 1836 * Return Code: 1837 * None 1838 */ 1839 static void 1840 vdc_destroy_descriptor_ring(vdc_t *vdc) 1841 { 1842 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 1843 ldc_mem_handle_t mhdl = NULL; 1844 int status = -1; 1845 int i; /* loop */ 1846 1847 ASSERT(vdc != NULL); 1848 ASSERT(mutex_owned(&vdc->lock)); 1849 ASSERT(vdc->state == VD_STATE_INIT); 1850 1851 PR0("%s: Entered\n", __func__); 1852 1853 if (vdc->initialized & VDC_DRING_ENTRY) { 1854 PR0("[%d] Removing Local DRing entries\n", vdc->instance); 1855 for (i = 0; i < VD_DRING_LEN; i++) { 1856 ldep = &vdc->local_dring[i]; 1857 mhdl = ldep->desc_mhdl; 1858 1859 if (mhdl == NULL) 1860 continue; 1861 1862 (void) ldc_mem_free_handle(mhdl); 1863 mutex_destroy(&ldep->lock); 1864 cv_destroy(&ldep->cv); 1865 } 1866 vdc->initialized &= ~VDC_DRING_ENTRY; 1867 } 1868 1869 if (vdc->initialized & VDC_DRING_LOCAL) { 1870 PR0("[%d] Freeing Local DRing\n", vdc->instance); 1871 kmem_free(vdc->local_dring, 1872 VD_DRING_LEN * sizeof (vdc_local_desc_t)); 1873 vdc->initialized &= ~VDC_DRING_LOCAL; 1874 } 1875 1876 if (vdc->initialized & VDC_DRING_BOUND) { 1877 PR0("[%d] Unbinding DRing\n", vdc->instance); 1878 status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl); 1879 if (status == 0) { 1880 vdc->initialized &= ~VDC_DRING_BOUND; 1881 } else { 1882 vdc_msg("%s: Failed to unbind Descriptor Ring (%lx)\n", 1883 vdc->ldc_dring_hdl); 1884 } 1885 } 1886 1887 if (vdc->initialized & VDC_DRING_INIT) { 1888 PR0("[%d] Destroying DRing\n", vdc->instance); 1889 status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl); 1890 if (status == 0) { 1891 vdc->ldc_dring_hdl = NULL; 1892 bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 1893 vdc->initialized &= ~VDC_DRING_INIT; 1894 } else { 1895 vdc_msg("%s: Failed to destroy Descriptor Ring (%lx)\n", 1896 vdc->ldc_dring_hdl); 1897 } 1898 } 1899 } 1900 1901 /* 1902 * vdc_get_next_dring_entry_idx() 1903 * 1904 * Description: 1905 * This function gets the index of the next Descriptor Ring entry available 1906 * 1907 * Return Value: 1908 * 0 <= rv < VD_DRING_LEN Next available slot 1909 * -1 DRing is full 1910 */ 1911 static int 1912 vdc_get_next_dring_entry_idx(vdc_t *vdc, uint_t num_slots_needed) 1913 { 1914 _NOTE(ARGUNUSED(num_slots_needed)) 1915 1916 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 1917 int idx = -1; 1918 int start_idx = 0; 1919 1920 ASSERT(vdc != NULL); 1921 ASSERT(vdc->dring_len == VD_DRING_LEN); 1922 ASSERT(vdc->dring_curr_idx >= 0); 1923 ASSERT(vdc->dring_curr_idx < VD_DRING_LEN); 1924 ASSERT(mutex_owned(&vdc->dring_lock)); 1925 1926 /* Start at the last entry used */ 1927 idx = start_idx = vdc->dring_curr_idx; 1928 1929 /* 1930 * Loop through Descriptor Ring checking for a free entry until we reach 1931 * the entry we started at. We should never come close to filling the 1932 * Ring at any stage, instead this is just to prevent an entry which 1933 * gets into an inconsistent state (e.g. due to a request timing out) 1934 * from blocking progress. 1935 */ 1936 do { 1937 /* Get the next entry after the last known index tried */ 1938 idx = (idx + 1) % VD_DRING_LEN; 1939 1940 dep = VDC_GET_DRING_ENTRY_PTR(vdc, idx); 1941 ASSERT(dep != NULL); 1942 1943 if (dep->hdr.dstate == VIO_DESC_FREE) { 1944 ASSERT(idx >= 0); 1945 ASSERT(idx < VD_DRING_LEN); 1946 vdc->dring_curr_idx = idx; 1947 return (idx); 1948 1949 } else if (dep->hdr.dstate == VIO_DESC_READY) { 1950 PR0("%s: Entry %d waiting to be accepted\n", 1951 __func__, idx); 1952 continue; 1953 1954 } else if (dep->hdr.dstate == VIO_DESC_ACCEPTED) { 1955 PR0("%s: Entry %d waiting to be processed\n", 1956 __func__, idx); 1957 continue; 1958 1959 } else if (dep->hdr.dstate == VIO_DESC_DONE) { 1960 PR0("%s: Entry %d done but not marked free\n", 1961 __func__, idx); 1962 1963 /* 1964 * If we are currently panicking, interrupts are 1965 * disabled and we will not be getting ACKs from the 1966 * vDisk server so we mark the descriptor ring entries 1967 * as FREE here instead of in the ACK handler. 1968 */ 1969 if (panicstr) { 1970 (void) vdc_depopulate_descriptor(vdc, idx); 1971 dep->hdr.dstate = VIO_DESC_FREE; 1972 vdc->local_dring[idx].flags = VIO_DESC_FREE; 1973 } 1974 continue; 1975 1976 } else { 1977 vdc_msg("Public Descriptor Ring entry corrupted"); 1978 mutex_enter(&vdc->lock); 1979 vdc_reset_connection(vdc, B_FALSE); 1980 mutex_exit(&vdc->lock); 1981 return (-1); 1982 } 1983 1984 } while (idx != start_idx); 1985 1986 return (-1); 1987 } 1988 1989 /* 1990 * Function: 1991 * vdc_populate_descriptor 1992 * 1993 * Description: 1994 * This routine writes the data to be transmitted to vds into the 1995 * descriptor, notifies vds that the ring has been updated and 1996 * then waits for the request to be processed. 1997 * 1998 * Arguments: 1999 * vdc - the soft state pointer 2000 * addr - start address of memory region. 2001 * nbytes - number of bytes to read/write 2002 * operation - operation we want vds to perform (VD_OP_XXX) 2003 * arg - parameter to be sent to server (depends on VD_OP_XXX type) 2004 * . mode for ioctl(9e) 2005 * . LP64 diskaddr_t (block I/O) 2006 * slice - the disk slice this request is for 2007 * 2008 * Return Codes: 2009 * 0 2010 * EAGAIN 2011 * EFAULT 2012 * ENXIO 2013 * EIO 2014 */ 2015 static int 2016 vdc_populate_descriptor(vdc_t *vdc, caddr_t addr, size_t nbytes, int operation, 2017 uint64_t arg, uint64_t slice) 2018 { 2019 vdc_local_desc_t *local_dep = NULL; /* Local Dring Entry Pointer */ 2020 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2021 int idx = 0; /* Index of DRing entry used */ 2022 vio_dring_msg_t dmsg; 2023 size_t msglen = sizeof (dmsg); 2024 int status = 0; 2025 int rv; 2026 int retries = 0; 2027 2028 ASSERT(vdc != NULL); 2029 ASSERT(slice < V_NUMPAR); 2030 2031 /* 2032 * Get next available DRing entry. 2033 */ 2034 mutex_enter(&vdc->dring_lock); 2035 idx = vdc_get_next_dring_entry_idx(vdc, 1); 2036 if (idx == -1) { 2037 mutex_exit(&vdc->dring_lock); 2038 vdc_msg("%s[%d]: no descriptor ring entry avail, seq=%d\n", 2039 __func__, vdc->instance, vdc->seq_num); 2040 2041 /* 2042 * Since strategy should not block we don't wait for the DRing 2043 * to empty and instead return 2044 */ 2045 return (EAGAIN); 2046 } 2047 2048 ASSERT(idx < VD_DRING_LEN); 2049 local_dep = &vdc->local_dring[idx]; 2050 dep = local_dep->dep; 2051 ASSERT(dep != NULL); 2052 2053 /* 2054 * Wait for anybody still using the DRing entry to finish. 2055 * (e.g. still waiting for vds to respond to a request) 2056 */ 2057 mutex_enter(&local_dep->lock); 2058 2059 switch (operation) { 2060 case VD_OP_BREAD: 2061 case VD_OP_BWRITE: 2062 PR1("buf=%p, block=%lx, nbytes=%lx\n", addr, arg, nbytes); 2063 dep->payload.addr = (diskaddr_t)arg; 2064 rv = vdc_populate_mem_hdl(vdc, idx, addr, nbytes, operation); 2065 break; 2066 2067 case VD_OP_FLUSH: 2068 case VD_OP_GET_VTOC: 2069 case VD_OP_SET_VTOC: 2070 case VD_OP_GET_DISKGEOM: 2071 case VD_OP_SET_DISKGEOM: 2072 case VD_OP_SCSICMD: 2073 if (nbytes > 0) { 2074 rv = vdc_populate_mem_hdl(vdc, idx, addr, nbytes, 2075 operation); 2076 } 2077 break; 2078 default: 2079 cmn_err(CE_NOTE, "[%d] Unsupported vDisk operation [%d]\n", 2080 vdc->instance, operation); 2081 rv = EINVAL; 2082 } 2083 2084 if (rv != 0) { 2085 mutex_exit(&local_dep->lock); 2086 mutex_exit(&vdc->dring_lock); 2087 return (rv); 2088 } 2089 2090 /* 2091 * fill in the data details into the DRing 2092 */ 2093 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdc); 2094 dep->payload.operation = operation; 2095 dep->payload.nbytes = nbytes; 2096 dep->payload.status = EINPROGRESS; /* vds will set valid value */ 2097 dep->payload.slice = slice; 2098 dep->hdr.dstate = VIO_DESC_READY; 2099 dep->hdr.ack = 1; /* request an ACK for every message */ 2100 2101 local_dep->flags = VIO_DESC_READY; 2102 local_dep->addr = addr; 2103 2104 /* 2105 * Send a msg with the DRing details to vds 2106 */ 2107 VIO_INIT_DRING_DATA_TAG(dmsg); 2108 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc); 2109 dmsg.dring_ident = vdc->dring_ident; 2110 dmsg.start_idx = idx; 2111 dmsg.end_idx = idx; 2112 2113 PR1("ident=0x%llx, st=%d, end=%d, seq=%d req=%d dep=%p\n", 2114 vdc->dring_ident, dmsg.start_idx, dmsg.end_idx, 2115 dmsg.seq_num, dep->payload.req_id, dep); 2116 2117 mutex_enter(&vdc->lock); 2118 status = vdc_send(vdc, (caddr_t)&dmsg, &msglen); 2119 mutex_exit(&vdc->lock); 2120 PR1("%s[%d]: ldc_write() status=%d\n", __func__, vdc->instance, status); 2121 if (status != 0) { 2122 mutex_exit(&local_dep->lock); 2123 mutex_exit(&vdc->dring_lock); 2124 vdc_msg("%s: ldc_write(%d)\n", __func__, status); 2125 return (EAGAIN); 2126 } 2127 2128 /* 2129 * If the message was successfully sent, we increment the sequence 2130 * number to be used by the next message 2131 */ 2132 vdc->seq_num++; 2133 2134 /* 2135 * XXX - potential performance enhancement (Investigate at a later date) 2136 * 2137 * for calls from strategy(9E), instead of waiting for a response from 2138 * vds, we could return at this stage and let the ACK handling code 2139 * trigger the biodone(9F) 2140 */ 2141 2142 /* 2143 * When a guest is panicking, the completion of requests needs to be 2144 * handled differently because interrupts are disabled and vdc 2145 * will not get messages. We have to poll for the messages instead. 2146 */ 2147 if (ddi_in_panic()) { 2148 int start = 0; 2149 retries = 0; 2150 for (;;) { 2151 msglen = sizeof (dmsg); 2152 status = ldc_read(vdc->ldc_handle, (caddr_t)&dmsg, 2153 &msglen); 2154 if (status) { 2155 status = EINVAL; 2156 break; 2157 } 2158 2159 /* 2160 * if there are no packets wait and check again 2161 */ 2162 if ((status == 0) && (msglen == 0)) { 2163 if (retries++ > vdc_dump_retries) { 2164 PR0("[%d] Giving up waiting, idx %d\n", 2165 vdc->instance, idx); 2166 status = EAGAIN; 2167 break; 2168 } 2169 2170 PR1("Waiting for next packet @ %d\n", idx); 2171 delay(drv_usectohz(vdc_dump_usec_timeout)); 2172 continue; 2173 } 2174 2175 /* 2176 * Ignore all messages that are not ACKs/NACKs to 2177 * DRing requests. 2178 */ 2179 if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 2180 (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 2181 PR0("discarding pkt: type=%d sub=%d env=%d\n", 2182 dmsg.tag.vio_msgtype, 2183 dmsg.tag.vio_subtype, 2184 dmsg.tag.vio_subtype_env); 2185 continue; 2186 } 2187 2188 /* 2189 * set the appropriate return value for the 2190 * current request. 2191 */ 2192 switch (dmsg.tag.vio_subtype) { 2193 case VIO_SUBTYPE_ACK: 2194 status = 0; 2195 break; 2196 case VIO_SUBTYPE_NACK: 2197 status = EAGAIN; 2198 break; 2199 default: 2200 continue; 2201 } 2202 2203 start = dmsg.start_idx; 2204 if (start >= VD_DRING_LEN) { 2205 PR0("[%d] Bogus ack data : start %d\n", 2206 vdc->instance, start); 2207 continue; 2208 } 2209 2210 dep = VDC_GET_DRING_ENTRY_PTR(vdc, start); 2211 2212 PR1("[%d] Dumping start=%d idx=%d state=%d\n", 2213 vdc->instance, start, idx, dep->hdr.dstate); 2214 2215 if (dep->hdr.dstate != VIO_DESC_DONE) { 2216 PR0("[%d] Entry @ %d - state !DONE %d\n", 2217 vdc->instance, start, dep->hdr.dstate); 2218 continue; 2219 } 2220 2221 (void) vdc_depopulate_descriptor(vdc, start); 2222 2223 /* 2224 * We want to process all Dring entries up to 2225 * the current one so that we can return an 2226 * error with the correct request. 2227 */ 2228 if (idx > start) { 2229 PR0("[%d] Looping: start %d, idx %d\n", 2230 vdc->instance, idx, start); 2231 continue; 2232 } 2233 2234 /* exit - all outstanding requests are completed */ 2235 break; 2236 } 2237 2238 mutex_exit(&local_dep->lock); 2239 mutex_exit(&vdc->dring_lock); 2240 2241 return (status); 2242 } 2243 2244 /* 2245 * Now watch the DRing entries we modified to get the response 2246 * from vds. 2247 */ 2248 status = vdc_wait_for_descriptor_update(vdc, idx, dmsg); 2249 if (status == ETIMEDOUT) { 2250 /* debug info when dumping state on vds side */ 2251 dep->payload.status = ECANCELED; 2252 } 2253 2254 status = vdc_depopulate_descriptor(vdc, idx); 2255 PR1("%s[%d] Status=%d\n", __func__, vdc->instance, status); 2256 2257 mutex_exit(&local_dep->lock); 2258 mutex_exit(&vdc->dring_lock); 2259 2260 return (status); 2261 } 2262 2263 /* 2264 * Function: 2265 * vdc_wait_for_descriptor_update() 2266 * 2267 * Description: 2268 * 2269 * Arguments: 2270 * vdc - soft state pointer for this instance of the device driver. 2271 * idx - Index of the Descriptor Ring entry being modified 2272 * dmsg - LDC message sent by vDisk server 2273 * 2274 * Return Code: 2275 * 0 - Success 2276 */ 2277 static int 2278 vdc_wait_for_descriptor_update(vdc_t *vdc, uint_t idx, vio_dring_msg_t dmsg) 2279 { 2280 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2281 vdc_local_desc_t *local_dep = NULL; /* Local Dring Entry Pointer */ 2282 size_t msglen = sizeof (dmsg); 2283 int retries = 0; 2284 int status = 0; 2285 int rv = 0; 2286 2287 ASSERT(vdc != NULL); 2288 ASSERT(mutex_owned(&vdc->dring_lock)); 2289 ASSERT(idx < VD_DRING_LEN); 2290 local_dep = &vdc->local_dring[idx]; 2291 ASSERT(local_dep != NULL); 2292 dep = local_dep->dep; 2293 ASSERT(dep != NULL); 2294 2295 while (dep->hdr.dstate != VIO_DESC_DONE) { 2296 rv = cv_timedwait(&local_dep->cv, &local_dep->lock, 2297 VD_GET_TIMEOUT_HZ(retries)); 2298 if (rv == -1) { 2299 /* 2300 * If they persist in ignoring us we'll storm off in a 2301 * huff and return ETIMEDOUT to the upper layers. 2302 */ 2303 if (retries >= vdc_retries) { 2304 PR0("%s: Finished waiting on entry %d\n", 2305 __func__, idx); 2306 status = ETIMEDOUT; 2307 break; 2308 } else { 2309 retries++; 2310 PR0("%s[%d]: Timeout #%d on entry %d " 2311 "[seq %d][req %d]\n", __func__, 2312 vdc->instance, 2313 retries, idx, dmsg.seq_num, 2314 dep->payload.req_id); 2315 } 2316 2317 if (dep->hdr.dstate & VIO_DESC_ACCEPTED) { 2318 PR0("%s[%d]: vds has accessed entry %d [seq %d]" 2319 "[req %d] but not ack'ed it yet\n", 2320 __func__, vdc->instance, idx, dmsg.seq_num, 2321 dep->payload.req_id); 2322 continue; 2323 } 2324 2325 /* 2326 * we resend the message as it may have been dropped 2327 * and have never made it to the other side (vds). 2328 * (We reuse the original message but update seq ID) 2329 */ 2330 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc); 2331 retries = 0; 2332 mutex_enter(&vdc->lock); 2333 status = vdc_send(vdc, (caddr_t)&dmsg, &msglen); 2334 mutex_exit(&vdc->lock); 2335 if (status != 0) { 2336 vdc_msg("%s: Error (%d) while resending after " 2337 "timeout\n", __func__, status); 2338 status = ETIMEDOUT; 2339 break; 2340 } 2341 /* 2342 * If the message was successfully sent, we increment 2343 * the sequence number to be used by the next message. 2344 */ 2345 vdc->seq_num++; 2346 } 2347 } 2348 2349 return (status); 2350 } 2351 2352 static int 2353 vdc_get_response(vdc_t *vdc, int start, int end) 2354 { 2355 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2356 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2357 int status = ENXIO; 2358 int idx = -1; 2359 2360 ASSERT(vdc != NULL); 2361 ASSERT(start >= 0); 2362 ASSERT(start <= VD_DRING_LEN); 2363 ASSERT(start >= -1); 2364 ASSERT(start <= VD_DRING_LEN); 2365 2366 idx = start; 2367 ldep = &vdc->local_dring[idx]; 2368 ASSERT(ldep != NULL); 2369 dep = ldep->dep; 2370 ASSERT(dep != NULL); 2371 2372 PR0("%s[%d] DRING entry=%d status=%d\n", __func__, vdc->instance, 2373 idx, VIO_GET_DESC_STATE(dep->hdr.dstate)); 2374 while (VIO_GET_DESC_STATE(dep->hdr.dstate) == VIO_DESC_DONE) { 2375 if ((end != -1) && (idx > end)) 2376 return (0); 2377 2378 switch (ldep->operation) { 2379 case VD_OP_BREAD: 2380 case VD_OP_BWRITE: 2381 /* call bioxxx */ 2382 break; 2383 default: 2384 /* signal waiter */ 2385 break; 2386 } 2387 2388 /* Clear the DRing entry */ 2389 status = vdc_depopulate_descriptor(vdc, idx); 2390 PR0("%s[%d] Status=%d\n", __func__, vdc->instance, status); 2391 2392 /* loop accounting to get next DRing entry */ 2393 idx++; 2394 ldep = &vdc->local_dring[idx]; 2395 dep = ldep->dep; 2396 } 2397 2398 return (status); 2399 } 2400 2401 /* 2402 * Function: 2403 * vdc_depopulate_descriptor() 2404 * 2405 * Description: 2406 * 2407 * Arguments: 2408 * vdc - soft state pointer for this instance of the device driver. 2409 * idx - Index of the Descriptor Ring entry being modified 2410 * 2411 * Return Code: 2412 * 0 - Success 2413 */ 2414 static int 2415 vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 2416 { 2417 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2418 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2419 int status = ENXIO; 2420 2421 ASSERT(vdc != NULL); 2422 ASSERT(idx < VD_DRING_LEN); 2423 ldep = &vdc->local_dring[idx]; 2424 ASSERT(ldep != NULL); 2425 dep = ldep->dep; 2426 ASSERT(dep != NULL); 2427 2428 status = dep->payload.status; 2429 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 2430 ldep = &vdc->local_dring[idx]; 2431 VIO_SET_DESC_STATE(ldep->flags, VIO_DESC_FREE); 2432 2433 /* 2434 * If the upper layer passed in a misaligned address we copied the 2435 * data into an aligned buffer before sending it to LDC - we now 2436 * copy it back to the original buffer. 2437 */ 2438 if (ldep->align_addr) { 2439 ASSERT(ldep->addr != NULL); 2440 ASSERT(dep->payload.nbytes > 0); 2441 2442 bcopy(ldep->align_addr, ldep->addr, dep->payload.nbytes); 2443 kmem_free(ldep->align_addr, 2444 sizeof (caddr_t) * dep->payload.nbytes); 2445 ldep->align_addr = NULL; 2446 } 2447 2448 status = ldc_mem_unbind_handle(ldep->desc_mhdl); 2449 if (status != 0) { 2450 cmn_err(CE_NOTE, "[%d] unbind mem hdl 0x%lx @ idx %d failed:%d", 2451 vdc->instance, ldep->desc_mhdl, idx, status); 2452 } 2453 2454 return (status); 2455 } 2456 2457 /* 2458 * Function: 2459 * vdc_populate_mem_hdl() 2460 * 2461 * Description: 2462 * 2463 * Arguments: 2464 * vdc - soft state pointer for this instance of the device driver. 2465 * idx - Index of the Descriptor Ring entry being modified 2466 * addr - virtual address being mapped in 2467 * nybtes - number of bytes in 'addr' 2468 * operation - the vDisk operation being performed (VD_OP_xxx) 2469 * 2470 * Return Code: 2471 * 0 - Success 2472 */ 2473 static int 2474 vdc_populate_mem_hdl(vdc_t *vdc, uint_t idx, caddr_t addr, size_t nbytes, 2475 int operation) 2476 { 2477 vd_dring_entry_t *dep = NULL; 2478 vdc_local_desc_t *ldep = NULL; 2479 ldc_mem_handle_t mhdl; 2480 caddr_t vaddr; 2481 int perm = LDC_MEM_RW; 2482 int rv = 0; 2483 int i; 2484 2485 ASSERT(vdc != NULL); 2486 ASSERT(idx < VD_DRING_LEN); 2487 2488 dep = VDC_GET_DRING_ENTRY_PTR(vdc, idx); 2489 ldep = &vdc->local_dring[idx]; 2490 mhdl = ldep->desc_mhdl; 2491 2492 switch (operation) { 2493 case VD_OP_BREAD: 2494 perm = LDC_MEM_W; 2495 break; 2496 2497 case VD_OP_BWRITE: 2498 perm = LDC_MEM_R; 2499 break; 2500 2501 case VD_OP_FLUSH: 2502 case VD_OP_GET_VTOC: 2503 case VD_OP_SET_VTOC: 2504 case VD_OP_GET_DISKGEOM: 2505 case VD_OP_SET_DISKGEOM: 2506 case VD_OP_SCSICMD: 2507 perm = LDC_MEM_RW; 2508 break; 2509 2510 default: 2511 ASSERT(0); /* catch bad programming in vdc */ 2512 } 2513 2514 /* 2515 * LDC expects any addresses passed in to be 8-byte aligned. We need 2516 * to copy the contents of any misaligned buffers to a newly allocated 2517 * buffer and bind it instead (and copy the the contents back to the 2518 * original buffer passed in when depopulating the descriptor) 2519 */ 2520 vaddr = addr; 2521 if (((uint64_t)addr & 0x7) != 0) { 2522 ldep->align_addr = 2523 kmem_zalloc(sizeof (caddr_t) * nbytes, KM_SLEEP); 2524 PR0("%s[%d] Misaligned address %lx reallocating " 2525 "(buf=%lx entry=%d)\n", 2526 __func__, vdc->instance, addr, ldep->align_addr, idx); 2527 bcopy(addr, ldep->align_addr, nbytes); 2528 vaddr = ldep->align_addr; 2529 } 2530 2531 rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 2532 vdc->dring_mem_info.mtype, perm, &dep->payload.cookie[0], 2533 &dep->payload.ncookies); 2534 PR1("%s[%d] bound mem handle; ncookies=%d\n", 2535 __func__, vdc->instance, dep->payload.ncookies); 2536 if (rv != 0) { 2537 vdc_msg("%s[%d] failed to ldc_mem_bind_handle " 2538 "(mhdl=%lx, buf=%lx entry=%d err=%d)\n", 2539 __func__, vdc->instance, mhdl, addr, idx, rv); 2540 if (ldep->align_addr) { 2541 kmem_free(ldep->align_addr, 2542 sizeof (caddr_t) * dep->payload.nbytes); 2543 ldep->align_addr = NULL; 2544 } 2545 return (EAGAIN); 2546 } 2547 2548 /* 2549 * Get the other cookies (if any). 2550 */ 2551 for (i = 1; i < dep->payload.ncookies; i++) { 2552 rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 2553 if (rv != 0) { 2554 (void) ldc_mem_unbind_handle(mhdl); 2555 vdc_msg("%s: failed to get next cookie(mhdl=%lx " 2556 "cnum=%d), err=%d", __func__, mhdl, i, rv); 2557 if (ldep->align_addr) { 2558 kmem_free(ldep->align_addr, 2559 sizeof (caddr_t) * dep->payload.nbytes); 2560 ldep->align_addr = NULL; 2561 } 2562 return (EAGAIN); 2563 } 2564 } 2565 2566 return (rv); 2567 } 2568 2569 /* 2570 * Interrupt handlers for messages from LDC 2571 */ 2572 2573 /* 2574 * Function: 2575 * vdc_handle_cb() 2576 * 2577 * Description: 2578 * 2579 * Arguments: 2580 * event - Type of event (LDC_EVT_xxx) that triggered the callback 2581 * arg - soft state pointer for this instance of the device driver. 2582 * 2583 * Return Code: 2584 * 0 - Success 2585 */ 2586 static uint_t 2587 vdc_handle_cb(uint64_t event, caddr_t arg) 2588 { 2589 ldc_status_t ldc_state; 2590 int rv = 0; 2591 2592 vdc_t *vdc = (vdc_t *)(void *)arg; 2593 2594 ASSERT(vdc != NULL); 2595 2596 PR1("%s[%d] event=%x seqID=%d\n", 2597 __func__, vdc->instance, event, vdc->seq_num); 2598 2599 /* 2600 * Depending on the type of event that triggered this callback, 2601 * we modify the handhske state or read the data. 2602 * 2603 * NOTE: not done as a switch() as event could be triggered by 2604 * a state change and a read request. Also the ordering of the 2605 * check for the event types is deliberate. 2606 */ 2607 if (event & LDC_EVT_UP) { 2608 PR0("%s[%d] Received LDC_EVT_UP\n", __func__, vdc->instance); 2609 2610 /* get LDC state */ 2611 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2612 if (rv != 0) { 2613 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d", 2614 vdc->instance, rv); 2615 mutex_enter(&vdc->lock); 2616 vdc_reset_connection(vdc, B_TRUE); 2617 mutex_exit(&vdc->lock); 2618 return (LDC_SUCCESS); 2619 } 2620 2621 /* 2622 * Reset the transaction sequence numbers when LDC comes up. 2623 * We then kick off the handshake negotiation with the vDisk 2624 * server. 2625 */ 2626 mutex_enter(&vdc->lock); 2627 vdc->seq_num = 1; 2628 vdc->seq_num_reply = 0; 2629 vdc->ldc_state = ldc_state; 2630 ASSERT(ldc_state == LDC_UP); 2631 mutex_exit(&vdc->lock); 2632 2633 vdc_init_handshake_negotiation(vdc); 2634 2635 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 2636 } 2637 2638 if (event & LDC_EVT_READ) { 2639 /* 2640 * Wake up the worker thread to process the message 2641 */ 2642 mutex_enter(&vdc->msg_proc_lock); 2643 vdc->msg_pending = B_TRUE; 2644 cv_signal(&vdc->msg_proc_cv); 2645 mutex_exit(&vdc->msg_proc_lock); 2646 2647 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 2648 2649 /* that's all we have to do - no need to handle DOWN/RESET */ 2650 return (LDC_SUCCESS); 2651 } 2652 2653 if (event & LDC_EVT_RESET) { 2654 PR0("%s[%d] Recvd LDC RESET event\n", __func__, vdc->instance); 2655 2656 /* get LDC state */ 2657 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2658 if (rv != 0) { 2659 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d", 2660 vdc->instance, rv); 2661 ldc_state = LDC_OPEN; 2662 } 2663 mutex_enter(&vdc->lock); 2664 vdc->ldc_state = ldc_state; 2665 vdc_reset_connection(vdc, B_FALSE); 2666 mutex_exit(&vdc->lock); 2667 2668 vdc_init_handshake_negotiation(vdc); 2669 } 2670 2671 if (event & LDC_EVT_DOWN) { 2672 PR0("%s[%d] Recvd LDC DOWN event\n", __func__, vdc->instance); 2673 2674 /* get LDC state */ 2675 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2676 if (rv != 0) { 2677 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d", 2678 vdc->instance, rv); 2679 ldc_state = LDC_OPEN; 2680 } 2681 mutex_enter(&vdc->lock); 2682 vdc->ldc_state = ldc_state; 2683 vdc_reset_connection(vdc, B_TRUE); 2684 mutex_exit(&vdc->lock); 2685 } 2686 2687 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 2688 cmn_err(CE_NOTE, "![%d] Unexpected LDC event (%lx) received", 2689 vdc->instance, event); 2690 2691 return (LDC_SUCCESS); 2692 } 2693 2694 /* -------------------------------------------------------------------------- */ 2695 2696 /* 2697 * The following functions process the incoming messages from vds 2698 */ 2699 2700 2701 /* 2702 * Function: 2703 * vdc_process_msg_thread() 2704 * 2705 * Description: 2706 * 2707 * Arguments: 2708 * vdc - soft state pointer for this instance of the device driver. 2709 * 2710 * Return Code: 2711 * None 2712 */ 2713 static void 2714 vdc_process_msg_thread(vdc_t *vdc) 2715 { 2716 int status = 0; 2717 boolean_t q_is_empty = B_TRUE; 2718 2719 ASSERT(vdc != NULL); 2720 2721 mutex_enter(&vdc->msg_proc_lock); 2722 PR0("%s[%d]: Starting\n", __func__, vdc->instance); 2723 2724 vdc->msg_proc_thr_state = VDC_THR_RUNNING; 2725 2726 while (vdc->msg_proc_thr_state == VDC_THR_RUNNING) { 2727 2728 PR1("%s[%d] Waiting\n", __func__, vdc->instance); 2729 while (!vdc->msg_pending) 2730 cv_wait(&vdc->msg_proc_cv, &vdc->msg_proc_lock); 2731 2732 PR1("%s[%d] Message Received\n", __func__, vdc->instance); 2733 2734 /* check if there is data */ 2735 status = ldc_chkq(vdc->ldc_handle, &q_is_empty); 2736 if ((status != 0) && 2737 (vdc->msg_proc_thr_state == VDC_THR_RUNNING)) { 2738 cmn_err(CE_NOTE, "[%d] Unable to communicate with vDisk" 2739 " server. Cannot check LDC queue: %d", 2740 vdc->instance, status); 2741 mutex_enter(&vdc->lock); 2742 vdc_reset_connection(vdc, B_FALSE); 2743 mutex_exit(&vdc->lock); 2744 vdc->msg_proc_thr_state = VDC_THR_STOP; 2745 continue; 2746 } 2747 2748 if (!q_is_empty) { 2749 PR1("%s: new pkt(s) available\n", __func__); 2750 vdc_process_msg(vdc); 2751 } 2752 2753 vdc->msg_pending = B_FALSE; 2754 } 2755 2756 PR0("Message processing thread stopped\n"); 2757 vdc->msg_pending = B_FALSE; 2758 vdc->msg_proc_thr_state = VDC_THR_DONE; 2759 cv_signal(&vdc->msg_proc_cv); 2760 mutex_exit(&vdc->msg_proc_lock); 2761 thread_exit(); 2762 } 2763 2764 2765 /* 2766 * Function: 2767 * vdc_process_msg() 2768 * 2769 * Description: 2770 * This function is called by the message processing thread each time it 2771 * is triggered when LDC sends an interrupt to indicate that there are 2772 * more packets on the queue. When it is called it will continue to loop 2773 * and read the messages until there are no more left of the queue. If it 2774 * encounters an invalid sized message it will drop it and check the next 2775 * message. 2776 * 2777 * Arguments: 2778 * arg - soft state pointer for this instance of the device driver. 2779 * 2780 * Return Code: 2781 * None. 2782 */ 2783 static void 2784 vdc_process_msg(void *arg) 2785 { 2786 vdc_t *vdc = (vdc_t *)(void *)arg; 2787 vio_msg_t vio_msg; 2788 size_t nbytes = sizeof (vio_msg); 2789 int status; 2790 2791 ASSERT(vdc != NULL); 2792 2793 mutex_enter(&vdc->lock); 2794 2795 PR1("%s\n", __func__); 2796 2797 for (;;) { 2798 2799 /* read all messages - until no more left */ 2800 status = ldc_read(vdc->ldc_handle, (caddr_t)&vio_msg, &nbytes); 2801 2802 if (status) { 2803 vdc_msg("%s: ldc_read() failed = %d", __func__, status); 2804 2805 /* if status is ECONNRESET --- reset vdc state */ 2806 if (status == EIO || status == ECONNRESET) { 2807 vdc_reset_connection(vdc, B_TRUE); 2808 } 2809 2810 mutex_exit(&vdc->lock); 2811 return; 2812 } 2813 2814 if ((nbytes > 0) && (nbytes < sizeof (vio_msg_tag_t))) { 2815 cmn_err(CE_CONT, "![%d] Expect %lu bytes; recv'd %lu\n", 2816 vdc->instance, sizeof (vio_msg_tag_t), nbytes); 2817 mutex_exit(&vdc->lock); 2818 return; 2819 } 2820 2821 if (nbytes == 0) { 2822 PR2("%s[%d]: ldc_read() done..\n", 2823 __func__, vdc->instance); 2824 mutex_exit(&vdc->lock); 2825 return; 2826 } 2827 2828 PR1("%s[%d] (%x/%x/%x)\n", __func__, vdc->instance, 2829 vio_msg.tag.vio_msgtype, 2830 vio_msg.tag.vio_subtype, 2831 vio_msg.tag.vio_subtype_env); 2832 2833 /* 2834 * Verify the Session ID of the message 2835 * 2836 * Every message after the Version has been negotiated should 2837 * have the correct session ID set. 2838 */ 2839 if ((vio_msg.tag.vio_sid != vdc->session_id) && 2840 (vio_msg.tag.vio_subtype_env != VIO_VER_INFO)) { 2841 cmn_err(CE_NOTE, "[%d] Invalid SID 0x%x, expect 0x%lx", 2842 vdc->instance, vio_msg.tag.vio_sid, 2843 vdc->session_id); 2844 vdc_reset_connection(vdc, B_FALSE); 2845 mutex_exit(&vdc->lock); 2846 return; 2847 } 2848 2849 switch (vio_msg.tag.vio_msgtype) { 2850 case VIO_TYPE_CTRL: 2851 status = vdc_process_ctrl_msg(vdc, vio_msg); 2852 break; 2853 case VIO_TYPE_DATA: 2854 status = vdc_process_data_msg(vdc, vio_msg); 2855 break; 2856 case VIO_TYPE_ERR: 2857 status = vdc_process_err_msg(vdc, vio_msg); 2858 break; 2859 default: 2860 PR1("%s", __func__); 2861 status = EINVAL; 2862 break; 2863 } 2864 2865 if (status != 0) { 2866 PR0("%s[%d] Error (%d) occcurred processing msg\n", 2867 __func__, vdc->instance, status); 2868 vdc_reset_connection(vdc, B_FALSE); 2869 } 2870 } 2871 _NOTE(NOTREACHED) 2872 } 2873 2874 /* 2875 * Function: 2876 * vdc_process_ctrl_msg() 2877 * 2878 * Description: 2879 * This function is called by the message processing thread each time 2880 * an LDC message with a msgtype of VIO_TYPE_CTRL is received. 2881 * 2882 * Arguments: 2883 * vdc - soft state pointer for this instance of the device driver. 2884 * msg - the LDC message sent by vds 2885 * 2886 * Return Codes: 2887 * 0 - Success. 2888 * EPROTO - A message was received which shouldn't have happened according 2889 * to the protocol 2890 * ENOTSUP - An action which is allowed according to the protocol but which 2891 * isn't (or doesn't need to be) implemented yet. 2892 * EINVAL - An invalid value was returned as part of a message. 2893 */ 2894 static int 2895 vdc_process_ctrl_msg(vdc_t *vdc, vio_msg_t msg) 2896 { 2897 int status = -1; 2898 2899 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_CTRL); 2900 ASSERT(vdc != NULL); 2901 ASSERT(mutex_owned(&vdc->lock)); 2902 2903 /* Depending on which state we are in; process the message */ 2904 switch (vdc->state) { 2905 case VD_STATE_INIT: 2906 status = vdc_handle_ver_msg(vdc, (vio_ver_msg_t *)&msg); 2907 break; 2908 2909 case VD_STATE_VER: 2910 status = vdc_handle_attr_msg(vdc, (vd_attr_msg_t *)&msg); 2911 break; 2912 2913 case VD_STATE_ATTR: 2914 status = vdc_handle_dring_reg_msg(vdc, 2915 (vio_dring_reg_msg_t *)&msg); 2916 break; 2917 2918 case VD_STATE_RDX: 2919 if (msg.tag.vio_subtype_env != VIO_RDX) { 2920 status = EPROTO; 2921 break; 2922 } 2923 2924 PR0("%s: Received RDX - handshake successful\n", __func__); 2925 2926 vdc->hshake_cnt = 0; /* reset failed handshake count */ 2927 status = 0; 2928 vdc->state = VD_STATE_DATA; 2929 2930 cv_broadcast(&vdc->attach_cv); 2931 break; 2932 2933 case VD_STATE_DATA: 2934 default: 2935 cmn_err(CE_NOTE, "[%d] Unexpected handshake state %d", 2936 vdc->instance, vdc->state); 2937 status = EPROTO; 2938 break; 2939 } 2940 2941 return (status); 2942 } 2943 2944 2945 /* 2946 * Function: 2947 * vdc_process_data_msg() 2948 * 2949 * Description: 2950 * This function is called by the message processing thread each time 2951 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 2952 * be an ACK or NACK from vds[1] which vdc handles as follows. 2953 * ACK - wake up the waiting thread 2954 * NACK - resend any messages necessary 2955 * 2956 * [1] Although the message format allows it, vds should not send a 2957 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 2958 * some bizarre reason it does, vdc will reset the connection. 2959 * 2960 * Arguments: 2961 * vdc - soft state pointer for this instance of the device driver. 2962 * msg - the LDC message sent by vds 2963 * 2964 * Return Code: 2965 * 0 - Success. 2966 * > 0 - error value returned by LDC 2967 */ 2968 static int 2969 vdc_process_data_msg(vdc_t *vdc, vio_msg_t msg) 2970 { 2971 int status = 0; 2972 vdc_local_desc_t *local_dep = NULL; 2973 vio_dring_msg_t *dring_msg = NULL; 2974 uint_t num_msgs; 2975 uint_t start; 2976 uint_t end; 2977 uint_t i; 2978 2979 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_DATA); 2980 ASSERT(vdc != NULL); 2981 ASSERT(mutex_owned(&vdc->lock)); 2982 2983 dring_msg = (vio_dring_msg_t *)&msg; 2984 2985 /* 2986 * Check to see if the message has bogus data 2987 */ 2988 start = dring_msg->start_idx; 2989 end = dring_msg->end_idx; 2990 if ((start >= VD_DRING_LEN) || (end >= VD_DRING_LEN)) { 2991 vdc_msg("%s: Bogus ACK data : start %d, end %d\n", 2992 __func__, start, end); 2993 return (EPROTO); 2994 } 2995 2996 /* 2997 * calculate the number of messages that vds ACK'ed 2998 * 2999 * Assumes, (like the rest of vdc) that there is a 1:1 mapping 3000 * between requests and Dring entries. 3001 */ 3002 num_msgs = (end >= start) ? 3003 (end - start + 1) : 3004 (VD_DRING_LEN - start + end + 1); 3005 3006 /* 3007 * Verify that the sequence number is what vdc expects. 3008 */ 3009 if (!vdc_verify_seq_num(vdc, dring_msg, num_msgs)) { 3010 return (ENXIO); 3011 } 3012 3013 /* 3014 * Wake the thread waiting for each DRing entry ACK'ed 3015 */ 3016 for (i = 0; i < num_msgs; i++) { 3017 int idx = (start + i) % VD_DRING_LEN; 3018 3019 local_dep = &vdc->local_dring[idx]; 3020 mutex_enter(&local_dep->lock); 3021 cv_signal(&local_dep->cv); 3022 mutex_exit(&local_dep->lock); 3023 } 3024 3025 if (msg.tag.vio_subtype == VIO_SUBTYPE_NACK) { 3026 PR0("%s: DATA NACK\n", __func__); 3027 VDC_DUMP_DRING_MSG(dring_msg); 3028 vdc_reset_connection(vdc, B_FALSE); 3029 3030 /* we need to drop the lock to trigger the handshake */ 3031 mutex_exit(&vdc->lock); 3032 vdc_init_handshake_negotiation(vdc); 3033 mutex_enter(&vdc->lock); 3034 } else if (msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 3035 status = EPROTO; 3036 } 3037 3038 return (status); 3039 } 3040 3041 /* 3042 * Function: 3043 * vdc_process_err_msg() 3044 * 3045 * NOTE: No error messages are used as part of the vDisk protocol 3046 */ 3047 static int 3048 vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg) 3049 { 3050 _NOTE(ARGUNUSED(vdc)) 3051 _NOTE(ARGUNUSED(msg)) 3052 3053 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_ERR); 3054 cmn_err(CE_NOTE, "[%d] Got an ERR msg", vdc->instance); 3055 3056 return (ENOTSUP); 3057 } 3058 3059 /* 3060 * Function: 3061 * vdc_handle_ver_msg() 3062 * 3063 * Description: 3064 * 3065 * Arguments: 3066 * vdc - soft state pointer for this instance of the device driver. 3067 * ver_msg - LDC message sent by vDisk server 3068 * 3069 * Return Code: 3070 * 0 - Success 3071 */ 3072 static int 3073 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) 3074 { 3075 int status = 0; 3076 3077 ASSERT(vdc != NULL); 3078 ASSERT(mutex_owned(&vdc->lock)); 3079 3080 if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { 3081 return (EPROTO); 3082 } 3083 3084 if (ver_msg->dev_class != VDEV_DISK_SERVER) { 3085 return (EINVAL); 3086 } 3087 3088 switch (ver_msg->tag.vio_subtype) { 3089 case VIO_SUBTYPE_ACK: 3090 /* 3091 * We check to see if the version returned is indeed supported 3092 * (The server may have also adjusted the minor number downwards 3093 * and if so 'ver_msg' will contain the actual version agreed) 3094 */ 3095 if (vdc_is_supported_version(ver_msg)) { 3096 vdc->ver.major = ver_msg->ver_major; 3097 vdc->ver.minor = ver_msg->ver_minor; 3098 ASSERT(vdc->ver.major > 0); 3099 3100 vdc->state = VD_STATE_VER; 3101 status = vdc_init_attr_negotiation(vdc); 3102 } else { 3103 status = EPROTO; 3104 } 3105 break; 3106 3107 case VIO_SUBTYPE_NACK: 3108 /* 3109 * call vdc_is_supported_version() which will return the next 3110 * supported version (if any) in 'ver_msg' 3111 */ 3112 (void) vdc_is_supported_version(ver_msg); 3113 if (ver_msg->ver_major > 0) { 3114 size_t len = sizeof (*ver_msg); 3115 3116 ASSERT(vdc->ver.major > 0); 3117 3118 /* reset the necessary fields and resend */ 3119 ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; 3120 ver_msg->dev_class = VDEV_DISK; 3121 3122 status = vdc_send(vdc, (caddr_t)ver_msg, &len); 3123 PR0("[%d] Resend VER info (LDC status = %d)\n", 3124 vdc->instance, status); 3125 if (len != sizeof (*ver_msg)) 3126 status = EBADMSG; 3127 } else { 3128 cmn_err(CE_NOTE, "[%d] No common version with " 3129 "vDisk server", vdc->instance); 3130 status = ENOTSUP; 3131 } 3132 3133 break; 3134 case VIO_SUBTYPE_INFO: 3135 /* 3136 * Handle the case where vds starts handshake 3137 * (for now only vdc is the instigatior) 3138 */ 3139 status = ENOTSUP; 3140 break; 3141 3142 default: 3143 status = EINVAL; 3144 break; 3145 } 3146 3147 return (status); 3148 } 3149 3150 /* 3151 * Function: 3152 * vdc_handle_attr_msg() 3153 * 3154 * Description: 3155 * 3156 * Arguments: 3157 * vdc - soft state pointer for this instance of the device driver. 3158 * attr_msg - LDC message sent by vDisk server 3159 * 3160 * Return Code: 3161 * 0 - Success 3162 */ 3163 static int 3164 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) 3165 { 3166 int status = 0; 3167 3168 ASSERT(vdc != NULL); 3169 ASSERT(mutex_owned(&vdc->lock)); 3170 3171 if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { 3172 return (EPROTO); 3173 } 3174 3175 switch (attr_msg->tag.vio_subtype) { 3176 case VIO_SUBTYPE_ACK: 3177 /* 3178 * We now verify the attributes sent by vds. 3179 */ 3180 vdc->vdisk_size = attr_msg->vdisk_size; 3181 vdc->vdisk_type = attr_msg->vdisk_type; 3182 3183 if ((attr_msg->max_xfer_sz != vdc->max_xfer_sz) || 3184 (attr_msg->vdisk_block_size != vdc->block_size)) { 3185 /* 3186 * Future support: step down to the block size 3187 * and max transfer size suggested by the 3188 * server. (If this value is less than 128K 3189 * then multiple Dring entries per request 3190 * would need to be implemented) 3191 */ 3192 cmn_err(CE_NOTE, "[%d] Couldn't process block " 3193 "attributes from vds", vdc->instance); 3194 status = EINVAL; 3195 break; 3196 } 3197 3198 if ((attr_msg->xfer_mode != VIO_DRING_MODE) || 3199 (attr_msg->vdisk_size > INT64_MAX) || 3200 (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 3201 vdc_msg("%s[%d] Couldn't process attrs " 3202 "from vds", __func__, vdc->instance); 3203 status = EINVAL; 3204 break; 3205 } 3206 3207 vdc->state = VD_STATE_ATTR; 3208 status = vdc_init_dring_negotiate(vdc); 3209 break; 3210 3211 case VIO_SUBTYPE_NACK: 3212 /* 3213 * vds could not handle the attributes we sent so we 3214 * stop negotiating. 3215 */ 3216 status = EPROTO; 3217 break; 3218 3219 case VIO_SUBTYPE_INFO: 3220 /* 3221 * Handle the case where vds starts the handshake 3222 * (for now; vdc is the only supported instigatior) 3223 */ 3224 status = ENOTSUP; 3225 break; 3226 3227 default: 3228 status = ENOTSUP; 3229 break; 3230 } 3231 3232 return (status); 3233 } 3234 3235 /* 3236 * Function: 3237 * vdc_handle_dring_reg_msg() 3238 * 3239 * Description: 3240 * 3241 * Arguments: 3242 * vdc - soft state pointer for this instance of the driver. 3243 * dring_msg - LDC message sent by vDisk server 3244 * 3245 * Return Code: 3246 * 0 - Success 3247 */ 3248 static int 3249 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) 3250 { 3251 int status = 0; 3252 vio_rdx_msg_t msg = {0}; 3253 size_t msglen = sizeof (msg); 3254 3255 ASSERT(vdc != NULL); 3256 ASSERT(mutex_owned(&vdc->lock)); 3257 3258 if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { 3259 return (EPROTO); 3260 } 3261 3262 switch (dring_msg->tag.vio_subtype) { 3263 case VIO_SUBTYPE_ACK: 3264 /* save the received dring_ident */ 3265 vdc->dring_ident = dring_msg->dring_ident; 3266 PR0("%s[%d] Received dring ident=0x%lx\n", 3267 __func__, vdc->instance, vdc->dring_ident); 3268 3269 /* 3270 * Send an RDX message to vds to indicate we are ready 3271 * to send data 3272 */ 3273 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 3274 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 3275 msg.tag.vio_subtype_env = VIO_RDX; 3276 msg.tag.vio_sid = vdc->session_id; 3277 status = vdc_send(vdc, (caddr_t)&msg, &msglen); 3278 if (status != 0) { 3279 cmn_err(CE_NOTE, "[%d] Failed to send RDX" 3280 " message (%d)", vdc->instance, status); 3281 break; 3282 } 3283 3284 vdc->state = VD_STATE_RDX; 3285 break; 3286 3287 case VIO_SUBTYPE_NACK: 3288 /* 3289 * vds could not handle the DRing info we sent so we 3290 * stop negotiating. 3291 */ 3292 cmn_err(CE_CONT, "server could not register DRing\n"); 3293 vdc_reset_connection(vdc, B_FALSE); 3294 vdc_destroy_descriptor_ring(vdc); 3295 status = EPROTO; 3296 break; 3297 3298 case VIO_SUBTYPE_INFO: 3299 /* 3300 * Handle the case where vds starts handshake 3301 * (for now only vdc is the instigatior) 3302 */ 3303 status = ENOTSUP; 3304 break; 3305 default: 3306 status = ENOTSUP; 3307 } 3308 3309 return (status); 3310 } 3311 3312 /* 3313 * Function: 3314 * vdc_verify_seq_num() 3315 * 3316 * Description: 3317 * This functions verifies that the sequence number sent back by vds with 3318 * the latest message correctly follows the last request processed. 3319 * 3320 * Arguments: 3321 * vdc - soft state pointer for this instance of the driver. 3322 * dring_msg - pointer to the LDC message sent by vds 3323 * num_msgs - the number of requests being acknowledged 3324 * 3325 * Return Code: 3326 * B_TRUE - Success. 3327 * B_FALSE - The seq numbers are so out of sync, vdc cannot deal with them 3328 */ 3329 static boolean_t 3330 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg, int num_msgs) 3331 { 3332 ASSERT(vdc != NULL); 3333 ASSERT(dring_msg != NULL); 3334 3335 /* 3336 * Check to see if the messages were responded to in the correct 3337 * order by vds. There are 3 possible scenarios: 3338 * - the seq_num we expected is returned (everything is OK) 3339 * - a seq_num earlier than the last one acknowledged is returned, 3340 * if so something is seriously wrong so we reset the connection 3341 * - a seq_num greater than what we expected is returned. 3342 */ 3343 if (dring_msg->seq_num != (vdc->seq_num_reply + num_msgs)) { 3344 vdc_msg("%s[%d]: Bogus seq_num %d, expected %d\n", 3345 __func__, vdc->instance, dring_msg->seq_num, 3346 vdc->seq_num_reply + num_msgs); 3347 if (dring_msg->seq_num < (vdc->seq_num_reply + num_msgs)) { 3348 return (B_FALSE); 3349 } else { 3350 /* 3351 * vds has responded with a seq_num greater than what we 3352 * expected 3353 */ 3354 return (B_FALSE); 3355 } 3356 } 3357 vdc->seq_num_reply += num_msgs; 3358 3359 return (B_TRUE); 3360 } 3361 3362 3363 /* 3364 * Function: 3365 * vdc_is_supported_version() 3366 * 3367 * Description: 3368 * This routine checks if the major/minor version numbers specified in 3369 * 'ver_msg' are supported. If not it finds the next version that is 3370 * in the supported version list 'vdc_version[]' and sets the fields in 3371 * 'ver_msg' to those values 3372 * 3373 * Arguments: 3374 * ver_msg - LDC message sent by vDisk server 3375 * 3376 * Return Code: 3377 * B_TRUE - Success 3378 * B_FALSE - Version not supported 3379 */ 3380 static boolean_t 3381 vdc_is_supported_version(vio_ver_msg_t *ver_msg) 3382 { 3383 int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); 3384 3385 for (int i = 0; i < vdc_num_versions; i++) { 3386 ASSERT(vdc_version[i].major > 0); 3387 ASSERT((i == 0) || 3388 (vdc_version[i].major < vdc_version[i-1].major)); 3389 3390 /* 3391 * If the major versions match, adjust the minor version, if 3392 * necessary, down to the highest value supported by this 3393 * client. The server should support all minor versions lower 3394 * than the value it sent 3395 */ 3396 if (ver_msg->ver_major == vdc_version[i].major) { 3397 if (ver_msg->ver_minor > vdc_version[i].minor) { 3398 PR0("Adjusting minor version from %u to %u", 3399 ver_msg->ver_minor, vdc_version[i].minor); 3400 ver_msg->ver_minor = vdc_version[i].minor; 3401 } 3402 return (B_TRUE); 3403 } 3404 3405 /* 3406 * If the message contains a higher major version number, set 3407 * the message's major/minor versions to the current values 3408 * and return false, so this message will get resent with 3409 * these values, and the server will potentially try again 3410 * with the same or a lower version 3411 */ 3412 if (ver_msg->ver_major > vdc_version[i].major) { 3413 ver_msg->ver_major = vdc_version[i].major; 3414 ver_msg->ver_minor = vdc_version[i].minor; 3415 PR0("Suggesting major/minor (0x%x/0x%x)\n", 3416 ver_msg->ver_major, ver_msg->ver_minor); 3417 3418 return (B_FALSE); 3419 } 3420 3421 /* 3422 * Otherwise, the message's major version is less than the 3423 * current major version, so continue the loop to the next 3424 * (lower) supported version 3425 */ 3426 } 3427 3428 /* 3429 * No common version was found; "ground" the version pair in the 3430 * message to terminate negotiation 3431 */ 3432 ver_msg->ver_major = 0; 3433 ver_msg->ver_minor = 0; 3434 3435 return (B_FALSE); 3436 } 3437 /* -------------------------------------------------------------------------- */ 3438 3439 /* 3440 * DKIO(7) support 3441 */ 3442 3443 typedef struct vdc_dk_arg { 3444 struct dk_callback dkc; 3445 int mode; 3446 dev_t dev; 3447 vdc_t *vdc; 3448 } vdc_dk_arg_t; 3449 3450 /* 3451 * Function: 3452 * vdc_dkio_flush_cb() 3453 * 3454 * Description: 3455 * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 3456 * by kernel code. 3457 * 3458 * Arguments: 3459 * arg - a pointer to a vdc_dk_arg_t structure. 3460 */ 3461 void 3462 vdc_dkio_flush_cb(void *arg) 3463 { 3464 struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 3465 struct dk_callback *dkc = NULL; 3466 vdc_t *vdc = NULL; 3467 int rv; 3468 3469 if (dk_arg == NULL) { 3470 vdc_msg("%s[?] DKIOCFLUSHWRITECACHE arg is NULL\n", __func__); 3471 return; 3472 } 3473 dkc = &dk_arg->dkc; 3474 vdc = dk_arg->vdc; 3475 ASSERT(vdc != NULL); 3476 3477 rv = vdc_populate_descriptor(vdc, NULL, 0, VD_OP_FLUSH, 3478 dk_arg->mode, SDPART(getminor(dk_arg->dev))); 3479 if (rv != 0) { 3480 PR0("%s[%d] DKIOCFLUSHWRITECACHE failed : model %x\n", 3481 __func__, vdc->instance, 3482 ddi_model_convert_from(dk_arg->mode & FMODELS)); 3483 return; 3484 } 3485 3486 /* 3487 * Trigger the call back to notify the caller the the ioctl call has 3488 * been completed. 3489 */ 3490 if ((dk_arg->mode & FKIOCTL) && 3491 (dkc != NULL) && 3492 (dkc->dkc_callback != NULL)) { 3493 ASSERT(dkc->dkc_cookie != NULL); 3494 (*dkc->dkc_callback)(dkc->dkc_cookie, ENOTSUP); 3495 } 3496 3497 /* Indicate that one less DKIO write flush is outstanding */ 3498 mutex_enter(&vdc->lock); 3499 vdc->dkio_flush_pending--; 3500 ASSERT(vdc->dkio_flush_pending >= 0); 3501 mutex_exit(&vdc->lock); 3502 } 3503 3504 /* 3505 * This structure is used in the DKIO(7I) array below. 3506 */ 3507 typedef struct vdc_dk_ioctl { 3508 uint8_t op; /* VD_OP_XXX value */ 3509 int cmd; /* Solaris ioctl operation number */ 3510 size_t nbytes; /* size of structure to be copied */ 3511 3512 /* function to convert between vDisk and Solaris structure formats */ 3513 int (*convert)(void *vd_buf, void *ioctl_arg, int mode, int dir); 3514 } vdc_dk_ioctl_t; 3515 3516 /* 3517 * Subset of DKIO(7I) operations currently supported 3518 */ 3519 static vdc_dk_ioctl_t dk_ioctl[] = { 3520 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, sizeof (int), 3521 vdc_null_copy_func}, 3522 {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), 3523 vdc_null_copy_func}, 3524 {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), 3525 vdc_null_copy_func}, 3526 {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), 3527 vdc_get_vtoc_convert}, 3528 {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), 3529 vdc_set_vtoc_convert}, 3530 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 3531 vdc_get_geom_convert}, 3532 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), 3533 vdc_get_geom_convert}, 3534 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), 3535 vdc_get_geom_convert}, 3536 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), 3537 vdc_get_geom_convert}, 3538 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 3539 vdc_set_geom_convert}, 3540 3541 /* 3542 * These particular ioctls are not sent to the server - vdc fakes up 3543 * the necessary info. 3544 */ 3545 {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, 3546 {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, 3547 {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, 3548 {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, 3549 {0, CDROMREADOFFSET, 0, vdc_null_copy_func} 3550 }; 3551 3552 /* 3553 * Function: 3554 * vd_process_ioctl() 3555 * 3556 * Description: 3557 * This routine processes disk specific ioctl calls 3558 * 3559 * Arguments: 3560 * dev - the device number 3561 * cmd - the operation [dkio(7I)] to be processed 3562 * arg - pointer to user provided structure 3563 * (contains data to be set or reference parameter for get) 3564 * mode - bit flag, indicating open settings, 32/64 bit type, etc 3565 * 3566 * Return Code: 3567 * 0 3568 * EFAULT 3569 * ENXIO 3570 * EIO 3571 * ENOTSUP 3572 */ 3573 static int 3574 vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode) 3575 { 3576 int instance = SDUNIT(getminor(dev)); 3577 vdc_t *vdc = NULL; 3578 int rv = -1; 3579 int idx = 0; /* index into dk_ioctl[] */ 3580 size_t len = 0; /* #bytes to send to vds */ 3581 size_t alloc_len = 0; /* #bytes to allocate mem for */ 3582 caddr_t mem_p = NULL; 3583 size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 3584 3585 PR0("%s: Processing ioctl(%x) for dev %x : model %x\n", 3586 __func__, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 3587 3588 vdc = ddi_get_soft_state(vdc_state, instance); 3589 if (vdc == NULL) { 3590 cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 3591 instance); 3592 return (ENXIO); 3593 } 3594 3595 /* 3596 * Check to see if we can communicate with the vDisk server 3597 */ 3598 if (!vdc_is_able_to_tx_data(vdc, O_NONBLOCK)) { 3599 PR0("%s[%d] Not ready to transmit data\n", __func__, instance); 3600 return (ENOLINK); 3601 } 3602 3603 /* 3604 * Validate the ioctl operation to be performed. 3605 * 3606 * If we have looped through the array without finding a match then we 3607 * don't support this ioctl. 3608 */ 3609 for (idx = 0; idx < nioctls; idx++) { 3610 if (cmd == dk_ioctl[idx].cmd) 3611 break; 3612 } 3613 3614 if (idx >= nioctls) { 3615 PR0("%s[%d] Unsupported ioctl(%x)\n", 3616 __func__, vdc->instance, cmd); 3617 return (ENOTSUP); 3618 } 3619 3620 len = dk_ioctl[idx].nbytes; 3621 3622 /* 3623 * Deal with the ioctls which the server does not provide. vdc can 3624 * fake these up and return immediately 3625 */ 3626 switch (cmd) { 3627 case CDROMREADOFFSET: 3628 case DKIOCREMOVABLE: 3629 case USCSICMD: 3630 return (ENOTTY); 3631 3632 case DKIOCINFO: 3633 { 3634 struct dk_cinfo cinfo; 3635 if (vdc->cinfo == NULL) 3636 return (ENXIO); 3637 3638 bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 3639 cinfo.dki_partition = SDPART(getminor(dev)); 3640 3641 rv = ddi_copyout(&cinfo, (void *)arg, 3642 sizeof (struct dk_cinfo), mode); 3643 if (rv != 0) 3644 return (EFAULT); 3645 3646 return (0); 3647 } 3648 3649 case DKIOCGMEDIAINFO: 3650 if (vdc->minfo == NULL) 3651 return (ENXIO); 3652 3653 rv = ddi_copyout(vdc->minfo, (void *)arg, 3654 sizeof (struct dk_minfo), mode); 3655 if (rv != 0) 3656 return (EFAULT); 3657 3658 return (0); 3659 } 3660 3661 /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 3662 ASSERT(dk_ioctl[idx].op != 0); 3663 3664 /* LDC requires that the memory being mapped is 8-byte aligned */ 3665 alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 3666 PR1("%s[%d]: struct size %d alloc %d\n", 3667 __func__, instance, len, alloc_len); 3668 3669 ASSERT(alloc_len != 0); /* sanity check */ 3670 mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 3671 3672 /* 3673 * Call the conversion function for this ioctl whhich if necessary 3674 * converts from the Solaris format to the format ARC'ed 3675 * as part of the vDisk protocol (FWARC 2006/195) 3676 */ 3677 ASSERT(dk_ioctl[idx].convert != NULL); 3678 rv = (dk_ioctl[idx].convert)(arg, mem_p, mode, VD_COPYIN); 3679 if (rv != 0) { 3680 PR0("%s[%d]: convert returned %d for ioctl 0x%x\n", 3681 __func__, instance, rv, cmd); 3682 if (mem_p != NULL) 3683 kmem_free(mem_p, alloc_len); 3684 return (rv); 3685 } 3686 3687 /* 3688 * handle the special case of DKIOCFLUSHWRITECACHE 3689 */ 3690 if (cmd == DKIOCFLUSHWRITECACHE) { 3691 struct dk_callback *dkc = (struct dk_callback *)arg; 3692 3693 PR0("%s[%d]: DKIOCFLUSHWRITECACHE\n", __func__, instance); 3694 3695 /* no mem should have been allocated hence no need to free it */ 3696 ASSERT(mem_p == NULL); 3697 3698 /* 3699 * If arg is NULL, we break here and the call operates 3700 * synchronously; waiting for vds to return. 3701 * 3702 * i.e. after the request to vds returns successfully, 3703 * all writes completed prior to the ioctl will have been 3704 * flushed from the disk write cache to persistent media. 3705 */ 3706 if (dkc != NULL) { 3707 vdc_dk_arg_t arg; 3708 arg.mode = mode; 3709 arg.dev = dev; 3710 bcopy(dkc, &arg.dkc, sizeof (*dkc)); 3711 3712 mutex_enter(&vdc->lock); 3713 vdc->dkio_flush_pending++; 3714 arg.vdc = vdc; 3715 mutex_exit(&vdc->lock); 3716 3717 /* put the request on a task queue */ 3718 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 3719 (void *)&arg, DDI_SLEEP); 3720 3721 return (rv == NULL ? ENOMEM : 0); 3722 } 3723 } 3724 3725 /* 3726 * send request to vds to service the ioctl. 3727 */ 3728 rv = vdc_populate_descriptor(vdc, mem_p, alloc_len, dk_ioctl[idx].op, 3729 mode, SDPART((getminor(dev)))); 3730 if (rv != 0) { 3731 /* 3732 * This is not necessarily an error. The ioctl could 3733 * be returning a value such as ENOTTY to indicate 3734 * that the ioctl is not applicable. 3735 */ 3736 PR0("%s[%d]: vds returned %d for ioctl 0x%x\n", 3737 __func__, instance, rv, cmd); 3738 if (mem_p != NULL) 3739 kmem_free(mem_p, alloc_len); 3740 return (rv); 3741 } 3742 3743 /* 3744 * If the VTOC has been changed, then vdc needs to update the copy 3745 * it saved in the soft state structure and try and update the device 3746 * node properties. Failing to set the properties should not cause 3747 * an error to be return the caller though. 3748 */ 3749 if (cmd == DKIOCSVTOC) { 3750 bcopy(mem_p, vdc->vtoc, sizeof (struct vtoc)); 3751 if (vdc_create_device_nodes_props(vdc)) { 3752 cmn_err(CE_NOTE, "![%d] Failed to update device nodes" 3753 " properties", instance); 3754 } 3755 } 3756 3757 /* 3758 * Call the conversion function (if it exists) for this ioctl 3759 * which converts from the format ARC'ed as part of the vDisk 3760 * protocol (FWARC 2006/195) back to a format understood by 3761 * the rest of Solaris. 3762 */ 3763 rv = (dk_ioctl[idx].convert)(mem_p, arg, mode, VD_COPYOUT); 3764 if (rv != 0) { 3765 PR0("%s[%d]: convert returned %d for ioctl 0x%x\n", 3766 __func__, instance, rv, cmd); 3767 if (mem_p != NULL) 3768 kmem_free(mem_p, alloc_len); 3769 return (rv); 3770 } 3771 3772 if (mem_p != NULL) 3773 kmem_free(mem_p, alloc_len); 3774 3775 return (rv); 3776 } 3777 3778 /* 3779 * Function: 3780 * 3781 * Description: 3782 * This is an empty conversion function used by ioctl calls which 3783 * do not need to convert the data being passed in/out to userland 3784 */ 3785 static int 3786 vdc_null_copy_func(void *from, void *to, int mode, int dir) 3787 { 3788 _NOTE(ARGUNUSED(from)) 3789 _NOTE(ARGUNUSED(to)) 3790 _NOTE(ARGUNUSED(mode)) 3791 _NOTE(ARGUNUSED(dir)) 3792 3793 return (0); 3794 } 3795 3796 /* 3797 * Function: 3798 * vdc_get_vtoc_convert() 3799 * 3800 * Description: 3801 * This routine fakes up the disk info needed for some DKIO ioctls. 3802 * 3803 * Arguments: 3804 * from - the buffer containing the data to be copied from 3805 * to - the buffer to be copied to 3806 * mode - flags passed to ioctl() call 3807 * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT 3808 * 3809 * Return Code: 3810 * 0 - Success 3811 * ENXIO - incorrect buffer passed in. 3812 * EFAULT - ddi_copyxxx routine encountered an error. 3813 */ 3814 static int 3815 vdc_get_vtoc_convert(void *from, void *to, int mode, int dir) 3816 { 3817 void *tmp_mem = NULL; 3818 void *tmp_memp; 3819 struct vtoc vt; 3820 struct vtoc32 vt32; 3821 int copy_len = 0; 3822 int rv = 0; 3823 3824 if (dir != VD_COPYOUT) 3825 return (0); /* nothing to do */ 3826 3827 if ((from == NULL) || (to == NULL)) 3828 return (ENXIO); 3829 3830 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 3831 copy_len = sizeof (struct vtoc32); 3832 else 3833 copy_len = sizeof (struct vtoc); 3834 3835 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 3836 3837 VD_VTOC2VTOC((vd_vtoc_t *)from, &vt); 3838 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 3839 vtoctovtoc32(vt, vt32); 3840 tmp_memp = &vt32; 3841 } else { 3842 tmp_memp = &vt; 3843 } 3844 rv = ddi_copyout(tmp_memp, to, copy_len, mode); 3845 if (rv != 0) 3846 rv = EFAULT; 3847 3848 kmem_free(tmp_mem, copy_len); 3849 return (rv); 3850 } 3851 3852 /* 3853 * Function: 3854 * vdc_set_vtoc_convert() 3855 * 3856 * Description: 3857 * 3858 * Arguments: 3859 * from - Buffer with data 3860 * to - Buffer where data is to be copied to 3861 * mode - flags passed to ioctl 3862 * dir - direction of copy (in or out) 3863 * 3864 * Return Code: 3865 * 0 - Success 3866 * ENXIO - Invalid buffer passed in 3867 * EFAULT - ddi_copyin of data failed 3868 */ 3869 static int 3870 vdc_set_vtoc_convert(void *from, void *to, int mode, int dir) 3871 { 3872 void *tmp_mem = NULL; 3873 struct vtoc vt; 3874 struct vtoc *vtp = &vt; 3875 vd_vtoc_t vtvd; 3876 int copy_len = 0; 3877 int rv = 0; 3878 3879 if (dir != VD_COPYIN) 3880 return (0); /* nothing to do */ 3881 3882 if ((from == NULL) || (to == NULL)) 3883 return (ENXIO); 3884 3885 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 3886 copy_len = sizeof (struct vtoc32); 3887 else 3888 copy_len = sizeof (struct vtoc); 3889 3890 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 3891 3892 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 3893 if (rv != 0) { 3894 kmem_free(tmp_mem, copy_len); 3895 return (EFAULT); 3896 } 3897 3898 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 3899 vtoc32tovtoc((*(struct vtoc32 *)tmp_mem), vt); 3900 } else { 3901 vtp = tmp_mem; 3902 } 3903 3904 VTOC2VD_VTOC(vtp, &vtvd); 3905 bcopy(&vtvd, to, sizeof (vd_vtoc_t)); 3906 kmem_free(tmp_mem, copy_len); 3907 3908 return (0); 3909 } 3910 3911 /* 3912 * Function: 3913 * vdc_get_geom_convert() 3914 * 3915 * Description: 3916 * 3917 * Arguments: 3918 * from - Buffer with data 3919 * to - Buffer where data is to be copied to 3920 * mode - flags passed to ioctl 3921 * dir - direction of copy (in or out) 3922 * 3923 * Return Code: 3924 * 0 - Success 3925 * ENXIO - Invalid buffer passed in 3926 * EFAULT - ddi_copyin of data failed 3927 */ 3928 static int 3929 vdc_get_geom_convert(void *from, void *to, int mode, int dir) 3930 { 3931 struct dk_geom geom; 3932 int copy_len = sizeof (struct dk_geom); 3933 int rv = 0; 3934 3935 if (dir != VD_COPYOUT) 3936 return (0); /* nothing to do */ 3937 3938 if ((from == NULL) || (to == NULL)) 3939 return (ENXIO); 3940 3941 VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); 3942 rv = ddi_copyout(&geom, to, copy_len, mode); 3943 if (rv != 0) 3944 rv = EFAULT; 3945 3946 return (rv); 3947 } 3948 3949 /* 3950 * Function: 3951 * vdc_set_geom_convert() 3952 * 3953 * Description: 3954 * This routine performs the necessary convertions from the DKIOCSVTOC 3955 * Solaris structure to the format defined in FWARC 2006/195 3956 * 3957 * Arguments: 3958 * from - Buffer with data 3959 * to - Buffer where data is to be copied to 3960 * mode - flags passed to ioctl 3961 * dir - direction of copy (in or out) 3962 * 3963 * Return Code: 3964 * 0 - Success 3965 * ENXIO - Invalid buffer passed in 3966 * EFAULT - ddi_copyin of data failed 3967 */ 3968 static int 3969 vdc_set_geom_convert(void *from, void *to, int mode, int dir) 3970 { 3971 vd_geom_t vdgeom; 3972 void *tmp_mem = NULL; 3973 int copy_len = sizeof (struct dk_geom); 3974 int rv = 0; 3975 3976 if (dir != VD_COPYIN) 3977 return (0); /* nothing to do */ 3978 3979 if ((from == NULL) || (to == NULL)) 3980 return (ENXIO); 3981 3982 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 3983 3984 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 3985 if (rv != 0) { 3986 kmem_free(tmp_mem, copy_len); 3987 return (EFAULT); 3988 } 3989 DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); 3990 bcopy(&vdgeom, to, sizeof (vdgeom)); 3991 kmem_free(tmp_mem, copy_len); 3992 3993 return (0); 3994 } 3995 3996 /* 3997 * Function: 3998 * vdc_create_fake_geometry() 3999 * 4000 * Description: 4001 * This routine fakes up the disk info needed for some DKIO ioctls. 4002 * - DKIOCINFO 4003 * - DKIOCGMEDIAINFO 4004 * 4005 * [ just like lofi(7D) and ramdisk(7D) ] 4006 * 4007 * Arguments: 4008 * vdc - soft state pointer for this instance of the device driver. 4009 * 4010 * Return Code: 4011 * 0 - Success 4012 */ 4013 static int 4014 vdc_create_fake_geometry(vdc_t *vdc) 4015 { 4016 int rv = 0; 4017 4018 ASSERT(vdc != NULL); 4019 4020 /* 4021 * DKIOCINFO support 4022 */ 4023 vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 4024 4025 (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 4026 (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 4027 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz / vdc->block_size; 4028 vdc->cinfo->dki_ctype = DKC_SCSI_CCS; 4029 vdc->cinfo->dki_flags = DKI_FMTVOL; 4030 vdc->cinfo->dki_cnum = 0; 4031 vdc->cinfo->dki_addr = 0; 4032 vdc->cinfo->dki_space = 0; 4033 vdc->cinfo->dki_prio = 0; 4034 vdc->cinfo->dki_vec = 0; 4035 vdc->cinfo->dki_unit = vdc->instance; 4036 vdc->cinfo->dki_slave = 0; 4037 /* 4038 * The partition number will be created on the fly depending on the 4039 * actual slice (i.e. minor node) that is used to request the data. 4040 */ 4041 vdc->cinfo->dki_partition = 0; 4042 4043 /* 4044 * DKIOCGMEDIAINFO support 4045 */ 4046 if (vdc->minfo == NULL) 4047 vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 4048 vdc->minfo->dki_media_type = DK_FIXED_DISK; 4049 vdc->minfo->dki_capacity = 1; 4050 vdc->minfo->dki_lbsize = DEV_BSIZE; 4051 4052 return (rv); 4053 } 4054 4055 /* 4056 * Function: 4057 * vdc_setup_disk_layout() 4058 * 4059 * Description: 4060 * This routine discovers all the necessary details about the "disk" 4061 * by requesting the data that is available from the vDisk server and by 4062 * faking up the rest of the data. 4063 * 4064 * Arguments: 4065 * vdc - soft state pointer for this instance of the device driver. 4066 * 4067 * Return Code: 4068 * 0 - Success 4069 */ 4070 static int 4071 vdc_setup_disk_layout(vdc_t *vdc) 4072 { 4073 dev_t dev; 4074 int slice = 0; 4075 int rv; 4076 4077 ASSERT(vdc != NULL); 4078 4079 rv = vdc_create_fake_geometry(vdc); 4080 if (rv != 0) { 4081 cmn_err(CE_NOTE, "[%d] Failed to create disk geometry (err%d)", 4082 vdc->instance, rv); 4083 } 4084 4085 if (vdc->vtoc == NULL) 4086 vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); 4087 4088 dev = makedevice(ddi_driver_major(vdc->dip), 4089 VD_MAKE_DEV(vdc->instance, 0)); 4090 rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)vdc->vtoc, FKIOCTL); 4091 if (rv) { 4092 cmn_err(CE_NOTE, "[%d] Failed to get VTOC (err=%d)", 4093 vdc->instance, rv); 4094 return (rv); 4095 } 4096 4097 /* 4098 * Read disk label from start of disk 4099 */ 4100 vdc->label = kmem_zalloc(DK_LABEL_SIZE, KM_SLEEP); 4101 4102 /* 4103 * find the slice that represents the entire "disk" and use that to 4104 * read the disk label. The convention in Solaris is that slice 2 4105 * represents the whole disk so we check that it is otherwise we 4106 * default to slice 0 4107 */ 4108 if ((vdc->vdisk_type == VD_DISK_TYPE_DISK) && 4109 (vdc->vtoc->v_part[2].p_tag == V_BACKUP)) { 4110 slice = 2; 4111 } else { 4112 slice = 0; 4113 } 4114 rv = vdc_populate_descriptor(vdc, (caddr_t)vdc->label, DK_LABEL_SIZE, 4115 VD_OP_BREAD, 0, slice); 4116 4117 return (rv); 4118 } 4119