1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * LDoms virtual disk client (vdc) device driver 31 * 32 * This driver runs on a guest logical domain and communicates with the virtual 33 * disk server (vds) driver running on the service domain which is exporting 34 * virtualized "disks" to the guest logical domain. 35 * 36 * The driver can be divided into four sections: 37 * 38 * 1) generic device driver housekeeping 39 * _init, _fini, attach, detach, ops structures, etc. 40 * 41 * 2) communication channel setup 42 * Setup the communications link over the LDC channel that vdc uses to 43 * talk to the vDisk server. Initialise the descriptor ring which 44 * allows the LDC clients to transfer data via memory mappings. 45 * 46 * 3) Support exported to upper layers (filesystems, etc) 47 * The upper layers call into vdc via strategy(9E) and DKIO(7I) 48 * ioctl calls. vdc will copy the data to be written to the descriptor 49 * ring or maps the buffer to store the data read by the vDisk 50 * server into the descriptor ring. It then sends a message to the 51 * vDisk server requesting it to complete the operation. 52 * 53 * 4) Handling responses from vDisk server. 54 * The vDisk server will ACK some or all of the messages vdc sends to it 55 * (this is configured during the handshake). Upon receipt of an ACK 56 * vdc will check the descriptor ring and signal to the upper layer 57 * code waiting on the IO. 58 */ 59 60 #include <sys/conf.h> 61 #include <sys/disp.h> 62 #include <sys/ddi.h> 63 #include <sys/dkio.h> 64 #include <sys/efi_partition.h> 65 #include <sys/fcntl.h> 66 #include <sys/file.h> 67 #include <sys/mach_descrip.h> 68 #include <sys/modctl.h> 69 #include <sys/mdeg.h> 70 #include <sys/note.h> 71 #include <sys/open.h> 72 #include <sys/sdt.h> 73 #include <sys/stat.h> 74 #include <sys/sunddi.h> 75 #include <sys/types.h> 76 #include <sys/promif.h> 77 #include <sys/vtoc.h> 78 #include <sys/archsystm.h> 79 #include <sys/sysmacros.h> 80 81 #include <sys/cdio.h> 82 #include <sys/dktp/cm.h> 83 #include <sys/dktp/fdisk.h> 84 #include <sys/scsi/generic/sense.h> 85 #include <sys/scsi/impl/uscsi.h> /* Needed for defn of USCSICMD ioctl */ 86 #include <sys/scsi/targets/sddef.h> 87 88 #include <sys/ldoms.h> 89 #include <sys/ldc.h> 90 #include <sys/vio_common.h> 91 #include <sys/vio_mailbox.h> 92 #include <sys/vdsk_common.h> 93 #include <sys/vdsk_mailbox.h> 94 #include <sys/vdc.h> 95 96 /* 97 * function prototypes 98 */ 99 100 /* standard driver functions */ 101 static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 102 static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 103 static int vdc_strategy(struct buf *buf); 104 static int vdc_print(dev_t dev, char *str); 105 static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 106 static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 107 static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 108 static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 109 cred_t *credp, int *rvalp); 110 static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 111 static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 112 113 static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 114 void *arg, void **resultp); 115 static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 116 static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 117 118 /* setup */ 119 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 120 static int vdc_do_ldc_init(vdc_t *vdc); 121 static int vdc_start_ldc_connection(vdc_t *vdc); 122 static int vdc_create_device_nodes(vdc_t *vdc); 123 static int vdc_create_device_nodes_props(vdc_t *vdc); 124 static int vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id); 125 static int vdc_do_ldc_up(vdc_t *vdc); 126 static void vdc_terminate_ldc(vdc_t *vdc); 127 static int vdc_init_descriptor_ring(vdc_t *vdc); 128 static void vdc_destroy_descriptor_ring(vdc_t *vdc); 129 130 /* handshake with vds */ 131 static void vdc_init_handshake_negotiation(void *arg); 132 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 133 static int vdc_init_attr_negotiation(vdc_t *vdc); 134 static int vdc_init_dring_negotiate(vdc_t *vdc); 135 static void vdc_reset_connection(vdc_t *vdc, boolean_t resetldc); 136 static boolean_t vdc_is_able_to_tx_data(vdc_t *vdc, int flag); 137 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 138 139 /* processing incoming messages from vDisk server */ 140 static void vdc_process_msg_thread(vdc_t *vdc); 141 static void vdc_process_msg(void *arg); 142 static void vdc_do_process_msg(vdc_t *vdc); 143 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 144 static int vdc_process_ctrl_msg(vdc_t *vdc, vio_msg_t msg); 145 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t msg); 146 static int vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg); 147 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 148 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 149 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 150 static int vdc_get_next_dring_entry_id(vdc_t *vdc, uint_t needed); 151 static int vdc_populate_descriptor(vdc_t *vdc, caddr_t addr, 152 size_t nbytes, int op, uint64_t arg, uint64_t slice); 153 static int vdc_wait_for_descriptor_update(vdc_t *vdc, uint_t idx, 154 vio_dring_msg_t dmsg); 155 static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 156 static int vdc_populate_mem_hdl(vdc_t *vdc, uint_t idx, 157 caddr_t addr, size_t nbytes, int operation); 158 static boolean_t vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg, int 159 num_msgs); 160 161 /* dkio */ 162 static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode); 163 static int vdc_create_fake_geometry(vdc_t *vdc); 164 static int vdc_setup_disk_layout(vdc_t *vdc); 165 static int vdc_null_copy_func(vdc_t *vdc, void *from, void *to, 166 int mode, int dir); 167 static int vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, 168 int mode, int dir); 169 static int vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, 170 int mode, int dir); 171 static int vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, 172 int mode, int dir); 173 static int vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, 174 int mode, int dir); 175 static int vdc_uscsicmd_convert(vdc_t *vdc, void *from, void *to, 176 int mode, int dir); 177 178 /* 179 * Module variables 180 */ 181 uint64_t vdc_hz_timeout; 182 uint64_t vdc_usec_timeout = VDC_USEC_TIMEOUT_MIN; 183 uint64_t vdc_usec_timeout_dump = VDC_USEC_TIMEOUT_MIN / 300; 184 uint64_t vdc_usec_timeout_dring = 10 * MILLISEC; 185 static int vdc_retries = VDC_RETRIES; 186 static int vdc_dump_retries = VDC_RETRIES * 10; 187 188 /* Soft state pointer */ 189 static void *vdc_state; 190 191 /* variable level controlling the verbosity of the error/debug messages */ 192 int vdc_msglevel = 0; 193 194 /* 195 * Supported vDisk protocol version pairs. 196 * 197 * The first array entry is the latest and preferred version. 198 */ 199 static const vio_ver_t vdc_version[] = {{1, 0}}; 200 201 static void 202 vdc_msg(const char *format, ...) 203 { 204 va_list args; 205 206 va_start(args, format); 207 vcmn_err(CE_CONT, format, args); 208 va_end(args); 209 } 210 211 static struct cb_ops vdc_cb_ops = { 212 vdc_open, /* cb_open */ 213 vdc_close, /* cb_close */ 214 vdc_strategy, /* cb_strategy */ 215 vdc_print, /* cb_print */ 216 vdc_dump, /* cb_dump */ 217 vdc_read, /* cb_read */ 218 vdc_write, /* cb_write */ 219 vdc_ioctl, /* cb_ioctl */ 220 nodev, /* cb_devmap */ 221 nodev, /* cb_mmap */ 222 nodev, /* cb_segmap */ 223 nochpoll, /* cb_chpoll */ 224 ddi_prop_op, /* cb_prop_op */ 225 NULL, /* cb_str */ 226 D_MP | D_64BIT, /* cb_flag */ 227 CB_REV, /* cb_rev */ 228 vdc_aread, /* cb_aread */ 229 vdc_awrite /* cb_awrite */ 230 }; 231 232 static struct dev_ops vdc_ops = { 233 DEVO_REV, /* devo_rev */ 234 0, /* devo_refcnt */ 235 vdc_getinfo, /* devo_getinfo */ 236 nulldev, /* devo_identify */ 237 nulldev, /* devo_probe */ 238 vdc_attach, /* devo_attach */ 239 vdc_detach, /* devo_detach */ 240 nodev, /* devo_reset */ 241 &vdc_cb_ops, /* devo_cb_ops */ 242 NULL, /* devo_bus_ops */ 243 nulldev /* devo_power */ 244 }; 245 246 static struct modldrv modldrv = { 247 &mod_driverops, 248 "virtual disk client %I%", 249 &vdc_ops, 250 }; 251 252 static struct modlinkage modlinkage = { 253 MODREV_1, 254 &modldrv, 255 NULL 256 }; 257 258 /* -------------------------------------------------------------------------- */ 259 260 /* 261 * Device Driver housekeeping and setup 262 */ 263 264 int 265 _init(void) 266 { 267 int status; 268 269 if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 270 return (status); 271 if ((status = mod_install(&modlinkage)) != 0) 272 ddi_soft_state_fini(&vdc_state); 273 return (status); 274 } 275 276 int 277 _info(struct modinfo *modinfop) 278 { 279 return (mod_info(&modlinkage, modinfop)); 280 } 281 282 int 283 _fini(void) 284 { 285 int status; 286 287 if ((status = mod_remove(&modlinkage)) != 0) 288 return (status); 289 ddi_soft_state_fini(&vdc_state); 290 return (0); 291 } 292 293 static int 294 vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 295 { 296 _NOTE(ARGUNUSED(dip)) 297 298 int instance = SDUNIT(getminor((dev_t)arg)); 299 vdc_t *vdc = NULL; 300 301 switch (cmd) { 302 case DDI_INFO_DEVT2DEVINFO: 303 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 304 *resultp = NULL; 305 return (DDI_FAILURE); 306 } 307 *resultp = vdc->dip; 308 return (DDI_SUCCESS); 309 case DDI_INFO_DEVT2INSTANCE: 310 *resultp = (void *)(uintptr_t)instance; 311 return (DDI_SUCCESS); 312 default: 313 *resultp = NULL; 314 return (DDI_FAILURE); 315 } 316 } 317 318 static int 319 vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 320 { 321 int instance; 322 int rv; 323 uint_t retries = 0; 324 vdc_t *vdc = NULL; 325 326 switch (cmd) { 327 case DDI_DETACH: 328 /* the real work happens below */ 329 break; 330 case DDI_SUSPEND: 331 /* nothing to do for this non-device */ 332 return (DDI_SUCCESS); 333 default: 334 return (DDI_FAILURE); 335 } 336 337 ASSERT(cmd == DDI_DETACH); 338 instance = ddi_get_instance(dip); 339 PR1("%s[%d] Entered\n", __func__, instance); 340 341 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 342 vdc_msg("%s[%d]: Could not get state structure.", 343 __func__, instance); 344 return (DDI_FAILURE); 345 } 346 347 if (vdc->open) { 348 PR0("%s[%d]: Cannot detach: device is open", 349 __func__, instance); 350 return (DDI_FAILURE); 351 } 352 353 PR0("%s[%d] proceeding...\n", __func__, instance); 354 355 /* 356 * try and disable callbacks to prevent another handshake 357 */ 358 rv = ldc_set_cb_mode(vdc->ldc_handle, LDC_CB_DISABLE); 359 PR0("%s[%d] callback disabled (rv=%d)\n", __func__, instance, rv); 360 361 /* 362 * Prevent any more attempts to start a handshake with the vdisk 363 * server and tear down the existing connection. 364 */ 365 mutex_enter(&vdc->lock); 366 vdc->initialized |= VDC_HANDSHAKE_STOP; 367 vdc_reset_connection(vdc, B_TRUE); 368 mutex_exit(&vdc->lock); 369 370 if (vdc->initialized & VDC_THREAD) { 371 mutex_enter(&vdc->msg_proc_lock); 372 vdc->msg_proc_thr_state = VDC_THR_STOP; 373 vdc->msg_pending = B_TRUE; 374 cv_signal(&vdc->msg_proc_cv); 375 376 while (vdc->msg_proc_thr_state != VDC_THR_DONE) { 377 PR0("%s[%d]: Waiting for thread to exit\n", 378 __func__, instance); 379 rv = cv_timedwait(&vdc->msg_proc_cv, 380 &vdc->msg_proc_lock, VD_GET_TIMEOUT_HZ(1)); 381 if ((rv == -1) && (retries++ > vdc_retries)) 382 break; 383 } 384 mutex_exit(&vdc->msg_proc_lock); 385 } 386 387 mutex_enter(&vdc->lock); 388 389 if (vdc->initialized & VDC_DRING) 390 vdc_destroy_descriptor_ring(vdc); 391 392 if (vdc->initialized & VDC_LDC) 393 vdc_terminate_ldc(vdc); 394 395 mutex_exit(&vdc->lock); 396 397 if (vdc->initialized & VDC_MINOR) { 398 ddi_prop_remove_all(dip); 399 ddi_remove_minor_node(dip, NULL); 400 } 401 402 if (vdc->initialized & VDC_LOCKS) { 403 mutex_destroy(&vdc->lock); 404 mutex_destroy(&vdc->attach_lock); 405 mutex_destroy(&vdc->msg_proc_lock); 406 mutex_destroy(&vdc->dring_lock); 407 cv_destroy(&vdc->cv); 408 cv_destroy(&vdc->attach_cv); 409 cv_destroy(&vdc->msg_proc_cv); 410 } 411 412 if (vdc->minfo) 413 kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 414 415 if (vdc->cinfo) 416 kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 417 418 if (vdc->vtoc) 419 kmem_free(vdc->vtoc, sizeof (struct vtoc)); 420 421 if (vdc->label) 422 kmem_free(vdc->label, DK_LABEL_SIZE); 423 424 if (vdc->initialized & VDC_SOFT_STATE) 425 ddi_soft_state_free(vdc_state, instance); 426 427 PR0("%s[%d] End %p\n", __func__, instance, vdc); 428 429 return (DDI_SUCCESS); 430 } 431 432 433 static int 434 vdc_do_attach(dev_info_t *dip) 435 { 436 int instance; 437 vdc_t *vdc = NULL; 438 int status; 439 uint_t retries = 0; 440 441 ASSERT(dip != NULL); 442 443 instance = ddi_get_instance(dip); 444 if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 445 vdc_msg("%s:(%d): Couldn't alloc state structure", 446 __func__, instance); 447 return (DDI_FAILURE); 448 } 449 450 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 451 vdc_msg("%s:(%d): Could not get state structure.", 452 __func__, instance); 453 return (DDI_FAILURE); 454 } 455 456 /* 457 * We assign the value to initialized in this case to zero out the 458 * variable and then set bits in it to indicate what has been done 459 */ 460 vdc->initialized = VDC_SOFT_STATE; 461 462 vdc_hz_timeout = drv_usectohz(vdc_usec_timeout); 463 464 vdc->dip = dip; 465 vdc->instance = instance; 466 vdc->open = 0; 467 vdc->vdisk_type = VD_DISK_TYPE_UNK; 468 vdc->state = VD_STATE_INIT; 469 vdc->ldc_state = 0; 470 vdc->session_id = 0; 471 vdc->block_size = DEV_BSIZE; 472 vdc->max_xfer_sz = maxphys / DEV_BSIZE; 473 474 vdc->vtoc = NULL; 475 vdc->cinfo = NULL; 476 vdc->minfo = NULL; 477 478 mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 479 mutex_init(&vdc->attach_lock, NULL, MUTEX_DRIVER, NULL); 480 mutex_init(&vdc->msg_proc_lock, NULL, MUTEX_DRIVER, NULL); 481 mutex_init(&vdc->dring_lock, NULL, MUTEX_DRIVER, NULL); 482 cv_init(&vdc->cv, NULL, CV_DRIVER, NULL); 483 cv_init(&vdc->attach_cv, NULL, CV_DRIVER, NULL); 484 cv_init(&vdc->msg_proc_cv, NULL, CV_DRIVER, NULL); 485 vdc->initialized |= VDC_LOCKS; 486 487 vdc->msg_pending = B_FALSE; 488 vdc->msg_proc_thr_id = thread_create(NULL, 0, vdc_process_msg_thread, 489 vdc, 0, &p0, TS_RUN, minclsyspri); 490 if (vdc->msg_proc_thr_id == NULL) { 491 cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 492 instance); 493 return (DDI_FAILURE); 494 } 495 vdc->initialized |= VDC_THREAD; 496 497 /* initialise LDC channel which will be used to communicate with vds */ 498 if (vdc_do_ldc_init(vdc) != 0) { 499 cmn_err(CE_NOTE, "[%d] Couldn't initialize LDC", instance); 500 return (DDI_FAILURE); 501 } 502 503 /* Bring up connection with vds via LDC */ 504 status = vdc_start_ldc_connection(vdc); 505 if (status != 0) { 506 vdc_msg("%s[%d] Could not start LDC", __func__, instance); 507 return (DDI_FAILURE); 508 } 509 510 /* 511 * We need to wait until the handshake has completed before leaving 512 * the attach(). This is to allow the device node(s) to be created 513 * and the first usage of the filesystem to succeed. 514 */ 515 mutex_enter(&vdc->attach_lock); 516 while ((vdc->ldc_state != LDC_UP) || 517 (vdc->state != VD_STATE_DATA)) { 518 519 PR0("%s[%d] handshake in progress [VD %d (LDC %d)]\n", 520 __func__, instance, vdc->state, vdc->ldc_state); 521 522 status = cv_timedwait(&vdc->attach_cv, &vdc->attach_lock, 523 VD_GET_TIMEOUT_HZ(1)); 524 if (status == -1) { 525 if (retries >= vdc_retries) { 526 PR0("%s[%d] Give up handshake wait.\n", 527 __func__, instance); 528 mutex_exit(&vdc->attach_lock); 529 return (DDI_FAILURE); 530 } else { 531 PR0("%s[%d] Retry #%d for handshake.\n", 532 __func__, instance, retries); 533 vdc_init_handshake_negotiation(vdc); 534 retries++; 535 } 536 } 537 } 538 mutex_exit(&vdc->attach_lock); 539 540 /* 541 * Once the handshake is complete, we can use the DRing to send 542 * requests to the vDisk server to calculate the geometry and 543 * VTOC of the "disk" 544 */ 545 status = vdc_setup_disk_layout(vdc); 546 if (status != 0) { 547 cmn_err(CE_NOTE, "[%d] Failed to discover disk layout (err%d)", 548 vdc->instance, status); 549 } 550 551 /* 552 * Now that we have the device info we can create the 553 * device nodes and properties 554 */ 555 status = vdc_create_device_nodes(vdc); 556 if (status) { 557 cmn_err(CE_NOTE, "[%d] Failed to create device nodes", 558 instance); 559 return (status); 560 } 561 status = vdc_create_device_nodes_props(vdc); 562 if (status) { 563 cmn_err(CE_NOTE, "[%d] Failed to create device nodes" 564 " properties (%d)", instance, status); 565 return (status); 566 } 567 568 ddi_report_dev(dip); 569 570 PR0("%s[%d] Attach completed\n", __func__, instance); 571 return (status); 572 } 573 574 static int 575 vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 576 { 577 int status; 578 579 switch (cmd) { 580 case DDI_ATTACH: 581 if ((status = vdc_do_attach(dip)) != 0) 582 (void) vdc_detach(dip, DDI_DETACH); 583 return (status); 584 case DDI_RESUME: 585 /* nothing to do for this non-device */ 586 return (DDI_SUCCESS); 587 default: 588 return (DDI_FAILURE); 589 } 590 } 591 592 static int 593 vdc_do_ldc_init(vdc_t *vdc) 594 { 595 int status = 0; 596 ldc_status_t ldc_state; 597 ldc_attr_t ldc_attr; 598 uint64_t ldc_id = 0; 599 dev_info_t *dip = NULL; 600 601 ASSERT(vdc != NULL); 602 603 dip = vdc->dip; 604 vdc->initialized |= VDC_LDC; 605 606 if ((status = vdc_get_ldc_id(dip, &ldc_id)) != 0) { 607 vdc_msg("%s: Failed to get <ldc_id> property\n", __func__); 608 return (EIO); 609 } 610 vdc->ldc_id = ldc_id; 611 612 ldc_attr.devclass = LDC_DEV_BLK; 613 ldc_attr.instance = vdc->instance; 614 ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 615 ldc_attr.qlen = VD_LDC_QLEN; 616 617 if ((vdc->initialized & VDC_LDC_INIT) == 0) { 618 status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle); 619 if (status != 0) { 620 cmn_err(CE_NOTE, "[%d] ldc_init(chan %ld) returned %d", 621 vdc->instance, ldc_id, status); 622 return (status); 623 } 624 vdc->initialized |= VDC_LDC_INIT; 625 } 626 status = ldc_status(vdc->ldc_handle, &ldc_state); 627 if (status != 0) { 628 vdc_msg("Cannot discover LDC status [err=%d].", status); 629 return (status); 630 } 631 vdc->ldc_state = ldc_state; 632 633 if ((vdc->initialized & VDC_LDC_CB) == 0) { 634 status = ldc_reg_callback(vdc->ldc_handle, vdc_handle_cb, 635 (caddr_t)vdc); 636 if (status != 0) { 637 vdc_msg("%s: ldc_reg_callback()=%d", __func__, status); 638 return (status); 639 } 640 vdc->initialized |= VDC_LDC_CB; 641 } 642 643 vdc->initialized |= VDC_LDC; 644 645 /* 646 * At this stage we have initialised LDC, we will now try and open 647 * the connection. 648 */ 649 if (vdc->ldc_state == LDC_INIT) { 650 status = ldc_open(vdc->ldc_handle); 651 if (status != 0) { 652 cmn_err(CE_NOTE, "[%d] ldc_open(chan %ld) returned %d", 653 vdc->instance, vdc->ldc_id, status); 654 return (status); 655 } 656 vdc->initialized |= VDC_LDC_OPEN; 657 } 658 659 return (status); 660 } 661 662 static int 663 vdc_start_ldc_connection(vdc_t *vdc) 664 { 665 int status = 0; 666 667 ASSERT(vdc != NULL); 668 669 mutex_enter(&vdc->lock); 670 671 if (vdc->ldc_state == LDC_UP) { 672 PR0("%s: LDC is already UP ..\n", __func__); 673 mutex_exit(&vdc->lock); 674 return (0); 675 } 676 677 status = vdc_do_ldc_up(vdc); 678 679 PR0("%s[%d] Finished bringing up LDC\n", __func__, vdc->instance); 680 681 mutex_exit(&vdc->lock); 682 683 return (status); 684 } 685 686 687 /* 688 * Function: 689 * vdc_create_device_nodes 690 * 691 * Description: 692 * This function creates the block and character device nodes under 693 * /devices along with the node properties. It is called as part of 694 * the attach(9E) of the instance during the handshake with vds after 695 * vds has sent the attributes to vdc. 696 * 697 * If the device is of type VD_DISK_TYPE_SLICE then the minor node 698 * of 2 is used in keeping with the Solaris convention that slice 2 699 * refers to a whole disk. Slices start at 'a' 700 * 701 * Parameters: 702 * vdc - soft state pointer 703 * 704 * Return Values 705 * 0 - Success 706 * EIO - Failed to create node 707 * EINVAL - Unknown type of disk exported 708 */ 709 static int 710 vdc_create_device_nodes(vdc_t *vdc) 711 { 712 /* uses NNNN which is OK as long as # of disks <= 10000 */ 713 char name[sizeof ("disk@NNNN:s,raw")]; 714 dev_info_t *dip = NULL; 715 int instance; 716 int num_slices = 1; 717 int i; 718 719 ASSERT(vdc != NULL); 720 721 instance = vdc->instance; 722 dip = vdc->dip; 723 724 switch (vdc->vdisk_type) { 725 case VD_DISK_TYPE_DISK: 726 num_slices = V_NUMPAR; 727 break; 728 case VD_DISK_TYPE_SLICE: 729 num_slices = 1; 730 break; 731 case VD_DISK_TYPE_UNK: 732 default: 733 return (EINVAL); 734 } 735 736 for (i = 0; i < num_slices; i++) { 737 (void) snprintf(name, sizeof (name), "%c", 'a' + i); 738 if (ddi_create_minor_node(dip, name, S_IFBLK, 739 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 740 vdc_msg("%s[%d]: Couldn't add block node %s.", 741 __func__, instance, name); 742 return (EIO); 743 } 744 745 /* if any device node is created we set this flag */ 746 vdc->initialized |= VDC_MINOR; 747 748 (void) snprintf(name, sizeof (name), "%c%s", 749 'a' + i, ",raw"); 750 if (ddi_create_minor_node(dip, name, S_IFCHR, 751 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 752 vdc_msg("%s[%d]: Could not add raw node %s.", 753 __func__, instance, name); 754 return (EIO); 755 } 756 } 757 758 return (0); 759 } 760 761 /* 762 * Function: 763 * vdc_create_device_nodes_props 764 * 765 * Description: 766 * This function creates the block and character device nodes under 767 * /devices along with the node properties. It is called as part of 768 * the attach(9E) of the instance during the handshake with vds after 769 * vds has sent the attributes to vdc. 770 * 771 * Parameters: 772 * vdc - soft state pointer 773 * 774 * Return Values 775 * 0 - Success 776 * EIO - Failed to create device node property 777 * EINVAL - Unknown type of disk exported 778 */ 779 static int 780 vdc_create_device_nodes_props(vdc_t *vdc) 781 { 782 dev_info_t *dip = NULL; 783 int instance; 784 int num_slices = 1; 785 int64_t size = 0; 786 dev_t dev; 787 int rv; 788 int i; 789 790 ASSERT(vdc != NULL); 791 792 instance = vdc->instance; 793 dip = vdc->dip; 794 795 if ((vdc->vtoc == NULL) || (vdc->vtoc->v_sanity != VTOC_SANE)) { 796 cmn_err(CE_NOTE, "![%d] Could not create device node property." 797 " No VTOC available", instance); 798 return (ENXIO); 799 } 800 801 switch (vdc->vdisk_type) { 802 case VD_DISK_TYPE_DISK: 803 num_slices = V_NUMPAR; 804 break; 805 case VD_DISK_TYPE_SLICE: 806 num_slices = 1; 807 break; 808 case VD_DISK_TYPE_UNK: 809 default: 810 return (EINVAL); 811 } 812 813 for (i = 0; i < num_slices; i++) { 814 dev = makedevice(ddi_driver_major(dip), 815 VD_MAKE_DEV(instance, i)); 816 817 size = vdc->vtoc->v_part[i].p_size * vdc->vtoc->v_sectorsz; 818 PR0("%s[%d] sz %ld (%ld Mb) p_size %lx\n", 819 __func__, instance, size, size / (1024 * 1024), 820 vdc->vtoc->v_part[i].p_size); 821 822 rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size); 823 if (rv != DDI_PROP_SUCCESS) { 824 vdc_msg("%s:(%d): Couldn't add \"%s\" [%d]\n", 825 __func__, instance, VDC_SIZE_PROP_NAME, size); 826 return (EIO); 827 } 828 829 rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME, 830 lbtodb(size)); 831 if (rv != DDI_PROP_SUCCESS) { 832 vdc_msg("%s:(%d): Couldn't add \"%s\" [%d]\n", __func__, 833 instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size)); 834 return (EIO); 835 } 836 } 837 838 return (0); 839 } 840 841 static int 842 vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 843 { 844 _NOTE(ARGUNUSED(cred)) 845 846 int instance; 847 vdc_t *vdc; 848 849 ASSERT(dev != NULL); 850 instance = SDUNIT(getminor(*dev)); 851 852 PR0("%s[%d] minor = %d flag = %x, otyp = %x\n", __func__, instance, 853 getminor(*dev), flag, otyp); 854 855 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 856 return (EINVAL); 857 858 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 859 vdc_msg("%s[%d] Could not get state.", __func__, instance); 860 return (ENXIO); 861 } 862 863 /* 864 * Check to see if we can communicate with vds 865 */ 866 if (!vdc_is_able_to_tx_data(vdc, flag)) { 867 PR0("%s[%d] Not ready to transmit data\n", __func__, instance); 868 return (ENOLINK); 869 } 870 871 mutex_enter(&vdc->lock); 872 vdc->open++; 873 mutex_exit(&vdc->lock); 874 875 return (0); 876 } 877 878 static int 879 vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 880 { 881 _NOTE(ARGUNUSED(cred)) 882 883 int instance; 884 vdc_t *vdc; 885 886 instance = SDUNIT(getminor(dev)); 887 888 PR0("%s[%d] flag = %x, otyp = %x\n", __func__, instance, flag, otyp); 889 890 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 891 return (EINVAL); 892 893 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 894 vdc_msg("%s[%d] Could not get state.", __func__, instance); 895 return (ENXIO); 896 } 897 898 /* 899 * Check to see if we can communicate with vds 900 */ 901 if (!vdc_is_able_to_tx_data(vdc, 0)) { 902 PR0("%s[%d] Not ready to transmit data\n", __func__, instance); 903 return (ETIMEDOUT); 904 } 905 906 if (vdc->dkio_flush_pending) { 907 PR0("%s[%d]: Cannot detach: %d outstanding DKIO flushes", 908 __func__, instance, vdc->dkio_flush_pending); 909 return (EBUSY); 910 } 911 912 /* 913 * Should not need the mutex here, since the framework should protect 914 * against more opens on this device, but just in case. 915 */ 916 mutex_enter(&vdc->lock); 917 vdc->open--; 918 mutex_exit(&vdc->lock); 919 920 return (0); 921 } 922 923 static int 924 vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 925 { 926 _NOTE(ARGUNUSED(credp)) 927 _NOTE(ARGUNUSED(rvalp)) 928 929 return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode)); 930 } 931 932 static int 933 vdc_print(dev_t dev, char *str) 934 { 935 cmn_err(CE_NOTE, "vdc%d: %s", SDUNIT(getminor(dev)), str); 936 return (0); 937 } 938 939 static int 940 vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 941 { 942 buf_t *buf; /* BWRITE requests need to be in a buf_t structure */ 943 int rv; 944 size_t nbytes = nblk * DEV_BSIZE; 945 int instance = SDUNIT(getminor(dev)); 946 vdc_t *vdc = NULL; 947 948 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 949 vdc_msg("%s (%d): Could not get state.", __func__, instance); 950 return (ENXIO); 951 } 952 953 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 954 bioinit(buf); 955 buf->b_un.b_addr = addr; 956 buf->b_bcount = nbytes; 957 buf->b_flags = B_BUSY | B_WRITE; 958 buf->b_dev = dev; 959 rv = vdc_populate_descriptor(vdc, (caddr_t)buf, nbytes, 960 VD_OP_BWRITE, blkno, SDPART(getminor(dev))); 961 962 /* 963 * If the OS instance is panicking, the call above will ensure that 964 * the descriptor is done before returning. This should always be 965 * case when coming through this function but we check just in case 966 * and wait if necessary for the vDisk server to ACK and trigger 967 * the biodone. 968 */ 969 if (!ddi_in_panic()) 970 rv = biowait(buf); 971 972 biofini(buf); 973 kmem_free(buf, sizeof (buf_t)); 974 975 PR1("%s: status=%d\n", __func__, rv); 976 977 return (rv); 978 } 979 980 /* -------------------------------------------------------------------------- */ 981 982 /* 983 * Disk access routines 984 * 985 */ 986 987 /* 988 * vdc_strategy() 989 * 990 * Return Value: 991 * 0: As per strategy(9E), the strategy() function must return 0 992 * [ bioerror(9f) sets b_flags to the proper error code ] 993 */ 994 static int 995 vdc_strategy(struct buf *buf) 996 { 997 int rv = -1; 998 vdc_t *vdc = NULL; 999 int instance = SDUNIT(getminor(buf->b_edev)); 1000 int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 1001 1002 PR1("%s: %s %ld bytes at block %ld : b_addr=0x%p", 1003 __func__, (buf->b_flags & B_READ) ? "Read" : "Write", 1004 buf->b_bcount, buf->b_lblkno, buf->b_un.b_addr); 1005 1006 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1007 vdc_msg("%s[%d]: Could not get state.", __func__, instance); 1008 bioerror(buf, ENXIO); 1009 biodone(buf); 1010 return (0); 1011 } 1012 1013 DTRACE_IO2(vstart, buf_t *, buf, vdc_t *, vdc); 1014 1015 ASSERT(buf->b_bcount <= (vdc->max_xfer_sz * vdc->block_size)); 1016 1017 if (!vdc_is_able_to_tx_data(vdc, O_NONBLOCK)) { 1018 PR0("%s: Not ready to transmit data\n", __func__); 1019 bioerror(buf, ENXIO); 1020 biodone(buf); 1021 return (0); 1022 } 1023 bp_mapin(buf); 1024 1025 rv = vdc_populate_descriptor(vdc, (caddr_t)buf, buf->b_bcount, op, 1026 buf->b_lblkno, SDPART(getminor(buf->b_edev))); 1027 1028 /* 1029 * If the request was successfully sent, the strategy call returns and 1030 * the ACK handler calls the bioxxx functions when the vDisk server is 1031 * done. 1032 */ 1033 if (rv) { 1034 PR0("[%d] Failed to read/write (err=%d)\n", instance, rv); 1035 bioerror(buf, rv); 1036 biodone(buf); 1037 } 1038 1039 return (0); 1040 } 1041 1042 1043 static int 1044 vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 1045 { 1046 _NOTE(ARGUNUSED(cred)) 1047 1048 PR1("vdc_read(): Entered"); 1049 return (physio(vdc_strategy, NULL, dev, B_READ, minphys, uio)); 1050 } 1051 1052 static int 1053 vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 1054 { 1055 _NOTE(ARGUNUSED(cred)) 1056 1057 PR1("vdc_write(): Entered"); 1058 return (physio(vdc_strategy, NULL, dev, B_WRITE, minphys, uio)); 1059 } 1060 1061 static int 1062 vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 1063 { 1064 _NOTE(ARGUNUSED(cred)) 1065 1066 PR1("vdc_aread(): Entered"); 1067 return (aphysio(vdc_strategy, anocancel, dev, B_READ, minphys, aio)); 1068 } 1069 1070 static int 1071 vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 1072 { 1073 _NOTE(ARGUNUSED(cred)) 1074 1075 PR1("vdc_awrite(): Entered"); 1076 return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, minphys, aio)); 1077 } 1078 1079 1080 /* -------------------------------------------------------------------------- */ 1081 1082 /* 1083 * Handshake support 1084 */ 1085 1086 /* 1087 * vdc_init_handshake_negotiation 1088 * 1089 * Description: 1090 * This function is called to trigger the handshake negotiations between 1091 * the client (vdc) and the server (vds). It may be called multiple times. 1092 * 1093 * Parameters: 1094 * vdc - soft state pointer 1095 */ 1096 static void 1097 vdc_init_handshake_negotiation(void *arg) 1098 { 1099 vdc_t *vdc = (vdc_t *)(void *)arg; 1100 ldc_status_t ldc_state; 1101 vd_state_t state; 1102 int status; 1103 1104 ASSERT(vdc != NULL); 1105 1106 PR0("[%d] Initializing vdc<->vds handshake\n", vdc->instance); 1107 1108 /* get LDC state */ 1109 status = ldc_status(vdc->ldc_handle, &ldc_state); 1110 if (status != 0) { 1111 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status: err=%d", 1112 vdc->instance, status); 1113 return; 1114 } 1115 1116 /* 1117 * If the LDC connection is not UP we bring it up now and return. 1118 * The handshake will be started again when the callback is 1119 * triggered due to the UP event. 1120 */ 1121 if (ldc_state != LDC_UP) { 1122 PR0("[%d] Triggering an LDC_UP and returning\n", vdc->instance); 1123 (void) vdc_do_ldc_up(vdc); 1124 return; 1125 } 1126 1127 mutex_enter(&vdc->lock); 1128 /* 1129 * Do not continue if another thread has triggered a handshake which 1130 * has not been reset or detach() has stopped further handshakes. 1131 */ 1132 if (vdc->initialized & (VDC_HANDSHAKE | VDC_HANDSHAKE_STOP)) { 1133 PR0("%s[%d] Negotiation not triggered. [init=%x]\n", 1134 __func__, vdc->instance, vdc->initialized); 1135 mutex_exit(&vdc->lock); 1136 return; 1137 } 1138 1139 if (vdc->hshake_cnt++ > vdc_retries) { 1140 cmn_err(CE_NOTE, "[%d] Failed repeatedly to complete handshake" 1141 "with vDisk server", vdc->instance); 1142 mutex_exit(&vdc->lock); 1143 return; 1144 } 1145 1146 vdc->initialized |= VDC_HANDSHAKE; 1147 vdc->ldc_state = ldc_state; 1148 1149 state = vdc->state; 1150 1151 if (state == VD_STATE_INIT) { 1152 /* 1153 * Set the desired version parameter to the first entry in the 1154 * version array. If this specific version is not supported, 1155 * the response handling code will step down the version number 1156 * to the next array entry and deal with it accordingly. 1157 */ 1158 (void) vdc_init_ver_negotiation(vdc, vdc_version[0]); 1159 } else if (state == VD_STATE_VER) { 1160 (void) vdc_init_attr_negotiation(vdc); 1161 } else if (state == VD_STATE_ATTR) { 1162 (void) vdc_init_dring_negotiate(vdc); 1163 } else if (state == VD_STATE_DATA) { 1164 /* 1165 * nothing to do - we have already completed the negotiation 1166 * and we can transmit data when ready. 1167 */ 1168 PR0("%s[%d] Negotiation triggered after handshake completed", 1169 __func__, vdc->instance); 1170 } 1171 1172 mutex_exit(&vdc->lock); 1173 } 1174 1175 /* 1176 * Function: 1177 * vdc_init_ver_negotiation() 1178 * 1179 * Description: 1180 * 1181 * Arguments: 1182 * vdc - soft state pointer for this instance of the device driver. 1183 * 1184 * Return Code: 1185 * 0 - Success 1186 */ 1187 static int 1188 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 1189 { 1190 vio_ver_msg_t pkt; 1191 size_t msglen = sizeof (pkt); 1192 int status = -1; 1193 1194 PR0("%s: Entered.\n", __func__); 1195 1196 ASSERT(vdc != NULL); 1197 ASSERT(mutex_owned(&vdc->lock)); 1198 1199 /* 1200 * set the Session ID to a unique value 1201 * (the lower 32 bits of the clock tick) 1202 */ 1203 vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 1204 1205 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1206 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1207 pkt.tag.vio_subtype_env = VIO_VER_INFO; 1208 pkt.tag.vio_sid = vdc->session_id; 1209 pkt.dev_class = VDEV_DISK; 1210 pkt.ver_major = ver.major; 1211 pkt.ver_minor = ver.minor; 1212 1213 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1214 PR0("%s: vdc_send(status = %d)\n", __func__, status); 1215 1216 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1217 PR0("%s[%d] vdc_send failed: id(%lx) rv(%d) size(%d)\n", 1218 __func__, vdc->instance, vdc->ldc_handle, 1219 status, msglen); 1220 if (msglen != sizeof (vio_ver_msg_t)) 1221 status = ENOMSG; 1222 } 1223 1224 return (status); 1225 } 1226 1227 /* 1228 * Function: 1229 * vdc_init_attr_negotiation() 1230 * 1231 * Description: 1232 * 1233 * Arguments: 1234 * vdc - soft state pointer for this instance of the device driver. 1235 * 1236 * Return Code: 1237 * 0 - Success 1238 */ 1239 static int 1240 vdc_init_attr_negotiation(vdc_t *vdc) 1241 { 1242 vd_attr_msg_t pkt; 1243 size_t msglen = sizeof (pkt); 1244 int status; 1245 1246 ASSERT(vdc != NULL); 1247 ASSERT(mutex_owned(&vdc->lock)); 1248 1249 PR0("%s[%d] entered\n", __func__, vdc->instance); 1250 1251 /* fill in tag */ 1252 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1253 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1254 pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 1255 pkt.tag.vio_sid = vdc->session_id; 1256 /* fill in payload */ 1257 pkt.max_xfer_sz = vdc->max_xfer_sz; 1258 pkt.vdisk_block_size = vdc->block_size; 1259 pkt.xfer_mode = VIO_DRING_MODE; 1260 pkt.operations = 0; /* server will set bits of valid operations */ 1261 pkt.vdisk_type = 0; /* server will set to valid device type */ 1262 pkt.vdisk_size = 0; /* server will set to valid size */ 1263 1264 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1265 PR0("%s: vdc_send(status = %d)\n", __func__, status); 1266 1267 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1268 PR0("%s[%d] ldc_write failed: id(%lx) rv(%d) size (%d)\n", 1269 __func__, vdc->instance, vdc->ldc_handle, 1270 status, msglen); 1271 if (msglen != sizeof (vio_ver_msg_t)) 1272 status = ENOMSG; 1273 } 1274 1275 return (status); 1276 } 1277 1278 /* 1279 * Function: 1280 * vdc_init_dring_negotiate() 1281 * 1282 * Description: 1283 * 1284 * Arguments: 1285 * vdc - soft state pointer for this instance of the device driver. 1286 * 1287 * Return Code: 1288 * 0 - Success 1289 */ 1290 static int 1291 vdc_init_dring_negotiate(vdc_t *vdc) 1292 { 1293 vio_dring_reg_msg_t pkt; 1294 size_t msglen = sizeof (pkt); 1295 int status = -1; 1296 1297 ASSERT(vdc != NULL); 1298 ASSERT(mutex_owned(&vdc->lock)); 1299 1300 status = vdc_init_descriptor_ring(vdc); 1301 if (status != 0) { 1302 cmn_err(CE_CONT, "[%d] Failed to init DRing (status = %d)\n", 1303 vdc->instance, status); 1304 vdc_destroy_descriptor_ring(vdc); 1305 vdc_reset_connection(vdc, B_FALSE); 1306 return (status); 1307 } 1308 PR0("%s[%d] Init of descriptor ring completed (status = %d)\n", 1309 __func__, vdc->instance, status); 1310 1311 /* fill in tag */ 1312 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1313 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1314 pkt.tag.vio_subtype_env = VIO_DRING_REG; 1315 pkt.tag.vio_sid = vdc->session_id; 1316 /* fill in payload */ 1317 pkt.dring_ident = 0; 1318 pkt.num_descriptors = VD_DRING_LEN; 1319 pkt.descriptor_size = VD_DRING_ENTRY_SZ; 1320 pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 1321 pkt.ncookies = vdc->dring_cookie_count; 1322 pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 1323 1324 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1325 if (status != 0) { 1326 PR0("%s[%d] Failed to register DRing (status = %d)\n", 1327 __func__, vdc->instance, status); 1328 vdc_reset_connection(vdc, B_FALSE); 1329 } 1330 1331 return (status); 1332 } 1333 1334 1335 /* -------------------------------------------------------------------------- */ 1336 1337 /* 1338 * LDC helper routines 1339 */ 1340 1341 /* 1342 * Function: 1343 * vdc_send() 1344 * 1345 * Description: 1346 * The function encapsulates the call to write a message using LDC. 1347 * If LDC indicates that the call failed due to the queue being full, 1348 * we retry the ldc_write() [ up to 'vdc_retries' time ], otherwise 1349 * we return the error returned by LDC. 1350 * 1351 * Arguments: 1352 * ldc_handle - LDC handle for the channel this instance of vdc uses 1353 * pkt - address of LDC message to be sent 1354 * msglen - the size of the message being sent. When the function 1355 * returns, this contains the number of bytes written. 1356 * 1357 * Return Code: 1358 * 0 - Success. 1359 * EINVAL - pkt or msglen were NULL 1360 * ECONNRESET - The connection was not up. 1361 * EWOULDBLOCK - LDC queue is full 1362 * xxx - other error codes returned by ldc_write 1363 */ 1364 static int 1365 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 1366 { 1367 size_t size = 0; 1368 int retries = 0; 1369 int status = 0; 1370 1371 ASSERT(vdc != NULL); 1372 ASSERT(mutex_owned(&vdc->lock)); 1373 ASSERT(msglen != NULL); 1374 ASSERT(*msglen != 0); 1375 1376 do { 1377 size = *msglen; 1378 status = ldc_write(vdc->ldc_handle, pkt, &size); 1379 } while (status == EWOULDBLOCK && retries++ < vdc_retries); 1380 1381 /* if LDC had serious issues --- reset vdc state */ 1382 if (status == EIO || status == ECONNRESET) { 1383 vdc_reset_connection(vdc, B_TRUE); 1384 } 1385 1386 /* return the last size written */ 1387 *msglen = size; 1388 1389 return (status); 1390 } 1391 1392 /* 1393 * Function: 1394 * vdc_get_ldc_id() 1395 * 1396 * Description: 1397 * This function gets the 'ldc-id' for this particular instance of vdc. 1398 * The id returned is the guest domain channel endpoint LDC uses for 1399 * communication with vds. 1400 * 1401 * Arguments: 1402 * dip - dev info pointer for this instance of the device driver. 1403 * ldc_id - pointer to variable used to return the 'ldc-id' found. 1404 * 1405 * Return Code: 1406 * 0 - Success. 1407 * ENOENT - Expected node or property did not exist. 1408 * ENXIO - Unexpected error communicating with MD framework 1409 */ 1410 static int 1411 vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id) 1412 { 1413 int status = ENOENT; 1414 char *node_name = NULL; 1415 md_t *mdp = NULL; 1416 int num_nodes; 1417 int num_vdevs; 1418 int num_chans; 1419 mde_cookie_t rootnode; 1420 mde_cookie_t *listp = NULL; 1421 mde_cookie_t *chanp = NULL; 1422 boolean_t found_inst = B_FALSE; 1423 int listsz; 1424 int idx; 1425 uint64_t md_inst; 1426 int obp_inst; 1427 int instance = ddi_get_instance(dip); 1428 1429 ASSERT(ldc_id != NULL); 1430 *ldc_id = 0; 1431 1432 /* 1433 * Get the OBP instance number for comparison with the MD instance 1434 * 1435 * The "cfg-handle" property of a vdc node in an MD contains the MD's 1436 * notion of "instance", or unique identifier, for that node; OBP 1437 * stores the value of the "cfg-handle" MD property as the value of 1438 * the "reg" property on the node in the device tree it builds from 1439 * the MD and passes to Solaris. Thus, we look up the devinfo node's 1440 * "reg" property value to uniquely identify this device instance. 1441 * If the "reg" property cannot be found, the device tree state is 1442 * presumably so broken that there is no point in continuing. 1443 */ 1444 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 1445 cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 1446 return (ENOENT); 1447 } 1448 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1449 OBP_REG, -1); 1450 PR1("%s[%d]: OBP inst=%d\n", __func__, instance, obp_inst); 1451 1452 /* 1453 * We now walk the MD nodes and if an instance of a vdc node matches 1454 * the instance got from OBP we get the ldc-id property. 1455 */ 1456 if ((mdp = md_get_handle()) == NULL) { 1457 cmn_err(CE_WARN, "unable to init machine description"); 1458 return (ENXIO); 1459 } 1460 1461 num_nodes = md_node_count(mdp); 1462 ASSERT(num_nodes > 0); 1463 1464 listsz = num_nodes * sizeof (mde_cookie_t); 1465 1466 /* allocate memory for nodes */ 1467 listp = kmem_zalloc(listsz, KM_SLEEP); 1468 chanp = kmem_zalloc(listsz, KM_SLEEP); 1469 1470 rootnode = md_root_node(mdp); 1471 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 1472 1473 /* 1474 * Search for all the virtual devices, we will then check to see which 1475 * ones are disk nodes. 1476 */ 1477 num_vdevs = md_scan_dag(mdp, rootnode, 1478 md_find_name(mdp, VDC_MD_VDEV_NAME), 1479 md_find_name(mdp, "fwd"), listp); 1480 1481 if (num_vdevs <= 0) { 1482 cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 1483 status = ENOENT; 1484 goto done; 1485 } 1486 1487 PR1("%s[%d] num_vdevs=%d\n", __func__, instance, num_vdevs); 1488 for (idx = 0; idx < num_vdevs; idx++) { 1489 status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 1490 if ((status != 0) || (node_name == NULL)) { 1491 cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 1492 ": err %d", VDC_MD_VDEV_NAME, status); 1493 continue; 1494 } 1495 1496 PR1("%s[%d] Found node %s\n", __func__, instance, node_name); 1497 if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 1498 status = md_get_prop_val(mdp, listp[idx], 1499 VDC_MD_CFG_HDL, &md_inst); 1500 PR1("%s[%d] vdc inst# in MD=%d\n", 1501 __func__, instance, md_inst); 1502 if ((status == 0) && (md_inst == obp_inst)) { 1503 found_inst = B_TRUE; 1504 break; 1505 } 1506 } 1507 } 1508 1509 if (!found_inst) { 1510 cmn_err(CE_NOTE, "Unable to find correct '%s' node", 1511 VDC_MD_DISK_NAME); 1512 status = ENOENT; 1513 goto done; 1514 } 1515 PR0("%s[%d] MD inst=%d\n", __func__, instance, md_inst); 1516 1517 /* get the channels for this node */ 1518 num_chans = md_scan_dag(mdp, listp[idx], 1519 md_find_name(mdp, VDC_MD_CHAN_NAME), 1520 md_find_name(mdp, "fwd"), chanp); 1521 1522 /* expecting at least one channel */ 1523 if (num_chans <= 0) { 1524 cmn_err(CE_NOTE, "No '%s' node for '%s' port", 1525 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 1526 status = ENOENT; 1527 goto done; 1528 1529 } else if (num_chans != 1) { 1530 PR0("%s[%d] Expected 1 '%s' node for '%s' port, found %d\n", 1531 __func__, instance, VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, 1532 num_chans); 1533 } 1534 1535 /* 1536 * We use the first channel found (index 0), irrespective of how 1537 * many are there in total. 1538 */ 1539 if (md_get_prop_val(mdp, chanp[0], VDC_ID_PROP, ldc_id) != 0) { 1540 cmn_err(CE_NOTE, "Channel '%s' property not found", 1541 VDC_ID_PROP); 1542 status = ENOENT; 1543 } 1544 1545 PR0("%s[%d] LDC id is 0x%lx\n", __func__, instance, *ldc_id); 1546 1547 done: 1548 if (chanp) 1549 kmem_free(chanp, listsz); 1550 if (listp) 1551 kmem_free(listp, listsz); 1552 1553 (void) md_fini_handle(mdp); 1554 1555 return (status); 1556 } 1557 1558 static int 1559 vdc_do_ldc_up(vdc_t *vdc) 1560 { 1561 int status; 1562 1563 PR0("[%d] Bringing up channel %x\n", vdc->instance, vdc->ldc_id); 1564 1565 if ((status = ldc_up(vdc->ldc_handle)) != 0) { 1566 switch (status) { 1567 case ECONNREFUSED: /* listener not ready at other end */ 1568 PR0("%s: ldc_up(%d,...) return %d\n", 1569 __func__, vdc->ldc_id, status); 1570 status = 0; 1571 break; 1572 default: 1573 cmn_err(CE_NOTE, "[%d] Failed to bring up LDC: " 1574 "channel=%ld, err=%d", 1575 vdc->instance, vdc->ldc_id, status); 1576 } 1577 } 1578 1579 return (status); 1580 } 1581 1582 1583 /* 1584 * vdc_is_able_to_tx_data() 1585 * 1586 * Description: 1587 * This function checks if we are able to send data to the 1588 * vDisk server (vds). The LDC connection needs to be up and 1589 * vdc & vds need to have completed the handshake negotiation. 1590 * 1591 * Parameters: 1592 * vdc - soft state pointer 1593 * flag - flag to indicate if we can block or not 1594 * [ If O_NONBLOCK or O_NDELAY (which are defined in 1595 * open(2)) are set then do not block) 1596 * 1597 * Return Values 1598 * B_TRUE - can talk to vds 1599 * B_FALSE - unable to talk to vds 1600 */ 1601 static boolean_t 1602 vdc_is_able_to_tx_data(vdc_t *vdc, int flag) 1603 { 1604 vd_state_t state; 1605 uint32_t ldc_state; 1606 uint_t retries = 0; 1607 int rv = -1; 1608 1609 ASSERT(vdc != NULL); 1610 1611 mutex_enter(&vdc->lock); 1612 state = vdc->state; 1613 ldc_state = vdc->ldc_state; 1614 mutex_exit(&vdc->lock); 1615 1616 if ((state == VD_STATE_DATA) && (ldc_state == LDC_UP)) 1617 return (B_TRUE); 1618 1619 if ((flag & O_NONBLOCK) || (flag & O_NDELAY)) { 1620 PR0("%s[%d] Not ready to tx - state %d LDC state %d\n", 1621 __func__, vdc->instance, state, ldc_state); 1622 return (B_FALSE); 1623 } 1624 1625 /* 1626 * We want to check and see if any negotiations triggered earlier 1627 * have succeeded. We are prepared to wait a little while in case 1628 * they are still in progress. 1629 */ 1630 mutex_enter(&vdc->lock); 1631 while ((vdc->ldc_state != LDC_UP) || (vdc->state != VD_STATE_DATA)) { 1632 PR0("%s: Waiting for connection at state %d (LDC state %d)\n", 1633 __func__, vdc->state, vdc->ldc_state); 1634 1635 rv = cv_timedwait(&vdc->cv, &vdc->lock, 1636 VD_GET_TIMEOUT_HZ(retries)); 1637 1638 /* 1639 * An rv of -1 indicates that we timed out without the LDC 1640 * state changing so it looks like the other side (vdc) is 1641 * not yet ready/responding. 1642 * 1643 * Any other value of rv indicates that the LDC triggered an 1644 * interrupt so we just loop again, check the handshake state 1645 * and keep waiting if necessary. 1646 */ 1647 if (rv == -1) { 1648 if (retries >= vdc_retries) { 1649 PR0("%s[%d] handshake wait timed out.\n", 1650 __func__, vdc->instance); 1651 mutex_exit(&vdc->lock); 1652 return (B_FALSE); 1653 } else { 1654 PR1("%s[%d] Retry #%d for handshake timedout\n", 1655 __func__, vdc->instance, retries); 1656 retries++; 1657 } 1658 } 1659 } 1660 1661 ASSERT(vdc->ldc_state == LDC_UP); 1662 ASSERT(vdc->state == VD_STATE_DATA); 1663 1664 mutex_exit(&vdc->lock); 1665 1666 return (B_TRUE); 1667 } 1668 1669 1670 /* 1671 * Function: 1672 * vdc_terminate_ldc() 1673 * 1674 * Description: 1675 * 1676 * Arguments: 1677 * vdc - soft state pointer for this instance of the device driver. 1678 * 1679 * Return Code: 1680 * None 1681 */ 1682 static void 1683 vdc_terminate_ldc(vdc_t *vdc) 1684 { 1685 int instance = ddi_get_instance(vdc->dip); 1686 1687 ASSERT(vdc != NULL); 1688 ASSERT(mutex_owned(&vdc->lock)); 1689 1690 PR0("%s[%d] initialized=%x\n", __func__, instance, vdc->initialized); 1691 1692 if (vdc->initialized & VDC_LDC_OPEN) { 1693 PR0("%s[%d]: ldc_close()\n", __func__, instance); 1694 (void) ldc_close(vdc->ldc_handle); 1695 } 1696 if (vdc->initialized & VDC_LDC_CB) { 1697 PR0("%s[%d]: ldc_unreg_callback()\n", __func__, instance); 1698 (void) ldc_unreg_callback(vdc->ldc_handle); 1699 } 1700 if (vdc->initialized & VDC_LDC) { 1701 PR0("%s[%d]: ldc_fini()\n", __func__, instance); 1702 (void) ldc_fini(vdc->ldc_handle); 1703 vdc->ldc_handle = NULL; 1704 } 1705 1706 vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN); 1707 } 1708 1709 /* 1710 * Function: 1711 * vdc_reset_connection() 1712 * 1713 * Description: 1714 * 1715 * Arguments: 1716 * vdc - soft state pointer for this instance of the device driver. 1717 * reset_ldc - Flag whether or not to reset the LDC connection also. 1718 * 1719 * Return Code: 1720 * None 1721 */ 1722 static void 1723 vdc_reset_connection(vdc_t *vdc, boolean_t reset_ldc) 1724 { 1725 int status; 1726 1727 ASSERT(vdc != NULL); 1728 ASSERT(mutex_owned(&vdc->lock)); 1729 1730 PR0("%s[%d] Entered\n", __func__, vdc->instance); 1731 1732 vdc->state = VD_STATE_INIT; 1733 1734 if (reset_ldc) { 1735 status = ldc_reset(vdc->ldc_handle); 1736 PR0("%s[%d] ldc_reset() = %d\n", 1737 __func__, vdc->instance, status); 1738 } 1739 1740 vdc->initialized &= ~VDC_HANDSHAKE; 1741 PR0("%s[%d] init=%x\n", __func__, vdc->instance, vdc->initialized); 1742 } 1743 1744 /* -------------------------------------------------------------------------- */ 1745 1746 /* 1747 * Descriptor Ring helper routines 1748 */ 1749 1750 /* 1751 * Function: 1752 * vdc_init_descriptor_ring() 1753 * 1754 * Description: 1755 * 1756 * Arguments: 1757 * vdc - soft state pointer for this instance of the device driver. 1758 * 1759 * Return Code: 1760 * 0 - Success 1761 */ 1762 static int 1763 vdc_init_descriptor_ring(vdc_t *vdc) 1764 { 1765 vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 1766 int status = 0; 1767 int i; 1768 1769 PR0("%s[%d] initialized=%x\n", 1770 __func__, vdc->instance, vdc->initialized); 1771 1772 ASSERT(vdc != NULL); 1773 ASSERT(mutex_owned(&vdc->lock)); 1774 ASSERT(vdc->ldc_handle != NULL); 1775 1776 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 1777 PR0("%s[%d] ldc_mem_dring_create\n", __func__, vdc->instance); 1778 status = ldc_mem_dring_create(VD_DRING_LEN, VD_DRING_ENTRY_SZ, 1779 &vdc->ldc_dring_hdl); 1780 if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) { 1781 PR0("%s: Failed to create a descriptor ring", __func__); 1782 return (status); 1783 } 1784 vdc->dring_entry_size = VD_DRING_ENTRY_SZ; 1785 vdc->dring_len = VD_DRING_LEN; 1786 vdc->initialized |= VDC_DRING_INIT; 1787 } 1788 1789 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 1790 PR0("%s[%d] ldc_mem_dring_bind\n", __func__, vdc->instance); 1791 vdc->dring_cookie = 1792 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 1793 1794 status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl, 1795 LDC_SHADOW_MAP, LDC_MEM_RW, 1796 &vdc->dring_cookie[0], 1797 &vdc->dring_cookie_count); 1798 if (status != 0) { 1799 PR0("%s: Failed to bind descriptor ring (%p) " 1800 "to channel (%p)\n", 1801 __func__, vdc->ldc_dring_hdl, vdc->ldc_handle); 1802 return (status); 1803 } 1804 ASSERT(vdc->dring_cookie_count == 1); 1805 vdc->initialized |= VDC_DRING_BOUND; 1806 } 1807 1808 status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info); 1809 if (status != 0) { 1810 PR0("%s: Failed to get info for descriptor ring (%p)\n", 1811 __func__, vdc->ldc_dring_hdl); 1812 return (status); 1813 } 1814 1815 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 1816 PR0("%s[%d] local dring\n", __func__, vdc->instance); 1817 1818 /* Allocate the local copy of this dring */ 1819 vdc->local_dring = 1820 kmem_zalloc(VD_DRING_LEN * sizeof (vdc_local_desc_t), 1821 KM_SLEEP); 1822 vdc->initialized |= VDC_DRING_LOCAL; 1823 } 1824 1825 /* 1826 * Mark all DRing entries as free and initialize the private 1827 * descriptor's memory handles. If any entry is initialized, 1828 * we need to free it later so we set the bit in 'initialized' 1829 * at the start. 1830 */ 1831 vdc->initialized |= VDC_DRING_ENTRY; 1832 for (i = 0; i < VD_DRING_LEN; i++) { 1833 dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 1834 dep->hdr.dstate = VIO_DESC_FREE; 1835 1836 status = ldc_mem_alloc_handle(vdc->ldc_handle, 1837 &vdc->local_dring[i].desc_mhdl); 1838 if (status != 0) { 1839 cmn_err(CE_NOTE, "![%d] Failed to alloc mem handle for" 1840 " descriptor %d", vdc->instance, i); 1841 return (status); 1842 } 1843 vdc->local_dring[i].flags = VIO_DESC_FREE; 1844 vdc->local_dring[i].dep = dep; 1845 1846 mutex_init(&vdc->local_dring[i].lock, NULL, MUTEX_DRIVER, NULL); 1847 cv_init(&vdc->local_dring[i].cv, NULL, CV_DRIVER, NULL); 1848 } 1849 1850 /* 1851 * We init the index of the last DRing entry used. Since the code to 1852 * get the next available entry increments it before selecting one, 1853 * we set it to the last DRing entry so that it wraps around to zero 1854 * for the 1st entry to be used. 1855 */ 1856 vdc->dring_curr_idx = VD_DRING_LEN - 1; 1857 1858 return (status); 1859 } 1860 1861 /* 1862 * Function: 1863 * vdc_destroy_descriptor_ring() 1864 * 1865 * Description: 1866 * 1867 * Arguments: 1868 * vdc - soft state pointer for this instance of the device driver. 1869 * 1870 * Return Code: 1871 * None 1872 */ 1873 static void 1874 vdc_destroy_descriptor_ring(vdc_t *vdc) 1875 { 1876 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 1877 ldc_mem_handle_t mhdl = NULL; 1878 int status = -1; 1879 int i; /* loop */ 1880 1881 ASSERT(vdc != NULL); 1882 ASSERT(mutex_owned(&vdc->lock)); 1883 ASSERT(vdc->state == VD_STATE_INIT); 1884 1885 PR0("%s: Entered\n", __func__); 1886 1887 if (vdc->initialized & VDC_DRING_ENTRY) { 1888 PR0("[%d] Removing Local DRing entries\n", vdc->instance); 1889 for (i = 0; i < VD_DRING_LEN; i++) { 1890 ldep = &vdc->local_dring[i]; 1891 mhdl = ldep->desc_mhdl; 1892 1893 if (mhdl == NULL) 1894 continue; 1895 1896 (void) ldc_mem_free_handle(mhdl); 1897 mutex_destroy(&ldep->lock); 1898 cv_destroy(&ldep->cv); 1899 } 1900 vdc->initialized &= ~VDC_DRING_ENTRY; 1901 } 1902 1903 if (vdc->initialized & VDC_DRING_LOCAL) { 1904 PR0("[%d] Freeing Local DRing\n", vdc->instance); 1905 kmem_free(vdc->local_dring, 1906 VD_DRING_LEN * sizeof (vdc_local_desc_t)); 1907 vdc->initialized &= ~VDC_DRING_LOCAL; 1908 } 1909 1910 if (vdc->initialized & VDC_DRING_BOUND) { 1911 PR0("[%d] Unbinding DRing\n", vdc->instance); 1912 status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl); 1913 if (status == 0) { 1914 vdc->initialized &= ~VDC_DRING_BOUND; 1915 } else { 1916 vdc_msg("%s: Failed to unbind Descriptor Ring (%lx)\n", 1917 vdc->ldc_dring_hdl); 1918 } 1919 } 1920 1921 if (vdc->initialized & VDC_DRING_INIT) { 1922 PR0("[%d] Destroying DRing\n", vdc->instance); 1923 status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl); 1924 if (status == 0) { 1925 vdc->ldc_dring_hdl = NULL; 1926 bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 1927 vdc->initialized &= ~VDC_DRING_INIT; 1928 } else { 1929 vdc_msg("%s: Failed to destroy Descriptor Ring (%lx)\n", 1930 vdc->ldc_dring_hdl); 1931 } 1932 } 1933 } 1934 1935 /* 1936 * vdc_get_next_dring_entry_idx() 1937 * 1938 * Description: 1939 * This function gets the index of the next Descriptor Ring entry available 1940 * If the ring is full, it will back off and wait for the next entry to be 1941 * freed (the ACK handler will signal). 1942 * 1943 * Return Value: 1944 * 0 <= rv < VD_DRING_LEN Next available slot 1945 * -1 DRing is full 1946 */ 1947 static int 1948 vdc_get_next_dring_entry_idx(vdc_t *vdc, uint_t num_slots_needed) 1949 { 1950 _NOTE(ARGUNUSED(num_slots_needed)) 1951 1952 vd_dring_entry_t *dep = NULL; /* DRing Entry Pointer */ 1953 vdc_local_desc_t *ldep = NULL; /* Local DRing Entry Pointer */ 1954 int idx = -1; 1955 1956 ASSERT(vdc != NULL); 1957 ASSERT(vdc->dring_len == VD_DRING_LEN); 1958 ASSERT(vdc->dring_curr_idx >= 0); 1959 ASSERT(vdc->dring_curr_idx < VD_DRING_LEN); 1960 ASSERT(mutex_owned(&vdc->dring_lock)); 1961 1962 /* pick the next descriptor after the last one used */ 1963 idx = (vdc->dring_curr_idx + 1) % VD_DRING_LEN; 1964 ldep = &vdc->local_dring[idx]; 1965 ASSERT(ldep != NULL); 1966 dep = ldep->dep; 1967 ASSERT(dep != NULL); 1968 1969 mutex_enter(&ldep->lock); 1970 if (dep->hdr.dstate == VIO_DESC_FREE) { 1971 vdc->dring_curr_idx = idx; 1972 } else { 1973 DTRACE_PROBE(full); 1974 (void) cv_timedwait(&ldep->cv, &ldep->lock, 1975 VD_GET_TIMEOUT_HZ(1)); 1976 if (dep->hdr.dstate == VIO_DESC_FREE) { 1977 vdc->dring_curr_idx = idx; 1978 } else { 1979 PR0("[%d] Entry %d unavailable still in state %d\n", 1980 vdc->instance, idx, dep->hdr.dstate); 1981 idx = -1; /* indicate that the ring is full */ 1982 } 1983 } 1984 mutex_exit(&ldep->lock); 1985 1986 return (idx); 1987 } 1988 1989 /* 1990 * Function: 1991 * vdc_populate_descriptor 1992 * 1993 * Description: 1994 * This routine writes the data to be transmitted to vds into the 1995 * descriptor, notifies vds that the ring has been updated and 1996 * then waits for the request to be processed. 1997 * 1998 * Arguments: 1999 * vdc - the soft state pointer 2000 * addr - address of structure to be written. In the case of block 2001 * reads and writes this structure will be a buf_t and the 2002 * address of the data to be written will be in the b_un.b_addr 2003 * field. Otherwise the value of addr will be the address 2004 * to be written. 2005 * nbytes - number of bytes to read/write 2006 * operation - operation we want vds to perform (VD_OP_XXX) 2007 * arg - parameter to be sent to server (depends on VD_OP_XXX type) 2008 * . mode for ioctl(9e) 2009 * . LP64 diskaddr_t (block I/O) 2010 * slice - the disk slice this request is for 2011 * 2012 * Return Codes: 2013 * 0 2014 * EAGAIN 2015 * EFAULT 2016 * ENXIO 2017 * EIO 2018 */ 2019 static int 2020 vdc_populate_descriptor(vdc_t *vdc, caddr_t addr, size_t nbytes, int operation, 2021 uint64_t arg, uint64_t slice) 2022 { 2023 vdc_local_desc_t *local_dep = NULL; /* Local Dring Entry Pointer */ 2024 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2025 int idx = 0; /* Index of DRing entry used */ 2026 vio_dring_msg_t dmsg; 2027 size_t msglen = sizeof (dmsg); 2028 int retries = 0; 2029 int rv; 2030 2031 ASSERT(vdc != NULL); 2032 ASSERT(slice < V_NUMPAR); 2033 2034 /* 2035 * Get next available DRing entry. 2036 */ 2037 mutex_enter(&vdc->dring_lock); 2038 idx = vdc_get_next_dring_entry_idx(vdc, 1); 2039 if (idx == -1) { 2040 mutex_exit(&vdc->dring_lock); 2041 PR0("[%d] no descriptor ring entry avail, last seq=%d\n", 2042 vdc->instance, vdc->seq_num - 1); 2043 2044 /* 2045 * Since strategy should not block we don't wait for the DRing 2046 * to empty and instead return 2047 */ 2048 return (EAGAIN); 2049 } 2050 2051 ASSERT(idx < VD_DRING_LEN); 2052 local_dep = &vdc->local_dring[idx]; 2053 dep = local_dep->dep; 2054 ASSERT(dep != NULL); 2055 2056 /* 2057 * We now get the lock for this descriptor before dropping the overall 2058 * DRing lock. This prevents a race condition where another vdc thread 2059 * could grab the descriptor we selected. 2060 */ 2061 ASSERT(!MUTEX_HELD(&local_dep->lock)); 2062 mutex_enter(&local_dep->lock); 2063 mutex_exit(&vdc->dring_lock); 2064 2065 switch (operation) { 2066 case VD_OP_BREAD: 2067 case VD_OP_BWRITE: 2068 local_dep->buf = (struct buf *)addr; 2069 local_dep->addr = local_dep->buf->b_un.b_addr; 2070 PR1("buf=%p, block=%lx, nbytes=%lx\n", addr, arg, nbytes); 2071 dep->payload.addr = (diskaddr_t)arg; 2072 rv = vdc_populate_mem_hdl(vdc, idx, local_dep->addr, 2073 nbytes, operation); 2074 break; 2075 2076 case VD_OP_GET_VTOC: 2077 case VD_OP_SET_VTOC: 2078 case VD_OP_GET_DISKGEOM: 2079 case VD_OP_SET_DISKGEOM: 2080 case VD_OP_SCSICMD: 2081 local_dep->addr = addr; 2082 if (nbytes > 0) { 2083 rv = vdc_populate_mem_hdl(vdc, idx, addr, nbytes, 2084 operation); 2085 } 2086 break; 2087 2088 case VD_OP_FLUSH: 2089 case VD_OP_GET_WCE: 2090 case VD_OP_SET_WCE: 2091 rv = 0; /* nothing to bind */ 2092 break; 2093 2094 default: 2095 cmn_err(CE_NOTE, "[%d] Unsupported vDisk operation [%d]\n", 2096 vdc->instance, operation); 2097 rv = EINVAL; 2098 } 2099 2100 if (rv != 0) { 2101 mutex_exit(&local_dep->lock); 2102 return (rv); 2103 } 2104 2105 /* 2106 * fill in the data details into the DRing 2107 */ 2108 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdc); 2109 dep->payload.operation = operation; 2110 dep->payload.nbytes = nbytes; 2111 dep->payload.status = EINPROGRESS; /* vds will set valid value */ 2112 dep->payload.slice = slice; 2113 dep->hdr.dstate = VIO_DESC_READY; 2114 dep->hdr.ack = 1; /* request an ACK for every message */ 2115 2116 local_dep->flags = VIO_DESC_READY; 2117 2118 /* 2119 * Send a msg with the DRing details to vds 2120 */ 2121 mutex_enter(&vdc->lock); 2122 VIO_INIT_DRING_DATA_TAG(dmsg); 2123 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc); 2124 dmsg.dring_ident = vdc->dring_ident; 2125 dmsg.start_idx = idx; 2126 dmsg.end_idx = idx; 2127 2128 DTRACE_IO2(send, vio_dring_msg_t *, &dmsg, vdc_t *, vdc); 2129 2130 PR1("ident=0x%llx, st=%d, end=%d, seq=%d req=%d dep=%p\n", 2131 vdc->dring_ident, dmsg.start_idx, dmsg.end_idx, 2132 dmsg.seq_num, dep->payload.req_id, dep); 2133 2134 rv = vdc_send(vdc, (caddr_t)&dmsg, &msglen); 2135 PR1("%s[%d]: ldc_write() rv=%d\n", __func__, vdc->instance, rv); 2136 if (rv != 0) { 2137 mutex_exit(&vdc->lock); 2138 mutex_exit(&local_dep->lock); 2139 vdc_msg("%s: ldc_write(%d)\n", __func__, rv); 2140 2141 /* Clear the DRing entry */ 2142 rv = vdc_depopulate_descriptor(vdc, idx); 2143 2144 return (rv ? rv : EAGAIN); 2145 } 2146 2147 /* 2148 * If the message was successfully sent, we increment the sequence 2149 * number to be used by the next message 2150 */ 2151 vdc->seq_num++; 2152 mutex_exit(&vdc->lock); 2153 2154 /* 2155 * When a guest is panicking, the completion of requests needs to be 2156 * handled differently because interrupts are disabled and vdc 2157 * will not get messages. We have to poll for the messages instead. 2158 */ 2159 if (ddi_in_panic()) { 2160 int start = 0; 2161 retries = 0; 2162 for (;;) { 2163 msglen = sizeof (dmsg); 2164 rv = ldc_read(vdc->ldc_handle, (caddr_t)&dmsg, 2165 &msglen); 2166 if (rv) { 2167 rv = EINVAL; 2168 break; 2169 } 2170 2171 /* 2172 * if there are no packets wait and check again 2173 */ 2174 if ((rv == 0) && (msglen == 0)) { 2175 if (retries++ > vdc_dump_retries) { 2176 PR0("[%d] Giving up waiting, idx %d\n", 2177 vdc->instance, idx); 2178 rv = EAGAIN; 2179 break; 2180 } 2181 2182 PR1("Waiting for next packet @ %d\n", idx); 2183 drv_usecwait(vdc_usec_timeout_dump); 2184 continue; 2185 } 2186 2187 /* 2188 * Ignore all messages that are not ACKs/NACKs to 2189 * DRing requests. 2190 */ 2191 if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 2192 (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 2193 PR0("discarding pkt: type=%d sub=%d env=%d\n", 2194 dmsg.tag.vio_msgtype, 2195 dmsg.tag.vio_subtype, 2196 dmsg.tag.vio_subtype_env); 2197 continue; 2198 } 2199 2200 /* 2201 * set the appropriate return value for the 2202 * current request. 2203 */ 2204 switch (dmsg.tag.vio_subtype) { 2205 case VIO_SUBTYPE_ACK: 2206 rv = 0; 2207 break; 2208 case VIO_SUBTYPE_NACK: 2209 rv = EAGAIN; 2210 break; 2211 default: 2212 continue; 2213 } 2214 2215 start = dmsg.start_idx; 2216 if (start >= VD_DRING_LEN) { 2217 PR0("[%d] Bogus ack data : start %d\n", 2218 vdc->instance, start); 2219 continue; 2220 } 2221 2222 dep = VDC_GET_DRING_ENTRY_PTR(vdc, start); 2223 2224 PR1("[%d] Dumping start=%d idx=%d state=%d\n", 2225 vdc->instance, start, idx, dep->hdr.dstate); 2226 2227 if (dep->hdr.dstate != VIO_DESC_DONE) { 2228 PR0("[%d] Entry @ %d - state !DONE %d\n", 2229 vdc->instance, start, dep->hdr.dstate); 2230 continue; 2231 } 2232 2233 (void) vdc_depopulate_descriptor(vdc, start); 2234 2235 /* 2236 * We want to process all Dring entries up to 2237 * the current one so that we can return an 2238 * error with the correct request. 2239 */ 2240 if (idx > start) { 2241 PR0("[%d] Looping: start %d, idx %d\n", 2242 vdc->instance, idx, start); 2243 continue; 2244 } 2245 2246 /* exit - all outstanding requests are completed */ 2247 break; 2248 } 2249 2250 mutex_exit(&local_dep->lock); 2251 2252 return (rv); 2253 } 2254 2255 /* 2256 * In the case of calls from strategy and dump (in the non-panic case), 2257 * instead of waiting for a response from the vDisk server return now. 2258 * They will be processed asynchronously and the vdc ACK handling code 2259 * will trigger the biodone(9F) 2260 */ 2261 if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) { 2262 mutex_exit(&local_dep->lock); 2263 return (rv); 2264 } 2265 2266 /* 2267 * In the case of synchronous calls we watch the DRing entries we 2268 * modified and await the response from vds. 2269 */ 2270 rv = vdc_wait_for_descriptor_update(vdc, idx, dmsg); 2271 if (rv == ETIMEDOUT) { 2272 /* debug info when dumping state on vds side */ 2273 dep->payload.status = ECANCELED; 2274 } 2275 2276 rv = vdc_depopulate_descriptor(vdc, idx); 2277 PR1("%s[%d] Status=%d\n", __func__, vdc->instance, rv); 2278 2279 mutex_exit(&local_dep->lock); 2280 2281 return (rv); 2282 } 2283 2284 /* 2285 * Function: 2286 * vdc_wait_for_descriptor_update() 2287 * 2288 * Description: 2289 * 2290 * Arguments: 2291 * vdc - soft state pointer for this instance of the device driver. 2292 * idx - Index of the Descriptor Ring entry being modified 2293 * dmsg - LDC message sent by vDisk server 2294 * 2295 * Return Code: 2296 * 0 - Success 2297 */ 2298 static int 2299 vdc_wait_for_descriptor_update(vdc_t *vdc, uint_t idx, vio_dring_msg_t dmsg) 2300 { 2301 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2302 vdc_local_desc_t *local_dep = NULL; /* Local Dring Entry Pointer */ 2303 size_t msglen = sizeof (dmsg); 2304 int retries = 0; 2305 int status = 0; 2306 int rv = 0; 2307 2308 ASSERT(vdc != NULL); 2309 ASSERT(idx < VD_DRING_LEN); 2310 local_dep = &vdc->local_dring[idx]; 2311 ASSERT(local_dep != NULL); 2312 dep = local_dep->dep; 2313 ASSERT(dep != NULL); 2314 2315 while (dep->hdr.dstate != VIO_DESC_DONE) { 2316 rv = cv_timedwait(&local_dep->cv, &local_dep->lock, 2317 VD_GET_TIMEOUT_HZ(retries)); 2318 if (rv == -1) { 2319 /* 2320 * If they persist in ignoring us we'll storm off in a 2321 * huff and return ETIMEDOUT to the upper layers. 2322 */ 2323 if (retries >= vdc_retries) { 2324 PR0("%s: Finished waiting on entry %d\n", 2325 __func__, idx); 2326 status = ETIMEDOUT; 2327 break; 2328 } else { 2329 retries++; 2330 PR0("%s[%d]: Timeout #%d on entry %d " 2331 "[seq %d][req %d]\n", __func__, 2332 vdc->instance, 2333 retries, idx, dmsg.seq_num, 2334 dep->payload.req_id); 2335 } 2336 2337 if (dep->hdr.dstate & VIO_DESC_ACCEPTED) { 2338 PR0("%s[%d]: vds has accessed entry %d [seq %d]" 2339 "[req %d] but not ack'ed it yet\n", 2340 __func__, vdc->instance, idx, dmsg.seq_num, 2341 dep->payload.req_id); 2342 continue; 2343 } 2344 2345 /* 2346 * we resend the message as it may have been dropped 2347 * and have never made it to the other side (vds). 2348 * (We reuse the original message but update seq ID) 2349 */ 2350 mutex_enter(&vdc->lock); 2351 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc); 2352 retries = 0; 2353 status = vdc_send(vdc, (caddr_t)&dmsg, &msglen); 2354 if (status != 0) { 2355 mutex_exit(&vdc->lock); 2356 vdc_msg("%s: Error (%d) while resending after " 2357 "timeout\n", __func__, status); 2358 status = ETIMEDOUT; 2359 break; 2360 } 2361 /* 2362 * If the message was successfully sent, we increment 2363 * the sequence number to be used by the next message. 2364 */ 2365 vdc->seq_num++; 2366 mutex_exit(&vdc->lock); 2367 } 2368 } 2369 2370 return (status); 2371 } 2372 2373 2374 /* 2375 * Function: 2376 * vdc_depopulate_descriptor() 2377 * 2378 * Description: 2379 * 2380 * Arguments: 2381 * vdc - soft state pointer for this instance of the device driver. 2382 * idx - Index of the Descriptor Ring entry being modified 2383 * 2384 * Return Code: 2385 * 0 - Success 2386 */ 2387 static int 2388 vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 2389 { 2390 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2391 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2392 int status = ENXIO; 2393 int operation; 2394 int rv = 0; 2395 2396 ASSERT(vdc != NULL); 2397 ASSERT(idx < VD_DRING_LEN); 2398 ldep = &vdc->local_dring[idx]; 2399 ASSERT(ldep != NULL); 2400 dep = ldep->dep; 2401 ASSERT(dep != NULL); 2402 2403 status = dep->payload.status; 2404 operation = dep->payload.operation; 2405 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 2406 ldep = &vdc->local_dring[idx]; 2407 VIO_SET_DESC_STATE(ldep->flags, VIO_DESC_FREE); 2408 2409 /* the DKIO W$ operations never bind handles so we can return now */ 2410 if ((operation == VD_OP_FLUSH) || 2411 (operation == VD_OP_GET_WCE) || 2412 (operation == VD_OP_SET_WCE)) 2413 return (status); 2414 2415 /* 2416 * If the upper layer passed in a misaligned address we copied the 2417 * data into an aligned buffer before sending it to LDC - we now 2418 * copy it back to the original buffer. 2419 */ 2420 if (ldep->align_addr) { 2421 ASSERT(ldep->addr != NULL); 2422 ASSERT(dep->payload.nbytes > 0); 2423 2424 bcopy(ldep->align_addr, ldep->addr, dep->payload.nbytes); 2425 kmem_free(ldep->align_addr, 2426 sizeof (caddr_t) * P2ROUNDUP(dep->payload.nbytes, 8)); 2427 ldep->align_addr = NULL; 2428 } 2429 2430 rv = ldc_mem_unbind_handle(ldep->desc_mhdl); 2431 if (rv != 0) { 2432 cmn_err(CE_NOTE, "[%d] unbind mem hdl 0x%lx @ idx %d failed:%d", 2433 vdc->instance, ldep->desc_mhdl, idx, rv); 2434 /* 2435 * The error returned by the vDisk server is more informative 2436 * and thus has a higher priority but if it isn't set we ensure 2437 * that this function returns an error. 2438 */ 2439 if (status == 0) 2440 status = EINVAL; 2441 } 2442 2443 return (status); 2444 } 2445 2446 /* 2447 * Function: 2448 * vdc_populate_mem_hdl() 2449 * 2450 * Description: 2451 * 2452 * Arguments: 2453 * vdc - soft state pointer for this instance of the device driver. 2454 * idx - Index of the Descriptor Ring entry being modified 2455 * addr - virtual address being mapped in 2456 * nybtes - number of bytes in 'addr' 2457 * operation - the vDisk operation being performed (VD_OP_xxx) 2458 * 2459 * Return Code: 2460 * 0 - Success 2461 */ 2462 static int 2463 vdc_populate_mem_hdl(vdc_t *vdc, uint_t idx, caddr_t addr, size_t nbytes, 2464 int operation) 2465 { 2466 vd_dring_entry_t *dep = NULL; 2467 vdc_local_desc_t *ldep = NULL; 2468 ldc_mem_handle_t mhdl; 2469 caddr_t vaddr; 2470 int perm = LDC_MEM_RW; 2471 int rv = 0; 2472 int i; 2473 2474 ASSERT(vdc != NULL); 2475 ASSERT(idx < VD_DRING_LEN); 2476 2477 dep = VDC_GET_DRING_ENTRY_PTR(vdc, idx); 2478 ldep = &vdc->local_dring[idx]; 2479 mhdl = ldep->desc_mhdl; 2480 2481 switch (operation) { 2482 case VD_OP_BREAD: 2483 perm = LDC_MEM_W; 2484 break; 2485 2486 case VD_OP_BWRITE: 2487 perm = LDC_MEM_R; 2488 break; 2489 2490 case VD_OP_GET_VTOC: 2491 case VD_OP_SET_VTOC: 2492 case VD_OP_GET_DISKGEOM: 2493 case VD_OP_SET_DISKGEOM: 2494 case VD_OP_SCSICMD: 2495 perm = LDC_MEM_RW; 2496 break; 2497 2498 default: 2499 ASSERT(0); /* catch bad programming in vdc */ 2500 } 2501 2502 /* 2503 * LDC expects any addresses passed in to be 8-byte aligned. We need 2504 * to copy the contents of any misaligned buffers to a newly allocated 2505 * buffer and bind it instead (and copy the the contents back to the 2506 * original buffer passed in when depopulating the descriptor) 2507 */ 2508 vaddr = addr; 2509 if (((uint64_t)addr & 0x7) != 0) { 2510 ASSERT(ldep->align_addr == NULL); 2511 ldep->align_addr = 2512 kmem_zalloc(sizeof (caddr_t) * P2ROUNDUP(nbytes, 8), 2513 KM_SLEEP); 2514 PR0("%s[%d] Misaligned address %lx reallocating " 2515 "(buf=%lx nb=%d op=%d entry=%d)\n", 2516 __func__, vdc->instance, addr, ldep->align_addr, nbytes, 2517 operation, idx); 2518 bcopy(addr, ldep->align_addr, nbytes); 2519 vaddr = ldep->align_addr; 2520 } 2521 2522 rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 2523 LDC_SHADOW_MAP, perm, &dep->payload.cookie[0], 2524 &dep->payload.ncookies); 2525 PR1("%s[%d] bound mem handle; ncookies=%d\n", 2526 __func__, vdc->instance, dep->payload.ncookies); 2527 if (rv != 0) { 2528 vdc_msg("%s[%d] failed to ldc_mem_bind_handle " 2529 "(mhdl=%lx, buf=%lx entry=%d err=%d)\n", 2530 __func__, vdc->instance, mhdl, addr, idx, rv); 2531 if (ldep->align_addr) { 2532 kmem_free(ldep->align_addr, 2533 sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); 2534 ldep->align_addr = NULL; 2535 } 2536 return (EAGAIN); 2537 } 2538 2539 /* 2540 * Get the other cookies (if any). 2541 */ 2542 for (i = 1; i < dep->payload.ncookies; i++) { 2543 rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 2544 if (rv != 0) { 2545 (void) ldc_mem_unbind_handle(mhdl); 2546 vdc_msg("%s: failed to get next cookie(mhdl=%lx " 2547 "cnum=%d), err=%d", __func__, mhdl, i, rv); 2548 if (ldep->align_addr) { 2549 kmem_free(ldep->align_addr, 2550 sizeof (caddr_t) * dep->payload.nbytes); 2551 ldep->align_addr = NULL; 2552 } 2553 return (EAGAIN); 2554 } 2555 } 2556 2557 return (rv); 2558 } 2559 2560 /* 2561 * Interrupt handlers for messages from LDC 2562 */ 2563 2564 /* 2565 * Function: 2566 * vdc_handle_cb() 2567 * 2568 * Description: 2569 * 2570 * Arguments: 2571 * event - Type of event (LDC_EVT_xxx) that triggered the callback 2572 * arg - soft state pointer for this instance of the device driver. 2573 * 2574 * Return Code: 2575 * 0 - Success 2576 */ 2577 static uint_t 2578 vdc_handle_cb(uint64_t event, caddr_t arg) 2579 { 2580 ldc_status_t ldc_state; 2581 int rv = 0; 2582 2583 vdc_t *vdc = (vdc_t *)(void *)arg; 2584 2585 ASSERT(vdc != NULL); 2586 2587 PR1("%s[%d] event=%x seqID=%d\n", 2588 __func__, vdc->instance, event, vdc->seq_num); 2589 2590 /* 2591 * Depending on the type of event that triggered this callback, 2592 * we modify the handhske state or read the data. 2593 * 2594 * NOTE: not done as a switch() as event could be triggered by 2595 * a state change and a read request. Also the ordering of the 2596 * check for the event types is deliberate. 2597 */ 2598 if (event & LDC_EVT_UP) { 2599 PR0("%s[%d] Received LDC_EVT_UP\n", __func__, vdc->instance); 2600 2601 /* get LDC state */ 2602 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2603 if (rv != 0) { 2604 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d", 2605 vdc->instance, rv); 2606 mutex_enter(&vdc->lock); 2607 vdc_reset_connection(vdc, B_TRUE); 2608 mutex_exit(&vdc->lock); 2609 return (LDC_SUCCESS); 2610 } 2611 2612 /* 2613 * Reset the transaction sequence numbers when LDC comes up. 2614 * We then kick off the handshake negotiation with the vDisk 2615 * server. 2616 */ 2617 mutex_enter(&vdc->lock); 2618 vdc->seq_num = 1; 2619 vdc->seq_num_reply = 0; 2620 vdc->ldc_state = ldc_state; 2621 ASSERT(ldc_state == LDC_UP); 2622 mutex_exit(&vdc->lock); 2623 2624 vdc_init_handshake_negotiation(vdc); 2625 2626 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 2627 } 2628 2629 if (event & LDC_EVT_READ) { 2630 /* 2631 * Wake up the worker thread to process the message 2632 */ 2633 mutex_enter(&vdc->msg_proc_lock); 2634 vdc->msg_pending = B_TRUE; 2635 cv_signal(&vdc->msg_proc_cv); 2636 mutex_exit(&vdc->msg_proc_lock); 2637 2638 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 2639 2640 /* that's all we have to do - no need to handle DOWN/RESET */ 2641 return (LDC_SUCCESS); 2642 } 2643 2644 if (event & LDC_EVT_RESET) { 2645 PR0("%s[%d] Recvd LDC RESET event\n", __func__, vdc->instance); 2646 2647 /* get LDC state */ 2648 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2649 if (rv != 0) { 2650 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d", 2651 vdc->instance, rv); 2652 ldc_state = LDC_OPEN; 2653 } 2654 mutex_enter(&vdc->lock); 2655 vdc->ldc_state = ldc_state; 2656 vdc_reset_connection(vdc, B_FALSE); 2657 mutex_exit(&vdc->lock); 2658 2659 vdc_init_handshake_negotiation(vdc); 2660 } 2661 2662 if (event & LDC_EVT_DOWN) { 2663 PR0("%s[%d] Recvd LDC DOWN event\n", __func__, vdc->instance); 2664 2665 /* get LDC state */ 2666 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2667 if (rv != 0) { 2668 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d", 2669 vdc->instance, rv); 2670 ldc_state = LDC_OPEN; 2671 } 2672 mutex_enter(&vdc->lock); 2673 vdc->ldc_state = ldc_state; 2674 vdc_reset_connection(vdc, B_TRUE); 2675 mutex_exit(&vdc->lock); 2676 } 2677 2678 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 2679 cmn_err(CE_NOTE, "![%d] Unexpected LDC event (%lx) received", 2680 vdc->instance, event); 2681 2682 return (LDC_SUCCESS); 2683 } 2684 2685 /* -------------------------------------------------------------------------- */ 2686 2687 /* 2688 * The following functions process the incoming messages from vds 2689 */ 2690 2691 2692 /* 2693 * Function: 2694 * vdc_process_msg_thread() 2695 * 2696 * Description: 2697 * 2698 * Arguments: 2699 * vdc - soft state pointer for this instance of the device driver. 2700 * 2701 * Return Code: 2702 * None 2703 */ 2704 static void 2705 vdc_process_msg_thread(vdc_t *vdc) 2706 { 2707 int status = 0; 2708 boolean_t q_is_empty = B_TRUE; 2709 2710 ASSERT(vdc != NULL); 2711 2712 mutex_enter(&vdc->msg_proc_lock); 2713 PR0("%s[%d]: Starting\n", __func__, vdc->instance); 2714 2715 vdc->msg_proc_thr_state = VDC_THR_RUNNING; 2716 2717 while (vdc->msg_proc_thr_state == VDC_THR_RUNNING) { 2718 2719 PR1("%s[%d] Waiting\n", __func__, vdc->instance); 2720 while (!vdc->msg_pending) 2721 cv_wait(&vdc->msg_proc_cv, &vdc->msg_proc_lock); 2722 2723 PR1("%s[%d] Message Received\n", __func__, vdc->instance); 2724 2725 /* check if there is data */ 2726 status = ldc_chkq(vdc->ldc_handle, &q_is_empty); 2727 if ((status != 0) && 2728 (vdc->msg_proc_thr_state == VDC_THR_RUNNING)) { 2729 cmn_err(CE_NOTE, "[%d] Unable to communicate with vDisk" 2730 " server. Cannot check LDC queue: %d", 2731 vdc->instance, status); 2732 mutex_enter(&vdc->lock); 2733 vdc_reset_connection(vdc, B_FALSE); 2734 mutex_exit(&vdc->lock); 2735 vdc->msg_proc_thr_state = VDC_THR_STOP; 2736 continue; 2737 } 2738 2739 if (!q_is_empty) { 2740 PR1("%s: new pkt(s) available\n", __func__); 2741 vdc_process_msg(vdc); 2742 } 2743 2744 vdc->msg_pending = B_FALSE; 2745 } 2746 2747 PR0("Message processing thread stopped\n"); 2748 vdc->msg_pending = B_FALSE; 2749 vdc->msg_proc_thr_state = VDC_THR_DONE; 2750 cv_signal(&vdc->msg_proc_cv); 2751 mutex_exit(&vdc->msg_proc_lock); 2752 thread_exit(); 2753 } 2754 2755 2756 /* 2757 * Function: 2758 * vdc_process_msg() 2759 * 2760 * Description: 2761 * This function is called by the message processing thread each time it 2762 * is triggered when LDC sends an interrupt to indicate that there are 2763 * more packets on the queue. When it is called it will continue to loop 2764 * and read the messages until there are no more left of the queue. If it 2765 * encounters an invalid sized message it will drop it and check the next 2766 * message. 2767 * 2768 * Arguments: 2769 * arg - soft state pointer for this instance of the device driver. 2770 * 2771 * Return Code: 2772 * None. 2773 */ 2774 static void 2775 vdc_process_msg(void *arg) 2776 { 2777 vdc_t *vdc = (vdc_t *)(void *)arg; 2778 vio_msg_t vio_msg; 2779 size_t nbytes = sizeof (vio_msg); 2780 int status; 2781 2782 ASSERT(vdc != NULL); 2783 2784 mutex_enter(&vdc->lock); 2785 2786 PR1("%s\n", __func__); 2787 2788 for (;;) { 2789 2790 /* read all messages - until no more left */ 2791 status = ldc_read(vdc->ldc_handle, (caddr_t)&vio_msg, &nbytes); 2792 2793 if (status) { 2794 vdc_msg("%s: ldc_read() failed = %d", __func__, status); 2795 2796 /* if status is ECONNRESET --- reset vdc state */ 2797 if (status == EIO || status == ECONNRESET) { 2798 vdc_reset_connection(vdc, B_TRUE); 2799 } 2800 2801 mutex_exit(&vdc->lock); 2802 return; 2803 } 2804 2805 if ((nbytes > 0) && (nbytes < sizeof (vio_msg_tag_t))) { 2806 cmn_err(CE_CONT, "![%d] Expect %lu bytes; recv'd %lu\n", 2807 vdc->instance, sizeof (vio_msg_tag_t), nbytes); 2808 mutex_exit(&vdc->lock); 2809 return; 2810 } 2811 2812 if (nbytes == 0) { 2813 PR2("%s[%d]: ldc_read() done..\n", 2814 __func__, vdc->instance); 2815 mutex_exit(&vdc->lock); 2816 return; 2817 } 2818 2819 PR1("%s[%d] (%x/%x/%x)\n", __func__, vdc->instance, 2820 vio_msg.tag.vio_msgtype, 2821 vio_msg.tag.vio_subtype, 2822 vio_msg.tag.vio_subtype_env); 2823 2824 /* 2825 * Verify the Session ID of the message 2826 * 2827 * Every message after the Version has been negotiated should 2828 * have the correct session ID set. 2829 */ 2830 if ((vio_msg.tag.vio_sid != vdc->session_id) && 2831 (vio_msg.tag.vio_subtype_env != VIO_VER_INFO)) { 2832 cmn_err(CE_NOTE, "[%d] Invalid SID 0x%x, expect 0x%lx", 2833 vdc->instance, vio_msg.tag.vio_sid, 2834 vdc->session_id); 2835 vdc_reset_connection(vdc, B_FALSE); 2836 mutex_exit(&vdc->lock); 2837 return; 2838 } 2839 2840 switch (vio_msg.tag.vio_msgtype) { 2841 case VIO_TYPE_CTRL: 2842 status = vdc_process_ctrl_msg(vdc, vio_msg); 2843 break; 2844 case VIO_TYPE_DATA: 2845 status = vdc_process_data_msg(vdc, vio_msg); 2846 break; 2847 case VIO_TYPE_ERR: 2848 status = vdc_process_err_msg(vdc, vio_msg); 2849 break; 2850 default: 2851 PR1("%s", __func__); 2852 status = EINVAL; 2853 break; 2854 } 2855 2856 if (status != 0) { 2857 PR0("%s[%d] Error (%d) occcurred processing msg\n", 2858 __func__, vdc->instance, status); 2859 vdc_reset_connection(vdc, B_FALSE); 2860 } 2861 } 2862 _NOTE(NOTREACHED) 2863 } 2864 2865 /* 2866 * Function: 2867 * vdc_process_ctrl_msg() 2868 * 2869 * Description: 2870 * This function is called by the message processing thread each time 2871 * an LDC message with a msgtype of VIO_TYPE_CTRL is received. 2872 * 2873 * Arguments: 2874 * vdc - soft state pointer for this instance of the device driver. 2875 * msg - the LDC message sent by vds 2876 * 2877 * Return Codes: 2878 * 0 - Success. 2879 * EPROTO - A message was received which shouldn't have happened according 2880 * to the protocol 2881 * ENOTSUP - An action which is allowed according to the protocol but which 2882 * isn't (or doesn't need to be) implemented yet. 2883 * EINVAL - An invalid value was returned as part of a message. 2884 */ 2885 static int 2886 vdc_process_ctrl_msg(vdc_t *vdc, vio_msg_t msg) 2887 { 2888 int status = -1; 2889 2890 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_CTRL); 2891 ASSERT(vdc != NULL); 2892 ASSERT(mutex_owned(&vdc->lock)); 2893 2894 /* Depending on which state we are in; process the message */ 2895 switch (vdc->state) { 2896 case VD_STATE_INIT: 2897 status = vdc_handle_ver_msg(vdc, (vio_ver_msg_t *)&msg); 2898 break; 2899 2900 case VD_STATE_VER: 2901 status = vdc_handle_attr_msg(vdc, (vd_attr_msg_t *)&msg); 2902 break; 2903 2904 case VD_STATE_ATTR: 2905 status = vdc_handle_dring_reg_msg(vdc, 2906 (vio_dring_reg_msg_t *)&msg); 2907 break; 2908 2909 case VD_STATE_RDX: 2910 if (msg.tag.vio_subtype_env != VIO_RDX) { 2911 status = EPROTO; 2912 break; 2913 } 2914 2915 PR0("%s: Received RDX - handshake successful\n", __func__); 2916 2917 vdc->hshake_cnt = 0; /* reset failed handshake count */ 2918 status = 0; 2919 vdc->state = VD_STATE_DATA; 2920 2921 cv_broadcast(&vdc->attach_cv); 2922 break; 2923 2924 case VD_STATE_DATA: 2925 default: 2926 cmn_err(CE_NOTE, "[%d] Unexpected handshake state %d", 2927 vdc->instance, vdc->state); 2928 status = EPROTO; 2929 break; 2930 } 2931 2932 return (status); 2933 } 2934 2935 2936 /* 2937 * Function: 2938 * vdc_process_data_msg() 2939 * 2940 * Description: 2941 * This function is called by the message processing thread each time 2942 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 2943 * be an ACK or NACK from vds[1] which vdc handles as follows. 2944 * ACK - wake up the waiting thread 2945 * NACK - resend any messages necessary 2946 * 2947 * [1] Although the message format allows it, vds should not send a 2948 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 2949 * some bizarre reason it does, vdc will reset the connection. 2950 * 2951 * Arguments: 2952 * vdc - soft state pointer for this instance of the device driver. 2953 * msg - the LDC message sent by vds 2954 * 2955 * Return Code: 2956 * 0 - Success. 2957 * > 0 - error value returned by LDC 2958 */ 2959 static int 2960 vdc_process_data_msg(vdc_t *vdc, vio_msg_t msg) 2961 { 2962 int status = 0; 2963 vdc_local_desc_t *ldep = NULL; 2964 vio_dring_msg_t *dring_msg = NULL; 2965 uint_t num_msgs; 2966 uint_t start; 2967 uint_t end; 2968 uint_t i; 2969 2970 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_DATA); 2971 ASSERT(vdc != NULL); 2972 ASSERT(mutex_owned(&vdc->lock)); 2973 2974 dring_msg = (vio_dring_msg_t *)&msg; 2975 2976 /* 2977 * Check to see if the message has bogus data 2978 */ 2979 start = dring_msg->start_idx; 2980 end = dring_msg->end_idx; 2981 if ((start >= VD_DRING_LEN) || (end >= VD_DRING_LEN)) { 2982 vdc_msg("%s: Bogus ACK data : start %d, end %d\n", 2983 __func__, start, end); 2984 return (EPROTO); 2985 } 2986 2987 DTRACE_IO2(recv, vio_dring_msg_t, dring_msg, vdc_t *, vdc); 2988 2989 /* 2990 * calculate the number of messages that vds ACK'ed 2991 * 2992 * Assumes, (like the rest of vdc) that there is a 1:1 mapping 2993 * between requests and Dring entries. 2994 */ 2995 num_msgs = (end >= start) ? 2996 (end - start + 1) : 2997 (VD_DRING_LEN - start + end + 1); 2998 2999 /* 3000 * Verify that the sequence number is what vdc expects. 3001 */ 3002 if (!vdc_verify_seq_num(vdc, dring_msg, num_msgs)) { 3003 return (ENXIO); 3004 } 3005 3006 /* 3007 * Wake the thread waiting for each DRing entry ACK'ed 3008 */ 3009 for (i = 0; i < num_msgs; i++) { 3010 int operation; 3011 int idx = (start + i) % VD_DRING_LEN; 3012 3013 ldep = &vdc->local_dring[idx]; 3014 mutex_enter(&ldep->lock); 3015 operation = ldep->dep->payload.operation; 3016 if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) { 3017 /* 3018 * The vDisk server responds when it accepts a 3019 * descriptor so we continue looping and process 3020 * it when it sends the message that it is done. 3021 */ 3022 if (ldep->dep->hdr.dstate != VIO_DESC_DONE) { 3023 mutex_exit(&ldep->lock); 3024 continue; 3025 } 3026 bioerror(ldep->buf, ldep->dep->payload.status); 3027 biodone(ldep->buf); 3028 3029 DTRACE_IO2(vdone, buf_t *, ldep->buf, vdc_t *, vdc); 3030 3031 /* Clear the DRing entry */ 3032 status = vdc_depopulate_descriptor(vdc, idx); 3033 } 3034 cv_signal(&ldep->cv); 3035 mutex_exit(&ldep->lock); 3036 } 3037 3038 if (msg.tag.vio_subtype == VIO_SUBTYPE_NACK) { 3039 PR0("%s: DATA NACK\n", __func__); 3040 VDC_DUMP_DRING_MSG(dring_msg); 3041 vdc_reset_connection(vdc, B_FALSE); 3042 3043 /* we need to drop the lock to trigger the handshake */ 3044 mutex_exit(&vdc->lock); 3045 vdc_init_handshake_negotiation(vdc); 3046 mutex_enter(&vdc->lock); 3047 } else if (msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 3048 status = EPROTO; 3049 } 3050 3051 return (status); 3052 } 3053 3054 /* 3055 * Function: 3056 * vdc_process_err_msg() 3057 * 3058 * NOTE: No error messages are used as part of the vDisk protocol 3059 */ 3060 static int 3061 vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg) 3062 { 3063 _NOTE(ARGUNUSED(vdc)) 3064 _NOTE(ARGUNUSED(msg)) 3065 3066 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_ERR); 3067 cmn_err(CE_NOTE, "[%d] Got an ERR msg", vdc->instance); 3068 3069 return (ENOTSUP); 3070 } 3071 3072 /* 3073 * Function: 3074 * vdc_handle_ver_msg() 3075 * 3076 * Description: 3077 * 3078 * Arguments: 3079 * vdc - soft state pointer for this instance of the device driver. 3080 * ver_msg - LDC message sent by vDisk server 3081 * 3082 * Return Code: 3083 * 0 - Success 3084 */ 3085 static int 3086 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) 3087 { 3088 int status = 0; 3089 3090 ASSERT(vdc != NULL); 3091 ASSERT(mutex_owned(&vdc->lock)); 3092 3093 if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { 3094 return (EPROTO); 3095 } 3096 3097 if (ver_msg->dev_class != VDEV_DISK_SERVER) { 3098 return (EINVAL); 3099 } 3100 3101 switch (ver_msg->tag.vio_subtype) { 3102 case VIO_SUBTYPE_ACK: 3103 /* 3104 * We check to see if the version returned is indeed supported 3105 * (The server may have also adjusted the minor number downwards 3106 * and if so 'ver_msg' will contain the actual version agreed) 3107 */ 3108 if (vdc_is_supported_version(ver_msg)) { 3109 vdc->ver.major = ver_msg->ver_major; 3110 vdc->ver.minor = ver_msg->ver_minor; 3111 ASSERT(vdc->ver.major > 0); 3112 3113 vdc->state = VD_STATE_VER; 3114 status = vdc_init_attr_negotiation(vdc); 3115 } else { 3116 status = EPROTO; 3117 } 3118 break; 3119 3120 case VIO_SUBTYPE_NACK: 3121 /* 3122 * call vdc_is_supported_version() which will return the next 3123 * supported version (if any) in 'ver_msg' 3124 */ 3125 (void) vdc_is_supported_version(ver_msg); 3126 if (ver_msg->ver_major > 0) { 3127 size_t len = sizeof (*ver_msg); 3128 3129 ASSERT(vdc->ver.major > 0); 3130 3131 /* reset the necessary fields and resend */ 3132 ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; 3133 ver_msg->dev_class = VDEV_DISK; 3134 3135 status = vdc_send(vdc, (caddr_t)ver_msg, &len); 3136 PR0("[%d] Resend VER info (LDC status = %d)\n", 3137 vdc->instance, status); 3138 if (len != sizeof (*ver_msg)) 3139 status = EBADMSG; 3140 } else { 3141 cmn_err(CE_NOTE, "[%d] No common version with " 3142 "vDisk server", vdc->instance); 3143 status = ENOTSUP; 3144 } 3145 3146 break; 3147 case VIO_SUBTYPE_INFO: 3148 /* 3149 * Handle the case where vds starts handshake 3150 * (for now only vdc is the instigatior) 3151 */ 3152 status = ENOTSUP; 3153 break; 3154 3155 default: 3156 status = EINVAL; 3157 break; 3158 } 3159 3160 return (status); 3161 } 3162 3163 /* 3164 * Function: 3165 * vdc_handle_attr_msg() 3166 * 3167 * Description: 3168 * 3169 * Arguments: 3170 * vdc - soft state pointer for this instance of the device driver. 3171 * attr_msg - LDC message sent by vDisk server 3172 * 3173 * Return Code: 3174 * 0 - Success 3175 */ 3176 static int 3177 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) 3178 { 3179 int status = 0; 3180 3181 ASSERT(vdc != NULL); 3182 ASSERT(mutex_owned(&vdc->lock)); 3183 3184 if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { 3185 return (EPROTO); 3186 } 3187 3188 switch (attr_msg->tag.vio_subtype) { 3189 case VIO_SUBTYPE_ACK: 3190 /* 3191 * We now verify the attributes sent by vds. 3192 */ 3193 vdc->vdisk_size = attr_msg->vdisk_size; 3194 vdc->vdisk_type = attr_msg->vdisk_type; 3195 3196 if ((attr_msg->max_xfer_sz != vdc->max_xfer_sz) || 3197 (attr_msg->vdisk_block_size != vdc->block_size)) { 3198 /* 3199 * Future support: step down to the block size 3200 * and max transfer size suggested by the 3201 * server. (If this value is less than 128K 3202 * then multiple Dring entries per request 3203 * would need to be implemented) 3204 */ 3205 cmn_err(CE_NOTE, "[%d] Couldn't process block " 3206 "attributes from vds", vdc->instance); 3207 status = EINVAL; 3208 break; 3209 } 3210 3211 if ((attr_msg->xfer_mode != VIO_DRING_MODE) || 3212 (attr_msg->vdisk_size > INT64_MAX) || 3213 (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 3214 vdc_msg("%s[%d] Couldn't process attrs " 3215 "from vds", __func__, vdc->instance); 3216 status = EINVAL; 3217 break; 3218 } 3219 3220 vdc->state = VD_STATE_ATTR; 3221 status = vdc_init_dring_negotiate(vdc); 3222 break; 3223 3224 case VIO_SUBTYPE_NACK: 3225 /* 3226 * vds could not handle the attributes we sent so we 3227 * stop negotiating. 3228 */ 3229 status = EPROTO; 3230 break; 3231 3232 case VIO_SUBTYPE_INFO: 3233 /* 3234 * Handle the case where vds starts the handshake 3235 * (for now; vdc is the only supported instigatior) 3236 */ 3237 status = ENOTSUP; 3238 break; 3239 3240 default: 3241 status = ENOTSUP; 3242 break; 3243 } 3244 3245 return (status); 3246 } 3247 3248 /* 3249 * Function: 3250 * vdc_handle_dring_reg_msg() 3251 * 3252 * Description: 3253 * 3254 * Arguments: 3255 * vdc - soft state pointer for this instance of the driver. 3256 * dring_msg - LDC message sent by vDisk server 3257 * 3258 * Return Code: 3259 * 0 - Success 3260 */ 3261 static int 3262 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) 3263 { 3264 int status = 0; 3265 vio_rdx_msg_t msg = {0}; 3266 size_t msglen = sizeof (msg); 3267 3268 ASSERT(vdc != NULL); 3269 ASSERT(mutex_owned(&vdc->lock)); 3270 3271 if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { 3272 return (EPROTO); 3273 } 3274 3275 switch (dring_msg->tag.vio_subtype) { 3276 case VIO_SUBTYPE_ACK: 3277 /* save the received dring_ident */ 3278 vdc->dring_ident = dring_msg->dring_ident; 3279 PR0("%s[%d] Received dring ident=0x%lx\n", 3280 __func__, vdc->instance, vdc->dring_ident); 3281 3282 /* 3283 * Send an RDX message to vds to indicate we are ready 3284 * to send data 3285 */ 3286 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 3287 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 3288 msg.tag.vio_subtype_env = VIO_RDX; 3289 msg.tag.vio_sid = vdc->session_id; 3290 status = vdc_send(vdc, (caddr_t)&msg, &msglen); 3291 if (status != 0) { 3292 cmn_err(CE_NOTE, "[%d] Failed to send RDX" 3293 " message (%d)", vdc->instance, status); 3294 break; 3295 } 3296 3297 vdc->state = VD_STATE_RDX; 3298 break; 3299 3300 case VIO_SUBTYPE_NACK: 3301 /* 3302 * vds could not handle the DRing info we sent so we 3303 * stop negotiating. 3304 */ 3305 cmn_err(CE_CONT, "server could not register DRing\n"); 3306 vdc_reset_connection(vdc, B_FALSE); 3307 vdc_destroy_descriptor_ring(vdc); 3308 status = EPROTO; 3309 break; 3310 3311 case VIO_SUBTYPE_INFO: 3312 /* 3313 * Handle the case where vds starts handshake 3314 * (for now only vdc is the instigatior) 3315 */ 3316 status = ENOTSUP; 3317 break; 3318 default: 3319 status = ENOTSUP; 3320 } 3321 3322 return (status); 3323 } 3324 3325 /* 3326 * Function: 3327 * vdc_verify_seq_num() 3328 * 3329 * Description: 3330 * This functions verifies that the sequence number sent back by vds with 3331 * the latest message correctly follows the last request processed. 3332 * 3333 * Arguments: 3334 * vdc - soft state pointer for this instance of the driver. 3335 * dring_msg - pointer to the LDC message sent by vds 3336 * num_msgs - the number of requests being acknowledged 3337 * 3338 * Return Code: 3339 * B_TRUE - Success. 3340 * B_FALSE - The seq numbers are so out of sync, vdc cannot deal with them 3341 */ 3342 static boolean_t 3343 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg, int num_msgs) 3344 { 3345 ASSERT(vdc != NULL); 3346 ASSERT(dring_msg != NULL); 3347 ASSERT(mutex_owned(&vdc->lock)); 3348 3349 /* 3350 * Check to see if the messages were responded to in the correct 3351 * order by vds. There are 3 possible scenarios: 3352 * - the seq_num we expected is returned (everything is OK) 3353 * - a seq_num earlier than the last one acknowledged is returned, 3354 * if so something is seriously wrong so we reset the connection 3355 * - a seq_num greater than what we expected is returned. 3356 */ 3357 if (dring_msg->seq_num < vdc->seq_num_reply) { 3358 vdc_msg("%s[%d]: Bogus seq_num %d, expected %d\n", 3359 __func__, vdc->instance, dring_msg->seq_num, 3360 vdc->seq_num_reply + num_msgs); 3361 if (dring_msg->seq_num < (vdc->seq_num_reply + num_msgs)) { 3362 return (B_FALSE); 3363 } else { 3364 /* 3365 * vds has responded with a seq_num greater than what we 3366 * expected 3367 */ 3368 return (B_FALSE); 3369 } 3370 } 3371 vdc->seq_num_reply += num_msgs; 3372 3373 return (B_TRUE); 3374 } 3375 3376 3377 /* 3378 * Function: 3379 * vdc_is_supported_version() 3380 * 3381 * Description: 3382 * This routine checks if the major/minor version numbers specified in 3383 * 'ver_msg' are supported. If not it finds the next version that is 3384 * in the supported version list 'vdc_version[]' and sets the fields in 3385 * 'ver_msg' to those values 3386 * 3387 * Arguments: 3388 * ver_msg - LDC message sent by vDisk server 3389 * 3390 * Return Code: 3391 * B_TRUE - Success 3392 * B_FALSE - Version not supported 3393 */ 3394 static boolean_t 3395 vdc_is_supported_version(vio_ver_msg_t *ver_msg) 3396 { 3397 int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); 3398 3399 for (int i = 0; i < vdc_num_versions; i++) { 3400 ASSERT(vdc_version[i].major > 0); 3401 ASSERT((i == 0) || 3402 (vdc_version[i].major < vdc_version[i-1].major)); 3403 3404 /* 3405 * If the major versions match, adjust the minor version, if 3406 * necessary, down to the highest value supported by this 3407 * client. The server should support all minor versions lower 3408 * than the value it sent 3409 */ 3410 if (ver_msg->ver_major == vdc_version[i].major) { 3411 if (ver_msg->ver_minor > vdc_version[i].minor) { 3412 PR0("Adjusting minor version from %u to %u", 3413 ver_msg->ver_minor, vdc_version[i].minor); 3414 ver_msg->ver_minor = vdc_version[i].minor; 3415 } 3416 return (B_TRUE); 3417 } 3418 3419 /* 3420 * If the message contains a higher major version number, set 3421 * the message's major/minor versions to the current values 3422 * and return false, so this message will get resent with 3423 * these values, and the server will potentially try again 3424 * with the same or a lower version 3425 */ 3426 if (ver_msg->ver_major > vdc_version[i].major) { 3427 ver_msg->ver_major = vdc_version[i].major; 3428 ver_msg->ver_minor = vdc_version[i].minor; 3429 PR0("Suggesting major/minor (0x%x/0x%x)\n", 3430 ver_msg->ver_major, ver_msg->ver_minor); 3431 3432 return (B_FALSE); 3433 } 3434 3435 /* 3436 * Otherwise, the message's major version is less than the 3437 * current major version, so continue the loop to the next 3438 * (lower) supported version 3439 */ 3440 } 3441 3442 /* 3443 * No common version was found; "ground" the version pair in the 3444 * message to terminate negotiation 3445 */ 3446 ver_msg->ver_major = 0; 3447 ver_msg->ver_minor = 0; 3448 3449 return (B_FALSE); 3450 } 3451 /* -------------------------------------------------------------------------- */ 3452 3453 /* 3454 * DKIO(7) support 3455 */ 3456 3457 typedef struct vdc_dk_arg { 3458 struct dk_callback dkc; 3459 int mode; 3460 dev_t dev; 3461 vdc_t *vdc; 3462 } vdc_dk_arg_t; 3463 3464 /* 3465 * Function: 3466 * vdc_dkio_flush_cb() 3467 * 3468 * Description: 3469 * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 3470 * by kernel code. 3471 * 3472 * Arguments: 3473 * arg - a pointer to a vdc_dk_arg_t structure. 3474 */ 3475 void 3476 vdc_dkio_flush_cb(void *arg) 3477 { 3478 struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 3479 struct dk_callback *dkc = NULL; 3480 vdc_t *vdc = NULL; 3481 int rv; 3482 3483 if (dk_arg == NULL) { 3484 vdc_msg("%s[?] DKIOCFLUSHWRITECACHE arg is NULL\n", __func__); 3485 return; 3486 } 3487 dkc = &dk_arg->dkc; 3488 vdc = dk_arg->vdc; 3489 ASSERT(vdc != NULL); 3490 3491 rv = vdc_populate_descriptor(vdc, NULL, 0, VD_OP_FLUSH, 3492 dk_arg->mode, SDPART(getminor(dk_arg->dev))); 3493 if (rv != 0) { 3494 PR0("%s[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n", 3495 __func__, vdc->instance, rv, 3496 ddi_model_convert_from(dk_arg->mode & FMODELS)); 3497 } 3498 3499 /* 3500 * Trigger the call back to notify the caller the the ioctl call has 3501 * been completed. 3502 */ 3503 if ((dk_arg->mode & FKIOCTL) && 3504 (dkc != NULL) && 3505 (dkc->dkc_callback != NULL)) { 3506 ASSERT(dkc->dkc_cookie != NULL); 3507 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 3508 } 3509 3510 /* Indicate that one less DKIO write flush is outstanding */ 3511 mutex_enter(&vdc->lock); 3512 vdc->dkio_flush_pending--; 3513 ASSERT(vdc->dkio_flush_pending >= 0); 3514 mutex_exit(&vdc->lock); 3515 3516 /* free the mem that was allocated when the callback was dispatched */ 3517 kmem_free(arg, sizeof (vdc_dk_arg_t)); 3518 } 3519 3520 /* 3521 * This structure is used in the DKIO(7I) array below. 3522 */ 3523 typedef struct vdc_dk_ioctl { 3524 uint8_t op; /* VD_OP_XXX value */ 3525 int cmd; /* Solaris ioctl operation number */ 3526 size_t nbytes; /* size of structure to be copied */ 3527 3528 /* function to convert between vDisk and Solaris structure formats */ 3529 int (*convert)(vdc_t *vdc, void *vd_buf, void *ioctl_arg, 3530 int mode, int dir); 3531 } vdc_dk_ioctl_t; 3532 3533 /* 3534 * Subset of DKIO(7I) operations currently supported 3535 */ 3536 static vdc_dk_ioctl_t dk_ioctl[] = { 3537 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, sizeof (int), 3538 vdc_null_copy_func}, 3539 {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), 3540 vdc_null_copy_func}, 3541 {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), 3542 vdc_null_copy_func}, 3543 {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), 3544 vdc_get_vtoc_convert}, 3545 {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), 3546 vdc_set_vtoc_convert}, 3547 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), 3548 vdc_get_geom_convert}, 3549 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), 3550 vdc_get_geom_convert}, 3551 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), 3552 vdc_get_geom_convert}, 3553 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 3554 vdc_set_geom_convert}, 3555 3556 /* 3557 * These particular ioctls are not sent to the server - vdc fakes up 3558 * the necessary info. 3559 */ 3560 {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, 3561 {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, 3562 {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, 3563 {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, 3564 {0, CDROMREADOFFSET, 0, vdc_null_copy_func} 3565 }; 3566 3567 /* 3568 * Function: 3569 * vd_process_ioctl() 3570 * 3571 * Description: 3572 * This routine processes disk specific ioctl calls 3573 * 3574 * Arguments: 3575 * dev - the device number 3576 * cmd - the operation [dkio(7I)] to be processed 3577 * arg - pointer to user provided structure 3578 * (contains data to be set or reference parameter for get) 3579 * mode - bit flag, indicating open settings, 32/64 bit type, etc 3580 * 3581 * Return Code: 3582 * 0 3583 * EFAULT 3584 * ENXIO 3585 * EIO 3586 * ENOTSUP 3587 */ 3588 static int 3589 vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode) 3590 { 3591 int instance = SDUNIT(getminor(dev)); 3592 vdc_t *vdc = NULL; 3593 int rv = -1; 3594 int idx = 0; /* index into dk_ioctl[] */ 3595 size_t len = 0; /* #bytes to send to vds */ 3596 size_t alloc_len = 0; /* #bytes to allocate mem for */ 3597 caddr_t mem_p = NULL; 3598 size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 3599 struct vtoc vtoc_saved; 3600 3601 PR0("%s: Processing ioctl(%x) for dev %x : model %x\n", 3602 __func__, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 3603 3604 vdc = ddi_get_soft_state(vdc_state, instance); 3605 if (vdc == NULL) { 3606 cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 3607 instance); 3608 return (ENXIO); 3609 } 3610 3611 /* 3612 * Check to see if we can communicate with the vDisk server 3613 */ 3614 if (!vdc_is_able_to_tx_data(vdc, O_NONBLOCK)) { 3615 PR0("%s[%d] Not ready to transmit data\n", __func__, instance); 3616 return (ENOLINK); 3617 } 3618 3619 /* 3620 * Validate the ioctl operation to be performed. 3621 * 3622 * If we have looped through the array without finding a match then we 3623 * don't support this ioctl. 3624 */ 3625 for (idx = 0; idx < nioctls; idx++) { 3626 if (cmd == dk_ioctl[idx].cmd) 3627 break; 3628 } 3629 3630 if (idx >= nioctls) { 3631 PR0("%s[%d] Unsupported ioctl(%x)\n", 3632 __func__, vdc->instance, cmd); 3633 return (ENOTSUP); 3634 } 3635 3636 len = dk_ioctl[idx].nbytes; 3637 3638 /* 3639 * Deal with the ioctls which the server does not provide. vdc can 3640 * fake these up and return immediately 3641 */ 3642 switch (cmd) { 3643 case CDROMREADOFFSET: 3644 case DKIOCREMOVABLE: 3645 case USCSICMD: 3646 return (ENOTTY); 3647 3648 case DKIOCINFO: 3649 { 3650 struct dk_cinfo cinfo; 3651 if (vdc->cinfo == NULL) 3652 return (ENXIO); 3653 3654 bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 3655 cinfo.dki_partition = SDPART(getminor(dev)); 3656 3657 rv = ddi_copyout(&cinfo, (void *)arg, 3658 sizeof (struct dk_cinfo), mode); 3659 if (rv != 0) 3660 return (EFAULT); 3661 3662 return (0); 3663 } 3664 3665 case DKIOCGMEDIAINFO: 3666 { 3667 if (vdc->minfo == NULL) 3668 return (ENXIO); 3669 3670 rv = ddi_copyout(vdc->minfo, (void *)arg, 3671 sizeof (struct dk_minfo), mode); 3672 if (rv != 0) 3673 return (EFAULT); 3674 3675 return (0); 3676 } 3677 3678 case DKIOCFLUSHWRITECACHE: 3679 { 3680 struct dk_callback *dkc = (struct dk_callback *)arg; 3681 vdc_dk_arg_t *dkarg = NULL; 3682 3683 PR1("[%d] Flush W$: mode %x\n", instance, mode); 3684 3685 /* 3686 * If the backing device is not a 'real' disk then the 3687 * W$ operation request to the vDisk server will fail 3688 * so we might as well save the cycles and return now. 3689 */ 3690 if (vdc->vdisk_type != VD_DISK_TYPE_DISK) 3691 return (ENOTTY); 3692 3693 /* 3694 * If arg is NULL, then there is no callback function 3695 * registered and the call operates synchronously; we 3696 * break and continue with the rest of the function and 3697 * wait for vds to return (i.e. after the request to 3698 * vds returns successfully, all writes completed prior 3699 * to the ioctl will have been flushed from the disk 3700 * write cache to persistent media. 3701 * 3702 * If a callback function is registered, we dispatch 3703 * the request on a task queue and return immediately. 3704 * The callback will deal with informing the calling 3705 * thread that the flush request is completed. 3706 */ 3707 if (dkc == NULL) 3708 break; 3709 3710 dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP); 3711 3712 dkarg->mode = mode; 3713 dkarg->dev = dev; 3714 bcopy(dkc, &dkarg->dkc, sizeof (*dkc)); 3715 3716 mutex_enter(&vdc->lock); 3717 vdc->dkio_flush_pending++; 3718 dkarg->vdc = vdc; 3719 mutex_exit(&vdc->lock); 3720 3721 /* put the request on a task queue */ 3722 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 3723 (void *)dkarg, DDI_SLEEP); 3724 3725 return (rv == NULL ? ENOMEM : 0); 3726 } 3727 } 3728 3729 /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 3730 ASSERT(dk_ioctl[idx].op != 0); 3731 3732 /* LDC requires that the memory being mapped is 8-byte aligned */ 3733 alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 3734 PR1("%s[%d]: struct size %d alloc %d\n", 3735 __func__, instance, len, alloc_len); 3736 3737 ASSERT(alloc_len != 0); /* sanity check */ 3738 mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 3739 3740 if (cmd == DKIOCSVTOC) { 3741 /* 3742 * Save a copy of the current VTOC so that we can roll back 3743 * if the setting of the new VTOC fails. 3744 */ 3745 bcopy(vdc->vtoc, &vtoc_saved, sizeof (struct vtoc)); 3746 } 3747 3748 /* 3749 * Call the conversion function for this ioctl whhich if necessary 3750 * converts from the Solaris format to the format ARC'ed 3751 * as part of the vDisk protocol (FWARC 2006/195) 3752 */ 3753 ASSERT(dk_ioctl[idx].convert != NULL); 3754 rv = (dk_ioctl[idx].convert)(vdc, arg, mem_p, mode, VD_COPYIN); 3755 if (rv != 0) { 3756 PR0("%s[%d]: convert returned %d for ioctl 0x%x\n", 3757 __func__, instance, rv, cmd); 3758 if (mem_p != NULL) 3759 kmem_free(mem_p, alloc_len); 3760 return (rv); 3761 } 3762 3763 /* 3764 * send request to vds to service the ioctl. 3765 */ 3766 rv = vdc_populate_descriptor(vdc, mem_p, alloc_len, dk_ioctl[idx].op, 3767 mode, SDPART((getminor(dev)))); 3768 if (rv != 0) { 3769 /* 3770 * This is not necessarily an error. The ioctl could 3771 * be returning a value such as ENOTTY to indicate 3772 * that the ioctl is not applicable. 3773 */ 3774 PR0("%s[%d]: vds returned %d for ioctl 0x%x\n", 3775 __func__, instance, rv, cmd); 3776 if (mem_p != NULL) 3777 kmem_free(mem_p, alloc_len); 3778 3779 if (cmd == DKIOCSVTOC) { 3780 /* update of the VTOC has failed, roll back */ 3781 bcopy(&vtoc_saved, vdc->vtoc, sizeof (struct vtoc)); 3782 } 3783 3784 return (rv); 3785 } 3786 3787 if (cmd == DKIOCSVTOC) { 3788 /* 3789 * The VTOC has been changed, try and update the device 3790 * node properties. Failing to set the properties should 3791 * not cause an error to be return the caller though. 3792 */ 3793 if (vdc_create_device_nodes_props(vdc)) { 3794 cmn_err(CE_NOTE, "![%d] Failed to update device nodes" 3795 " properties", vdc->instance); 3796 } 3797 } 3798 3799 /* 3800 * Call the conversion function (if it exists) for this ioctl 3801 * which converts from the format ARC'ed as part of the vDisk 3802 * protocol (FWARC 2006/195) back to a format understood by 3803 * the rest of Solaris. 3804 */ 3805 rv = (dk_ioctl[idx].convert)(vdc, mem_p, arg, mode, VD_COPYOUT); 3806 if (rv != 0) { 3807 PR0("%s[%d]: convert returned %d for ioctl 0x%x\n", 3808 __func__, instance, rv, cmd); 3809 if (mem_p != NULL) 3810 kmem_free(mem_p, alloc_len); 3811 return (rv); 3812 } 3813 3814 if (mem_p != NULL) 3815 kmem_free(mem_p, alloc_len); 3816 3817 return (rv); 3818 } 3819 3820 /* 3821 * Function: 3822 * 3823 * Description: 3824 * This is an empty conversion function used by ioctl calls which 3825 * do not need to convert the data being passed in/out to userland 3826 */ 3827 static int 3828 vdc_null_copy_func(vdc_t *vdc, void *from, void *to, int mode, int dir) 3829 { 3830 _NOTE(ARGUNUSED(vdc)) 3831 _NOTE(ARGUNUSED(from)) 3832 _NOTE(ARGUNUSED(to)) 3833 _NOTE(ARGUNUSED(mode)) 3834 _NOTE(ARGUNUSED(dir)) 3835 3836 return (0); 3837 } 3838 3839 /* 3840 * Function: 3841 * vdc_get_vtoc_convert() 3842 * 3843 * Description: 3844 * This routine performs the necessary convertions from the DKIOCGVTOC 3845 * Solaris structure to the format defined in FWARC 2006/195. 3846 * 3847 * In the struct vtoc definition, the timestamp field is marked as not 3848 * supported so it is not part of vDisk protocol (FWARC 2006/195). 3849 * However SVM uses that field to check it can write into the VTOC, 3850 * so we fake up the info of that field. 3851 * 3852 * Arguments: 3853 * vdc - the vDisk client 3854 * from - the buffer containing the data to be copied from 3855 * to - the buffer to be copied to 3856 * mode - flags passed to ioctl() call 3857 * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT 3858 * 3859 * Return Code: 3860 * 0 - Success 3861 * ENXIO - incorrect buffer passed in. 3862 * EFAULT - ddi_copyout routine encountered an error. 3863 */ 3864 static int 3865 vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 3866 { 3867 int i; 3868 void *tmp_mem = NULL; 3869 void *tmp_memp; 3870 struct vtoc vt; 3871 struct vtoc32 vt32; 3872 int copy_len = 0; 3873 int rv = 0; 3874 3875 if (dir != VD_COPYOUT) 3876 return (0); /* nothing to do */ 3877 3878 if ((from == NULL) || (to == NULL)) 3879 return (ENXIO); 3880 3881 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 3882 copy_len = sizeof (struct vtoc32); 3883 else 3884 copy_len = sizeof (struct vtoc); 3885 3886 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 3887 3888 VD_VTOC2VTOC((vd_vtoc_t *)from, &vt); 3889 3890 /* fake the VTOC timestamp field */ 3891 for (i = 0; i < V_NUMPAR; i++) { 3892 vt.timestamp[i] = vdc->vtoc->timestamp[i]; 3893 } 3894 3895 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 3896 vtoctovtoc32(vt, vt32); 3897 tmp_memp = &vt32; 3898 } else { 3899 tmp_memp = &vt; 3900 } 3901 rv = ddi_copyout(tmp_memp, to, copy_len, mode); 3902 if (rv != 0) 3903 rv = EFAULT; 3904 3905 kmem_free(tmp_mem, copy_len); 3906 return (rv); 3907 } 3908 3909 /* 3910 * Function: 3911 * vdc_set_vtoc_convert() 3912 * 3913 * Description: 3914 * This routine performs the necessary convertions from the DKIOCSVTOC 3915 * Solaris structure to the format defined in FWARC 2006/195. 3916 * 3917 * Arguments: 3918 * vdc - the vDisk client 3919 * from - Buffer with data 3920 * to - Buffer where data is to be copied to 3921 * mode - flags passed to ioctl 3922 * dir - direction of copy (in or out) 3923 * 3924 * Return Code: 3925 * 0 - Success 3926 * ENXIO - Invalid buffer passed in 3927 * EFAULT - ddi_copyin of data failed 3928 */ 3929 static int 3930 vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 3931 { 3932 void *tmp_mem = NULL; 3933 struct vtoc vt; 3934 struct vtoc *vtp = &vt; 3935 vd_vtoc_t vtvd; 3936 int copy_len = 0; 3937 int rv = 0; 3938 3939 if (dir != VD_COPYIN) 3940 return (0); /* nothing to do */ 3941 3942 if ((from == NULL) || (to == NULL)) 3943 return (ENXIO); 3944 3945 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 3946 copy_len = sizeof (struct vtoc32); 3947 else 3948 copy_len = sizeof (struct vtoc); 3949 3950 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 3951 3952 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 3953 if (rv != 0) { 3954 kmem_free(tmp_mem, copy_len); 3955 return (EFAULT); 3956 } 3957 3958 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 3959 vtoc32tovtoc((*(struct vtoc32 *)tmp_mem), vt); 3960 } else { 3961 vtp = tmp_mem; 3962 } 3963 3964 /* 3965 * The VTOC is being changed, then vdc needs to update the copy 3966 * it saved in the soft state structure. 3967 */ 3968 bcopy(vtp, vdc->vtoc, sizeof (struct vtoc)); 3969 3970 VTOC2VD_VTOC(vtp, &vtvd); 3971 bcopy(&vtvd, to, sizeof (vd_vtoc_t)); 3972 kmem_free(tmp_mem, copy_len); 3973 3974 return (0); 3975 } 3976 3977 /* 3978 * Function: 3979 * vdc_get_geom_convert() 3980 * 3981 * Description: 3982 * This routine performs the necessary convertions from the DKIOCGGEOM, 3983 * DKIOCG_PHYSGEOM and DKIOG_VIRTGEOM Solaris structures to the format 3984 * defined in FWARC 2006/195 3985 * 3986 * Arguments: 3987 * vdc - the vDisk client 3988 * from - Buffer with data 3989 * to - Buffer where data is to be copied to 3990 * mode - flags passed to ioctl 3991 * dir - direction of copy (in or out) 3992 * 3993 * Return Code: 3994 * 0 - Success 3995 * ENXIO - Invalid buffer passed in 3996 * EFAULT - ddi_copyout of data failed 3997 */ 3998 static int 3999 vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4000 { 4001 _NOTE(ARGUNUSED(vdc)) 4002 4003 struct dk_geom geom; 4004 int copy_len = sizeof (struct dk_geom); 4005 int rv = 0; 4006 4007 if (dir != VD_COPYOUT) 4008 return (0); /* nothing to do */ 4009 4010 if ((from == NULL) || (to == NULL)) 4011 return (ENXIO); 4012 4013 VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); 4014 rv = ddi_copyout(&geom, to, copy_len, mode); 4015 if (rv != 0) 4016 rv = EFAULT; 4017 4018 return (rv); 4019 } 4020 4021 /* 4022 * Function: 4023 * vdc_set_geom_convert() 4024 * 4025 * Description: 4026 * This routine performs the necessary convertions from the DKIOCSGEOM 4027 * Solaris structure to the format defined in FWARC 2006/195. 4028 * 4029 * Arguments: 4030 * vdc - the vDisk client 4031 * from - Buffer with data 4032 * to - Buffer where data is to be copied to 4033 * mode - flags passed to ioctl 4034 * dir - direction of copy (in or out) 4035 * 4036 * Return Code: 4037 * 0 - Success 4038 * ENXIO - Invalid buffer passed in 4039 * EFAULT - ddi_copyin of data failed 4040 */ 4041 static int 4042 vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4043 { 4044 _NOTE(ARGUNUSED(vdc)) 4045 4046 vd_geom_t vdgeom; 4047 void *tmp_mem = NULL; 4048 int copy_len = sizeof (struct dk_geom); 4049 int rv = 0; 4050 4051 if (dir != VD_COPYIN) 4052 return (0); /* nothing to do */ 4053 4054 if ((from == NULL) || (to == NULL)) 4055 return (ENXIO); 4056 4057 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 4058 4059 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 4060 if (rv != 0) { 4061 kmem_free(tmp_mem, copy_len); 4062 return (EFAULT); 4063 } 4064 DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); 4065 bcopy(&vdgeom, to, sizeof (vdgeom)); 4066 kmem_free(tmp_mem, copy_len); 4067 4068 return (0); 4069 } 4070 4071 /* 4072 * Function: 4073 * vdc_create_fake_geometry() 4074 * 4075 * Description: 4076 * This routine fakes up the disk info needed for some DKIO ioctls. 4077 * - DKIOCINFO 4078 * - DKIOCGMEDIAINFO 4079 * 4080 * [ just like lofi(7D) and ramdisk(7D) ] 4081 * 4082 * Arguments: 4083 * vdc - soft state pointer for this instance of the device driver. 4084 * 4085 * Return Code: 4086 * 0 - Success 4087 */ 4088 static int 4089 vdc_create_fake_geometry(vdc_t *vdc) 4090 { 4091 int rv = 0; 4092 4093 ASSERT(vdc != NULL); 4094 4095 /* 4096 * DKIOCINFO support 4097 */ 4098 vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 4099 4100 (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 4101 (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 4102 /* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */ 4103 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; 4104 vdc->cinfo->dki_ctype = DKC_SCSI_CCS; 4105 vdc->cinfo->dki_flags = DKI_FMTVOL; 4106 vdc->cinfo->dki_cnum = 0; 4107 vdc->cinfo->dki_addr = 0; 4108 vdc->cinfo->dki_space = 0; 4109 vdc->cinfo->dki_prio = 0; 4110 vdc->cinfo->dki_vec = 0; 4111 vdc->cinfo->dki_unit = vdc->instance; 4112 vdc->cinfo->dki_slave = 0; 4113 /* 4114 * The partition number will be created on the fly depending on the 4115 * actual slice (i.e. minor node) that is used to request the data. 4116 */ 4117 vdc->cinfo->dki_partition = 0; 4118 4119 /* 4120 * DKIOCGMEDIAINFO support 4121 */ 4122 if (vdc->minfo == NULL) 4123 vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 4124 vdc->minfo->dki_media_type = DK_FIXED_DISK; 4125 vdc->minfo->dki_capacity = 1; 4126 vdc->minfo->dki_lbsize = DEV_BSIZE; 4127 4128 return (rv); 4129 } 4130 4131 /* 4132 * Function: 4133 * vdc_setup_disk_layout() 4134 * 4135 * Description: 4136 * This routine discovers all the necessary details about the "disk" 4137 * by requesting the data that is available from the vDisk server and by 4138 * faking up the rest of the data. 4139 * 4140 * Arguments: 4141 * vdc - soft state pointer for this instance of the device driver. 4142 * 4143 * Return Code: 4144 * 0 - Success 4145 */ 4146 static int 4147 vdc_setup_disk_layout(vdc_t *vdc) 4148 { 4149 buf_t *buf; /* BREAD requests need to be in a buf_t structure */ 4150 dev_t dev; 4151 int slice = 0; 4152 int rv; 4153 4154 ASSERT(vdc != NULL); 4155 4156 rv = vdc_create_fake_geometry(vdc); 4157 if (rv != 0) { 4158 cmn_err(CE_NOTE, "[%d] Failed to create disk geometry (err%d)", 4159 vdc->instance, rv); 4160 } 4161 4162 if (vdc->vtoc == NULL) 4163 vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); 4164 4165 dev = makedevice(ddi_driver_major(vdc->dip), 4166 VD_MAKE_DEV(vdc->instance, 0)); 4167 rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)vdc->vtoc, FKIOCTL); 4168 if (rv) { 4169 cmn_err(CE_NOTE, "[%d] Failed to get VTOC (err=%d)", 4170 vdc->instance, rv); 4171 return (rv); 4172 } 4173 4174 /* 4175 * find the slice that represents the entire "disk" and use that to 4176 * read the disk label. The convention in Solaris is that slice 2 4177 * represents the whole disk so we check that it is, otherwise we 4178 * default to slice 0 4179 */ 4180 if ((vdc->vdisk_type == VD_DISK_TYPE_DISK) && 4181 (vdc->vtoc->v_part[2].p_tag == V_BACKUP)) { 4182 slice = 2; 4183 } else { 4184 slice = 0; 4185 } 4186 4187 /* 4188 * Read disk label from start of disk 4189 */ 4190 vdc->label = kmem_zalloc(DK_LABEL_SIZE, KM_SLEEP); 4191 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 4192 bioinit(buf); 4193 buf->b_un.b_addr = (caddr_t)vdc->label; 4194 buf->b_bcount = DK_LABEL_SIZE; 4195 buf->b_flags = B_BUSY | B_READ; 4196 buf->b_dev = dev; 4197 rv = vdc_populate_descriptor(vdc, (caddr_t)buf, DK_LABEL_SIZE, 4198 VD_OP_BREAD, 0, slice); 4199 rv = biowait(buf); 4200 biofini(buf); 4201 kmem_free(buf, sizeof (buf_t)); 4202 4203 return (rv); 4204 } 4205