1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * LDoms virtual disk client (vdc) device driver 31 * 32 * This driver runs on a guest logical domain and communicates with the virtual 33 * disk server (vds) driver running on the service domain which is exporting 34 * virtualized "disks" to the guest logical domain. 35 * 36 * The driver can be divided into four sections: 37 * 38 * 1) generic device driver housekeeping 39 * _init, _fini, attach, detach, ops structures, etc. 40 * 41 * 2) communication channel setup 42 * Setup the communications link over the LDC channel that vdc uses to 43 * talk to the vDisk server. Initialise the descriptor ring which 44 * allows the LDC clients to transfer data via memory mappings. 45 * 46 * 3) Support exported to upper layers (filesystems, etc) 47 * The upper layers call into vdc via strategy(9E) and DKIO(7I) 48 * ioctl calls. vdc will copy the data to be written to the descriptor 49 * ring or maps the buffer to store the data read by the vDisk 50 * server into the descriptor ring. It then sends a message to the 51 * vDisk server requesting it to complete the operation. 52 * 53 * 4) Handling responses from vDisk server. 54 * The vDisk server will ACK some or all of the messages vdc sends to it 55 * (this is configured during the handshake). Upon receipt of an ACK 56 * vdc will check the descriptor ring and signal to the upper layer 57 * code waiting on the IO. 58 */ 59 60 #include <sys/conf.h> 61 #include <sys/disp.h> 62 #include <sys/ddi.h> 63 #include <sys/dkio.h> 64 #include <sys/efi_partition.h> 65 #include <sys/fcntl.h> 66 #include <sys/file.h> 67 #include <sys/mach_descrip.h> 68 #include <sys/modctl.h> 69 #include <sys/mdeg.h> 70 #include <sys/note.h> 71 #include <sys/open.h> 72 #include <sys/stat.h> 73 #include <sys/sunddi.h> 74 #include <sys/types.h> 75 #include <sys/promif.h> 76 #include <sys/vtoc.h> 77 #include <sys/archsystm.h> 78 #include <sys/sysmacros.h> 79 80 #include <sys/cdio.h> 81 #include <sys/dktp/cm.h> 82 #include <sys/dktp/fdisk.h> 83 #include <sys/scsi/generic/sense.h> 84 #include <sys/scsi/impl/uscsi.h> /* Needed for defn of USCSICMD ioctl */ 85 #include <sys/scsi/targets/sddef.h> 86 87 #include <sys/ldoms.h> 88 #include <sys/ldc.h> 89 #include <sys/vio_common.h> 90 #include <sys/vio_mailbox.h> 91 #include <sys/vdsk_common.h> 92 #include <sys/vdsk_mailbox.h> 93 #include <sys/vdc.h> 94 95 /* 96 * function prototypes 97 */ 98 99 /* standard driver functions */ 100 static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 101 static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 102 static int vdc_strategy(struct buf *buf); 103 static int vdc_print(dev_t dev, char *str); 104 static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 105 static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 106 static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 107 static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 108 cred_t *credp, int *rvalp); 109 static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 110 static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 111 112 static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 113 void *arg, void **resultp); 114 static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 115 static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 116 117 /* setup */ 118 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 119 static int vdc_do_ldc_init(vdc_t *vdc); 120 static int vdc_start_ldc_connection(vdc_t *vdc); 121 static int vdc_create_device_nodes(vdc_t *vdc); 122 static int vdc_create_device_nodes_props(vdc_t *vdc); 123 static int vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id); 124 static int vdc_do_ldc_up(vdc_t *vdc); 125 static void vdc_terminate_ldc(vdc_t *vdc); 126 static int vdc_init_descriptor_ring(vdc_t *vdc); 127 static void vdc_destroy_descriptor_ring(vdc_t *vdc); 128 129 /* handshake with vds */ 130 static void vdc_init_handshake_negotiation(void *arg); 131 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 132 static int vdc_init_attr_negotiation(vdc_t *vdc); 133 static int vdc_init_dring_negotiate(vdc_t *vdc); 134 static void vdc_reset_connection(vdc_t *vdc, boolean_t resetldc); 135 static boolean_t vdc_is_able_to_tx_data(vdc_t *vdc, int flag); 136 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 137 138 /* processing incoming messages from vDisk server */ 139 static void vdc_process_msg_thread(vdc_t *vdc); 140 static void vdc_process_msg(void *arg); 141 static void vdc_do_process_msg(vdc_t *vdc); 142 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 143 static int vdc_process_ctrl_msg(vdc_t *vdc, vio_msg_t msg); 144 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t msg); 145 static int vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg); 146 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 147 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 148 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 149 static int vdc_get_next_dring_entry_id(vdc_t *vdc, uint_t needed); 150 static int vdc_populate_descriptor(vdc_t *vdc, caddr_t addr, 151 size_t nbytes, int op, uint64_t arg, uint64_t slice); 152 static int vdc_wait_for_descriptor_update(vdc_t *vdc, uint_t idx, 153 vio_dring_msg_t dmsg); 154 static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 155 static int vdc_get_response(vdc_t *vdc, int start, int end); 156 static int vdc_populate_mem_hdl(vdc_t *vdc, uint_t idx, 157 caddr_t addr, size_t nbytes, int operation); 158 static boolean_t vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg, int 159 num_msgs); 160 161 /* dkio */ 162 static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode); 163 static int vdc_create_fake_geometry(vdc_t *vdc); 164 static int vdc_setup_disk_layout(vdc_t *vdc); 165 static int vdc_null_copy_func(void *from, void *to, int mode, int dir); 166 static int vdc_get_vtoc_convert(void *from, void *to, int mode, int dir); 167 static int vdc_set_vtoc_convert(void *from, void *to, int mode, int dir); 168 static int vdc_get_geom_convert(void *from, void *to, int mode, int dir); 169 static int vdc_set_geom_convert(void *from, void *to, int mode, int dir); 170 static int vdc_uscsicmd_convert(void *from, void *to, int mode, int dir); 171 172 /* 173 * Module variables 174 */ 175 uint64_t vdc_hz_timeout; 176 uint64_t vdc_usec_timeout = VDC_USEC_TIMEOUT_MIN; 177 uint64_t vdc_dump_usec_timeout = VDC_USEC_TIMEOUT_MIN / 300; 178 static int vdc_retries = VDC_RETRIES; 179 static int vdc_dump_retries = VDC_RETRIES * 10; 180 181 /* Soft state pointer */ 182 static void *vdc_state; 183 184 /* variable level controlling the verbosity of the error/debug messages */ 185 int vdc_msglevel = 0; 186 187 /* 188 * Supported vDisk protocol version pairs. 189 * 190 * The first array entry is the latest and preferred version. 191 */ 192 static const vio_ver_t vdc_version[] = {{1, 0}}; 193 194 static void 195 vdc_msg(const char *format, ...) 196 { 197 va_list args; 198 199 va_start(args, format); 200 vcmn_err(CE_CONT, format, args); 201 va_end(args); 202 } 203 204 static struct cb_ops vdc_cb_ops = { 205 vdc_open, /* cb_open */ 206 vdc_close, /* cb_close */ 207 vdc_strategy, /* cb_strategy */ 208 vdc_print, /* cb_print */ 209 vdc_dump, /* cb_dump */ 210 vdc_read, /* cb_read */ 211 vdc_write, /* cb_write */ 212 vdc_ioctl, /* cb_ioctl */ 213 nodev, /* cb_devmap */ 214 nodev, /* cb_mmap */ 215 nodev, /* cb_segmap */ 216 nochpoll, /* cb_chpoll */ 217 ddi_prop_op, /* cb_prop_op */ 218 NULL, /* cb_str */ 219 D_MP | D_64BIT, /* cb_flag */ 220 CB_REV, /* cb_rev */ 221 vdc_aread, /* cb_aread */ 222 vdc_awrite /* cb_awrite */ 223 }; 224 225 static struct dev_ops vdc_ops = { 226 DEVO_REV, /* devo_rev */ 227 0, /* devo_refcnt */ 228 vdc_getinfo, /* devo_getinfo */ 229 nulldev, /* devo_identify */ 230 nulldev, /* devo_probe */ 231 vdc_attach, /* devo_attach */ 232 vdc_detach, /* devo_detach */ 233 nodev, /* devo_reset */ 234 &vdc_cb_ops, /* devo_cb_ops */ 235 NULL, /* devo_bus_ops */ 236 nulldev /* devo_power */ 237 }; 238 239 static struct modldrv modldrv = { 240 &mod_driverops, 241 "virtual disk client %I%", 242 &vdc_ops, 243 }; 244 245 static struct modlinkage modlinkage = { 246 MODREV_1, 247 &modldrv, 248 NULL 249 }; 250 251 /* -------------------------------------------------------------------------- */ 252 253 /* 254 * Device Driver housekeeping and setup 255 */ 256 257 int 258 _init(void) 259 { 260 int status; 261 262 if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 263 return (status); 264 if ((status = mod_install(&modlinkage)) != 0) 265 ddi_soft_state_fini(&vdc_state); 266 return (status); 267 } 268 269 int 270 _info(struct modinfo *modinfop) 271 { 272 return (mod_info(&modlinkage, modinfop)); 273 } 274 275 int 276 _fini(void) 277 { 278 int status; 279 280 if ((status = mod_remove(&modlinkage)) != 0) 281 return (status); 282 ddi_soft_state_fini(&vdc_state); 283 return (0); 284 } 285 286 static int 287 vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 288 { 289 _NOTE(ARGUNUSED(dip)) 290 291 int instance = SDUNIT(getminor((dev_t)arg)); 292 vdc_t *vdc = NULL; 293 294 switch (cmd) { 295 case DDI_INFO_DEVT2DEVINFO: 296 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 297 *resultp = NULL; 298 return (DDI_FAILURE); 299 } 300 *resultp = vdc->dip; 301 return (DDI_SUCCESS); 302 case DDI_INFO_DEVT2INSTANCE: 303 *resultp = (void *)(uintptr_t)instance; 304 return (DDI_SUCCESS); 305 default: 306 *resultp = NULL; 307 return (DDI_FAILURE); 308 } 309 } 310 311 static int 312 vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 313 { 314 int instance; 315 int rv; 316 uint_t retries = 0; 317 vdc_t *vdc = NULL; 318 319 switch (cmd) { 320 case DDI_DETACH: 321 /* the real work happens below */ 322 break; 323 case DDI_SUSPEND: 324 /* nothing to do for this non-device */ 325 return (DDI_SUCCESS); 326 default: 327 return (DDI_FAILURE); 328 } 329 330 ASSERT(cmd == DDI_DETACH); 331 instance = ddi_get_instance(dip); 332 PR1("%s[%d] Entered\n", __func__, instance); 333 334 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 335 vdc_msg("%s[%d]: Could not get state structure.", 336 __func__, instance); 337 return (DDI_FAILURE); 338 } 339 340 if (vdc->open) { 341 PR0("%s[%d]: Cannot detach: device is open", 342 __func__, instance); 343 return (DDI_FAILURE); 344 } 345 346 PR0("%s[%d] proceeding...\n", __func__, instance); 347 348 /* 349 * try and disable callbacks to prevent another handshake 350 */ 351 rv = ldc_set_cb_mode(vdc->ldc_handle, LDC_CB_DISABLE); 352 PR0("%s[%d] callback disabled (rv=%d)\n", __func__, instance, rv); 353 354 /* 355 * Prevent any more attempts to start a handshake with the vdisk 356 * server and tear down the existing connection. 357 */ 358 mutex_enter(&vdc->lock); 359 vdc->initialized |= VDC_HANDSHAKE_STOP; 360 vdc_reset_connection(vdc, B_TRUE); 361 mutex_exit(&vdc->lock); 362 363 if (vdc->initialized & VDC_THREAD) { 364 mutex_enter(&vdc->msg_proc_lock); 365 vdc->msg_proc_thr_state = VDC_THR_STOP; 366 vdc->msg_pending = B_TRUE; 367 cv_signal(&vdc->msg_proc_cv); 368 369 while (vdc->msg_proc_thr_state != VDC_THR_DONE) { 370 PR0("%s[%d]: Waiting for thread to exit\n", 371 __func__, instance); 372 rv = cv_timedwait(&vdc->msg_proc_cv, 373 &vdc->msg_proc_lock, VD_GET_TIMEOUT_HZ(1)); 374 if ((rv == -1) && (retries++ > vdc_retries)) 375 break; 376 } 377 mutex_exit(&vdc->msg_proc_lock); 378 } 379 380 mutex_enter(&vdc->lock); 381 382 if (vdc->initialized & VDC_DRING) 383 vdc_destroy_descriptor_ring(vdc); 384 385 if (vdc->initialized & VDC_LDC) 386 vdc_terminate_ldc(vdc); 387 388 mutex_exit(&vdc->lock); 389 390 if (vdc->initialized & VDC_MINOR) { 391 ddi_prop_remove_all(dip); 392 ddi_remove_minor_node(dip, NULL); 393 } 394 395 if (vdc->initialized & VDC_LOCKS) { 396 mutex_destroy(&vdc->lock); 397 mutex_destroy(&vdc->attach_lock); 398 mutex_destroy(&vdc->msg_proc_lock); 399 mutex_destroy(&vdc->dring_lock); 400 cv_destroy(&vdc->cv); 401 cv_destroy(&vdc->attach_cv); 402 cv_destroy(&vdc->msg_proc_cv); 403 } 404 405 if (vdc->minfo) 406 kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 407 408 if (vdc->cinfo) 409 kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 410 411 if (vdc->vtoc) 412 kmem_free(vdc->vtoc, sizeof (struct vtoc)); 413 414 if (vdc->label) 415 kmem_free(vdc->label, DK_LABEL_SIZE); 416 417 if (vdc->initialized & VDC_SOFT_STATE) 418 ddi_soft_state_free(vdc_state, instance); 419 420 PR0("%s[%d] End %p\n", __func__, instance, vdc); 421 422 return (DDI_SUCCESS); 423 } 424 425 426 static int 427 vdc_do_attach(dev_info_t *dip) 428 { 429 int instance; 430 vdc_t *vdc = NULL; 431 int status; 432 uint_t retries = 0; 433 434 ASSERT(dip != NULL); 435 436 instance = ddi_get_instance(dip); 437 if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 438 vdc_msg("%s:(%d): Couldn't alloc state structure", 439 __func__, instance); 440 return (DDI_FAILURE); 441 } 442 443 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 444 vdc_msg("%s:(%d): Could not get state structure.", 445 __func__, instance); 446 return (DDI_FAILURE); 447 } 448 449 /* 450 * We assign the value to initialized in this case to zero out the 451 * variable and then set bits in it to indicate what has been done 452 */ 453 vdc->initialized = VDC_SOFT_STATE; 454 455 vdc_hz_timeout = drv_usectohz(vdc_usec_timeout); 456 457 vdc->dip = dip; 458 vdc->instance = instance; 459 vdc->open = 0; 460 vdc->vdisk_type = VD_DISK_TYPE_UNK; 461 vdc->state = VD_STATE_INIT; 462 vdc->ldc_state = 0; 463 vdc->session_id = 0; 464 vdc->block_size = DEV_BSIZE; 465 vdc->max_xfer_sz = maxphys / DEV_BSIZE; 466 467 vdc->vtoc = NULL; 468 vdc->cinfo = NULL; 469 vdc->minfo = NULL; 470 471 mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 472 mutex_init(&vdc->attach_lock, NULL, MUTEX_DRIVER, NULL); 473 mutex_init(&vdc->msg_proc_lock, NULL, MUTEX_DRIVER, NULL); 474 mutex_init(&vdc->dring_lock, NULL, MUTEX_DRIVER, NULL); 475 cv_init(&vdc->cv, NULL, CV_DRIVER, NULL); 476 cv_init(&vdc->attach_cv, NULL, CV_DRIVER, NULL); 477 cv_init(&vdc->msg_proc_cv, NULL, CV_DRIVER, NULL); 478 vdc->initialized |= VDC_LOCKS; 479 480 vdc->msg_pending = B_FALSE; 481 vdc->msg_proc_thr_id = thread_create(NULL, 0, vdc_process_msg_thread, 482 vdc, 0, &p0, TS_RUN, minclsyspri); 483 if (vdc->msg_proc_thr_id == NULL) { 484 cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 485 instance); 486 return (DDI_FAILURE); 487 } 488 vdc->initialized |= VDC_THREAD; 489 490 /* initialise LDC channel which will be used to communicate with vds */ 491 if (vdc_do_ldc_init(vdc) != 0) { 492 cmn_err(CE_NOTE, "[%d] Couldn't initialize LDC", instance); 493 return (DDI_FAILURE); 494 } 495 496 /* Bring up connection with vds via LDC */ 497 status = vdc_start_ldc_connection(vdc); 498 if (status != 0) { 499 vdc_msg("%s[%d] Could not start LDC", __func__, instance); 500 return (DDI_FAILURE); 501 } 502 503 /* 504 * We need to wait until the handshake has completed before leaving 505 * the attach(). This is to allow the device node(s) to be created 506 * and the first usage of the filesystem to succeed. 507 */ 508 mutex_enter(&vdc->attach_lock); 509 while ((vdc->ldc_state != LDC_UP) || 510 (vdc->state != VD_STATE_DATA)) { 511 512 PR0("%s[%d] handshake in progress [VD %d (LDC %d)]\n", 513 __func__, instance, vdc->state, vdc->ldc_state); 514 515 status = cv_timedwait(&vdc->attach_cv, &vdc->attach_lock, 516 VD_GET_TIMEOUT_HZ(1)); 517 if (status == -1) { 518 if (retries >= vdc_retries) { 519 PR0("%s[%d] Give up handshake wait.\n", 520 __func__, instance); 521 mutex_exit(&vdc->attach_lock); 522 return (DDI_FAILURE); 523 } else { 524 PR0("%s[%d] Retry #%d for handshake.\n", 525 __func__, instance, retries); 526 vdc_init_handshake_negotiation(vdc); 527 retries++; 528 } 529 } 530 } 531 mutex_exit(&vdc->attach_lock); 532 533 /* 534 * Once the handshake is complete, we can use the DRing to send 535 * requests to the vDisk server to calculate the geometry and 536 * VTOC of the "disk" 537 */ 538 status = vdc_setup_disk_layout(vdc); 539 if (status != 0) { 540 cmn_err(CE_NOTE, "[%d] Failed to discover disk layout (err%d)", 541 vdc->instance, status); 542 } 543 544 /* 545 * Now that we have the device info we can create the 546 * device nodes and properties 547 */ 548 status = vdc_create_device_nodes(vdc); 549 if (status) { 550 cmn_err(CE_NOTE, "[%d] Failed to create device nodes", 551 instance); 552 return (status); 553 } 554 status = vdc_create_device_nodes_props(vdc); 555 if (status) { 556 cmn_err(CE_NOTE, "[%d] Failed to create device nodes" 557 " properties (%d)", instance, status); 558 return (status); 559 } 560 561 ddi_report_dev(dip); 562 563 PR0("%s[%d] Attach completed\n", __func__, instance); 564 return (status); 565 } 566 567 static int 568 vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 569 { 570 int status; 571 572 switch (cmd) { 573 case DDI_ATTACH: 574 if ((status = vdc_do_attach(dip)) != 0) 575 (void) vdc_detach(dip, DDI_DETACH); 576 return (status); 577 case DDI_RESUME: 578 /* nothing to do for this non-device */ 579 return (DDI_SUCCESS); 580 default: 581 return (DDI_FAILURE); 582 } 583 } 584 585 static int 586 vdc_do_ldc_init(vdc_t *vdc) 587 { 588 int status = 0; 589 ldc_status_t ldc_state; 590 ldc_attr_t ldc_attr; 591 uint64_t ldc_id = 0; 592 dev_info_t *dip = NULL; 593 594 ASSERT(vdc != NULL); 595 596 dip = vdc->dip; 597 vdc->initialized |= VDC_LDC; 598 599 if ((status = vdc_get_ldc_id(dip, &ldc_id)) != 0) { 600 vdc_msg("%s: Failed to get <ldc_id> property\n", __func__); 601 return (EIO); 602 } 603 vdc->ldc_id = ldc_id; 604 605 ldc_attr.devclass = LDC_DEV_BLK; 606 ldc_attr.instance = vdc->instance; 607 ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 608 ldc_attr.qlen = VD_LDC_QLEN; 609 610 if ((vdc->initialized & VDC_LDC_INIT) == 0) { 611 status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle); 612 if (status != 0) { 613 cmn_err(CE_NOTE, "[%d] ldc_init(chan %ld) returned %d", 614 vdc->instance, ldc_id, status); 615 return (status); 616 } 617 vdc->initialized |= VDC_LDC_INIT; 618 } 619 status = ldc_status(vdc->ldc_handle, &ldc_state); 620 if (status != 0) { 621 vdc_msg("Cannot discover LDC status [err=%d].", status); 622 return (status); 623 } 624 vdc->ldc_state = ldc_state; 625 626 if ((vdc->initialized & VDC_LDC_CB) == 0) { 627 status = ldc_reg_callback(vdc->ldc_handle, vdc_handle_cb, 628 (caddr_t)vdc); 629 if (status != 0) { 630 vdc_msg("%s: ldc_reg_callback()=%d", __func__, status); 631 return (status); 632 } 633 vdc->initialized |= VDC_LDC_CB; 634 } 635 636 vdc->initialized |= VDC_LDC; 637 638 /* 639 * At this stage we have initialised LDC, we will now try and open 640 * the connection. 641 */ 642 if (vdc->ldc_state == LDC_INIT) { 643 status = ldc_open(vdc->ldc_handle); 644 if (status != 0) { 645 cmn_err(CE_NOTE, "[%d] ldc_open(chan %ld) returned %d", 646 vdc->instance, vdc->ldc_id, status); 647 return (status); 648 } 649 vdc->initialized |= VDC_LDC_OPEN; 650 } 651 652 return (status); 653 } 654 655 static int 656 vdc_start_ldc_connection(vdc_t *vdc) 657 { 658 int status = 0; 659 660 ASSERT(vdc != NULL); 661 662 mutex_enter(&vdc->lock); 663 664 if (vdc->ldc_state == LDC_UP) { 665 PR0("%s: LDC is already UP ..\n", __func__); 666 mutex_exit(&vdc->lock); 667 return (0); 668 } 669 670 status = vdc_do_ldc_up(vdc); 671 672 PR0("%s[%d] Finished bringing up LDC\n", __func__, vdc->instance); 673 674 mutex_exit(&vdc->lock); 675 676 return (status); 677 } 678 679 680 /* 681 * Function: 682 * vdc_create_device_nodes 683 * 684 * Description: 685 * This function creates the block and character device nodes under 686 * /devices along with the node properties. It is called as part of 687 * the attach(9E) of the instance during the handshake with vds after 688 * vds has sent the attributes to vdc. 689 * 690 * If the device is of type VD_DISK_TYPE_SLICE then the minor node 691 * of 2 is used in keeping with the Solaris convention that slice 2 692 * refers to a whole disk. Slices start at 'a' 693 * 694 * Parameters: 695 * vdc - soft state pointer 696 * 697 * Return Values 698 * 0 - Success 699 * EIO - Failed to create node 700 * EINVAL - Unknown type of disk exported 701 */ 702 static int 703 vdc_create_device_nodes(vdc_t *vdc) 704 { 705 /* uses NNNN which is OK as long as # of disks <= 10000 */ 706 char name[sizeof ("disk@NNNN:s,raw")]; 707 dev_info_t *dip = NULL; 708 int instance; 709 int num_slices = 1; 710 int i; 711 712 ASSERT(vdc != NULL); 713 714 instance = vdc->instance; 715 dip = vdc->dip; 716 717 switch (vdc->vdisk_type) { 718 case VD_DISK_TYPE_DISK: 719 num_slices = V_NUMPAR; 720 break; 721 case VD_DISK_TYPE_SLICE: 722 num_slices = 1; 723 break; 724 case VD_DISK_TYPE_UNK: 725 default: 726 return (EINVAL); 727 } 728 729 for (i = 0; i < num_slices; i++) { 730 (void) snprintf(name, sizeof (name), "%c", 'a' + i); 731 if (ddi_create_minor_node(dip, name, S_IFBLK, 732 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 733 vdc_msg("%s[%d]: Couldn't add block node %s.", 734 __func__, instance, name); 735 return (EIO); 736 } 737 738 /* if any device node is created we set this flag */ 739 vdc->initialized |= VDC_MINOR; 740 741 (void) snprintf(name, sizeof (name), "%c%s", 742 'a' + i, ",raw"); 743 if (ddi_create_minor_node(dip, name, S_IFCHR, 744 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 745 vdc_msg("%s[%d]: Could not add raw node %s.", 746 __func__, instance, name); 747 return (EIO); 748 } 749 } 750 751 return (0); 752 } 753 754 /* 755 * Function: 756 * vdc_create_device_nodes_props 757 * 758 * Description: 759 * This function creates the block and character device nodes under 760 * /devices along with the node properties. It is called as part of 761 * the attach(9E) of the instance during the handshake with vds after 762 * vds has sent the attributes to vdc. 763 * 764 * Parameters: 765 * vdc - soft state pointer 766 * 767 * Return Values 768 * 0 - Success 769 * EIO - Failed to create device node property 770 * EINVAL - Unknown type of disk exported 771 */ 772 static int 773 vdc_create_device_nodes_props(vdc_t *vdc) 774 { 775 dev_info_t *dip = NULL; 776 int instance; 777 int num_slices = 1; 778 int64_t size = 0; 779 dev_t dev; 780 int rv; 781 int i; 782 783 ASSERT(vdc != NULL); 784 785 instance = vdc->instance; 786 dip = vdc->dip; 787 788 if ((vdc->vtoc == NULL) || (vdc->vtoc->v_sanity != VTOC_SANE)) { 789 cmn_err(CE_NOTE, "![%d] Could not create device node property." 790 " No VTOC available", instance); 791 return (ENXIO); 792 } 793 794 switch (vdc->vdisk_type) { 795 case VD_DISK_TYPE_DISK: 796 num_slices = V_NUMPAR; 797 break; 798 case VD_DISK_TYPE_SLICE: 799 num_slices = 1; 800 break; 801 case VD_DISK_TYPE_UNK: 802 default: 803 return (EINVAL); 804 } 805 806 for (i = 0; i < num_slices; i++) { 807 dev = makedevice(ddi_driver_major(dip), 808 VD_MAKE_DEV(instance, i)); 809 810 size = vdc->vtoc->v_part[i].p_size * vdc->vtoc->v_sectorsz; 811 PR0("%s[%d] sz %ld (%ld Mb) p_size %lx\n", 812 __func__, instance, size, size / (1024 * 1024), 813 vdc->vtoc->v_part[i].p_size); 814 815 rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size); 816 if (rv != DDI_PROP_SUCCESS) { 817 vdc_msg("%s:(%d): Couldn't add \"%s\" [%d]\n", 818 __func__, instance, VDC_SIZE_PROP_NAME, size); 819 return (EIO); 820 } 821 822 rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME, 823 lbtodb(size)); 824 if (rv != DDI_PROP_SUCCESS) { 825 vdc_msg("%s:(%d): Couldn't add \"%s\" [%d]\n", __func__, 826 instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size)); 827 return (EIO); 828 } 829 } 830 831 return (0); 832 } 833 834 static int 835 vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 836 { 837 _NOTE(ARGUNUSED(cred)) 838 839 int instance; 840 vdc_t *vdc; 841 842 ASSERT(dev != NULL); 843 instance = SDUNIT(getminor(*dev)); 844 845 PR0("%s[%d] minor = %d flag = %x, otyp = %x\n", __func__, instance, 846 getminor(*dev), flag, otyp); 847 848 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 849 return (EINVAL); 850 851 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 852 vdc_msg("%s[%d] Could not get state.", __func__, instance); 853 return (ENXIO); 854 } 855 856 /* 857 * Check to see if we can communicate with vds 858 */ 859 if (!vdc_is_able_to_tx_data(vdc, flag)) { 860 PR0("%s[%d] Not ready to transmit data\n", __func__, instance); 861 return (ENOLINK); 862 } 863 864 mutex_enter(&vdc->lock); 865 vdc->open++; 866 mutex_exit(&vdc->lock); 867 868 return (0); 869 } 870 871 static int 872 vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 873 { 874 _NOTE(ARGUNUSED(cred)) 875 876 int instance; 877 vdc_t *vdc; 878 879 instance = SDUNIT(getminor(dev)); 880 881 PR0("%s[%d] flag = %x, otyp = %x\n", __func__, instance, flag, otyp); 882 883 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 884 return (EINVAL); 885 886 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 887 vdc_msg("%s[%d] Could not get state.", __func__, instance); 888 return (ENXIO); 889 } 890 891 /* 892 * Check to see if we can communicate with vds 893 */ 894 if (!vdc_is_able_to_tx_data(vdc, 0)) { 895 PR0("%s[%d] Not ready to transmit data\n", __func__, instance); 896 return (ETIMEDOUT); 897 } 898 899 if (vdc->dkio_flush_pending) { 900 PR0("%s[%d]: Cannot detach: %d outstanding DKIO flushes", 901 __func__, instance, vdc->dkio_flush_pending); 902 return (EBUSY); 903 } 904 905 /* 906 * Should not need the mutex here, since the framework should protect 907 * against more opens on this device, but just in case. 908 */ 909 mutex_enter(&vdc->lock); 910 vdc->open--; 911 mutex_exit(&vdc->lock); 912 913 return (0); 914 } 915 916 static int 917 vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 918 { 919 _NOTE(ARGUNUSED(credp)) 920 _NOTE(ARGUNUSED(rvalp)) 921 922 return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode)); 923 } 924 925 static int 926 vdc_print(dev_t dev, char *str) 927 { 928 cmn_err(CE_NOTE, "vdc%d: %s", SDUNIT(getminor(dev)), str); 929 return (0); 930 } 931 932 static int 933 vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 934 { 935 int rv = 0; 936 size_t nbytes = (nblk * DEV_BSIZE); 937 int instance = SDUNIT(getminor(dev)); 938 vdc_t *vdc; 939 940 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 941 vdc_msg("%s (%d): Could not get state.", __func__, instance); 942 return (ENXIO); 943 } 944 945 rv = vdc_populate_descriptor(vdc, addr, nbytes, VD_OP_BWRITE, 946 blkno, SDPART(getminor(dev))); 947 948 PR1("%s: status=%d\n", __func__, rv); 949 950 return (rv); 951 } 952 953 /* -------------------------------------------------------------------------- */ 954 955 /* 956 * Disk access routines 957 * 958 */ 959 960 /* 961 * vdc_strategy() 962 * 963 * Return Value: 964 * 0: As per strategy(9E), the strategy() function must return 0 965 * [ bioerror(9f) sets b_flags to the proper error code ] 966 */ 967 static int 968 vdc_strategy(struct buf *buf) 969 { 970 int rv = -1; 971 vdc_t *vdc = NULL; 972 int instance = SDUNIT(getminor(buf->b_edev)); 973 int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 974 975 PR1("%s: %s %ld bytes at block %ld : b_addr=0x%p", 976 __func__, (buf->b_flags & B_READ) ? "Read" : "Write", 977 buf->b_bcount, buf->b_lblkno, buf->b_un.b_addr); 978 979 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 980 vdc_msg("%s[%d]: Could not get state.", __func__, instance); 981 bioerror(buf, ENXIO); 982 biodone(buf); 983 return (0); 984 } 985 986 ASSERT(buf->b_bcount <= (vdc->max_xfer_sz * vdc->block_size)); 987 988 if (!vdc_is_able_to_tx_data(vdc, O_NONBLOCK)) { 989 vdc_msg("%s: Not ready to transmit data", __func__); 990 bioerror(buf, ENXIO); 991 biodone(buf); 992 return (0); 993 } 994 bp_mapin(buf); 995 996 rv = vdc_populate_descriptor(vdc, buf->b_un.b_addr, buf->b_bcount, op, 997 buf->b_lblkno, SDPART(getminor(buf->b_edev))); 998 999 PR1("%s: status=%d", __func__, rv); 1000 bioerror(buf, rv); 1001 biodone(buf); 1002 return (0); 1003 } 1004 1005 1006 static int 1007 vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 1008 { 1009 _NOTE(ARGUNUSED(cred)) 1010 1011 PR1("vdc_read(): Entered"); 1012 return (physio(vdc_strategy, NULL, dev, B_READ, minphys, uio)); 1013 } 1014 1015 static int 1016 vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 1017 { 1018 _NOTE(ARGUNUSED(cred)) 1019 1020 PR1("vdc_write(): Entered"); 1021 return (physio(vdc_strategy, NULL, dev, B_WRITE, minphys, uio)); 1022 } 1023 1024 static int 1025 vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 1026 { 1027 _NOTE(ARGUNUSED(cred)) 1028 1029 PR1("vdc_aread(): Entered"); 1030 return (aphysio(vdc_strategy, anocancel, dev, B_READ, minphys, aio)); 1031 } 1032 1033 static int 1034 vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 1035 { 1036 _NOTE(ARGUNUSED(cred)) 1037 1038 PR1("vdc_awrite(): Entered"); 1039 return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, minphys, aio)); 1040 } 1041 1042 1043 /* -------------------------------------------------------------------------- */ 1044 1045 /* 1046 * Handshake support 1047 */ 1048 1049 /* 1050 * vdc_init_handshake_negotiation 1051 * 1052 * Description: 1053 * This function is called to trigger the handshake negotiations between 1054 * the client (vdc) and the server (vds). It may be called multiple times. 1055 * 1056 * Parameters: 1057 * vdc - soft state pointer 1058 */ 1059 static void 1060 vdc_init_handshake_negotiation(void *arg) 1061 { 1062 vdc_t *vdc = (vdc_t *)(void *)arg; 1063 ldc_status_t ldc_state; 1064 vd_state_t state; 1065 int status; 1066 1067 ASSERT(vdc != NULL); 1068 1069 PR0("[%d] Initializing vdc<->vds handshake\n", vdc->instance); 1070 1071 /* get LDC state */ 1072 status = ldc_status(vdc->ldc_handle, &ldc_state); 1073 if (status != 0) { 1074 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status: err=%d", 1075 vdc->instance, status); 1076 return; 1077 } 1078 1079 /* 1080 * If the LDC connection is not UP we bring it up now and return. 1081 * The handshake will be started again when the callback is 1082 * triggered due to the UP event. 1083 */ 1084 if (ldc_state != LDC_UP) { 1085 PR0("[%d] Triggering an LDC_UP and returning\n", vdc->instance); 1086 (void) vdc_do_ldc_up(vdc); 1087 return; 1088 } 1089 1090 mutex_enter(&vdc->lock); 1091 /* 1092 * Do not continue if another thread has triggered a handshake which 1093 * has not been reset or detach() has stopped further handshakes. 1094 */ 1095 if (vdc->initialized & (VDC_HANDSHAKE | VDC_HANDSHAKE_STOP)) { 1096 PR0("%s[%d] Negotiation not triggered. [init=%x]\n", 1097 __func__, vdc->instance, vdc->initialized); 1098 mutex_exit(&vdc->lock); 1099 return; 1100 } 1101 1102 if (vdc->hshake_cnt++ > vdc_retries) { 1103 cmn_err(CE_NOTE, "[%d] Failed repeatedly to complete handshake" 1104 "with vDisk server", vdc->instance); 1105 mutex_exit(&vdc->lock); 1106 return; 1107 } 1108 1109 vdc->initialized |= VDC_HANDSHAKE; 1110 vdc->ldc_state = ldc_state; 1111 1112 state = vdc->state; 1113 1114 if (state == VD_STATE_INIT) { 1115 /* 1116 * Set the desired version parameter to the first entry in the 1117 * version array. If this specific version is not supported, 1118 * the response handling code will step down the version number 1119 * to the next array entry and deal with it accordingly. 1120 */ 1121 (void) vdc_init_ver_negotiation(vdc, vdc_version[0]); 1122 } else if (state == VD_STATE_VER) { 1123 (void) vdc_init_attr_negotiation(vdc); 1124 } else if (state == VD_STATE_ATTR) { 1125 (void) vdc_init_dring_negotiate(vdc); 1126 } else if (state == VD_STATE_DATA) { 1127 /* 1128 * nothing to do - we have already completed the negotiation 1129 * and we can transmit data when ready. 1130 */ 1131 PR0("%s[%d] Negotiation triggered after handshake completed", 1132 __func__, vdc->instance); 1133 } 1134 1135 mutex_exit(&vdc->lock); 1136 } 1137 1138 /* 1139 * Function: 1140 * vdc_init_ver_negotiation() 1141 * 1142 * Description: 1143 * 1144 * Arguments: 1145 * vdc - soft state pointer for this instance of the device driver. 1146 * 1147 * Return Code: 1148 * 0 - Success 1149 */ 1150 static int 1151 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 1152 { 1153 vio_ver_msg_t pkt; 1154 size_t msglen = sizeof (pkt); 1155 int status = -1; 1156 1157 PR0("%s: Entered.\n", __func__); 1158 1159 ASSERT(vdc != NULL); 1160 ASSERT(mutex_owned(&vdc->lock)); 1161 1162 /* 1163 * set the Session ID to a unique value 1164 * (the lower 32 bits of the clock tick) 1165 */ 1166 vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 1167 1168 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1169 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1170 pkt.tag.vio_subtype_env = VIO_VER_INFO; 1171 pkt.tag.vio_sid = vdc->session_id; 1172 pkt.dev_class = VDEV_DISK; 1173 pkt.ver_major = ver.major; 1174 pkt.ver_minor = ver.minor; 1175 1176 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1177 PR0("%s: vdc_send(status = %d)\n", __func__, status); 1178 1179 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1180 PR0("%s[%d] vdc_send failed: id(%lx) rv(%d) size(%d)\n", 1181 __func__, vdc->instance, vdc->ldc_handle, 1182 status, msglen); 1183 if (msglen != sizeof (vio_ver_msg_t)) 1184 status = ENOMSG; 1185 } 1186 1187 return (status); 1188 } 1189 1190 /* 1191 * Function: 1192 * vdc_init_attr_negotiation() 1193 * 1194 * Description: 1195 * 1196 * Arguments: 1197 * vdc - soft state pointer for this instance of the device driver. 1198 * 1199 * Return Code: 1200 * 0 - Success 1201 */ 1202 static int 1203 vdc_init_attr_negotiation(vdc_t *vdc) 1204 { 1205 vd_attr_msg_t pkt; 1206 size_t msglen = sizeof (pkt); 1207 int status; 1208 1209 ASSERT(vdc != NULL); 1210 ASSERT(mutex_owned(&vdc->lock)); 1211 1212 PR0("%s[%d] entered\n", __func__, vdc->instance); 1213 1214 /* fill in tag */ 1215 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1216 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1217 pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 1218 pkt.tag.vio_sid = vdc->session_id; 1219 /* fill in payload */ 1220 pkt.max_xfer_sz = vdc->max_xfer_sz; 1221 pkt.vdisk_block_size = vdc->block_size; 1222 pkt.xfer_mode = VIO_DRING_MODE; 1223 pkt.operations = 0; /* server will set bits of valid operations */ 1224 pkt.vdisk_type = 0; /* server will set to valid device type */ 1225 pkt.vdisk_size = 0; /* server will set to valid size */ 1226 1227 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1228 PR0("%s: vdc_send(status = %d)\n", __func__, status); 1229 1230 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1231 PR0("%s[%d] ldc_write failed: id(%lx) rv(%d) size (%d)\n", 1232 __func__, vdc->instance, vdc->ldc_handle, 1233 status, msglen); 1234 if (msglen != sizeof (vio_ver_msg_t)) 1235 status = ENOMSG; 1236 } 1237 1238 return (status); 1239 } 1240 1241 /* 1242 * Function: 1243 * vdc_init_dring_negotiate() 1244 * 1245 * Description: 1246 * 1247 * Arguments: 1248 * vdc - soft state pointer for this instance of the device driver. 1249 * 1250 * Return Code: 1251 * 0 - Success 1252 */ 1253 static int 1254 vdc_init_dring_negotiate(vdc_t *vdc) 1255 { 1256 vio_dring_reg_msg_t pkt; 1257 size_t msglen = sizeof (pkt); 1258 int status = -1; 1259 1260 ASSERT(vdc != NULL); 1261 ASSERT(mutex_owned(&vdc->lock)); 1262 1263 status = vdc_init_descriptor_ring(vdc); 1264 if (status != 0) { 1265 cmn_err(CE_CONT, "[%d] Failed to init DRing (status = %d)\n", 1266 vdc->instance, status); 1267 vdc_destroy_descriptor_ring(vdc); 1268 vdc_reset_connection(vdc, B_FALSE); 1269 return (status); 1270 } 1271 PR0("%s[%d] Init of descriptor ring completed (status = %d)\n", 1272 __func__, vdc->instance, status); 1273 1274 /* fill in tag */ 1275 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1276 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1277 pkt.tag.vio_subtype_env = VIO_DRING_REG; 1278 pkt.tag.vio_sid = vdc->session_id; 1279 /* fill in payload */ 1280 pkt.dring_ident = 0; 1281 pkt.num_descriptors = VD_DRING_LEN; 1282 pkt.descriptor_size = VD_DRING_ENTRY_SZ; 1283 pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 1284 pkt.ncookies = vdc->dring_cookie_count; 1285 pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 1286 1287 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1288 if (status != 0) { 1289 PR0("%s[%d] Failed to register DRing (status = %d)\n", 1290 __func__, vdc->instance, status); 1291 vdc_reset_connection(vdc, B_FALSE); 1292 } 1293 1294 return (status); 1295 } 1296 1297 1298 /* -------------------------------------------------------------------------- */ 1299 1300 /* 1301 * LDC helper routines 1302 */ 1303 1304 /* 1305 * Function: 1306 * vdc_send() 1307 * 1308 * Description: 1309 * The function encapsulates the call to write a message using LDC. 1310 * If LDC indicates that the call failed due to the queue being full, 1311 * we retry the ldc_write() [ up to 'vdc_retries' time ], otherwise 1312 * we return the error returned by LDC. 1313 * 1314 * Arguments: 1315 * ldc_handle - LDC handle for the channel this instance of vdc uses 1316 * pkt - address of LDC message to be sent 1317 * msglen - the size of the message being sent. When the function 1318 * returns, this contains the number of bytes written. 1319 * 1320 * Return Code: 1321 * 0 - Success. 1322 * EINVAL - pkt or msglen were NULL 1323 * ECONNRESET - The connection was not up. 1324 * EWOULDBLOCK - LDC queue is full 1325 * xxx - other error codes returned by ldc_write 1326 */ 1327 static int 1328 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 1329 { 1330 size_t size = 0; 1331 int retries = 0; 1332 int status = 0; 1333 1334 ASSERT(vdc != NULL); 1335 ASSERT(mutex_owned(&vdc->lock)); 1336 ASSERT(msglen != NULL); 1337 ASSERT(*msglen != 0); 1338 1339 do { 1340 size = *msglen; 1341 status = ldc_write(vdc->ldc_handle, pkt, &size); 1342 } while (status == EWOULDBLOCK && retries++ < vdc_retries); 1343 1344 /* if LDC had serious issues --- reset vdc state */ 1345 if (status == EIO || status == ECONNRESET) { 1346 vdc_reset_connection(vdc, B_TRUE); 1347 } 1348 1349 /* return the last size written */ 1350 *msglen = size; 1351 1352 return (status); 1353 } 1354 1355 /* 1356 * Function: 1357 * vdc_get_ldc_id() 1358 * 1359 * Description: 1360 * This function gets the 'ldc-id' for this particular instance of vdc. 1361 * The id returned is the guest domain channel endpoint LDC uses for 1362 * communication with vds. 1363 * 1364 * Arguments: 1365 * dip - dev info pointer for this instance of the device driver. 1366 * ldc_id - pointer to variable used to return the 'ldc-id' found. 1367 * 1368 * Return Code: 1369 * 0 - Success. 1370 * ENOENT - Expected node or property did not exist. 1371 * ENXIO - Unexpected error communicating with MD framework 1372 */ 1373 static int 1374 vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id) 1375 { 1376 int status = ENOENT; 1377 char *node_name = NULL; 1378 md_t *mdp = NULL; 1379 int num_nodes; 1380 int num_vdevs; 1381 int num_chans; 1382 mde_cookie_t rootnode; 1383 mde_cookie_t *listp = NULL; 1384 mde_cookie_t *chanp = NULL; 1385 boolean_t found_inst = B_FALSE; 1386 int listsz; 1387 int idx; 1388 uint64_t md_inst; 1389 int obp_inst; 1390 int instance = ddi_get_instance(dip); 1391 1392 ASSERT(ldc_id != NULL); 1393 *ldc_id = 0; 1394 1395 /* 1396 * Get the OBP instance number for comparison with the MD instance 1397 * 1398 * The "cfg-handle" property of a vdc node in an MD contains the MD's 1399 * notion of "instance", or unique identifier, for that node; OBP 1400 * stores the value of the "cfg-handle" MD property as the value of 1401 * the "reg" property on the node in the device tree it builds from 1402 * the MD and passes to Solaris. Thus, we look up the devinfo node's 1403 * "reg" property value to uniquely identify this device instance. 1404 * If the "reg" property cannot be found, the device tree state is 1405 * presumably so broken that there is no point in continuing. 1406 */ 1407 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 1408 cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 1409 return (ENOENT); 1410 } 1411 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1412 OBP_REG, -1); 1413 PR1("%s[%d]: OBP inst=%d\n", __func__, instance, obp_inst); 1414 1415 /* 1416 * We now walk the MD nodes and if an instance of a vdc node matches 1417 * the instance got from OBP we get the ldc-id property. 1418 */ 1419 if ((mdp = md_get_handle()) == NULL) { 1420 cmn_err(CE_WARN, "unable to init machine description"); 1421 return (ENXIO); 1422 } 1423 1424 num_nodes = md_node_count(mdp); 1425 ASSERT(num_nodes > 0); 1426 1427 listsz = num_nodes * sizeof (mde_cookie_t); 1428 1429 /* allocate memory for nodes */ 1430 listp = kmem_zalloc(listsz, KM_SLEEP); 1431 chanp = kmem_zalloc(listsz, KM_SLEEP); 1432 1433 rootnode = md_root_node(mdp); 1434 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 1435 1436 /* 1437 * Search for all the virtual devices, we will then check to see which 1438 * ones are disk nodes. 1439 */ 1440 num_vdevs = md_scan_dag(mdp, rootnode, 1441 md_find_name(mdp, VDC_MD_VDEV_NAME), 1442 md_find_name(mdp, "fwd"), listp); 1443 1444 if (num_vdevs <= 0) { 1445 cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 1446 status = ENOENT; 1447 goto done; 1448 } 1449 1450 PR1("%s[%d] num_vdevs=%d\n", __func__, instance, num_vdevs); 1451 for (idx = 0; idx < num_vdevs; idx++) { 1452 status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 1453 if ((status != 0) || (node_name == NULL)) { 1454 cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 1455 ": err %d", VDC_MD_VDEV_NAME, status); 1456 continue; 1457 } 1458 1459 PR1("%s[%d] Found node %s\n", __func__, instance, node_name); 1460 if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 1461 status = md_get_prop_val(mdp, listp[idx], 1462 VDC_MD_CFG_HDL, &md_inst); 1463 PR1("%s[%d] vdc inst# in MD=%d\n", 1464 __func__, instance, md_inst); 1465 if ((status == 0) && (md_inst == obp_inst)) { 1466 found_inst = B_TRUE; 1467 break; 1468 } 1469 } 1470 } 1471 1472 if (!found_inst) { 1473 cmn_err(CE_NOTE, "Unable to find correct '%s' node", 1474 VDC_MD_DISK_NAME); 1475 status = ENOENT; 1476 goto done; 1477 } 1478 PR0("%s[%d] MD inst=%d\n", __func__, instance, md_inst); 1479 1480 /* get the channels for this node */ 1481 num_chans = md_scan_dag(mdp, listp[idx], 1482 md_find_name(mdp, VDC_MD_CHAN_NAME), 1483 md_find_name(mdp, "fwd"), chanp); 1484 1485 /* expecting at least one channel */ 1486 if (num_chans <= 0) { 1487 cmn_err(CE_NOTE, "No '%s' node for '%s' port", 1488 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 1489 status = ENOENT; 1490 goto done; 1491 1492 } else if (num_chans != 1) { 1493 PR0("%s[%d] Expected 1 '%s' node for '%s' port, found %d\n", 1494 __func__, instance, VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, 1495 num_chans); 1496 } 1497 1498 /* 1499 * We use the first channel found (index 0), irrespective of how 1500 * many are there in total. 1501 */ 1502 if (md_get_prop_val(mdp, chanp[0], VDC_ID_PROP, ldc_id) != 0) { 1503 cmn_err(CE_NOTE, "Channel '%s' property not found", 1504 VDC_ID_PROP); 1505 status = ENOENT; 1506 } 1507 1508 PR0("%s[%d] LDC id is 0x%lx\n", __func__, instance, *ldc_id); 1509 1510 done: 1511 if (chanp) 1512 kmem_free(chanp, listsz); 1513 if (listp) 1514 kmem_free(listp, listsz); 1515 1516 (void) md_fini_handle(mdp); 1517 1518 return (status); 1519 } 1520 1521 static int 1522 vdc_do_ldc_up(vdc_t *vdc) 1523 { 1524 int status; 1525 1526 PR0("[%d] Bringing up channel %x\n", vdc->instance, vdc->ldc_id); 1527 1528 if ((status = ldc_up(vdc->ldc_handle)) != 0) { 1529 switch (status) { 1530 case ECONNREFUSED: /* listener not ready at other end */ 1531 PR0("%s: ldc_up(%d,...) return %d\n", 1532 __func__, vdc->ldc_id, status); 1533 status = 0; 1534 break; 1535 default: 1536 cmn_err(CE_NOTE, "[%d] Failed to bring up LDC: " 1537 "channel=%ld, err=%d", 1538 vdc->instance, vdc->ldc_id, status); 1539 } 1540 } 1541 1542 return (status); 1543 } 1544 1545 1546 /* 1547 * vdc_is_able_to_tx_data() 1548 * 1549 * Description: 1550 * This function checks if we are able to send data to the 1551 * vDisk server (vds). The LDC connection needs to be up and 1552 * vdc & vds need to have completed the handshake negotiation. 1553 * 1554 * Parameters: 1555 * vdc - soft state pointer 1556 * flag - flag to indicate if we can block or not 1557 * [ If O_NONBLOCK or O_NDELAY (which are defined in 1558 * open(2)) are set then do not block) 1559 * 1560 * Return Values 1561 * B_TRUE - can talk to vds 1562 * B_FALSE - unable to talk to vds 1563 */ 1564 static boolean_t 1565 vdc_is_able_to_tx_data(vdc_t *vdc, int flag) 1566 { 1567 vd_state_t state; 1568 uint32_t ldc_state; 1569 uint_t retries = 0; 1570 int rv = -1; 1571 1572 ASSERT(vdc != NULL); 1573 1574 mutex_enter(&vdc->lock); 1575 state = vdc->state; 1576 ldc_state = vdc->ldc_state; 1577 mutex_exit(&vdc->lock); 1578 1579 if ((state == VD_STATE_DATA) && (ldc_state == LDC_UP)) 1580 return (B_TRUE); 1581 1582 if ((flag & O_NONBLOCK) || (flag & O_NDELAY)) { 1583 PR0("%s[%d] Not ready to tx - state %d LDC state %d\n", 1584 __func__, vdc->instance, state, ldc_state); 1585 return (B_FALSE); 1586 } 1587 1588 /* 1589 * We want to check and see if any negotiations triggered earlier 1590 * have succeeded. We are prepared to wait a little while in case 1591 * they are still in progress. 1592 */ 1593 mutex_enter(&vdc->lock); 1594 while ((vdc->ldc_state != LDC_UP) || (vdc->state != VD_STATE_DATA)) { 1595 PR0("%s: Waiting for connection at state %d (LDC state %d)\n", 1596 __func__, vdc->state, vdc->ldc_state); 1597 1598 rv = cv_timedwait(&vdc->cv, &vdc->lock, 1599 VD_GET_TIMEOUT_HZ(retries)); 1600 1601 /* 1602 * An rv of -1 indicates that we timed out without the LDC 1603 * state changing so it looks like the other side (vdc) is 1604 * not yet ready/responding. 1605 * 1606 * Any other value of rv indicates that the LDC triggered an 1607 * interrupt so we just loop again, check the handshake state 1608 * and keep waiting if necessary. 1609 */ 1610 if (rv == -1) { 1611 if (retries >= vdc_retries) { 1612 PR0("%s[%d] handshake wait timed out.\n", 1613 __func__, vdc->instance); 1614 mutex_exit(&vdc->lock); 1615 return (B_FALSE); 1616 } else { 1617 PR1("%s[%d] Retry #%d for handshake timedout\n", 1618 __func__, vdc->instance, retries); 1619 retries++; 1620 } 1621 } 1622 } 1623 1624 ASSERT(vdc->ldc_state == LDC_UP); 1625 ASSERT(vdc->state == VD_STATE_DATA); 1626 1627 mutex_exit(&vdc->lock); 1628 1629 return (B_TRUE); 1630 } 1631 1632 1633 /* 1634 * Function: 1635 * vdc_terminate_ldc() 1636 * 1637 * Description: 1638 * 1639 * Arguments: 1640 * vdc - soft state pointer for this instance of the device driver. 1641 * 1642 * Return Code: 1643 * None 1644 */ 1645 static void 1646 vdc_terminate_ldc(vdc_t *vdc) 1647 { 1648 int instance = ddi_get_instance(vdc->dip); 1649 1650 ASSERT(vdc != NULL); 1651 ASSERT(mutex_owned(&vdc->lock)); 1652 1653 PR0("%s[%d] initialized=%x\n", __func__, instance, vdc->initialized); 1654 1655 if (vdc->initialized & VDC_LDC_OPEN) { 1656 PR0("%s[%d]: ldc_close()\n", __func__, instance); 1657 (void) ldc_close(vdc->ldc_handle); 1658 } 1659 if (vdc->initialized & VDC_LDC_CB) { 1660 PR0("%s[%d]: ldc_unreg_callback()\n", __func__, instance); 1661 (void) ldc_unreg_callback(vdc->ldc_handle); 1662 } 1663 if (vdc->initialized & VDC_LDC) { 1664 PR0("%s[%d]: ldc_fini()\n", __func__, instance); 1665 (void) ldc_fini(vdc->ldc_handle); 1666 vdc->ldc_handle = NULL; 1667 } 1668 1669 vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN); 1670 } 1671 1672 /* 1673 * Function: 1674 * vdc_reset_connection() 1675 * 1676 * Description: 1677 * 1678 * Arguments: 1679 * vdc - soft state pointer for this instance of the device driver. 1680 * reset_ldc - Flag whether or not to reset the LDC connection also. 1681 * 1682 * Return Code: 1683 * None 1684 */ 1685 static void 1686 vdc_reset_connection(vdc_t *vdc, boolean_t reset_ldc) 1687 { 1688 int status; 1689 1690 ASSERT(vdc != NULL); 1691 ASSERT(mutex_owned(&vdc->lock)); 1692 1693 PR0("%s[%d] Entered\n", __func__, vdc->instance); 1694 1695 vdc->state = VD_STATE_INIT; 1696 1697 if (reset_ldc) { 1698 status = ldc_reset(vdc->ldc_handle); 1699 PR0("%s[%d] ldc_reset() = %d\n", 1700 __func__, vdc->instance, status); 1701 } 1702 1703 vdc->initialized &= ~VDC_HANDSHAKE; 1704 PR0("%s[%d] init=%x\n", __func__, vdc->instance, vdc->initialized); 1705 } 1706 1707 /* -------------------------------------------------------------------------- */ 1708 1709 /* 1710 * Descriptor Ring helper routines 1711 */ 1712 1713 /* 1714 * Function: 1715 * vdc_init_descriptor_ring() 1716 * 1717 * Description: 1718 * 1719 * Arguments: 1720 * vdc - soft state pointer for this instance of the device driver. 1721 * 1722 * Return Code: 1723 * 0 - Success 1724 */ 1725 static int 1726 vdc_init_descriptor_ring(vdc_t *vdc) 1727 { 1728 vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 1729 int status = 0; 1730 int i; 1731 1732 PR0("%s[%d] initialized=%x\n", 1733 __func__, vdc->instance, vdc->initialized); 1734 1735 ASSERT(vdc != NULL); 1736 ASSERT(mutex_owned(&vdc->lock)); 1737 ASSERT(vdc->ldc_handle != NULL); 1738 1739 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 1740 PR0("%s[%d] ldc_mem_dring_create\n", __func__, vdc->instance); 1741 status = ldc_mem_dring_create(VD_DRING_LEN, VD_DRING_ENTRY_SZ, 1742 &vdc->ldc_dring_hdl); 1743 if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) { 1744 PR0("%s: Failed to create a descriptor ring", __func__); 1745 return (status); 1746 } 1747 vdc->dring_entry_size = VD_DRING_ENTRY_SZ; 1748 vdc->dring_len = VD_DRING_LEN; 1749 vdc->initialized |= VDC_DRING_INIT; 1750 } 1751 1752 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 1753 PR0("%s[%d] ldc_mem_dring_bind\n", __func__, vdc->instance); 1754 vdc->dring_cookie = 1755 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 1756 1757 status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl, 1758 LDC_SHADOW_MAP, LDC_MEM_RW, 1759 &vdc->dring_cookie[0], 1760 &vdc->dring_cookie_count); 1761 if (status != 0) { 1762 PR0("%s: Failed to bind descriptor ring (%p) " 1763 "to channel (%p)\n", 1764 __func__, vdc->ldc_dring_hdl, vdc->ldc_handle); 1765 return (status); 1766 } 1767 ASSERT(vdc->dring_cookie_count == 1); 1768 vdc->initialized |= VDC_DRING_BOUND; 1769 } 1770 1771 status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info); 1772 if (status != 0) { 1773 PR0("%s: Failed to get info for descriptor ring (%p)\n", 1774 __func__, vdc->ldc_dring_hdl); 1775 return (status); 1776 } 1777 1778 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 1779 PR0("%s[%d] local dring\n", __func__, vdc->instance); 1780 1781 /* Allocate the local copy of this dring */ 1782 vdc->local_dring = 1783 kmem_zalloc(VD_DRING_LEN * sizeof (vdc_local_desc_t), 1784 KM_SLEEP); 1785 vdc->initialized |= VDC_DRING_LOCAL; 1786 } 1787 1788 /* 1789 * Mark all DRing entries as free and initialize the private 1790 * descriptor's memory handles. If any entry is initialized, 1791 * we need to free it later so we set the bit in 'initialized' 1792 * at the start. 1793 */ 1794 vdc->initialized |= VDC_DRING_ENTRY; 1795 for (i = 0; i < VD_DRING_LEN; i++) { 1796 dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 1797 dep->hdr.dstate = VIO_DESC_FREE; 1798 1799 status = ldc_mem_alloc_handle(vdc->ldc_handle, 1800 &vdc->local_dring[i].desc_mhdl); 1801 if (status != 0) { 1802 cmn_err(CE_NOTE, "![%d] Failed to alloc mem handle for" 1803 " descriptor %d", vdc->instance, i); 1804 return (status); 1805 } 1806 vdc->local_dring[i].flags = VIO_DESC_FREE; 1807 vdc->local_dring[i].dep = dep; 1808 1809 mutex_init(&vdc->local_dring[i].lock, NULL, MUTEX_DRIVER, NULL); 1810 cv_init(&vdc->local_dring[i].cv, NULL, CV_DRIVER, NULL); 1811 } 1812 1813 /* 1814 * We init the index of the last DRing entry used. Since the code to 1815 * get the next available entry increments it before selecting one, 1816 * we set it to the last DRing entry so that it wraps around to zero 1817 * for the 1st entry to be used. 1818 */ 1819 vdc->dring_curr_idx = VD_DRING_LEN - 1; 1820 1821 return (status); 1822 } 1823 1824 /* 1825 * Function: 1826 * vdc_destroy_descriptor_ring() 1827 * 1828 * Description: 1829 * 1830 * Arguments: 1831 * vdc - soft state pointer for this instance of the device driver. 1832 * 1833 * Return Code: 1834 * None 1835 */ 1836 static void 1837 vdc_destroy_descriptor_ring(vdc_t *vdc) 1838 { 1839 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 1840 ldc_mem_handle_t mhdl = NULL; 1841 int status = -1; 1842 int i; /* loop */ 1843 1844 ASSERT(vdc != NULL); 1845 ASSERT(mutex_owned(&vdc->lock)); 1846 ASSERT(vdc->state == VD_STATE_INIT); 1847 1848 PR0("%s: Entered\n", __func__); 1849 1850 if (vdc->initialized & VDC_DRING_ENTRY) { 1851 PR0("[%d] Removing Local DRing entries\n", vdc->instance); 1852 for (i = 0; i < VD_DRING_LEN; i++) { 1853 ldep = &vdc->local_dring[i]; 1854 mhdl = ldep->desc_mhdl; 1855 1856 if (mhdl == NULL) 1857 continue; 1858 1859 (void) ldc_mem_free_handle(mhdl); 1860 mutex_destroy(&ldep->lock); 1861 cv_destroy(&ldep->cv); 1862 } 1863 vdc->initialized &= ~VDC_DRING_ENTRY; 1864 } 1865 1866 if (vdc->initialized & VDC_DRING_LOCAL) { 1867 PR0("[%d] Freeing Local DRing\n", vdc->instance); 1868 kmem_free(vdc->local_dring, 1869 VD_DRING_LEN * sizeof (vdc_local_desc_t)); 1870 vdc->initialized &= ~VDC_DRING_LOCAL; 1871 } 1872 1873 if (vdc->initialized & VDC_DRING_BOUND) { 1874 PR0("[%d] Unbinding DRing\n", vdc->instance); 1875 status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl); 1876 if (status == 0) { 1877 vdc->initialized &= ~VDC_DRING_BOUND; 1878 } else { 1879 vdc_msg("%s: Failed to unbind Descriptor Ring (%lx)\n", 1880 vdc->ldc_dring_hdl); 1881 } 1882 } 1883 1884 if (vdc->initialized & VDC_DRING_INIT) { 1885 PR0("[%d] Destroying DRing\n", vdc->instance); 1886 status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl); 1887 if (status == 0) { 1888 vdc->ldc_dring_hdl = NULL; 1889 bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 1890 vdc->initialized &= ~VDC_DRING_INIT; 1891 } else { 1892 vdc_msg("%s: Failed to destroy Descriptor Ring (%lx)\n", 1893 vdc->ldc_dring_hdl); 1894 } 1895 } 1896 } 1897 1898 /* 1899 * vdc_get_next_dring_entry_idx() 1900 * 1901 * Description: 1902 * This function gets the index of the next Descriptor Ring entry available 1903 * 1904 * Return Value: 1905 * 0 <= rv < VD_DRING_LEN Next available slot 1906 * -1 DRing is full 1907 */ 1908 static int 1909 vdc_get_next_dring_entry_idx(vdc_t *vdc, uint_t num_slots_needed) 1910 { 1911 _NOTE(ARGUNUSED(num_slots_needed)) 1912 1913 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 1914 int idx = -1; 1915 int start_idx = 0; 1916 1917 ASSERT(vdc != NULL); 1918 ASSERT(vdc->dring_len == VD_DRING_LEN); 1919 ASSERT(vdc->dring_curr_idx >= 0); 1920 ASSERT(vdc->dring_curr_idx < VD_DRING_LEN); 1921 ASSERT(mutex_owned(&vdc->dring_lock)); 1922 1923 /* Start at the last entry used */ 1924 idx = start_idx = vdc->dring_curr_idx; 1925 1926 /* 1927 * Loop through Descriptor Ring checking for a free entry until we reach 1928 * the entry we started at. We should never come close to filling the 1929 * Ring at any stage, instead this is just to prevent an entry which 1930 * gets into an inconsistent state (e.g. due to a request timing out) 1931 * from blocking progress. 1932 */ 1933 do { 1934 /* Get the next entry after the last known index tried */ 1935 idx = (idx + 1) % VD_DRING_LEN; 1936 1937 dep = VDC_GET_DRING_ENTRY_PTR(vdc, idx); 1938 ASSERT(dep != NULL); 1939 1940 if (dep->hdr.dstate == VIO_DESC_FREE) { 1941 ASSERT(idx >= 0); 1942 ASSERT(idx < VD_DRING_LEN); 1943 vdc->dring_curr_idx = idx; 1944 return (idx); 1945 1946 } else if (dep->hdr.dstate == VIO_DESC_READY) { 1947 PR0("%s: Entry %d waiting to be accepted\n", 1948 __func__, idx); 1949 continue; 1950 1951 } else if (dep->hdr.dstate == VIO_DESC_ACCEPTED) { 1952 PR0("%s: Entry %d waiting to be processed\n", 1953 __func__, idx); 1954 continue; 1955 1956 } else if (dep->hdr.dstate == VIO_DESC_DONE) { 1957 PR0("%s: Entry %d done but not marked free\n", 1958 __func__, idx); 1959 1960 /* 1961 * If we are currently panicking, interrupts are 1962 * disabled and we will not be getting ACKs from the 1963 * vDisk server so we mark the descriptor ring entries 1964 * as FREE here instead of in the ACK handler. 1965 */ 1966 if (panicstr) { 1967 (void) vdc_depopulate_descriptor(vdc, idx); 1968 dep->hdr.dstate = VIO_DESC_FREE; 1969 vdc->local_dring[idx].flags = VIO_DESC_FREE; 1970 } 1971 continue; 1972 1973 } else { 1974 vdc_msg("Public Descriptor Ring entry corrupted"); 1975 mutex_enter(&vdc->lock); 1976 vdc_reset_connection(vdc, B_FALSE); 1977 mutex_exit(&vdc->lock); 1978 return (-1); 1979 } 1980 1981 } while (idx != start_idx); 1982 1983 return (-1); 1984 } 1985 1986 /* 1987 * Function: 1988 * vdc_populate_descriptor 1989 * 1990 * Description: 1991 * This routine writes the data to be transmitted to vds into the 1992 * descriptor, notifies vds that the ring has been updated and 1993 * then waits for the request to be processed. 1994 * 1995 * Arguments: 1996 * vdc - the soft state pointer 1997 * addr - start address of memory region. 1998 * nbytes - number of bytes to read/write 1999 * operation - operation we want vds to perform (VD_OP_XXX) 2000 * arg - parameter to be sent to server (depends on VD_OP_XXX type) 2001 * . mode for ioctl(9e) 2002 * . LP64 diskaddr_t (block I/O) 2003 * slice - the disk slice this request is for 2004 * 2005 * Return Codes: 2006 * 0 2007 * EAGAIN 2008 * EFAULT 2009 * ENXIO 2010 * EIO 2011 */ 2012 static int 2013 vdc_populate_descriptor(vdc_t *vdc, caddr_t addr, size_t nbytes, int operation, 2014 uint64_t arg, uint64_t slice) 2015 { 2016 vdc_local_desc_t *local_dep = NULL; /* Local Dring Entry Pointer */ 2017 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2018 int idx = 0; /* Index of DRing entry used */ 2019 vio_dring_msg_t dmsg; 2020 size_t msglen = sizeof (dmsg); 2021 int retries = 0; 2022 int rv; 2023 2024 ASSERT(vdc != NULL); 2025 ASSERT(slice < V_NUMPAR); 2026 2027 /* 2028 * Get next available DRing entry. 2029 */ 2030 mutex_enter(&vdc->dring_lock); 2031 idx = vdc_get_next_dring_entry_idx(vdc, 1); 2032 if (idx == -1) { 2033 mutex_exit(&vdc->dring_lock); 2034 vdc_msg("%s[%d]: no descriptor ring entry avail, seq=%d\n", 2035 __func__, vdc->instance, vdc->seq_num); 2036 2037 /* 2038 * Since strategy should not block we don't wait for the DRing 2039 * to empty and instead return 2040 */ 2041 return (EAGAIN); 2042 } 2043 2044 ASSERT(idx < VD_DRING_LEN); 2045 local_dep = &vdc->local_dring[idx]; 2046 dep = local_dep->dep; 2047 ASSERT(dep != NULL); 2048 2049 /* 2050 * Wait for anybody still using the DRing entry to finish. 2051 * (e.g. still waiting for vds to respond to a request) 2052 */ 2053 mutex_enter(&local_dep->lock); 2054 2055 switch (operation) { 2056 case VD_OP_BREAD: 2057 case VD_OP_BWRITE: 2058 PR1("buf=%p, block=%lx, nbytes=%lx\n", addr, arg, nbytes); 2059 dep->payload.addr = (diskaddr_t)arg; 2060 rv = vdc_populate_mem_hdl(vdc, idx, addr, nbytes, operation); 2061 break; 2062 2063 case VD_OP_GET_VTOC: 2064 case VD_OP_SET_VTOC: 2065 case VD_OP_GET_DISKGEOM: 2066 case VD_OP_SET_DISKGEOM: 2067 case VD_OP_SCSICMD: 2068 if (nbytes > 0) { 2069 rv = vdc_populate_mem_hdl(vdc, idx, addr, nbytes, 2070 operation); 2071 } 2072 break; 2073 2074 case VD_OP_FLUSH: 2075 case VD_OP_GET_WCE: 2076 case VD_OP_SET_WCE: 2077 rv = 0; /* nothing to bind */ 2078 break; 2079 2080 default: 2081 cmn_err(CE_NOTE, "[%d] Unsupported vDisk operation [%d]\n", 2082 vdc->instance, operation); 2083 rv = EINVAL; 2084 } 2085 2086 if (rv != 0) { 2087 mutex_exit(&local_dep->lock); 2088 mutex_exit(&vdc->dring_lock); 2089 return (rv); 2090 } 2091 2092 /* 2093 * fill in the data details into the DRing 2094 */ 2095 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdc); 2096 dep->payload.operation = operation; 2097 dep->payload.nbytes = nbytes; 2098 dep->payload.status = EINPROGRESS; /* vds will set valid value */ 2099 dep->payload.slice = slice; 2100 dep->hdr.dstate = VIO_DESC_READY; 2101 dep->hdr.ack = 1; /* request an ACK for every message */ 2102 2103 local_dep->flags = VIO_DESC_READY; 2104 local_dep->addr = addr; 2105 2106 /* 2107 * Send a msg with the DRing details to vds 2108 */ 2109 VIO_INIT_DRING_DATA_TAG(dmsg); 2110 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc); 2111 dmsg.dring_ident = vdc->dring_ident; 2112 dmsg.start_idx = idx; 2113 dmsg.end_idx = idx; 2114 2115 PR1("ident=0x%llx, st=%d, end=%d, seq=%d req=%d dep=%p\n", 2116 vdc->dring_ident, dmsg.start_idx, dmsg.end_idx, 2117 dmsg.seq_num, dep->payload.req_id, dep); 2118 2119 mutex_enter(&vdc->lock); 2120 rv = vdc_send(vdc, (caddr_t)&dmsg, &msglen); 2121 mutex_exit(&vdc->lock); 2122 PR1("%s[%d]: ldc_write() rv=%d\n", __func__, vdc->instance, rv); 2123 if (rv != 0) { 2124 mutex_exit(&local_dep->lock); 2125 mutex_exit(&vdc->dring_lock); 2126 vdc_msg("%s: ldc_write(%d)\n", __func__, rv); 2127 return (EAGAIN); 2128 } 2129 2130 /* 2131 * If the message was successfully sent, we increment the sequence 2132 * number to be used by the next message 2133 */ 2134 vdc->seq_num++; 2135 2136 /* 2137 * XXX - potential performance enhancement (Investigate at a later date) 2138 * 2139 * for calls from strategy(9E), instead of waiting for a response from 2140 * vds, we could return at this stage and let the ACK handling code 2141 * trigger the biodone(9F) 2142 */ 2143 2144 /* 2145 * When a guest is panicking, the completion of requests needs to be 2146 * handled differently because interrupts are disabled and vdc 2147 * will not get messages. We have to poll for the messages instead. 2148 */ 2149 if (ddi_in_panic()) { 2150 int start = 0; 2151 retries = 0; 2152 for (;;) { 2153 msglen = sizeof (dmsg); 2154 rv = ldc_read(vdc->ldc_handle, (caddr_t)&dmsg, 2155 &msglen); 2156 if (rv) { 2157 rv = EINVAL; 2158 break; 2159 } 2160 2161 /* 2162 * if there are no packets wait and check again 2163 */ 2164 if ((rv == 0) && (msglen == 0)) { 2165 if (retries++ > vdc_dump_retries) { 2166 PR0("[%d] Giving up waiting, idx %d\n", 2167 vdc->instance, idx); 2168 rv = EAGAIN; 2169 break; 2170 } 2171 2172 PR1("Waiting for next packet @ %d\n", idx); 2173 delay(drv_usectohz(vdc_dump_usec_timeout)); 2174 continue; 2175 } 2176 2177 /* 2178 * Ignore all messages that are not ACKs/NACKs to 2179 * DRing requests. 2180 */ 2181 if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 2182 (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 2183 PR0("discarding pkt: type=%d sub=%d env=%d\n", 2184 dmsg.tag.vio_msgtype, 2185 dmsg.tag.vio_subtype, 2186 dmsg.tag.vio_subtype_env); 2187 continue; 2188 } 2189 2190 /* 2191 * set the appropriate return value for the 2192 * current request. 2193 */ 2194 switch (dmsg.tag.vio_subtype) { 2195 case VIO_SUBTYPE_ACK: 2196 rv = 0; 2197 break; 2198 case VIO_SUBTYPE_NACK: 2199 rv = EAGAIN; 2200 break; 2201 default: 2202 continue; 2203 } 2204 2205 start = dmsg.start_idx; 2206 if (start >= VD_DRING_LEN) { 2207 PR0("[%d] Bogus ack data : start %d\n", 2208 vdc->instance, start); 2209 continue; 2210 } 2211 2212 dep = VDC_GET_DRING_ENTRY_PTR(vdc, start); 2213 2214 PR1("[%d] Dumping start=%d idx=%d state=%d\n", 2215 vdc->instance, start, idx, dep->hdr.dstate); 2216 2217 if (dep->hdr.dstate != VIO_DESC_DONE) { 2218 PR0("[%d] Entry @ %d - state !DONE %d\n", 2219 vdc->instance, start, dep->hdr.dstate); 2220 continue; 2221 } 2222 2223 (void) vdc_depopulate_descriptor(vdc, start); 2224 2225 /* 2226 * We want to process all Dring entries up to 2227 * the current one so that we can return an 2228 * error with the correct request. 2229 */ 2230 if (idx > start) { 2231 PR0("[%d] Looping: start %d, idx %d\n", 2232 vdc->instance, idx, start); 2233 continue; 2234 } 2235 2236 /* exit - all outstanding requests are completed */ 2237 break; 2238 } 2239 2240 mutex_exit(&local_dep->lock); 2241 mutex_exit(&vdc->dring_lock); 2242 2243 return (rv); 2244 } 2245 2246 /* 2247 * Now watch the DRing entries we modified to get the response 2248 * from vds. 2249 */ 2250 rv = vdc_wait_for_descriptor_update(vdc, idx, dmsg); 2251 if (rv == ETIMEDOUT) { 2252 /* debug info when dumping state on vds side */ 2253 dep->payload.status = ECANCELED; 2254 } 2255 2256 rv = vdc_depopulate_descriptor(vdc, idx); 2257 PR1("%s[%d] Status=%d\n", __func__, vdc->instance, rv); 2258 2259 mutex_exit(&local_dep->lock); 2260 mutex_exit(&vdc->dring_lock); 2261 2262 return (rv); 2263 } 2264 2265 /* 2266 * Function: 2267 * vdc_wait_for_descriptor_update() 2268 * 2269 * Description: 2270 * 2271 * Arguments: 2272 * vdc - soft state pointer for this instance of the device driver. 2273 * idx - Index of the Descriptor Ring entry being modified 2274 * dmsg - LDC message sent by vDisk server 2275 * 2276 * Return Code: 2277 * 0 - Success 2278 */ 2279 static int 2280 vdc_wait_for_descriptor_update(vdc_t *vdc, uint_t idx, vio_dring_msg_t dmsg) 2281 { 2282 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2283 vdc_local_desc_t *local_dep = NULL; /* Local Dring Entry Pointer */ 2284 size_t msglen = sizeof (dmsg); 2285 int retries = 0; 2286 int status = 0; 2287 int rv = 0; 2288 2289 ASSERT(vdc != NULL); 2290 ASSERT(mutex_owned(&vdc->dring_lock)); 2291 ASSERT(idx < VD_DRING_LEN); 2292 local_dep = &vdc->local_dring[idx]; 2293 ASSERT(local_dep != NULL); 2294 dep = local_dep->dep; 2295 ASSERT(dep != NULL); 2296 2297 while (dep->hdr.dstate != VIO_DESC_DONE) { 2298 rv = cv_timedwait(&local_dep->cv, &local_dep->lock, 2299 VD_GET_TIMEOUT_HZ(retries)); 2300 if (rv == -1) { 2301 /* 2302 * If they persist in ignoring us we'll storm off in a 2303 * huff and return ETIMEDOUT to the upper layers. 2304 */ 2305 if (retries >= vdc_retries) { 2306 PR0("%s: Finished waiting on entry %d\n", 2307 __func__, idx); 2308 status = ETIMEDOUT; 2309 break; 2310 } else { 2311 retries++; 2312 PR0("%s[%d]: Timeout #%d on entry %d " 2313 "[seq %d][req %d]\n", __func__, 2314 vdc->instance, 2315 retries, idx, dmsg.seq_num, 2316 dep->payload.req_id); 2317 } 2318 2319 if (dep->hdr.dstate & VIO_DESC_ACCEPTED) { 2320 PR0("%s[%d]: vds has accessed entry %d [seq %d]" 2321 "[req %d] but not ack'ed it yet\n", 2322 __func__, vdc->instance, idx, dmsg.seq_num, 2323 dep->payload.req_id); 2324 continue; 2325 } 2326 2327 /* 2328 * we resend the message as it may have been dropped 2329 * and have never made it to the other side (vds). 2330 * (We reuse the original message but update seq ID) 2331 */ 2332 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc); 2333 retries = 0; 2334 mutex_enter(&vdc->lock); 2335 status = vdc_send(vdc, (caddr_t)&dmsg, &msglen); 2336 mutex_exit(&vdc->lock); 2337 if (status != 0) { 2338 vdc_msg("%s: Error (%d) while resending after " 2339 "timeout\n", __func__, status); 2340 status = ETIMEDOUT; 2341 break; 2342 } 2343 /* 2344 * If the message was successfully sent, we increment 2345 * the sequence number to be used by the next message. 2346 */ 2347 vdc->seq_num++; 2348 } 2349 } 2350 2351 return (status); 2352 } 2353 2354 static int 2355 vdc_get_response(vdc_t *vdc, int start, int end) 2356 { 2357 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2358 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2359 int status = ENXIO; 2360 int idx = -1; 2361 2362 ASSERT(vdc != NULL); 2363 ASSERT(start >= 0); 2364 ASSERT(start <= VD_DRING_LEN); 2365 ASSERT(start >= -1); 2366 ASSERT(start <= VD_DRING_LEN); 2367 2368 idx = start; 2369 ldep = &vdc->local_dring[idx]; 2370 ASSERT(ldep != NULL); 2371 dep = ldep->dep; 2372 ASSERT(dep != NULL); 2373 2374 PR0("%s[%d] DRING entry=%d status=%d\n", __func__, vdc->instance, 2375 idx, VIO_GET_DESC_STATE(dep->hdr.dstate)); 2376 while (VIO_GET_DESC_STATE(dep->hdr.dstate) == VIO_DESC_DONE) { 2377 if ((end != -1) && (idx > end)) 2378 return (0); 2379 2380 switch (ldep->operation) { 2381 case VD_OP_BREAD: 2382 case VD_OP_BWRITE: 2383 /* call bioxxx */ 2384 break; 2385 default: 2386 /* signal waiter */ 2387 break; 2388 } 2389 2390 /* Clear the DRing entry */ 2391 status = vdc_depopulate_descriptor(vdc, idx); 2392 PR0("%s[%d] Status=%d\n", __func__, vdc->instance, status); 2393 2394 /* loop accounting to get next DRing entry */ 2395 idx++; 2396 ldep = &vdc->local_dring[idx]; 2397 dep = ldep->dep; 2398 } 2399 2400 return (status); 2401 } 2402 2403 /* 2404 * Function: 2405 * vdc_depopulate_descriptor() 2406 * 2407 * Description: 2408 * 2409 * Arguments: 2410 * vdc - soft state pointer for this instance of the device driver. 2411 * idx - Index of the Descriptor Ring entry being modified 2412 * 2413 * Return Code: 2414 * 0 - Success 2415 */ 2416 static int 2417 vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 2418 { 2419 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2420 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2421 int status = ENXIO; 2422 int operation; 2423 int rv = 0; 2424 2425 ASSERT(vdc != NULL); 2426 ASSERT(idx < VD_DRING_LEN); 2427 ldep = &vdc->local_dring[idx]; 2428 ASSERT(ldep != NULL); 2429 dep = ldep->dep; 2430 ASSERT(dep != NULL); 2431 2432 status = dep->payload.status; 2433 operation = dep->payload.operation; 2434 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 2435 ldep = &vdc->local_dring[idx]; 2436 VIO_SET_DESC_STATE(ldep->flags, VIO_DESC_FREE); 2437 2438 /* the DKIO W$ operations never bind handles so we can return now */ 2439 if ((operation == VD_OP_FLUSH) || 2440 (operation == VD_OP_GET_WCE) || 2441 (operation == VD_OP_SET_WCE)) 2442 return (status); 2443 2444 /* 2445 * If the upper layer passed in a misaligned address we copied the 2446 * data into an aligned buffer before sending it to LDC - we now 2447 * copy it back to the original buffer. 2448 */ 2449 if (ldep->align_addr) { 2450 ASSERT(ldep->addr != NULL); 2451 ASSERT(dep->payload.nbytes > 0); 2452 2453 bcopy(ldep->align_addr, ldep->addr, dep->payload.nbytes); 2454 kmem_free(ldep->align_addr, 2455 sizeof (caddr_t) * dep->payload.nbytes); 2456 ldep->align_addr = NULL; 2457 } 2458 2459 rv = ldc_mem_unbind_handle(ldep->desc_mhdl); 2460 if (rv != 0) { 2461 cmn_err(CE_NOTE, "[%d] unbind mem hdl 0x%lx @ idx %d failed:%d", 2462 vdc->instance, ldep->desc_mhdl, idx, rv); 2463 /* 2464 * The error returned by the vDisk server is more informative 2465 * and thus has a higher priority but if it isn't set we ensure 2466 * that this function returns an error. 2467 */ 2468 if (status == 0) 2469 status = EINVAL; 2470 } 2471 2472 return (status); 2473 } 2474 2475 /* 2476 * Function: 2477 * vdc_populate_mem_hdl() 2478 * 2479 * Description: 2480 * 2481 * Arguments: 2482 * vdc - soft state pointer for this instance of the device driver. 2483 * idx - Index of the Descriptor Ring entry being modified 2484 * addr - virtual address being mapped in 2485 * nybtes - number of bytes in 'addr' 2486 * operation - the vDisk operation being performed (VD_OP_xxx) 2487 * 2488 * Return Code: 2489 * 0 - Success 2490 */ 2491 static int 2492 vdc_populate_mem_hdl(vdc_t *vdc, uint_t idx, caddr_t addr, size_t nbytes, 2493 int operation) 2494 { 2495 vd_dring_entry_t *dep = NULL; 2496 vdc_local_desc_t *ldep = NULL; 2497 ldc_mem_handle_t mhdl; 2498 caddr_t vaddr; 2499 int perm = LDC_MEM_RW; 2500 int rv = 0; 2501 int i; 2502 2503 ASSERT(vdc != NULL); 2504 ASSERT(idx < VD_DRING_LEN); 2505 2506 dep = VDC_GET_DRING_ENTRY_PTR(vdc, idx); 2507 ldep = &vdc->local_dring[idx]; 2508 mhdl = ldep->desc_mhdl; 2509 2510 switch (operation) { 2511 case VD_OP_BREAD: 2512 perm = LDC_MEM_W; 2513 break; 2514 2515 case VD_OP_BWRITE: 2516 perm = LDC_MEM_R; 2517 break; 2518 2519 case VD_OP_GET_VTOC: 2520 case VD_OP_SET_VTOC: 2521 case VD_OP_GET_DISKGEOM: 2522 case VD_OP_SET_DISKGEOM: 2523 case VD_OP_SCSICMD: 2524 perm = LDC_MEM_RW; 2525 break; 2526 2527 default: 2528 ASSERT(0); /* catch bad programming in vdc */ 2529 } 2530 2531 /* 2532 * LDC expects any addresses passed in to be 8-byte aligned. We need 2533 * to copy the contents of any misaligned buffers to a newly allocated 2534 * buffer and bind it instead (and copy the the contents back to the 2535 * original buffer passed in when depopulating the descriptor) 2536 */ 2537 vaddr = addr; 2538 if (((uint64_t)addr & 0x7) != 0) { 2539 ldep->align_addr = 2540 kmem_zalloc(sizeof (caddr_t) * nbytes, KM_SLEEP); 2541 PR0("%s[%d] Misaligned address %lx reallocating " 2542 "(buf=%lx entry=%d)\n", 2543 __func__, vdc->instance, addr, ldep->align_addr, idx); 2544 bcopy(addr, ldep->align_addr, nbytes); 2545 vaddr = ldep->align_addr; 2546 } 2547 2548 rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 2549 vdc->dring_mem_info.mtype, perm, &dep->payload.cookie[0], 2550 &dep->payload.ncookies); 2551 PR1("%s[%d] bound mem handle; ncookies=%d\n", 2552 __func__, vdc->instance, dep->payload.ncookies); 2553 if (rv != 0) { 2554 vdc_msg("%s[%d] failed to ldc_mem_bind_handle " 2555 "(mhdl=%lx, buf=%lx entry=%d err=%d)\n", 2556 __func__, vdc->instance, mhdl, addr, idx, rv); 2557 if (ldep->align_addr) { 2558 kmem_free(ldep->align_addr, 2559 sizeof (caddr_t) * dep->payload.nbytes); 2560 ldep->align_addr = NULL; 2561 } 2562 return (EAGAIN); 2563 } 2564 2565 /* 2566 * Get the other cookies (if any). 2567 */ 2568 for (i = 1; i < dep->payload.ncookies; i++) { 2569 rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 2570 if (rv != 0) { 2571 (void) ldc_mem_unbind_handle(mhdl); 2572 vdc_msg("%s: failed to get next cookie(mhdl=%lx " 2573 "cnum=%d), err=%d", __func__, mhdl, i, rv); 2574 if (ldep->align_addr) { 2575 kmem_free(ldep->align_addr, 2576 sizeof (caddr_t) * dep->payload.nbytes); 2577 ldep->align_addr = NULL; 2578 } 2579 return (EAGAIN); 2580 } 2581 } 2582 2583 return (rv); 2584 } 2585 2586 /* 2587 * Interrupt handlers for messages from LDC 2588 */ 2589 2590 /* 2591 * Function: 2592 * vdc_handle_cb() 2593 * 2594 * Description: 2595 * 2596 * Arguments: 2597 * event - Type of event (LDC_EVT_xxx) that triggered the callback 2598 * arg - soft state pointer for this instance of the device driver. 2599 * 2600 * Return Code: 2601 * 0 - Success 2602 */ 2603 static uint_t 2604 vdc_handle_cb(uint64_t event, caddr_t arg) 2605 { 2606 ldc_status_t ldc_state; 2607 int rv = 0; 2608 2609 vdc_t *vdc = (vdc_t *)(void *)arg; 2610 2611 ASSERT(vdc != NULL); 2612 2613 PR1("%s[%d] event=%x seqID=%d\n", 2614 __func__, vdc->instance, event, vdc->seq_num); 2615 2616 /* 2617 * Depending on the type of event that triggered this callback, 2618 * we modify the handhske state or read the data. 2619 * 2620 * NOTE: not done as a switch() as event could be triggered by 2621 * a state change and a read request. Also the ordering of the 2622 * check for the event types is deliberate. 2623 */ 2624 if (event & LDC_EVT_UP) { 2625 PR0("%s[%d] Received LDC_EVT_UP\n", __func__, vdc->instance); 2626 2627 /* get LDC state */ 2628 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2629 if (rv != 0) { 2630 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d", 2631 vdc->instance, rv); 2632 mutex_enter(&vdc->lock); 2633 vdc_reset_connection(vdc, B_TRUE); 2634 mutex_exit(&vdc->lock); 2635 return (LDC_SUCCESS); 2636 } 2637 2638 /* 2639 * Reset the transaction sequence numbers when LDC comes up. 2640 * We then kick off the handshake negotiation with the vDisk 2641 * server. 2642 */ 2643 mutex_enter(&vdc->lock); 2644 vdc->seq_num = 1; 2645 vdc->seq_num_reply = 0; 2646 vdc->ldc_state = ldc_state; 2647 ASSERT(ldc_state == LDC_UP); 2648 mutex_exit(&vdc->lock); 2649 2650 vdc_init_handshake_negotiation(vdc); 2651 2652 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 2653 } 2654 2655 if (event & LDC_EVT_READ) { 2656 /* 2657 * Wake up the worker thread to process the message 2658 */ 2659 mutex_enter(&vdc->msg_proc_lock); 2660 vdc->msg_pending = B_TRUE; 2661 cv_signal(&vdc->msg_proc_cv); 2662 mutex_exit(&vdc->msg_proc_lock); 2663 2664 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 2665 2666 /* that's all we have to do - no need to handle DOWN/RESET */ 2667 return (LDC_SUCCESS); 2668 } 2669 2670 if (event & LDC_EVT_RESET) { 2671 PR0("%s[%d] Recvd LDC RESET event\n", __func__, vdc->instance); 2672 2673 /* get LDC state */ 2674 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2675 if (rv != 0) { 2676 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d", 2677 vdc->instance, rv); 2678 ldc_state = LDC_OPEN; 2679 } 2680 mutex_enter(&vdc->lock); 2681 vdc->ldc_state = ldc_state; 2682 vdc_reset_connection(vdc, B_FALSE); 2683 mutex_exit(&vdc->lock); 2684 2685 vdc_init_handshake_negotiation(vdc); 2686 } 2687 2688 if (event & LDC_EVT_DOWN) { 2689 PR0("%s[%d] Recvd LDC DOWN event\n", __func__, vdc->instance); 2690 2691 /* get LDC state */ 2692 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2693 if (rv != 0) { 2694 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d", 2695 vdc->instance, rv); 2696 ldc_state = LDC_OPEN; 2697 } 2698 mutex_enter(&vdc->lock); 2699 vdc->ldc_state = ldc_state; 2700 vdc_reset_connection(vdc, B_TRUE); 2701 mutex_exit(&vdc->lock); 2702 } 2703 2704 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 2705 cmn_err(CE_NOTE, "![%d] Unexpected LDC event (%lx) received", 2706 vdc->instance, event); 2707 2708 return (LDC_SUCCESS); 2709 } 2710 2711 /* -------------------------------------------------------------------------- */ 2712 2713 /* 2714 * The following functions process the incoming messages from vds 2715 */ 2716 2717 2718 /* 2719 * Function: 2720 * vdc_process_msg_thread() 2721 * 2722 * Description: 2723 * 2724 * Arguments: 2725 * vdc - soft state pointer for this instance of the device driver. 2726 * 2727 * Return Code: 2728 * None 2729 */ 2730 static void 2731 vdc_process_msg_thread(vdc_t *vdc) 2732 { 2733 int status = 0; 2734 boolean_t q_is_empty = B_TRUE; 2735 2736 ASSERT(vdc != NULL); 2737 2738 mutex_enter(&vdc->msg_proc_lock); 2739 PR0("%s[%d]: Starting\n", __func__, vdc->instance); 2740 2741 vdc->msg_proc_thr_state = VDC_THR_RUNNING; 2742 2743 while (vdc->msg_proc_thr_state == VDC_THR_RUNNING) { 2744 2745 PR1("%s[%d] Waiting\n", __func__, vdc->instance); 2746 while (!vdc->msg_pending) 2747 cv_wait(&vdc->msg_proc_cv, &vdc->msg_proc_lock); 2748 2749 PR1("%s[%d] Message Received\n", __func__, vdc->instance); 2750 2751 /* check if there is data */ 2752 status = ldc_chkq(vdc->ldc_handle, &q_is_empty); 2753 if ((status != 0) && 2754 (vdc->msg_proc_thr_state == VDC_THR_RUNNING)) { 2755 cmn_err(CE_NOTE, "[%d] Unable to communicate with vDisk" 2756 " server. Cannot check LDC queue: %d", 2757 vdc->instance, status); 2758 mutex_enter(&vdc->lock); 2759 vdc_reset_connection(vdc, B_FALSE); 2760 mutex_exit(&vdc->lock); 2761 vdc->msg_proc_thr_state = VDC_THR_STOP; 2762 continue; 2763 } 2764 2765 if (!q_is_empty) { 2766 PR1("%s: new pkt(s) available\n", __func__); 2767 vdc_process_msg(vdc); 2768 } 2769 2770 vdc->msg_pending = B_FALSE; 2771 } 2772 2773 PR0("Message processing thread stopped\n"); 2774 vdc->msg_pending = B_FALSE; 2775 vdc->msg_proc_thr_state = VDC_THR_DONE; 2776 cv_signal(&vdc->msg_proc_cv); 2777 mutex_exit(&vdc->msg_proc_lock); 2778 thread_exit(); 2779 } 2780 2781 2782 /* 2783 * Function: 2784 * vdc_process_msg() 2785 * 2786 * Description: 2787 * This function is called by the message processing thread each time it 2788 * is triggered when LDC sends an interrupt to indicate that there are 2789 * more packets on the queue. When it is called it will continue to loop 2790 * and read the messages until there are no more left of the queue. If it 2791 * encounters an invalid sized message it will drop it and check the next 2792 * message. 2793 * 2794 * Arguments: 2795 * arg - soft state pointer for this instance of the device driver. 2796 * 2797 * Return Code: 2798 * None. 2799 */ 2800 static void 2801 vdc_process_msg(void *arg) 2802 { 2803 vdc_t *vdc = (vdc_t *)(void *)arg; 2804 vio_msg_t vio_msg; 2805 size_t nbytes = sizeof (vio_msg); 2806 int status; 2807 2808 ASSERT(vdc != NULL); 2809 2810 mutex_enter(&vdc->lock); 2811 2812 PR1("%s\n", __func__); 2813 2814 for (;;) { 2815 2816 /* read all messages - until no more left */ 2817 status = ldc_read(vdc->ldc_handle, (caddr_t)&vio_msg, &nbytes); 2818 2819 if (status) { 2820 vdc_msg("%s: ldc_read() failed = %d", __func__, status); 2821 2822 /* if status is ECONNRESET --- reset vdc state */ 2823 if (status == EIO || status == ECONNRESET) { 2824 vdc_reset_connection(vdc, B_TRUE); 2825 } 2826 2827 mutex_exit(&vdc->lock); 2828 return; 2829 } 2830 2831 if ((nbytes > 0) && (nbytes < sizeof (vio_msg_tag_t))) { 2832 cmn_err(CE_CONT, "![%d] Expect %lu bytes; recv'd %lu\n", 2833 vdc->instance, sizeof (vio_msg_tag_t), nbytes); 2834 mutex_exit(&vdc->lock); 2835 return; 2836 } 2837 2838 if (nbytes == 0) { 2839 PR2("%s[%d]: ldc_read() done..\n", 2840 __func__, vdc->instance); 2841 mutex_exit(&vdc->lock); 2842 return; 2843 } 2844 2845 PR1("%s[%d] (%x/%x/%x)\n", __func__, vdc->instance, 2846 vio_msg.tag.vio_msgtype, 2847 vio_msg.tag.vio_subtype, 2848 vio_msg.tag.vio_subtype_env); 2849 2850 /* 2851 * Verify the Session ID of the message 2852 * 2853 * Every message after the Version has been negotiated should 2854 * have the correct session ID set. 2855 */ 2856 if ((vio_msg.tag.vio_sid != vdc->session_id) && 2857 (vio_msg.tag.vio_subtype_env != VIO_VER_INFO)) { 2858 cmn_err(CE_NOTE, "[%d] Invalid SID 0x%x, expect 0x%lx", 2859 vdc->instance, vio_msg.tag.vio_sid, 2860 vdc->session_id); 2861 vdc_reset_connection(vdc, B_FALSE); 2862 mutex_exit(&vdc->lock); 2863 return; 2864 } 2865 2866 switch (vio_msg.tag.vio_msgtype) { 2867 case VIO_TYPE_CTRL: 2868 status = vdc_process_ctrl_msg(vdc, vio_msg); 2869 break; 2870 case VIO_TYPE_DATA: 2871 status = vdc_process_data_msg(vdc, vio_msg); 2872 break; 2873 case VIO_TYPE_ERR: 2874 status = vdc_process_err_msg(vdc, vio_msg); 2875 break; 2876 default: 2877 PR1("%s", __func__); 2878 status = EINVAL; 2879 break; 2880 } 2881 2882 if (status != 0) { 2883 PR0("%s[%d] Error (%d) occcurred processing msg\n", 2884 __func__, vdc->instance, status); 2885 vdc_reset_connection(vdc, B_FALSE); 2886 } 2887 } 2888 _NOTE(NOTREACHED) 2889 } 2890 2891 /* 2892 * Function: 2893 * vdc_process_ctrl_msg() 2894 * 2895 * Description: 2896 * This function is called by the message processing thread each time 2897 * an LDC message with a msgtype of VIO_TYPE_CTRL is received. 2898 * 2899 * Arguments: 2900 * vdc - soft state pointer for this instance of the device driver. 2901 * msg - the LDC message sent by vds 2902 * 2903 * Return Codes: 2904 * 0 - Success. 2905 * EPROTO - A message was received which shouldn't have happened according 2906 * to the protocol 2907 * ENOTSUP - An action which is allowed according to the protocol but which 2908 * isn't (or doesn't need to be) implemented yet. 2909 * EINVAL - An invalid value was returned as part of a message. 2910 */ 2911 static int 2912 vdc_process_ctrl_msg(vdc_t *vdc, vio_msg_t msg) 2913 { 2914 int status = -1; 2915 2916 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_CTRL); 2917 ASSERT(vdc != NULL); 2918 ASSERT(mutex_owned(&vdc->lock)); 2919 2920 /* Depending on which state we are in; process the message */ 2921 switch (vdc->state) { 2922 case VD_STATE_INIT: 2923 status = vdc_handle_ver_msg(vdc, (vio_ver_msg_t *)&msg); 2924 break; 2925 2926 case VD_STATE_VER: 2927 status = vdc_handle_attr_msg(vdc, (vd_attr_msg_t *)&msg); 2928 break; 2929 2930 case VD_STATE_ATTR: 2931 status = vdc_handle_dring_reg_msg(vdc, 2932 (vio_dring_reg_msg_t *)&msg); 2933 break; 2934 2935 case VD_STATE_RDX: 2936 if (msg.tag.vio_subtype_env != VIO_RDX) { 2937 status = EPROTO; 2938 break; 2939 } 2940 2941 PR0("%s: Received RDX - handshake successful\n", __func__); 2942 2943 vdc->hshake_cnt = 0; /* reset failed handshake count */ 2944 status = 0; 2945 vdc->state = VD_STATE_DATA; 2946 2947 cv_broadcast(&vdc->attach_cv); 2948 break; 2949 2950 case VD_STATE_DATA: 2951 default: 2952 cmn_err(CE_NOTE, "[%d] Unexpected handshake state %d", 2953 vdc->instance, vdc->state); 2954 status = EPROTO; 2955 break; 2956 } 2957 2958 return (status); 2959 } 2960 2961 2962 /* 2963 * Function: 2964 * vdc_process_data_msg() 2965 * 2966 * Description: 2967 * This function is called by the message processing thread each time 2968 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 2969 * be an ACK or NACK from vds[1] which vdc handles as follows. 2970 * ACK - wake up the waiting thread 2971 * NACK - resend any messages necessary 2972 * 2973 * [1] Although the message format allows it, vds should not send a 2974 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 2975 * some bizarre reason it does, vdc will reset the connection. 2976 * 2977 * Arguments: 2978 * vdc - soft state pointer for this instance of the device driver. 2979 * msg - the LDC message sent by vds 2980 * 2981 * Return Code: 2982 * 0 - Success. 2983 * > 0 - error value returned by LDC 2984 */ 2985 static int 2986 vdc_process_data_msg(vdc_t *vdc, vio_msg_t msg) 2987 { 2988 int status = 0; 2989 vdc_local_desc_t *local_dep = NULL; 2990 vio_dring_msg_t *dring_msg = NULL; 2991 uint_t num_msgs; 2992 uint_t start; 2993 uint_t end; 2994 uint_t i; 2995 2996 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_DATA); 2997 ASSERT(vdc != NULL); 2998 ASSERT(mutex_owned(&vdc->lock)); 2999 3000 dring_msg = (vio_dring_msg_t *)&msg; 3001 3002 /* 3003 * Check to see if the message has bogus data 3004 */ 3005 start = dring_msg->start_idx; 3006 end = dring_msg->end_idx; 3007 if ((start >= VD_DRING_LEN) || (end >= VD_DRING_LEN)) { 3008 vdc_msg("%s: Bogus ACK data : start %d, end %d\n", 3009 __func__, start, end); 3010 return (EPROTO); 3011 } 3012 3013 /* 3014 * calculate the number of messages that vds ACK'ed 3015 * 3016 * Assumes, (like the rest of vdc) that there is a 1:1 mapping 3017 * between requests and Dring entries. 3018 */ 3019 num_msgs = (end >= start) ? 3020 (end - start + 1) : 3021 (VD_DRING_LEN - start + end + 1); 3022 3023 /* 3024 * Verify that the sequence number is what vdc expects. 3025 */ 3026 if (!vdc_verify_seq_num(vdc, dring_msg, num_msgs)) { 3027 return (ENXIO); 3028 } 3029 3030 /* 3031 * Wake the thread waiting for each DRing entry ACK'ed 3032 */ 3033 for (i = 0; i < num_msgs; i++) { 3034 int idx = (start + i) % VD_DRING_LEN; 3035 3036 local_dep = &vdc->local_dring[idx]; 3037 mutex_enter(&local_dep->lock); 3038 cv_signal(&local_dep->cv); 3039 mutex_exit(&local_dep->lock); 3040 } 3041 3042 if (msg.tag.vio_subtype == VIO_SUBTYPE_NACK) { 3043 PR0("%s: DATA NACK\n", __func__); 3044 VDC_DUMP_DRING_MSG(dring_msg); 3045 vdc_reset_connection(vdc, B_FALSE); 3046 3047 /* we need to drop the lock to trigger the handshake */ 3048 mutex_exit(&vdc->lock); 3049 vdc_init_handshake_negotiation(vdc); 3050 mutex_enter(&vdc->lock); 3051 } else if (msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 3052 status = EPROTO; 3053 } 3054 3055 return (status); 3056 } 3057 3058 /* 3059 * Function: 3060 * vdc_process_err_msg() 3061 * 3062 * NOTE: No error messages are used as part of the vDisk protocol 3063 */ 3064 static int 3065 vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg) 3066 { 3067 _NOTE(ARGUNUSED(vdc)) 3068 _NOTE(ARGUNUSED(msg)) 3069 3070 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_ERR); 3071 cmn_err(CE_NOTE, "[%d] Got an ERR msg", vdc->instance); 3072 3073 return (ENOTSUP); 3074 } 3075 3076 /* 3077 * Function: 3078 * vdc_handle_ver_msg() 3079 * 3080 * Description: 3081 * 3082 * Arguments: 3083 * vdc - soft state pointer for this instance of the device driver. 3084 * ver_msg - LDC message sent by vDisk server 3085 * 3086 * Return Code: 3087 * 0 - Success 3088 */ 3089 static int 3090 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) 3091 { 3092 int status = 0; 3093 3094 ASSERT(vdc != NULL); 3095 ASSERT(mutex_owned(&vdc->lock)); 3096 3097 if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { 3098 return (EPROTO); 3099 } 3100 3101 if (ver_msg->dev_class != VDEV_DISK_SERVER) { 3102 return (EINVAL); 3103 } 3104 3105 switch (ver_msg->tag.vio_subtype) { 3106 case VIO_SUBTYPE_ACK: 3107 /* 3108 * We check to see if the version returned is indeed supported 3109 * (The server may have also adjusted the minor number downwards 3110 * and if so 'ver_msg' will contain the actual version agreed) 3111 */ 3112 if (vdc_is_supported_version(ver_msg)) { 3113 vdc->ver.major = ver_msg->ver_major; 3114 vdc->ver.minor = ver_msg->ver_minor; 3115 ASSERT(vdc->ver.major > 0); 3116 3117 vdc->state = VD_STATE_VER; 3118 status = vdc_init_attr_negotiation(vdc); 3119 } else { 3120 status = EPROTO; 3121 } 3122 break; 3123 3124 case VIO_SUBTYPE_NACK: 3125 /* 3126 * call vdc_is_supported_version() which will return the next 3127 * supported version (if any) in 'ver_msg' 3128 */ 3129 (void) vdc_is_supported_version(ver_msg); 3130 if (ver_msg->ver_major > 0) { 3131 size_t len = sizeof (*ver_msg); 3132 3133 ASSERT(vdc->ver.major > 0); 3134 3135 /* reset the necessary fields and resend */ 3136 ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; 3137 ver_msg->dev_class = VDEV_DISK; 3138 3139 status = vdc_send(vdc, (caddr_t)ver_msg, &len); 3140 PR0("[%d] Resend VER info (LDC status = %d)\n", 3141 vdc->instance, status); 3142 if (len != sizeof (*ver_msg)) 3143 status = EBADMSG; 3144 } else { 3145 cmn_err(CE_NOTE, "[%d] No common version with " 3146 "vDisk server", vdc->instance); 3147 status = ENOTSUP; 3148 } 3149 3150 break; 3151 case VIO_SUBTYPE_INFO: 3152 /* 3153 * Handle the case where vds starts handshake 3154 * (for now only vdc is the instigatior) 3155 */ 3156 status = ENOTSUP; 3157 break; 3158 3159 default: 3160 status = EINVAL; 3161 break; 3162 } 3163 3164 return (status); 3165 } 3166 3167 /* 3168 * Function: 3169 * vdc_handle_attr_msg() 3170 * 3171 * Description: 3172 * 3173 * Arguments: 3174 * vdc - soft state pointer for this instance of the device driver. 3175 * attr_msg - LDC message sent by vDisk server 3176 * 3177 * Return Code: 3178 * 0 - Success 3179 */ 3180 static int 3181 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) 3182 { 3183 int status = 0; 3184 3185 ASSERT(vdc != NULL); 3186 ASSERT(mutex_owned(&vdc->lock)); 3187 3188 if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { 3189 return (EPROTO); 3190 } 3191 3192 switch (attr_msg->tag.vio_subtype) { 3193 case VIO_SUBTYPE_ACK: 3194 /* 3195 * We now verify the attributes sent by vds. 3196 */ 3197 vdc->vdisk_size = attr_msg->vdisk_size; 3198 vdc->vdisk_type = attr_msg->vdisk_type; 3199 3200 if ((attr_msg->max_xfer_sz != vdc->max_xfer_sz) || 3201 (attr_msg->vdisk_block_size != vdc->block_size)) { 3202 /* 3203 * Future support: step down to the block size 3204 * and max transfer size suggested by the 3205 * server. (If this value is less than 128K 3206 * then multiple Dring entries per request 3207 * would need to be implemented) 3208 */ 3209 cmn_err(CE_NOTE, "[%d] Couldn't process block " 3210 "attributes from vds", vdc->instance); 3211 status = EINVAL; 3212 break; 3213 } 3214 3215 if ((attr_msg->xfer_mode != VIO_DRING_MODE) || 3216 (attr_msg->vdisk_size > INT64_MAX) || 3217 (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 3218 vdc_msg("%s[%d] Couldn't process attrs " 3219 "from vds", __func__, vdc->instance); 3220 status = EINVAL; 3221 break; 3222 } 3223 3224 vdc->state = VD_STATE_ATTR; 3225 status = vdc_init_dring_negotiate(vdc); 3226 break; 3227 3228 case VIO_SUBTYPE_NACK: 3229 /* 3230 * vds could not handle the attributes we sent so we 3231 * stop negotiating. 3232 */ 3233 status = EPROTO; 3234 break; 3235 3236 case VIO_SUBTYPE_INFO: 3237 /* 3238 * Handle the case where vds starts the handshake 3239 * (for now; vdc is the only supported instigatior) 3240 */ 3241 status = ENOTSUP; 3242 break; 3243 3244 default: 3245 status = ENOTSUP; 3246 break; 3247 } 3248 3249 return (status); 3250 } 3251 3252 /* 3253 * Function: 3254 * vdc_handle_dring_reg_msg() 3255 * 3256 * Description: 3257 * 3258 * Arguments: 3259 * vdc - soft state pointer for this instance of the driver. 3260 * dring_msg - LDC message sent by vDisk server 3261 * 3262 * Return Code: 3263 * 0 - Success 3264 */ 3265 static int 3266 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) 3267 { 3268 int status = 0; 3269 vio_rdx_msg_t msg = {0}; 3270 size_t msglen = sizeof (msg); 3271 3272 ASSERT(vdc != NULL); 3273 ASSERT(mutex_owned(&vdc->lock)); 3274 3275 if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { 3276 return (EPROTO); 3277 } 3278 3279 switch (dring_msg->tag.vio_subtype) { 3280 case VIO_SUBTYPE_ACK: 3281 /* save the received dring_ident */ 3282 vdc->dring_ident = dring_msg->dring_ident; 3283 PR0("%s[%d] Received dring ident=0x%lx\n", 3284 __func__, vdc->instance, vdc->dring_ident); 3285 3286 /* 3287 * Send an RDX message to vds to indicate we are ready 3288 * to send data 3289 */ 3290 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 3291 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 3292 msg.tag.vio_subtype_env = VIO_RDX; 3293 msg.tag.vio_sid = vdc->session_id; 3294 status = vdc_send(vdc, (caddr_t)&msg, &msglen); 3295 if (status != 0) { 3296 cmn_err(CE_NOTE, "[%d] Failed to send RDX" 3297 " message (%d)", vdc->instance, status); 3298 break; 3299 } 3300 3301 vdc->state = VD_STATE_RDX; 3302 break; 3303 3304 case VIO_SUBTYPE_NACK: 3305 /* 3306 * vds could not handle the DRing info we sent so we 3307 * stop negotiating. 3308 */ 3309 cmn_err(CE_CONT, "server could not register DRing\n"); 3310 vdc_reset_connection(vdc, B_FALSE); 3311 vdc_destroy_descriptor_ring(vdc); 3312 status = EPROTO; 3313 break; 3314 3315 case VIO_SUBTYPE_INFO: 3316 /* 3317 * Handle the case where vds starts handshake 3318 * (for now only vdc is the instigatior) 3319 */ 3320 status = ENOTSUP; 3321 break; 3322 default: 3323 status = ENOTSUP; 3324 } 3325 3326 return (status); 3327 } 3328 3329 /* 3330 * Function: 3331 * vdc_verify_seq_num() 3332 * 3333 * Description: 3334 * This functions verifies that the sequence number sent back by vds with 3335 * the latest message correctly follows the last request processed. 3336 * 3337 * Arguments: 3338 * vdc - soft state pointer for this instance of the driver. 3339 * dring_msg - pointer to the LDC message sent by vds 3340 * num_msgs - the number of requests being acknowledged 3341 * 3342 * Return Code: 3343 * B_TRUE - Success. 3344 * B_FALSE - The seq numbers are so out of sync, vdc cannot deal with them 3345 */ 3346 static boolean_t 3347 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg, int num_msgs) 3348 { 3349 ASSERT(vdc != NULL); 3350 ASSERT(dring_msg != NULL); 3351 3352 /* 3353 * Check to see if the messages were responded to in the correct 3354 * order by vds. There are 3 possible scenarios: 3355 * - the seq_num we expected is returned (everything is OK) 3356 * - a seq_num earlier than the last one acknowledged is returned, 3357 * if so something is seriously wrong so we reset the connection 3358 * - a seq_num greater than what we expected is returned. 3359 */ 3360 if (dring_msg->seq_num != (vdc->seq_num_reply + num_msgs)) { 3361 vdc_msg("%s[%d]: Bogus seq_num %d, expected %d\n", 3362 __func__, vdc->instance, dring_msg->seq_num, 3363 vdc->seq_num_reply + num_msgs); 3364 if (dring_msg->seq_num < (vdc->seq_num_reply + num_msgs)) { 3365 return (B_FALSE); 3366 } else { 3367 /* 3368 * vds has responded with a seq_num greater than what we 3369 * expected 3370 */ 3371 return (B_FALSE); 3372 } 3373 } 3374 vdc->seq_num_reply += num_msgs; 3375 3376 return (B_TRUE); 3377 } 3378 3379 3380 /* 3381 * Function: 3382 * vdc_is_supported_version() 3383 * 3384 * Description: 3385 * This routine checks if the major/minor version numbers specified in 3386 * 'ver_msg' are supported. If not it finds the next version that is 3387 * in the supported version list 'vdc_version[]' and sets the fields in 3388 * 'ver_msg' to those values 3389 * 3390 * Arguments: 3391 * ver_msg - LDC message sent by vDisk server 3392 * 3393 * Return Code: 3394 * B_TRUE - Success 3395 * B_FALSE - Version not supported 3396 */ 3397 static boolean_t 3398 vdc_is_supported_version(vio_ver_msg_t *ver_msg) 3399 { 3400 int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); 3401 3402 for (int i = 0; i < vdc_num_versions; i++) { 3403 ASSERT(vdc_version[i].major > 0); 3404 ASSERT((i == 0) || 3405 (vdc_version[i].major < vdc_version[i-1].major)); 3406 3407 /* 3408 * If the major versions match, adjust the minor version, if 3409 * necessary, down to the highest value supported by this 3410 * client. The server should support all minor versions lower 3411 * than the value it sent 3412 */ 3413 if (ver_msg->ver_major == vdc_version[i].major) { 3414 if (ver_msg->ver_minor > vdc_version[i].minor) { 3415 PR0("Adjusting minor version from %u to %u", 3416 ver_msg->ver_minor, vdc_version[i].minor); 3417 ver_msg->ver_minor = vdc_version[i].minor; 3418 } 3419 return (B_TRUE); 3420 } 3421 3422 /* 3423 * If the message contains a higher major version number, set 3424 * the message's major/minor versions to the current values 3425 * and return false, so this message will get resent with 3426 * these values, and the server will potentially try again 3427 * with the same or a lower version 3428 */ 3429 if (ver_msg->ver_major > vdc_version[i].major) { 3430 ver_msg->ver_major = vdc_version[i].major; 3431 ver_msg->ver_minor = vdc_version[i].minor; 3432 PR0("Suggesting major/minor (0x%x/0x%x)\n", 3433 ver_msg->ver_major, ver_msg->ver_minor); 3434 3435 return (B_FALSE); 3436 } 3437 3438 /* 3439 * Otherwise, the message's major version is less than the 3440 * current major version, so continue the loop to the next 3441 * (lower) supported version 3442 */ 3443 } 3444 3445 /* 3446 * No common version was found; "ground" the version pair in the 3447 * message to terminate negotiation 3448 */ 3449 ver_msg->ver_major = 0; 3450 ver_msg->ver_minor = 0; 3451 3452 return (B_FALSE); 3453 } 3454 /* -------------------------------------------------------------------------- */ 3455 3456 /* 3457 * DKIO(7) support 3458 */ 3459 3460 typedef struct vdc_dk_arg { 3461 struct dk_callback dkc; 3462 int mode; 3463 dev_t dev; 3464 vdc_t *vdc; 3465 } vdc_dk_arg_t; 3466 3467 /* 3468 * Function: 3469 * vdc_dkio_flush_cb() 3470 * 3471 * Description: 3472 * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 3473 * by kernel code. 3474 * 3475 * Arguments: 3476 * arg - a pointer to a vdc_dk_arg_t structure. 3477 */ 3478 void 3479 vdc_dkio_flush_cb(void *arg) 3480 { 3481 struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 3482 struct dk_callback *dkc = NULL; 3483 vdc_t *vdc = NULL; 3484 int rv; 3485 3486 if (dk_arg == NULL) { 3487 vdc_msg("%s[?] DKIOCFLUSHWRITECACHE arg is NULL\n", __func__); 3488 return; 3489 } 3490 dkc = &dk_arg->dkc; 3491 vdc = dk_arg->vdc; 3492 ASSERT(vdc != NULL); 3493 3494 rv = vdc_populate_descriptor(vdc, NULL, 0, VD_OP_FLUSH, 3495 dk_arg->mode, SDPART(getminor(dk_arg->dev))); 3496 if (rv != 0) { 3497 PR0("%s[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n", 3498 __func__, vdc->instance, rv, 3499 ddi_model_convert_from(dk_arg->mode & FMODELS)); 3500 } 3501 3502 /* 3503 * Trigger the call back to notify the caller the the ioctl call has 3504 * been completed. 3505 */ 3506 if ((dk_arg->mode & FKIOCTL) && 3507 (dkc != NULL) && 3508 (dkc->dkc_callback != NULL)) { 3509 ASSERT(dkc->dkc_cookie != NULL); 3510 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 3511 } 3512 3513 /* Indicate that one less DKIO write flush is outstanding */ 3514 mutex_enter(&vdc->lock); 3515 vdc->dkio_flush_pending--; 3516 ASSERT(vdc->dkio_flush_pending >= 0); 3517 mutex_exit(&vdc->lock); 3518 3519 /* free the mem that was allocated when the callback was dispatched */ 3520 kmem_free(arg, sizeof (vdc_dk_arg_t)); 3521 } 3522 3523 /* 3524 * This structure is used in the DKIO(7I) array below. 3525 */ 3526 typedef struct vdc_dk_ioctl { 3527 uint8_t op; /* VD_OP_XXX value */ 3528 int cmd; /* Solaris ioctl operation number */ 3529 size_t nbytes; /* size of structure to be copied */ 3530 3531 /* function to convert between vDisk and Solaris structure formats */ 3532 int (*convert)(void *vd_buf, void *ioctl_arg, int mode, int dir); 3533 } vdc_dk_ioctl_t; 3534 3535 /* 3536 * Subset of DKIO(7I) operations currently supported 3537 */ 3538 static vdc_dk_ioctl_t dk_ioctl[] = { 3539 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, sizeof (int), 3540 vdc_null_copy_func}, 3541 {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), 3542 vdc_null_copy_func}, 3543 {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), 3544 vdc_null_copy_func}, 3545 {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), 3546 vdc_get_vtoc_convert}, 3547 {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), 3548 vdc_set_vtoc_convert}, 3549 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 3550 vdc_get_geom_convert}, 3551 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), 3552 vdc_get_geom_convert}, 3553 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), 3554 vdc_get_geom_convert}, 3555 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), 3556 vdc_get_geom_convert}, 3557 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 3558 vdc_set_geom_convert}, 3559 3560 /* 3561 * These particular ioctls are not sent to the server - vdc fakes up 3562 * the necessary info. 3563 */ 3564 {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, 3565 {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, 3566 {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, 3567 {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, 3568 {0, CDROMREADOFFSET, 0, vdc_null_copy_func} 3569 }; 3570 3571 /* 3572 * Function: 3573 * vd_process_ioctl() 3574 * 3575 * Description: 3576 * This routine processes disk specific ioctl calls 3577 * 3578 * Arguments: 3579 * dev - the device number 3580 * cmd - the operation [dkio(7I)] to be processed 3581 * arg - pointer to user provided structure 3582 * (contains data to be set or reference parameter for get) 3583 * mode - bit flag, indicating open settings, 32/64 bit type, etc 3584 * 3585 * Return Code: 3586 * 0 3587 * EFAULT 3588 * ENXIO 3589 * EIO 3590 * ENOTSUP 3591 */ 3592 static int 3593 vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode) 3594 { 3595 int instance = SDUNIT(getminor(dev)); 3596 vdc_t *vdc = NULL; 3597 int rv = -1; 3598 int idx = 0; /* index into dk_ioctl[] */ 3599 size_t len = 0; /* #bytes to send to vds */ 3600 size_t alloc_len = 0; /* #bytes to allocate mem for */ 3601 caddr_t mem_p = NULL; 3602 size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 3603 3604 PR0("%s: Processing ioctl(%x) for dev %x : model %x\n", 3605 __func__, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 3606 3607 vdc = ddi_get_soft_state(vdc_state, instance); 3608 if (vdc == NULL) { 3609 cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 3610 instance); 3611 return (ENXIO); 3612 } 3613 3614 /* 3615 * Check to see if we can communicate with the vDisk server 3616 */ 3617 if (!vdc_is_able_to_tx_data(vdc, O_NONBLOCK)) { 3618 PR0("%s[%d] Not ready to transmit data\n", __func__, instance); 3619 return (ENOLINK); 3620 } 3621 3622 /* 3623 * Validate the ioctl operation to be performed. 3624 * 3625 * If we have looped through the array without finding a match then we 3626 * don't support this ioctl. 3627 */ 3628 for (idx = 0; idx < nioctls; idx++) { 3629 if (cmd == dk_ioctl[idx].cmd) 3630 break; 3631 } 3632 3633 if (idx >= nioctls) { 3634 PR0("%s[%d] Unsupported ioctl(%x)\n", 3635 __func__, vdc->instance, cmd); 3636 return (ENOTSUP); 3637 } 3638 3639 len = dk_ioctl[idx].nbytes; 3640 3641 /* 3642 * Deal with the ioctls which the server does not provide. vdc can 3643 * fake these up and return immediately 3644 */ 3645 switch (cmd) { 3646 case CDROMREADOFFSET: 3647 case DKIOCREMOVABLE: 3648 case USCSICMD: 3649 return (ENOTTY); 3650 3651 case DKIOCINFO: 3652 { 3653 struct dk_cinfo cinfo; 3654 if (vdc->cinfo == NULL) 3655 return (ENXIO); 3656 3657 bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 3658 cinfo.dki_partition = SDPART(getminor(dev)); 3659 3660 rv = ddi_copyout(&cinfo, (void *)arg, 3661 sizeof (struct dk_cinfo), mode); 3662 if (rv != 0) 3663 return (EFAULT); 3664 3665 return (0); 3666 } 3667 3668 case DKIOCGMEDIAINFO: 3669 { 3670 if (vdc->minfo == NULL) 3671 return (ENXIO); 3672 3673 rv = ddi_copyout(vdc->minfo, (void *)arg, 3674 sizeof (struct dk_minfo), mode); 3675 if (rv != 0) 3676 return (EFAULT); 3677 3678 return (0); 3679 } 3680 3681 case DKIOCFLUSHWRITECACHE: 3682 { 3683 struct dk_callback *dkc = (struct dk_callback *)arg; 3684 vdc_dk_arg_t *dkarg = NULL; 3685 3686 PR1("[%d] Flush W$: mode %x\n", instance, mode); 3687 3688 /* 3689 * If the backing device is not a 'real' disk then the 3690 * W$ operation request to the vDisk server will fail 3691 * so we might as well save the cycles and return now. 3692 */ 3693 if (vdc->vdisk_type != VD_DISK_TYPE_DISK) 3694 return (ENOTTY); 3695 3696 /* 3697 * If arg is NULL, then there is no callback function 3698 * registered and the call operates synchronously; we 3699 * break and continue with the rest of the function and 3700 * wait for vds to return (i.e. after the request to 3701 * vds returns successfully, all writes completed prior 3702 * to the ioctl will have been flushed from the disk 3703 * write cache to persistent media. 3704 * 3705 * If a callback function is registered, we dispatch 3706 * the request on a task queue and return immediately. 3707 * The callback will deal with informing the calling 3708 * thread that the flush request is completed. 3709 */ 3710 if (dkc == NULL) 3711 break; 3712 3713 dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP); 3714 3715 dkarg->mode = mode; 3716 dkarg->dev = dev; 3717 bcopy(dkc, &dkarg->dkc, sizeof (*dkc)); 3718 3719 mutex_enter(&vdc->lock); 3720 vdc->dkio_flush_pending++; 3721 dkarg->vdc = vdc; 3722 mutex_exit(&vdc->lock); 3723 3724 /* put the request on a task queue */ 3725 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 3726 (void *)dkarg, DDI_SLEEP); 3727 3728 return (rv == NULL ? ENOMEM : 0); 3729 } 3730 } 3731 3732 /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 3733 ASSERT(dk_ioctl[idx].op != 0); 3734 3735 /* LDC requires that the memory being mapped is 8-byte aligned */ 3736 alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 3737 PR1("%s[%d]: struct size %d alloc %d\n", 3738 __func__, instance, len, alloc_len); 3739 3740 ASSERT(alloc_len != 0); /* sanity check */ 3741 mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 3742 3743 /* 3744 * Call the conversion function for this ioctl whhich if necessary 3745 * converts from the Solaris format to the format ARC'ed 3746 * as part of the vDisk protocol (FWARC 2006/195) 3747 */ 3748 ASSERT(dk_ioctl[idx].convert != NULL); 3749 rv = (dk_ioctl[idx].convert)(arg, mem_p, mode, VD_COPYIN); 3750 if (rv != 0) { 3751 PR0("%s[%d]: convert returned %d for ioctl 0x%x\n", 3752 __func__, instance, rv, cmd); 3753 if (mem_p != NULL) 3754 kmem_free(mem_p, alloc_len); 3755 return (rv); 3756 } 3757 3758 /* 3759 * send request to vds to service the ioctl. 3760 */ 3761 rv = vdc_populate_descriptor(vdc, mem_p, alloc_len, dk_ioctl[idx].op, 3762 mode, SDPART((getminor(dev)))); 3763 if (rv != 0) { 3764 /* 3765 * This is not necessarily an error. The ioctl could 3766 * be returning a value such as ENOTTY to indicate 3767 * that the ioctl is not applicable. 3768 */ 3769 PR0("%s[%d]: vds returned %d for ioctl 0x%x\n", 3770 __func__, instance, rv, cmd); 3771 if (mem_p != NULL) 3772 kmem_free(mem_p, alloc_len); 3773 return (rv); 3774 } 3775 3776 /* 3777 * If the VTOC has been changed, then vdc needs to update the copy 3778 * it saved in the soft state structure and try and update the device 3779 * node properties. Failing to set the properties should not cause 3780 * an error to be return the caller though. 3781 */ 3782 if (cmd == DKIOCSVTOC) { 3783 bcopy(mem_p, vdc->vtoc, sizeof (struct vtoc)); 3784 if (vdc_create_device_nodes_props(vdc)) { 3785 cmn_err(CE_NOTE, "![%d] Failed to update device nodes" 3786 " properties", instance); 3787 } 3788 } 3789 3790 /* 3791 * Call the conversion function (if it exists) for this ioctl 3792 * which converts from the format ARC'ed as part of the vDisk 3793 * protocol (FWARC 2006/195) back to a format understood by 3794 * the rest of Solaris. 3795 */ 3796 rv = (dk_ioctl[idx].convert)(mem_p, arg, mode, VD_COPYOUT); 3797 if (rv != 0) { 3798 PR0("%s[%d]: convert returned %d for ioctl 0x%x\n", 3799 __func__, instance, rv, cmd); 3800 if (mem_p != NULL) 3801 kmem_free(mem_p, alloc_len); 3802 return (rv); 3803 } 3804 3805 if (mem_p != NULL) 3806 kmem_free(mem_p, alloc_len); 3807 3808 return (rv); 3809 } 3810 3811 /* 3812 * Function: 3813 * 3814 * Description: 3815 * This is an empty conversion function used by ioctl calls which 3816 * do not need to convert the data being passed in/out to userland 3817 */ 3818 static int 3819 vdc_null_copy_func(void *from, void *to, int mode, int dir) 3820 { 3821 _NOTE(ARGUNUSED(from)) 3822 _NOTE(ARGUNUSED(to)) 3823 _NOTE(ARGUNUSED(mode)) 3824 _NOTE(ARGUNUSED(dir)) 3825 3826 return (0); 3827 } 3828 3829 /* 3830 * Function: 3831 * vdc_get_vtoc_convert() 3832 * 3833 * Description: 3834 * This routine fakes up the disk info needed for some DKIO ioctls. 3835 * 3836 * Arguments: 3837 * from - the buffer containing the data to be copied from 3838 * to - the buffer to be copied to 3839 * mode - flags passed to ioctl() call 3840 * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT 3841 * 3842 * Return Code: 3843 * 0 - Success 3844 * ENXIO - incorrect buffer passed in. 3845 * EFAULT - ddi_copyxxx routine encountered an error. 3846 */ 3847 static int 3848 vdc_get_vtoc_convert(void *from, void *to, int mode, int dir) 3849 { 3850 void *tmp_mem = NULL; 3851 void *tmp_memp; 3852 struct vtoc vt; 3853 struct vtoc32 vt32; 3854 int copy_len = 0; 3855 int rv = 0; 3856 3857 if (dir != VD_COPYOUT) 3858 return (0); /* nothing to do */ 3859 3860 if ((from == NULL) || (to == NULL)) 3861 return (ENXIO); 3862 3863 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 3864 copy_len = sizeof (struct vtoc32); 3865 else 3866 copy_len = sizeof (struct vtoc); 3867 3868 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 3869 3870 VD_VTOC2VTOC((vd_vtoc_t *)from, &vt); 3871 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 3872 vtoctovtoc32(vt, vt32); 3873 tmp_memp = &vt32; 3874 } else { 3875 tmp_memp = &vt; 3876 } 3877 rv = ddi_copyout(tmp_memp, to, copy_len, mode); 3878 if (rv != 0) 3879 rv = EFAULT; 3880 3881 kmem_free(tmp_mem, copy_len); 3882 return (rv); 3883 } 3884 3885 /* 3886 * Function: 3887 * vdc_set_vtoc_convert() 3888 * 3889 * Description: 3890 * 3891 * Arguments: 3892 * from - Buffer with data 3893 * to - Buffer where data is to be copied to 3894 * mode - flags passed to ioctl 3895 * dir - direction of copy (in or out) 3896 * 3897 * Return Code: 3898 * 0 - Success 3899 * ENXIO - Invalid buffer passed in 3900 * EFAULT - ddi_copyin of data failed 3901 */ 3902 static int 3903 vdc_set_vtoc_convert(void *from, void *to, int mode, int dir) 3904 { 3905 void *tmp_mem = NULL; 3906 struct vtoc vt; 3907 struct vtoc *vtp = &vt; 3908 vd_vtoc_t vtvd; 3909 int copy_len = 0; 3910 int rv = 0; 3911 3912 if (dir != VD_COPYIN) 3913 return (0); /* nothing to do */ 3914 3915 if ((from == NULL) || (to == NULL)) 3916 return (ENXIO); 3917 3918 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 3919 copy_len = sizeof (struct vtoc32); 3920 else 3921 copy_len = sizeof (struct vtoc); 3922 3923 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 3924 3925 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 3926 if (rv != 0) { 3927 kmem_free(tmp_mem, copy_len); 3928 return (EFAULT); 3929 } 3930 3931 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 3932 vtoc32tovtoc((*(struct vtoc32 *)tmp_mem), vt); 3933 } else { 3934 vtp = tmp_mem; 3935 } 3936 3937 VTOC2VD_VTOC(vtp, &vtvd); 3938 bcopy(&vtvd, to, sizeof (vd_vtoc_t)); 3939 kmem_free(tmp_mem, copy_len); 3940 3941 return (0); 3942 } 3943 3944 /* 3945 * Function: 3946 * vdc_get_geom_convert() 3947 * 3948 * Description: 3949 * 3950 * Arguments: 3951 * from - Buffer with data 3952 * to - Buffer where data is to be copied to 3953 * mode - flags passed to ioctl 3954 * dir - direction of copy (in or out) 3955 * 3956 * Return Code: 3957 * 0 - Success 3958 * ENXIO - Invalid buffer passed in 3959 * EFAULT - ddi_copyin of data failed 3960 */ 3961 static int 3962 vdc_get_geom_convert(void *from, void *to, int mode, int dir) 3963 { 3964 struct dk_geom geom; 3965 int copy_len = sizeof (struct dk_geom); 3966 int rv = 0; 3967 3968 if (dir != VD_COPYOUT) 3969 return (0); /* nothing to do */ 3970 3971 if ((from == NULL) || (to == NULL)) 3972 return (ENXIO); 3973 3974 VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); 3975 rv = ddi_copyout(&geom, to, copy_len, mode); 3976 if (rv != 0) 3977 rv = EFAULT; 3978 3979 return (rv); 3980 } 3981 3982 /* 3983 * Function: 3984 * vdc_set_geom_convert() 3985 * 3986 * Description: 3987 * This routine performs the necessary convertions from the DKIOCSVTOC 3988 * Solaris structure to the format defined in FWARC 2006/195 3989 * 3990 * Arguments: 3991 * from - Buffer with data 3992 * to - Buffer where data is to be copied to 3993 * mode - flags passed to ioctl 3994 * dir - direction of copy (in or out) 3995 * 3996 * Return Code: 3997 * 0 - Success 3998 * ENXIO - Invalid buffer passed in 3999 * EFAULT - ddi_copyin of data failed 4000 */ 4001 static int 4002 vdc_set_geom_convert(void *from, void *to, int mode, int dir) 4003 { 4004 vd_geom_t vdgeom; 4005 void *tmp_mem = NULL; 4006 int copy_len = sizeof (struct dk_geom); 4007 int rv = 0; 4008 4009 if (dir != VD_COPYIN) 4010 return (0); /* nothing to do */ 4011 4012 if ((from == NULL) || (to == NULL)) 4013 return (ENXIO); 4014 4015 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 4016 4017 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 4018 if (rv != 0) { 4019 kmem_free(tmp_mem, copy_len); 4020 return (EFAULT); 4021 } 4022 DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); 4023 bcopy(&vdgeom, to, sizeof (vdgeom)); 4024 kmem_free(tmp_mem, copy_len); 4025 4026 return (0); 4027 } 4028 4029 /* 4030 * Function: 4031 * vdc_create_fake_geometry() 4032 * 4033 * Description: 4034 * This routine fakes up the disk info needed for some DKIO ioctls. 4035 * - DKIOCINFO 4036 * - DKIOCGMEDIAINFO 4037 * 4038 * [ just like lofi(7D) and ramdisk(7D) ] 4039 * 4040 * Arguments: 4041 * vdc - soft state pointer for this instance of the device driver. 4042 * 4043 * Return Code: 4044 * 0 - Success 4045 */ 4046 static int 4047 vdc_create_fake_geometry(vdc_t *vdc) 4048 { 4049 int rv = 0; 4050 4051 ASSERT(vdc != NULL); 4052 4053 /* 4054 * DKIOCINFO support 4055 */ 4056 vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 4057 4058 (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 4059 (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 4060 /* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */ 4061 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; 4062 vdc->cinfo->dki_ctype = DKC_SCSI_CCS; 4063 vdc->cinfo->dki_flags = DKI_FMTVOL; 4064 vdc->cinfo->dki_cnum = 0; 4065 vdc->cinfo->dki_addr = 0; 4066 vdc->cinfo->dki_space = 0; 4067 vdc->cinfo->dki_prio = 0; 4068 vdc->cinfo->dki_vec = 0; 4069 vdc->cinfo->dki_unit = vdc->instance; 4070 vdc->cinfo->dki_slave = 0; 4071 /* 4072 * The partition number will be created on the fly depending on the 4073 * actual slice (i.e. minor node) that is used to request the data. 4074 */ 4075 vdc->cinfo->dki_partition = 0; 4076 4077 /* 4078 * DKIOCGMEDIAINFO support 4079 */ 4080 if (vdc->minfo == NULL) 4081 vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 4082 vdc->minfo->dki_media_type = DK_FIXED_DISK; 4083 vdc->minfo->dki_capacity = 1; 4084 vdc->minfo->dki_lbsize = DEV_BSIZE; 4085 4086 return (rv); 4087 } 4088 4089 /* 4090 * Function: 4091 * vdc_setup_disk_layout() 4092 * 4093 * Description: 4094 * This routine discovers all the necessary details about the "disk" 4095 * by requesting the data that is available from the vDisk server and by 4096 * faking up the rest of the data. 4097 * 4098 * Arguments: 4099 * vdc - soft state pointer for this instance of the device driver. 4100 * 4101 * Return Code: 4102 * 0 - Success 4103 */ 4104 static int 4105 vdc_setup_disk_layout(vdc_t *vdc) 4106 { 4107 dev_t dev; 4108 int slice = 0; 4109 int rv; 4110 4111 ASSERT(vdc != NULL); 4112 4113 rv = vdc_create_fake_geometry(vdc); 4114 if (rv != 0) { 4115 cmn_err(CE_NOTE, "[%d] Failed to create disk geometry (err%d)", 4116 vdc->instance, rv); 4117 } 4118 4119 if (vdc->vtoc == NULL) 4120 vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); 4121 4122 dev = makedevice(ddi_driver_major(vdc->dip), 4123 VD_MAKE_DEV(vdc->instance, 0)); 4124 rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)vdc->vtoc, FKIOCTL); 4125 if (rv) { 4126 cmn_err(CE_NOTE, "[%d] Failed to get VTOC (err=%d)", 4127 vdc->instance, rv); 4128 return (rv); 4129 } 4130 4131 /* 4132 * Read disk label from start of disk 4133 */ 4134 vdc->label = kmem_zalloc(DK_LABEL_SIZE, KM_SLEEP); 4135 4136 /* 4137 * find the slice that represents the entire "disk" and use that to 4138 * read the disk label. The convention in Solaris is that slice 2 4139 * represents the whole disk so we check that it is otherwise we 4140 * default to slice 0 4141 */ 4142 if ((vdc->vdisk_type == VD_DISK_TYPE_DISK) && 4143 (vdc->vtoc->v_part[2].p_tag == V_BACKUP)) { 4144 slice = 2; 4145 } else { 4146 slice = 0; 4147 } 4148 rv = vdc_populate_descriptor(vdc, (caddr_t)vdc->label, DK_LABEL_SIZE, 4149 VD_OP_BREAD, 0, slice); 4150 4151 return (rv); 4152 } 4153