1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * LDoms virtual disk client (vdc) device driver 31 * 32 * This driver runs on a guest logical domain and communicates with the virtual 33 * disk server (vds) driver running on the service domain which is exporting 34 * virtualized "disks" to the guest logical domain. 35 * 36 * The driver can be divided into four sections: 37 * 38 * 1) generic device driver housekeeping 39 * _init, _fini, attach, detach, ops structures, etc. 40 * 41 * 2) communication channel setup 42 * Setup the communications link over the LDC channel that vdc uses to 43 * talk to the vDisk server. Initialise the descriptor ring which 44 * allows the LDC clients to transfer data via memory mappings. 45 * 46 * 3) Support exported to upper layers (filesystems, etc) 47 * The upper layers call into vdc via strategy(9E) and DKIO(7I) 48 * ioctl calls. vdc will copy the data to be written to the descriptor 49 * ring or maps the buffer to store the data read by the vDisk 50 * server into the descriptor ring. It then sends a message to the 51 * vDisk server requesting it to complete the operation. 52 * 53 * 4) Handling responses from vDisk server. 54 * The vDisk server will ACK some or all of the messages vdc sends to it 55 * (this is configured during the handshake). Upon receipt of an ACK 56 * vdc will check the descriptor ring and signal to the upper layer 57 * code waiting on the IO. 58 */ 59 60 #include <sys/conf.h> 61 #include <sys/disp.h> 62 #include <sys/ddi.h> 63 #include <sys/dkio.h> 64 #include <sys/efi_partition.h> 65 #include <sys/fcntl.h> 66 #include <sys/file.h> 67 #include <sys/mach_descrip.h> 68 #include <sys/modctl.h> 69 #include <sys/mdeg.h> 70 #include <sys/note.h> 71 #include <sys/open.h> 72 #include <sys/stat.h> 73 #include <sys/sunddi.h> 74 #include <sys/types.h> 75 #include <sys/promif.h> 76 #include <sys/vtoc.h> 77 #include <sys/archsystm.h> 78 #include <sys/sysmacros.h> 79 80 #include <sys/cdio.h> 81 #include <sys/dktp/cm.h> 82 #include <sys/dktp/fdisk.h> 83 #include <sys/scsi/generic/sense.h> 84 #include <sys/scsi/impl/uscsi.h> /* Needed for defn of USCSICMD ioctl */ 85 #include <sys/scsi/targets/sddef.h> 86 87 #include <sys/ldoms.h> 88 #include <sys/ldc.h> 89 #include <sys/vio_common.h> 90 #include <sys/vio_mailbox.h> 91 #include <sys/vdsk_common.h> 92 #include <sys/vdsk_mailbox.h> 93 #include <sys/vdc.h> 94 95 /* 96 * function prototypes 97 */ 98 99 /* standard driver functions */ 100 static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 101 static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 102 static int vdc_strategy(struct buf *buf); 103 static int vdc_print(dev_t dev, char *str); 104 static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 105 static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 106 static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 107 static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 108 cred_t *credp, int *rvalp); 109 static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 110 static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 111 112 static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 113 void *arg, void **resultp); 114 static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 115 static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 116 117 /* setup */ 118 static int vdc_send(ldc_handle_t ldc_handle, caddr_t pkt, size_t *msglen); 119 static int vdc_do_ldc_init(vdc_t *vdc); 120 static int vdc_start_ldc_connection(vdc_t *vdc); 121 static int vdc_create_device_nodes(vdc_t *vdc); 122 static int vdc_create_device_nodes_props(vdc_t *vdc); 123 static int vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id); 124 static void vdc_terminate_ldc(vdc_t *vdc); 125 static int vdc_init_descriptor_ring(vdc_t *vdc); 126 static void vdc_destroy_descriptor_ring(vdc_t *vdc); 127 128 /* handshake with vds */ 129 static void vdc_init_handshake_negotiation(void *arg); 130 static int vdc_init_ver_negotiation(vdc_t *vdc); 131 static int vdc_init_attr_negotiation(vdc_t *vdc); 132 static int vdc_init_dring_negotiate(vdc_t *vdc); 133 static int vdc_handle_ver_negotiate(); 134 static int vdc_handle_attr_negotiate(); 135 static void vdc_reset_connection(vdc_t *vdc, boolean_t resetldc); 136 static boolean_t vdc_is_able_to_tx_data(vdc_t *vdc, int flag); 137 138 /* processing */ 139 static void vdc_process_msg_thread(vdc_t *vdc); 140 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 141 static void vdc_process_msg(void *arg); 142 static int vdc_process_ctrl_msg(vdc_t *vdc, vio_msg_t msg); 143 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t msg); 144 static int vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg); 145 static void vdc_do_process_msg(vdc_t *vdc); 146 static int vdc_get_next_dring_entry_id(vdc_t *vdc, uint_t needed); 147 static int vdc_populate_descriptor(vdc_t *vdc, caddr_t addr, 148 size_t nbytes, int op, uint64_t arg, uint64_t slice); 149 static int vdc_wait_for_descriptor_update(vdc_t *vdc, uint_t idx, 150 vio_dring_msg_t dmsg); 151 static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 152 static int vdc_get_response(vdc_t *vdc, int start, int end); 153 static int vdc_populate_mem_hdl(vdc_t *vdc, uint_t idx, 154 caddr_t addr, size_t nbytes, int operation); 155 static boolean_t vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg, int 156 num_msgs); 157 158 /* dkio */ 159 static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode); 160 static int vdc_create_fake_geometry(vdc_t *vdc); 161 162 /* 163 * Module variables 164 */ 165 uint64_t vdc_hz_timeout; 166 uint64_t vdc_usec_timeout = VDC_USEC_TIMEOUT_MIN; 167 uint64_t vdc_dump_usec_timeout = VDC_USEC_TIMEOUT_MIN / 300; 168 static int vdc_retries = VDC_RETRIES; 169 static int vdc_dump_retries = VDC_RETRIES * 10; 170 171 /* Soft state pointer */ 172 static void *vdc_state; 173 174 /* variable level controlling the verbosity of the error/debug messages */ 175 int vdc_msglevel = 0; 176 177 178 static void 179 vdc_msg(const char *format, ...) 180 { 181 va_list args; 182 183 va_start(args, format); 184 vcmn_err(CE_CONT, format, args); 185 va_end(args); 186 } 187 188 static struct cb_ops vdc_cb_ops = { 189 vdc_open, /* cb_open */ 190 vdc_close, /* cb_close */ 191 vdc_strategy, /* cb_strategy */ 192 vdc_print, /* cb_print */ 193 vdc_dump, /* cb_dump */ 194 vdc_read, /* cb_read */ 195 vdc_write, /* cb_write */ 196 vdc_ioctl, /* cb_ioctl */ 197 nodev, /* cb_devmap */ 198 nodev, /* cb_mmap */ 199 nodev, /* cb_segmap */ 200 nochpoll, /* cb_chpoll */ 201 ddi_prop_op, /* cb_prop_op */ 202 NULL, /* cb_str */ 203 D_MP | D_64BIT, /* cb_flag */ 204 CB_REV, /* cb_rev */ 205 vdc_aread, /* cb_aread */ 206 vdc_awrite /* cb_awrite */ 207 }; 208 209 static struct dev_ops vdc_ops = { 210 DEVO_REV, /* devo_rev */ 211 0, /* devo_refcnt */ 212 vdc_getinfo, /* devo_getinfo */ 213 nulldev, /* devo_identify */ 214 nulldev, /* devo_probe */ 215 vdc_attach, /* devo_attach */ 216 vdc_detach, /* devo_detach */ 217 nodev, /* devo_reset */ 218 &vdc_cb_ops, /* devo_cb_ops */ 219 NULL, /* devo_bus_ops */ 220 nulldev /* devo_power */ 221 }; 222 223 static struct modldrv modldrv = { 224 &mod_driverops, 225 "virtual disk client %I%", 226 &vdc_ops, 227 }; 228 229 static struct modlinkage modlinkage = { 230 MODREV_1, 231 &modldrv, 232 NULL 233 }; 234 235 /* -------------------------------------------------------------------------- */ 236 237 /* 238 * Device Driver housekeeping and setup 239 */ 240 241 int 242 _init(void) 243 { 244 int status; 245 246 if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 247 return (status); 248 if ((status = mod_install(&modlinkage)) != 0) 249 ddi_soft_state_fini(&vdc_state); 250 return (status); 251 } 252 253 int 254 _info(struct modinfo *modinfop) 255 { 256 return (mod_info(&modlinkage, modinfop)); 257 } 258 259 int 260 _fini(void) 261 { 262 int status; 263 264 if ((status = mod_remove(&modlinkage)) != 0) 265 return (status); 266 ddi_soft_state_fini(&vdc_state); 267 return (0); 268 } 269 270 static int 271 vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 272 { 273 _NOTE(ARGUNUSED(dip)) 274 275 int instance = SDUNIT(getminor((dev_t)arg)); 276 vdc_t *vdc = NULL; 277 278 switch (cmd) { 279 case DDI_INFO_DEVT2DEVINFO: 280 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 281 *resultp = NULL; 282 return (DDI_FAILURE); 283 } 284 *resultp = vdc->dip; 285 return (DDI_SUCCESS); 286 case DDI_INFO_DEVT2INSTANCE: 287 *resultp = (void *)(uintptr_t)instance; 288 return (DDI_SUCCESS); 289 default: 290 *resultp = NULL; 291 return (DDI_FAILURE); 292 } 293 } 294 295 static int 296 vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 297 { 298 int instance; 299 int rv; 300 uint_t retries = 0; 301 vdc_t *vdc = NULL; 302 303 switch (cmd) { 304 case DDI_DETACH: 305 /* the real work happens below */ 306 break; 307 case DDI_SUSPEND: 308 /* nothing to do for this non-device */ 309 return (DDI_SUCCESS); 310 default: 311 return (DDI_FAILURE); 312 } 313 314 ASSERT(cmd == DDI_DETACH); 315 instance = ddi_get_instance(dip); 316 PR1("%s[%d] Entered\n", __func__, instance); 317 318 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 319 vdc_msg("%s[%d]: Could not get state structure.", 320 __func__, instance); 321 return (DDI_FAILURE); 322 } 323 324 if (vdc->open) { 325 PR0("%s[%d]: Cannot detach: device is open", 326 __func__, instance); 327 return (DDI_FAILURE); 328 } 329 330 PR0("%s[%d] proceeding...\n", __func__, instance); 331 332 /* 333 * try and disable callbacks to prevent another handshake 334 */ 335 rv = ldc_set_cb_mode(vdc->ldc_handle, LDC_CB_DISABLE); 336 PR0("%s[%d] callback disabled (rv=%d)\n", __func__, instance, rv); 337 338 /* 339 * Prevent any more attempts to start a handshake with the vdisk 340 * server and tear down the existing connection. 341 */ 342 mutex_enter(&vdc->lock); 343 vdc->initialized |= VDC_HANDSHAKE_STOP; 344 vdc_reset_connection(vdc, B_TRUE); 345 mutex_exit(&vdc->lock); 346 347 if (vdc->initialized & VDC_THREAD) { 348 mutex_enter(&vdc->msg_proc_lock); 349 vdc->msg_proc_thr_state = VDC_THR_STOP; 350 vdc->msg_pending = B_TRUE; 351 cv_signal(&vdc->msg_proc_cv); 352 353 while (vdc->msg_proc_thr_state != VDC_THR_DONE) { 354 PR0("%s[%d]: Waiting for thread to exit\n", 355 __func__, instance); 356 rv = cv_timedwait(&vdc->msg_proc_cv, 357 &vdc->msg_proc_lock, VD_GET_TIMEOUT_HZ(1)); 358 if ((rv == -1) && (retries++ > vdc_retries)) 359 break; 360 } 361 mutex_exit(&vdc->msg_proc_lock); 362 } 363 364 mutex_enter(&vdc->lock); 365 366 if (vdc->initialized & VDC_DRING) 367 vdc_destroy_descriptor_ring(vdc); 368 369 if (vdc->initialized & VDC_LDC) 370 vdc_terminate_ldc(vdc); 371 372 mutex_exit(&vdc->lock); 373 374 if (vdc->initialized & VDC_MINOR) { 375 ddi_prop_remove_all(dip); 376 ddi_remove_minor_node(dip, NULL); 377 } 378 379 if (vdc->initialized & VDC_LOCKS) { 380 mutex_destroy(&vdc->lock); 381 mutex_destroy(&vdc->attach_lock); 382 mutex_destroy(&vdc->msg_proc_lock); 383 mutex_destroy(&vdc->dring_lock); 384 cv_destroy(&vdc->cv); 385 cv_destroy(&vdc->attach_cv); 386 cv_destroy(&vdc->msg_proc_cv); 387 } 388 389 if (vdc->minfo) 390 kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 391 392 if (vdc->cinfo) 393 kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 394 395 if (vdc->vtoc) 396 kmem_free(vdc->vtoc, sizeof (struct vtoc)); 397 398 if (vdc->initialized & VDC_SOFT_STATE) 399 ddi_soft_state_free(vdc_state, instance); 400 401 PR0("%s[%d] End %p\n", __func__, instance, vdc); 402 403 return (DDI_SUCCESS); 404 } 405 406 407 static int 408 vdc_do_attach(dev_info_t *dip) 409 { 410 int instance; 411 vdc_t *vdc = NULL; 412 int status; 413 uint_t retries = 0; 414 415 ASSERT(dip != NULL); 416 417 instance = ddi_get_instance(dip); 418 if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 419 vdc_msg("%s:(%d): Couldn't alloc state structure", 420 __func__, instance); 421 return (DDI_FAILURE); 422 } 423 424 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 425 vdc_msg("%s:(%d): Could not get state structure.", 426 __func__, instance); 427 return (DDI_FAILURE); 428 } 429 430 /* 431 * We assign the value to initialized in this case to zero out the 432 * variable and then set bits in it to indicate what has been done 433 */ 434 vdc->initialized = VDC_SOFT_STATE; 435 436 vdc_hz_timeout = drv_usectohz(vdc_usec_timeout); 437 438 vdc->dip = dip; 439 vdc->instance = instance; 440 vdc->open = 0; 441 vdc->vdisk_type = VD_DISK_TYPE_UNK; 442 vdc->state = VD_STATE_INIT; 443 vdc->ldc_state = 0; 444 vdc->session_id = 0; 445 vdc->block_size = DEV_BSIZE; 446 vdc->max_xfer_sz = VD_MAX_BLOCK_SIZE / DEV_BSIZE; 447 448 vdc->vtoc = NULL; 449 vdc->cinfo = NULL; 450 vdc->minfo = NULL; 451 452 mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 453 mutex_init(&vdc->attach_lock, NULL, MUTEX_DRIVER, NULL); 454 mutex_init(&vdc->msg_proc_lock, NULL, MUTEX_DRIVER, NULL); 455 mutex_init(&vdc->dring_lock, NULL, MUTEX_DRIVER, NULL); 456 cv_init(&vdc->cv, NULL, CV_DRIVER, NULL); 457 cv_init(&vdc->attach_cv, NULL, CV_DRIVER, NULL); 458 cv_init(&vdc->msg_proc_cv, NULL, CV_DRIVER, NULL); 459 vdc->initialized |= VDC_LOCKS; 460 461 vdc->msg_pending = B_FALSE; 462 vdc->msg_proc_thr_id = thread_create(NULL, 0, vdc_process_msg_thread, 463 vdc, 0, &p0, TS_RUN, minclsyspri); 464 if (vdc->msg_proc_thr_id == NULL) { 465 cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 466 instance); 467 return (DDI_FAILURE); 468 } 469 vdc->initialized |= VDC_THREAD; 470 471 /* initialise LDC channel which will be used to communicate with vds */ 472 if (vdc_do_ldc_init(vdc) != 0) { 473 cmn_err(CE_NOTE, "[%d] Couldn't initialize LDC", instance); 474 return (DDI_FAILURE); 475 } 476 477 /* Bring up connection with vds via LDC */ 478 status = vdc_start_ldc_connection(vdc); 479 if (status != 0) { 480 vdc_msg("%s[%d] Could not start LDC", __func__, instance); 481 return (DDI_FAILURE); 482 } 483 484 /* 485 * We need to wait until the handshake has completed before leaving 486 * the attach(). This is to allow the device node(s) to be created 487 * and the first usage of the filesystem to succeed. 488 */ 489 mutex_enter(&vdc->attach_lock); 490 while ((vdc->ldc_state != LDC_UP) || 491 (vdc->state != VD_STATE_DATA)) { 492 493 PR0("%s[%d] handshake in progress [VD %d (LDC %d)]\n", 494 __func__, instance, vdc->state, vdc->ldc_state); 495 496 status = cv_timedwait(&vdc->attach_cv, &vdc->attach_lock, 497 VD_GET_TIMEOUT_HZ(1)); 498 if (status == -1) { 499 if (retries >= vdc_retries) { 500 PR0("%s[%d] Give up handshake wait.\n", 501 __func__, instance); 502 mutex_exit(&vdc->attach_lock); 503 return (DDI_FAILURE); 504 } else { 505 PR0("%s[%d] Retry #%d for handshake.\n", 506 __func__, instance, retries); 507 retries++; 508 } 509 } 510 } 511 mutex_exit(&vdc->attach_lock); 512 513 if (vdc->vtoc == NULL) 514 vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); 515 516 status = vdc_populate_descriptor(vdc, (caddr_t)vdc->vtoc, 517 P2ROUNDUP(sizeof (struct vtoc), sizeof (uint64_t)), 518 VD_OP_GET_VTOC, FKIOCTL, 0); 519 if (status) { 520 cmn_err(CE_NOTE, "[%d] Failed to get VTOC", instance); 521 return (status); 522 } 523 524 /* 525 * Now that we have the device info we can create the 526 * device nodes and properties 527 */ 528 status = vdc_create_device_nodes(vdc); 529 if (status) { 530 cmn_err(CE_NOTE, "[%d] Failed to create device nodes", 531 instance); 532 return (status); 533 } 534 status = vdc_create_device_nodes_props(vdc); 535 if (status) { 536 cmn_err(CE_NOTE, "[%d] Failed to create device nodes" 537 " properties", instance); 538 return (status); 539 } 540 541 ddi_report_dev(dip); 542 543 PR0("%s[%d] Attach completed\n", __func__, instance); 544 return (status); 545 } 546 547 static int 548 vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 549 { 550 int status; 551 552 PR0("%s[%d] Entered. Built %s %s\n", __func__, ddi_get_instance(dip), 553 __DATE__, __TIME__); 554 555 switch (cmd) { 556 case DDI_ATTACH: 557 if ((status = vdc_do_attach(dip)) != 0) 558 (void) vdc_detach(dip, DDI_DETACH); 559 return (status); 560 case DDI_RESUME: 561 /* nothing to do for this non-device */ 562 return (DDI_SUCCESS); 563 default: 564 return (DDI_FAILURE); 565 } 566 } 567 568 static int 569 vdc_do_ldc_init(vdc_t *vdc) 570 { 571 int status = 0; 572 ldc_status_t ldc_state; 573 ldc_attr_t ldc_attr; 574 uint64_t ldc_id = 0; 575 dev_info_t *dip = NULL; 576 577 ASSERT(vdc != NULL); 578 579 dip = vdc->dip; 580 vdc->initialized |= VDC_LDC; 581 582 if ((status = vdc_get_ldc_id(dip, &ldc_id)) != 0) { 583 vdc_msg("%s: Failed to get <ldc_id> property\n", __func__); 584 return (EIO); 585 } 586 vdc->ldc_id = ldc_id; 587 588 ldc_attr.devclass = LDC_DEV_BLK; 589 ldc_attr.instance = vdc->instance; 590 ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 591 ldc_attr.qlen = VD_LDC_QLEN; 592 593 if ((vdc->initialized & VDC_LDC_INIT) == 0) { 594 status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle); 595 if (status != 0) { 596 cmn_err(CE_NOTE, "[%d] ldc_init(chan %ld) returned %d", 597 vdc->instance, ldc_id, status); 598 return (status); 599 } 600 vdc->initialized |= VDC_LDC_INIT; 601 } 602 status = ldc_status(vdc->ldc_handle, &ldc_state); 603 if (status != 0) { 604 vdc_msg("Cannot discover LDC status [err=%d].", status); 605 return (status); 606 } 607 vdc->ldc_state = ldc_state; 608 609 if ((vdc->initialized & VDC_LDC_CB) == 0) { 610 status = ldc_reg_callback(vdc->ldc_handle, vdc_handle_cb, 611 (caddr_t)vdc); 612 if (status != 0) { 613 vdc_msg("%s: ldc_reg_callback()=%d", __func__, status); 614 return (status); 615 } 616 vdc->initialized |= VDC_LDC_CB; 617 } 618 619 vdc->initialized |= VDC_LDC; 620 621 /* 622 * At this stage we have initialised LDC, we will now try and open 623 * the connection. 624 */ 625 if (vdc->ldc_state == LDC_INIT) { 626 status = ldc_open(vdc->ldc_handle); 627 if (status != 0) { 628 cmn_err(CE_NOTE, "[%d] ldc_open(chan %ld) returned %d", 629 vdc->instance, vdc->ldc_id, status); 630 return (status); 631 } 632 vdc->initialized |= VDC_LDC_OPEN; 633 } 634 635 return (status); 636 } 637 638 static int 639 vdc_start_ldc_connection(vdc_t *vdc) 640 { 641 int status = 0; 642 643 ASSERT(vdc != NULL); 644 645 mutex_enter(&vdc->lock); 646 647 if (vdc->ldc_state == LDC_UP) { 648 PR0("%s: LDC is already UP ..\n", __func__); 649 mutex_exit(&vdc->lock); 650 return (0); 651 } 652 653 if ((status = ldc_up(vdc->ldc_handle)) != 0) { 654 switch (status) { 655 case ECONNREFUSED: /* listener not ready at other end */ 656 PR0("%s: ldc_up(%d,...) return %d\n", 657 __func__, vdc->ldc_id, status); 658 status = 0; 659 break; 660 default: 661 cmn_err(CE_NOTE, "[%d] Failed to bring up LDC: " 662 "channel=%ld, err=%d", 663 vdc->instance, vdc->ldc_id, status); 664 } 665 } 666 667 PR0("%s[%d] Finished bringing up LDC\n", __func__, vdc->instance); 668 669 mutex_exit(&vdc->lock); 670 671 return (status); 672 } 673 674 675 /* 676 * Function: 677 * vdc_create_device_nodes 678 * 679 * Description: 680 * This function creates the block and character device nodes under 681 * /devices along with the node properties. It is called as part of 682 * the attach(9E) of the instance during the handshake with vds after 683 * vds has sent the attributes to vdc. 684 * 685 * If the device is of type VD_DISK_TYPE_SLICE then the minor node 686 * of 2 is used in keeping with the Solaris convention that slice 2 687 * refers to a whole disk. Slices start at 'a' 688 * 689 * Parameters: 690 * vdc - soft state pointer 691 * 692 * Return Values 693 * 0 - Success 694 * EIO - Failed to create node 695 * EINVAL - Unknown type of disk exported 696 */ 697 static int 698 vdc_create_device_nodes(vdc_t *vdc) 699 { 700 /* uses NNNN which is OK as long as # of disks <= 10000 */ 701 char name[sizeof ("disk@NNNN:s,raw")]; 702 dev_info_t *dip = NULL; 703 int instance; 704 int num_slices = 1; 705 int i; 706 707 ASSERT(vdc != NULL); 708 709 instance = vdc->instance; 710 dip = vdc->dip; 711 712 switch (vdc->vdisk_type) { 713 case VD_DISK_TYPE_DISK: 714 num_slices = V_NUMPAR; 715 break; 716 case VD_DISK_TYPE_SLICE: 717 num_slices = 1; 718 break; 719 case VD_DISK_TYPE_UNK: 720 default: 721 return (EINVAL); 722 } 723 724 for (i = 0; i < num_slices; i++) { 725 (void) snprintf(name, sizeof (name), "%c", 'a' + i); 726 if (ddi_create_minor_node(dip, name, S_IFBLK, 727 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 728 vdc_msg("%s[%d]: Couldn't add block node %s.", 729 __func__, instance, name); 730 return (EIO); 731 } 732 733 /* if any device node is created we set this flag */ 734 vdc->initialized |= VDC_MINOR; 735 736 (void) snprintf(name, sizeof (name), "%c%s", 737 'a' + i, ",raw"); 738 if (ddi_create_minor_node(dip, name, S_IFCHR, 739 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 740 vdc_msg("%s[%d]: Could not add raw node %s.", 741 __func__, instance, name); 742 return (EIO); 743 } 744 } 745 746 return (0); 747 } 748 749 /* 750 * Function: 751 * vdc_create_device_nodes_props 752 * 753 * Description: 754 * This function creates the block and character device nodes under 755 * /devices along with the node properties. It is called as part of 756 * the attach(9E) of the instance during the handshake with vds after 757 * vds has sent the attributes to vdc. 758 * 759 * Parameters: 760 * vdc - soft state pointer 761 * 762 * Return Values 763 * 0 - Success 764 * EIO - Failed to create device node property 765 * EINVAL - Unknown type of disk exported 766 */ 767 static int 768 vdc_create_device_nodes_props(vdc_t *vdc) 769 { 770 dev_info_t *dip = NULL; 771 int instance; 772 int num_slices = 1; 773 int64_t size = 0; 774 dev_t dev; 775 int rv; 776 int i; 777 778 ASSERT(vdc != NULL); 779 780 instance = vdc->instance; 781 dip = vdc->dip; 782 783 if ((vdc->vtoc == NULL) || (vdc->vtoc->v_sanity != VTOC_SANE)) { 784 cmn_err(CE_NOTE, "![%d] Could not create device node property." 785 " No VTOC available", instance); 786 return (ENXIO); 787 } 788 789 switch (vdc->vdisk_type) { 790 case VD_DISK_TYPE_DISK: 791 num_slices = V_NUMPAR; 792 break; 793 case VD_DISK_TYPE_SLICE: 794 num_slices = 1; 795 break; 796 case VD_DISK_TYPE_UNK: 797 default: 798 return (EINVAL); 799 } 800 801 for (i = 0; i < num_slices; i++) { 802 dev = makedevice(ddi_driver_major(dip), 803 VD_MAKE_DEV(instance, i)); 804 805 size = vdc->vtoc->v_part[i].p_size * vdc->vtoc->v_sectorsz; 806 PR0("%s[%d] sz %ld (%ld Mb) p_size %lx\n", 807 __func__, instance, size, size / (1024 * 1024), 808 vdc->vtoc->v_part[i].p_size); 809 810 rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size); 811 if (rv != DDI_PROP_SUCCESS) { 812 vdc_msg("%s:(%d): Couldn't add \"%s\" [%d]\n", 813 __func__, instance, VDC_SIZE_PROP_NAME, size); 814 return (EIO); 815 } 816 817 rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME, 818 lbtodb(size)); 819 if (rv != DDI_PROP_SUCCESS) { 820 vdc_msg("%s:(%d): Couldn't add \"%s\" [%d]\n", __func__, 821 instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size)); 822 return (EIO); 823 } 824 } 825 826 return (0); 827 } 828 829 static int 830 vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 831 { 832 _NOTE(ARGUNUSED(cred)) 833 834 int instance; 835 int status = 0; 836 vdc_t *vdc; 837 838 ASSERT(dev != NULL); 839 instance = SDUNIT(getminor(*dev)); 840 841 PR0("%s[%d] minor = %d flag = %x, otyp = %x\n", __func__, instance, 842 getminor(*dev), flag, otyp); 843 844 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 845 return (EINVAL); 846 847 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 848 vdc_msg("%s[%d] Could not get state.", __func__, instance); 849 return (ENXIO); 850 } 851 852 /* 853 * Check to see if we can communicate with vds 854 */ 855 status = vdc_is_able_to_tx_data(vdc, flag); 856 if (status == B_FALSE) { 857 PR0("%s[%d] Not ready to transmit data\n", __func__, instance); 858 return (ENOLINK); 859 } 860 861 mutex_enter(&vdc->lock); 862 vdc->open++; 863 mutex_exit(&vdc->lock); 864 865 return (0); 866 } 867 868 static int 869 vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 870 { 871 _NOTE(ARGUNUSED(cred)) 872 873 int instance; 874 vdc_t *vdc; 875 876 instance = SDUNIT(getminor(dev)); 877 878 PR0("%s[%d] flag = %x, otyp = %x\n", __func__, instance, flag, otyp); 879 880 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 881 return (EINVAL); 882 883 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 884 vdc_msg("%s[%d] Could not get state.", __func__, instance); 885 return (ENXIO); 886 } 887 888 /* 889 * Check to see if we can communicate with vds 890 */ 891 if (vdc_is_able_to_tx_data(vdc, 0) == B_FALSE) { 892 PR0("%s[%d] Not ready to transmit data\n", __func__, instance); 893 return (ETIMEDOUT); 894 } 895 896 if (vdc->dkio_flush_pending) { 897 PR0("%s[%d]: Cannot detach: %d outstanding DKIO flushes", 898 __func__, instance, vdc->dkio_flush_pending); 899 return (EBUSY); 900 } 901 902 /* 903 * Should not need the mutex here, since the framework should protect 904 * against more opens on this device, but just in case. 905 */ 906 mutex_enter(&vdc->lock); 907 vdc->open--; 908 mutex_exit(&vdc->lock); 909 910 return (0); 911 } 912 913 static int 914 vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 915 { 916 _NOTE(ARGUNUSED(credp)) 917 _NOTE(ARGUNUSED(rvalp)) 918 919 return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode)); 920 } 921 922 static int 923 vdc_print(dev_t dev, char *str) 924 { 925 cmn_err(CE_NOTE, "vdc%d: %s", SDUNIT(getminor(dev)), str); 926 return (0); 927 } 928 929 static int 930 vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 931 { 932 int rv = 0; 933 size_t nbytes = (nblk * DEV_BSIZE); 934 int instance = SDUNIT(getminor(dev)); 935 vdc_t *vdc; 936 937 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 938 vdc_msg("%s (%d): Could not get state.", __func__, instance); 939 return (ENXIO); 940 } 941 942 rv = vdc_populate_descriptor(vdc, addr, nbytes, VD_OP_BWRITE, 943 blkno, SDPART(getminor(dev))); 944 945 PR1("%s: status=%d\n", __func__, rv); 946 947 return (rv); 948 } 949 950 /* -------------------------------------------------------------------------- */ 951 952 /* 953 * Disk access routines 954 * 955 */ 956 957 /* 958 * vdc_strategy() 959 * 960 * Return Value: 961 * 0: As per strategy(9E), the strategy() function must return 0 962 * [ bioerror(9f) sets b_flags to the proper error code ] 963 */ 964 static int 965 vdc_strategy(struct buf *buf) 966 { 967 int rv = -1; 968 vdc_t *vdc = NULL; 969 int instance = SDUNIT(getminor(buf->b_edev)); 970 int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 971 972 PR1("%s: %s %ld bytes at block %ld : b_addr=0x%p", 973 __func__, (buf->b_flags & B_READ) ? "Read" : "Write", 974 buf->b_bcount, buf->b_lblkno, buf->b_un.b_addr); 975 976 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 977 vdc_msg("%s[%d]: Could not get state.", __func__, instance); 978 bioerror(buf, ENXIO); 979 biodone(buf); 980 return (0); 981 } 982 983 ASSERT(buf->b_bcount <= (vdc->max_xfer_sz * vdc->block_size)); 984 985 if (vdc_is_able_to_tx_data(vdc, O_NONBLOCK) == B_FALSE) { 986 vdc_msg("%s: Not ready to transmit data", __func__); 987 bioerror(buf, ENXIO); 988 biodone(buf); 989 return (0); 990 } 991 bp_mapin(buf); 992 993 rv = vdc_populate_descriptor(vdc, buf->b_un.b_addr, buf->b_bcount, op, 994 buf->b_lblkno, SDPART(getminor(buf->b_edev))); 995 996 PR1("%s: status=%d", __func__, rv); 997 bioerror(buf, rv); 998 biodone(buf); 999 return (0); 1000 } 1001 1002 1003 static int 1004 vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 1005 { 1006 _NOTE(ARGUNUSED(cred)) 1007 1008 PR1("vdc_read(): Entered"); 1009 return (physio(vdc_strategy, NULL, dev, B_READ, minphys, uio)); 1010 } 1011 1012 static int 1013 vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 1014 { 1015 _NOTE(ARGUNUSED(cred)) 1016 1017 PR1("vdc_write(): Entered"); 1018 return (physio(vdc_strategy, NULL, dev, B_WRITE, minphys, uio)); 1019 } 1020 1021 static int 1022 vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 1023 { 1024 _NOTE(ARGUNUSED(cred)) 1025 1026 PR1("vdc_aread(): Entered"); 1027 return (aphysio(vdc_strategy, anocancel, dev, B_READ, minphys, aio)); 1028 } 1029 1030 static int 1031 vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 1032 { 1033 _NOTE(ARGUNUSED(cred)) 1034 1035 PR1("vdc_awrite(): Entered"); 1036 return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, minphys, aio)); 1037 } 1038 1039 1040 /* -------------------------------------------------------------------------- */ 1041 1042 /* 1043 * Handshake support 1044 */ 1045 1046 /* 1047 * vdc_init_handshake_negotiation 1048 * 1049 * Description: 1050 * This function is called to trigger the handshake negotiations between 1051 * the client (vdc) and the server (vds). It may be called multiple times. 1052 * 1053 * Parameters: 1054 * vdc - soft state pointer 1055 */ 1056 static void 1057 vdc_init_handshake_negotiation(void *arg) 1058 { 1059 vdc_t *vdc = (vdc_t *)(void *)arg; 1060 vd_state_t state; 1061 1062 ASSERT(vdc != NULL); 1063 ASSERT(vdc->ldc_state == LDC_UP); 1064 1065 mutex_enter(&vdc->lock); 1066 1067 /* 1068 * Do not continue if another thread has triggered a handshake which 1069 * is in progress or detach() has stopped further handshakes. 1070 */ 1071 if (vdc->initialized & (VDC_HANDSHAKE | VDC_HANDSHAKE_STOP)) { 1072 PR0("%s[%d] Negotiation not triggered. [init=%x]\n", 1073 __func__, vdc->instance, vdc->initialized); 1074 mutex_exit(&vdc->lock); 1075 return; 1076 } 1077 1078 PR0("Initializing vdc<->vds handshake\n"); 1079 1080 vdc->initialized |= VDC_HANDSHAKE; 1081 1082 state = vdc->state; 1083 1084 if (state == VD_STATE_INIT) { 1085 (void) vdc_init_ver_negotiation(vdc); 1086 } else if (state == VD_STATE_VER) { 1087 (void) vdc_init_attr_negotiation(vdc); 1088 } else if (state == VD_STATE_ATTR) { 1089 (void) vdc_init_dring_negotiate(vdc); 1090 } else if (state == VD_STATE_DATA) { 1091 /* 1092 * nothing to do - we have already completed the negotiation 1093 * and we can transmit data when ready. 1094 */ 1095 PR0("%s[%d] Negotiation triggered after handshake completed", 1096 __func__, vdc->instance); 1097 } 1098 1099 mutex_exit(&vdc->lock); 1100 } 1101 1102 static int 1103 vdc_init_ver_negotiation(vdc_t *vdc) 1104 { 1105 vio_ver_msg_t pkt; 1106 size_t msglen = sizeof (pkt); 1107 int status = -1; 1108 1109 PR0("%s: Entered.\n", __func__); 1110 1111 ASSERT(vdc != NULL); 1112 ASSERT(mutex_owned(&vdc->lock)); 1113 1114 /* 1115 * set the Session ID to a unique value 1116 * (the lower 32 bits of the clock tick) 1117 */ 1118 vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 1119 1120 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1121 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1122 pkt.tag.vio_subtype_env = VIO_VER_INFO; 1123 pkt.tag.vio_sid = vdc->session_id; 1124 pkt.dev_class = VDEV_DISK; 1125 pkt.ver_major = VD_VER_MAJOR; 1126 pkt.ver_minor = VD_VER_MINOR; 1127 1128 status = vdc_send(vdc->ldc_handle, (caddr_t)&pkt, &msglen); 1129 PR0("%s: vdc_send(status = %d)\n", __func__, status); 1130 1131 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1132 PR0("%s[%d] vdc_send failed: id(%lx) rv(%d) size(%d)\n", 1133 __func__, vdc->instance, vdc->ldc_handle, 1134 status, msglen); 1135 if (msglen != sizeof (vio_ver_msg_t)) 1136 status = ENOMSG; 1137 } 1138 1139 return (status); 1140 } 1141 1142 static int 1143 vdc_init_attr_negotiation(vdc_t *vdc) 1144 { 1145 vd_attr_msg_t pkt; 1146 size_t msglen = sizeof (pkt); 1147 int status; 1148 1149 ASSERT(vdc != NULL); 1150 ASSERT(mutex_owned(&vdc->lock)); 1151 1152 PR0("%s[%d] entered\n", __func__, vdc->instance); 1153 1154 /* fill in tag */ 1155 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1156 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1157 pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 1158 pkt.tag.vio_sid = vdc->session_id; 1159 /* fill in payload */ 1160 pkt.max_xfer_sz = vdc->max_xfer_sz; 1161 pkt.vdisk_block_size = vdc->block_size; 1162 pkt.xfer_mode = VIO_DRING_MODE; 1163 pkt.operations = 0; /* server will set bits of valid operations */ 1164 pkt.vdisk_type = 0; /* server will set to valid device type */ 1165 pkt.vdisk_size = 0; /* server will set to valid size */ 1166 1167 status = vdc_send(vdc->ldc_handle, (caddr_t)&pkt, &msglen); 1168 PR0("%s: vdc_send(status = %d)\n", __func__, status); 1169 1170 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1171 PR0("%s[%d] ldc_write failed: id(%lx) rv(%d) size (%d)\n", 1172 __func__, vdc->instance, vdc->ldc_handle, 1173 status, msglen); 1174 if (msglen != sizeof (vio_ver_msg_t)) 1175 status = ENOMSG; 1176 } 1177 1178 return (status); 1179 } 1180 1181 static int 1182 vdc_init_dring_negotiate(vdc_t *vdc) 1183 { 1184 vio_dring_reg_msg_t pkt; 1185 size_t msglen = sizeof (pkt); 1186 int status = -1; 1187 1188 ASSERT(vdc != NULL); 1189 ASSERT(mutex_owned(&vdc->lock)); 1190 1191 status = vdc_init_descriptor_ring(vdc); 1192 PR0("%s[%d] Init of descriptor ring completed (status = %d)\n", 1193 __func__, vdc->instance, status); 1194 if (status != 0) { 1195 cmn_err(CE_CONT, "[%d] Failed to init DRing (status = %d)\n", 1196 vdc->instance, status); 1197 vdc_reset_connection(vdc, B_FALSE); 1198 return (status); 1199 } 1200 1201 /* fill in tag */ 1202 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1203 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1204 pkt.tag.vio_subtype_env = VIO_DRING_REG; 1205 pkt.tag.vio_sid = vdc->session_id; 1206 /* fill in payload */ 1207 pkt.dring_ident = 0; 1208 pkt.num_descriptors = VD_DRING_LEN; 1209 pkt.descriptor_size = VD_DRING_ENTRY_SZ; 1210 pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 1211 pkt.ncookies = vdc->dring_cookie_count; 1212 pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 1213 1214 status = vdc_send(vdc->ldc_handle, (caddr_t)&pkt, &msglen); 1215 if (status != 0) { 1216 PR0("%s[%d] Failed to register DRing (status = %d)\n", 1217 __func__, vdc->instance, status); 1218 vdc_reset_connection(vdc, B_FALSE); 1219 } 1220 1221 return (status); 1222 } 1223 1224 1225 /* -------------------------------------------------------------------------- */ 1226 1227 /* 1228 * LDC helper routines 1229 */ 1230 1231 /* 1232 * Function: 1233 * vdc_send() 1234 * 1235 * Description: 1236 * The function encapsulates the call to write a message using LDC. 1237 * If LDC indicates that the call failed due to the queue being full, 1238 * we retry the ldc_write() [ up to 'vdc_retries' time ], otherwise 1239 * we return the error returned by LDC. 1240 * 1241 * Arguments: 1242 * ldc_handle - LDC handle for the channel this instance of vdc uses 1243 * pkt - address of LDC message to be sent 1244 * msglen - the size of the message being sent. When the function 1245 * returns, this contains the number of bytes written. 1246 * 1247 * Return Code: 1248 * 0 - Success. 1249 * EINVAL - pkt or msglen were NULL 1250 * ECONNRESET - The connection was not up. 1251 * EWOULDBLOCK - LDC queue is full 1252 * xxx - other error codes returned by ldc_write 1253 */ 1254 static int 1255 vdc_send(ldc_handle_t ldc_handle, caddr_t pkt, size_t *msglen) 1256 { 1257 size_t size = 0; 1258 int retries = 0; 1259 int status = 0; 1260 1261 ASSERT(msglen != NULL); 1262 ASSERT(*msglen != 0); 1263 1264 do { 1265 size = *msglen; 1266 status = ldc_write(ldc_handle, pkt, &size); 1267 } while (status == EWOULDBLOCK && retries++ < vdc_retries); 1268 1269 /* return the last size written */ 1270 *msglen = size; 1271 1272 return (status); 1273 } 1274 1275 /* 1276 * Function: 1277 * vdc_get_ldc_id() 1278 * 1279 * Description: 1280 * This function gets the 'ldc-id' for this particular instance of vdc. 1281 * The id returned is the guest domain channel endpoint LDC uses for 1282 * communication with vds. 1283 * 1284 * Arguments: 1285 * dip - dev info pointer for this instance of the device driver. 1286 * ldc_id - pointer to variable used to return the 'ldc-id' found. 1287 * 1288 * Return Code: 1289 * 0 - Success. 1290 * ENOENT - Expected node or property did not exist. 1291 * ENXIO - Unexpected error communicating with MD framework 1292 */ 1293 static int 1294 vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id) 1295 { 1296 int status = ENOENT; 1297 char *node_name = NULL; 1298 md_t *mdp = NULL; 1299 int num_nodes; 1300 int num_vdevs; 1301 int num_chans; 1302 mde_cookie_t rootnode; 1303 mde_cookie_t *listp = NULL; 1304 mde_cookie_t *chanp = NULL; 1305 boolean_t found_inst = B_FALSE; 1306 int listsz; 1307 int idx; 1308 uint64_t md_inst; 1309 int obp_inst; 1310 int instance = ddi_get_instance(dip); 1311 1312 ASSERT(ldc_id != NULL); 1313 *ldc_id = 0; 1314 1315 /* 1316 * Get the OBP instance number for comparison with the MD instance 1317 * 1318 * The "cfg-handle" property of a vdc node in an MD contains the MD's 1319 * notion of "instance", or unique identifier, for that node; OBP 1320 * stores the value of the "cfg-handle" MD property as the value of 1321 * the "reg" property on the node in the device tree it builds from 1322 * the MD and passes to Solaris. Thus, we look up the devinfo node's 1323 * "reg" property value to uniquely identify this device instance. 1324 * If the "reg" property cannot be found, the device tree state is 1325 * presumably so broken that there is no point in continuing. 1326 */ 1327 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 1328 cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 1329 return (ENOENT); 1330 } 1331 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1332 OBP_REG, -1); 1333 PR1("%s[%d]: OBP inst=%d\n", __func__, instance, obp_inst); 1334 1335 /* 1336 * We now walk the MD nodes and if an instance of a vdc node matches 1337 * the instance got from OBP we get the ldc-id property. 1338 */ 1339 if ((mdp = md_get_handle()) == NULL) { 1340 cmn_err(CE_WARN, "unable to init machine description"); 1341 return (ENXIO); 1342 } 1343 1344 num_nodes = md_node_count(mdp); 1345 ASSERT(num_nodes > 0); 1346 1347 listsz = num_nodes * sizeof (mde_cookie_t); 1348 1349 /* allocate memory for nodes */ 1350 listp = kmem_zalloc(listsz, KM_SLEEP); 1351 chanp = kmem_zalloc(listsz, KM_SLEEP); 1352 1353 rootnode = md_root_node(mdp); 1354 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 1355 1356 /* 1357 * Search for all the virtual devices, we will then check to see which 1358 * ones are disk nodes. 1359 */ 1360 num_vdevs = md_scan_dag(mdp, rootnode, 1361 md_find_name(mdp, VDC_MD_VDEV_NAME), 1362 md_find_name(mdp, "fwd"), listp); 1363 1364 if (num_vdevs <= 0) { 1365 cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 1366 status = ENOENT; 1367 goto done; 1368 } 1369 1370 PR1("%s[%d] num_vdevs=%d\n", __func__, instance, num_vdevs); 1371 for (idx = 0; idx < num_vdevs; idx++) { 1372 status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 1373 if ((status != 0) || (node_name == NULL)) { 1374 cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 1375 ": err %d", VDC_MD_VDEV_NAME, status); 1376 continue; 1377 } 1378 1379 PR1("%s[%d] Found node %s\n", __func__, instance, node_name); 1380 if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 1381 status = md_get_prop_val(mdp, listp[idx], 1382 VDC_MD_CFG_HDL, &md_inst); 1383 PR1("%s[%d] vdc inst# in MD=%d\n", 1384 __func__, instance, md_inst); 1385 if ((status == 0) && (md_inst == obp_inst)) { 1386 found_inst = B_TRUE; 1387 break; 1388 } 1389 } 1390 } 1391 1392 if (found_inst == B_FALSE) { 1393 cmn_err(CE_NOTE, "Unable to find correct '%s' node", 1394 VDC_MD_DISK_NAME); 1395 status = ENOENT; 1396 goto done; 1397 } 1398 PR0("%s[%d] MD inst=%d\n", __func__, instance, md_inst); 1399 1400 /* get the channels for this node */ 1401 num_chans = md_scan_dag(mdp, listp[idx], 1402 md_find_name(mdp, VDC_MD_CHAN_NAME), 1403 md_find_name(mdp, "fwd"), chanp); 1404 1405 /* expecting at least one channel */ 1406 if (num_chans <= 0) { 1407 cmn_err(CE_NOTE, "No '%s' node for '%s' port", 1408 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 1409 status = ENOENT; 1410 goto done; 1411 1412 } else if (num_chans != 1) { 1413 PR0("%s[%d] Expected 1 '%s' node for '%s' port, found %d\n", 1414 __func__, instance, VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, 1415 num_chans); 1416 } 1417 1418 /* 1419 * We use the first channel found (index 0), irrespective of how 1420 * many are there in total. 1421 */ 1422 if (md_get_prop_val(mdp, chanp[0], VDC_ID_PROP, ldc_id) != 0) { 1423 cmn_err(CE_NOTE, "Channel '%s' property not found", 1424 VDC_ID_PROP); 1425 status = ENOENT; 1426 } 1427 1428 PR0("%s[%d] LDC id is 0x%lx\n", __func__, instance, *ldc_id); 1429 1430 done: 1431 if (chanp) 1432 kmem_free(chanp, listsz); 1433 if (listp) 1434 kmem_free(listp, listsz); 1435 1436 (void) md_fini_handle(mdp); 1437 1438 return (status); 1439 } 1440 1441 1442 /* 1443 * vdc_is_able_to_tx_data() 1444 * 1445 * Description: 1446 * This function checks if we are able to send data to the 1447 * vDisk server (vds). The LDC connection needs to be up and 1448 * vdc & vds need to have completed the handshake negotiation. 1449 * 1450 * Parameters: 1451 * vdc - soft state pointer 1452 * flag - flag to indicate if we can block or not 1453 * [ If O_NONBLOCK or O_NDELAY (which are defined in 1454 * open(2)) are set then do not block) 1455 * 1456 * Return Values 1457 * B_TRUE - can talk to vds 1458 * B_FALSE - unable to talk to vds 1459 */ 1460 static boolean_t 1461 vdc_is_able_to_tx_data(vdc_t *vdc, int flag) 1462 { 1463 vd_state_t state; 1464 uint32_t ldc_state; 1465 uint_t retries = 0; 1466 int rv = -1; 1467 1468 ASSERT(vdc != NULL); 1469 1470 mutex_enter(&vdc->lock); 1471 state = vdc->state; 1472 ldc_state = vdc->ldc_state; 1473 mutex_exit(&vdc->lock); 1474 1475 if ((state == VD_STATE_DATA) && (ldc_state == LDC_UP)) 1476 return (B_TRUE); 1477 1478 if ((flag & O_NONBLOCK) || (flag & O_NDELAY)) { 1479 PR0("%s[%d] Not ready to tx - state %d LDC state %d\n", 1480 __func__, vdc->instance, state, ldc_state); 1481 return (B_FALSE); 1482 } 1483 1484 /* 1485 * We want to check and see if any negotiations triggered earlier 1486 * have succeeded. We are prepared to wait a little while in case 1487 * they are still in progress. 1488 */ 1489 mutex_enter(&vdc->lock); 1490 while ((vdc->ldc_state != LDC_UP) || (vdc->state != VD_STATE_DATA)) { 1491 PR0("%s: Waiting for connection at state %d (LDC state %d)\n", 1492 __func__, vdc->state, vdc->ldc_state); 1493 1494 rv = cv_timedwait(&vdc->cv, &vdc->lock, 1495 VD_GET_TIMEOUT_HZ(retries)); 1496 1497 /* 1498 * An rv of -1 indicates that we timed out without the LDC 1499 * state changing so it looks like the other side (vdc) is 1500 * not yet ready/responding. 1501 * 1502 * Any other value of rv indicates that the LDC triggered an 1503 * interrupt so we just loop again, check the handshake state 1504 * and keep waiting if necessary. 1505 */ 1506 if (rv == -1) { 1507 if (retries >= vdc_retries) { 1508 PR0("%s[%d] handshake wait timed out.\n", 1509 __func__, vdc->instance); 1510 mutex_exit(&vdc->lock); 1511 return (B_FALSE); 1512 } else { 1513 PR1("%s[%d] Retry #%d for handshake timedout\n", 1514 __func__, vdc->instance, retries); 1515 retries++; 1516 } 1517 } 1518 } 1519 1520 ASSERT(vdc->ldc_state == LDC_UP); 1521 ASSERT(vdc->state == VD_STATE_DATA); 1522 1523 mutex_exit(&vdc->lock); 1524 1525 return (B_TRUE); 1526 } 1527 1528 1529 static void 1530 vdc_terminate_ldc(vdc_t *vdc) 1531 { 1532 int instance = ddi_get_instance(vdc->dip); 1533 1534 ASSERT(vdc != NULL); 1535 ASSERT(mutex_owned(&vdc->lock)); 1536 1537 PR0("%s[%d] initialized=%x\n", __func__, instance, vdc->initialized); 1538 1539 if (vdc->initialized & VDC_LDC_OPEN) { 1540 PR0("%s[%d]: ldc_close()\n", __func__, instance); 1541 (void) ldc_close(vdc->ldc_handle); 1542 } 1543 if (vdc->initialized & VDC_LDC_CB) { 1544 PR0("%s[%d]: ldc_unreg_callback()\n", __func__, instance); 1545 (void) ldc_unreg_callback(vdc->ldc_handle); 1546 } 1547 if (vdc->initialized & VDC_LDC) { 1548 PR0("%s[%d]: ldc_fini()\n", __func__, instance); 1549 (void) ldc_fini(vdc->ldc_handle); 1550 vdc->ldc_handle = NULL; 1551 } 1552 1553 vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN); 1554 } 1555 1556 static void 1557 vdc_reset_connection(vdc_t *vdc, boolean_t reset_ldc) 1558 { 1559 int status; 1560 1561 ASSERT(vdc != NULL); 1562 ASSERT(mutex_owned(&vdc->lock)); 1563 1564 PR0("%s[%d] Entered\n", __func__, vdc->instance); 1565 1566 vdc->state = VD_STATE_INIT; 1567 1568 if (reset_ldc == B_TRUE) { 1569 status = ldc_reset(vdc->ldc_handle); 1570 PR0("%s[%d] ldc_reset() = %d\n", 1571 __func__, vdc->instance, status); 1572 } 1573 1574 vdc->initialized &= ~VDC_HANDSHAKE; 1575 PR0("%s[%d] init=%x\n", __func__, vdc->instance, vdc->initialized); 1576 } 1577 1578 /* -------------------------------------------------------------------------- */ 1579 1580 /* 1581 * Descriptor Ring helper routines 1582 */ 1583 1584 static int 1585 vdc_init_descriptor_ring(vdc_t *vdc) 1586 { 1587 vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 1588 int status = -1; 1589 int i; 1590 1591 PR0("%s\n", __func__); 1592 1593 ASSERT(vdc != NULL); 1594 ASSERT(mutex_owned(&vdc->lock)); 1595 ASSERT(vdc->ldc_handle != NULL); 1596 1597 status = ldc_mem_dring_create(VD_DRING_LEN, VD_DRING_ENTRY_SZ, 1598 &vdc->ldc_dring_hdl); 1599 if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) { 1600 PR0("%s: Failed to create a descriptor ring", __func__); 1601 return (status); 1602 } 1603 vdc->initialized |= VDC_DRING; 1604 vdc->dring_entry_size = VD_DRING_ENTRY_SZ; 1605 vdc->dring_len = VD_DRING_LEN; 1606 1607 vdc->dring_cookie = kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 1608 1609 status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl, 1610 LDC_SHADOW_MAP, LDC_MEM_RW, &vdc->dring_cookie[0], 1611 &vdc->dring_cookie_count); 1612 if (status != 0) { 1613 PR0("%s: Failed to bind descriptor ring (%p) to channel (%p)\n", 1614 __func__, vdc->ldc_dring_hdl, vdc->ldc_handle); 1615 return (status); 1616 } 1617 ASSERT(vdc->dring_cookie_count == 1); 1618 vdc->initialized |= VDC_DRING_BOUND; 1619 1620 status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info); 1621 if (status != 0) { 1622 PR0("%s: Failed to get info for descriptor ring (%p)\n", 1623 __func__, vdc->ldc_dring_hdl); 1624 return (status); 1625 } 1626 1627 /* Allocate the local copy of this dring */ 1628 vdc->local_dring = kmem_zalloc(VD_DRING_LEN * sizeof (vdc_local_desc_t), 1629 KM_SLEEP); 1630 vdc->initialized |= VDC_DRING_LOCAL; 1631 1632 /* 1633 * Mark all DRing entries as free and init priv desc memory handles 1634 * If any entry is initialized, we need to free it later so we set 1635 * the bit in 'initialized' at the start. 1636 */ 1637 vdc->initialized |= VDC_DRING_ENTRY; 1638 for (i = 0; i < VD_DRING_LEN; i++) { 1639 dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 1640 dep->hdr.dstate = VIO_DESC_FREE; 1641 1642 status = ldc_mem_alloc_handle(vdc->ldc_handle, 1643 &vdc->local_dring[i].desc_mhdl); 1644 if (status != 0) { 1645 cmn_err(CE_NOTE, "![%d] Failed to alloc mem handle for" 1646 " descriptor %d", vdc->instance, i); 1647 return (status); 1648 } 1649 vdc->local_dring[i].flags = VIO_DESC_FREE; 1650 vdc->local_dring[i].flags |= VDC_ALLOC_HANDLE; 1651 vdc->local_dring[i].dep = dep; 1652 1653 mutex_init(&vdc->local_dring[i].lock, NULL, MUTEX_DRIVER, NULL); 1654 cv_init(&vdc->local_dring[i].cv, NULL, CV_DRIVER, NULL); 1655 } 1656 1657 /* 1658 * We init the index of the last DRing entry used. Since the code to 1659 * get the next available entry increments it before selecting one, 1660 * we set it to the last DRing entry so that it wraps around to zero 1661 * for the 1st entry to be used. 1662 */ 1663 vdc->dring_curr_idx = VD_DRING_LEN - 1; 1664 1665 return (status); 1666 } 1667 1668 static void 1669 vdc_destroy_descriptor_ring(vdc_t *vdc) 1670 { 1671 ldc_mem_handle_t mhdl = NULL; 1672 int status = -1; 1673 int i; /* loop */ 1674 1675 ASSERT(vdc != NULL); 1676 ASSERT(mutex_owned(&vdc->lock)); 1677 ASSERT(vdc->state == VD_STATE_INIT); 1678 1679 PR0("%s: Entered\n", __func__); 1680 1681 if (vdc->initialized & VDC_DRING_ENTRY) { 1682 for (i = 0; i < VD_DRING_LEN; i++) { 1683 mhdl = vdc->local_dring[i].desc_mhdl; 1684 1685 if (vdc->local_dring[i].flags | VDC_ALLOC_HANDLE) 1686 (void) ldc_mem_free_handle(mhdl); 1687 1688 mutex_destroy(&vdc->local_dring[i].lock); 1689 cv_destroy(&vdc->local_dring[i].cv); 1690 1691 bzero(&vdc->local_dring[i].desc_mhdl, 1692 sizeof (ldc_mem_handle_t)); 1693 } 1694 vdc->initialized &= ~VDC_DRING_ENTRY; 1695 } 1696 1697 if (vdc->initialized & VDC_DRING_LOCAL) { 1698 kmem_free(vdc->local_dring, 1699 VD_DRING_LEN * sizeof (vdc_local_desc_t)); 1700 vdc->initialized &= ~VDC_DRING_LOCAL; 1701 } 1702 1703 if (vdc->initialized & VDC_DRING_BOUND) { 1704 status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl); 1705 if (status == 0) { 1706 vdc->initialized &= ~VDC_DRING_BOUND; 1707 } else { 1708 vdc_msg("%s: Failed to unbind Descriptor Ring (%lx)\n", 1709 vdc->ldc_dring_hdl); 1710 } 1711 } 1712 1713 if (vdc->initialized & VDC_DRING_INIT) { 1714 status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl); 1715 if (status == 0) { 1716 vdc->ldc_dring_hdl = NULL; 1717 bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 1718 vdc->initialized &= ~VDC_DRING_INIT; 1719 } else { 1720 vdc_msg("%s: Failed to destroy Descriptor Ring (%lx)\n", 1721 vdc->ldc_dring_hdl); 1722 } 1723 } 1724 } 1725 1726 /* 1727 * vdc_get_next_dring_entry_idx() 1728 * 1729 * Description: 1730 * This function gets the index of the next Descriptor Ring entry available 1731 * 1732 * Return Value: 1733 * 0 <= rv < VD_DRING_LEN Next available slot 1734 * -1 DRing is full 1735 */ 1736 static int 1737 vdc_get_next_dring_entry_idx(vdc_t *vdc, uint_t num_slots_needed) 1738 { 1739 _NOTE(ARGUNUSED(num_slots_needed)) 1740 1741 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 1742 int idx = -1; 1743 int start_idx = 0; 1744 1745 ASSERT(vdc != NULL); 1746 ASSERT(vdc->dring_len == VD_DRING_LEN); 1747 ASSERT(vdc->dring_curr_idx >= 0); 1748 ASSERT(vdc->dring_curr_idx < VD_DRING_LEN); 1749 ASSERT(mutex_owned(&vdc->dring_lock)); 1750 1751 /* Start at the last entry used */ 1752 idx = start_idx = vdc->dring_curr_idx; 1753 1754 /* 1755 * Loop through Descriptor Ring checking for a free entry until we reach 1756 * the entry we started at. We should never come close to filling the 1757 * Ring at any stage, instead this is just to prevent an entry which 1758 * gets into an inconsistent state (e.g. due to a request timing out) 1759 * from blocking progress. 1760 */ 1761 do { 1762 /* Get the next entry after the last known index tried */ 1763 idx = (idx + 1) % VD_DRING_LEN; 1764 1765 dep = VDC_GET_DRING_ENTRY_PTR(vdc, idx); 1766 ASSERT(dep != NULL); 1767 1768 if (dep->hdr.dstate == VIO_DESC_FREE) { 1769 ASSERT(idx >= 0); 1770 ASSERT(idx < VD_DRING_LEN); 1771 vdc->dring_curr_idx = idx; 1772 return (idx); 1773 1774 } else if (dep->hdr.dstate == VIO_DESC_READY) { 1775 PR0("%s: Entry %d waiting to be accepted\n", 1776 __func__, idx); 1777 continue; 1778 1779 } else if (dep->hdr.dstate == VIO_DESC_ACCEPTED) { 1780 PR0("%s: Entry %d waiting to be processed\n", 1781 __func__, idx); 1782 continue; 1783 1784 } else if (dep->hdr.dstate == VIO_DESC_DONE) { 1785 PR0("%s: Entry %d done but not marked free\n", 1786 __func__, idx); 1787 1788 /* 1789 * If we are currently panicking, interrupts are 1790 * disabled and we will not be getting ACKs from the 1791 * vDisk server so we mark the descriptor ring entries 1792 * as FREE here instead of in the ACK handler. 1793 */ 1794 if (panicstr) { 1795 (void) vdc_depopulate_descriptor(vdc, idx); 1796 dep->hdr.dstate = VIO_DESC_FREE; 1797 vdc->local_dring[idx].flags = VIO_DESC_FREE; 1798 } 1799 continue; 1800 1801 } else { 1802 vdc_msg("Public Descriptor Ring entry corrupted"); 1803 mutex_enter(&vdc->lock); 1804 vdc_reset_connection(vdc, B_TRUE); 1805 mutex_exit(&vdc->lock); 1806 return (-1); 1807 } 1808 1809 } while (idx != start_idx); 1810 1811 return (-1); 1812 } 1813 1814 /* 1815 * Function: 1816 * vdc_populate_descriptor 1817 * 1818 * Description: 1819 * This routine writes the data to be transmitted to vds into the 1820 * descriptor, notifies vds that the ring has been updated and 1821 * then waits for the request to be processed. 1822 * 1823 * Arguments: 1824 * vdc - the soft state pointer 1825 * addr - start address of memory region. 1826 * nbytes - number of bytes to read/write 1827 * operation - operation we want vds to perform (VD_OP_XXX) 1828 * arg - parameter to be sent to server (depends on VD_OP_XXX type) 1829 * . mode for ioctl(9e) 1830 * . LP64 diskaddr_t (block I/O) 1831 * slice - the disk slice this request is for 1832 * 1833 * Return Codes: 1834 * 0 1835 * EAGAIN 1836 * EFAULT 1837 * ENXIO 1838 * EIO 1839 */ 1840 static int 1841 vdc_populate_descriptor(vdc_t *vdc, caddr_t addr, size_t nbytes, int operation, 1842 uint64_t arg, uint64_t slice) 1843 { 1844 vdc_local_desc_t *local_dep = NULL; /* Local Dring Entry Pointer */ 1845 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 1846 int idx = 0; /* Index of DRing entry used */ 1847 vio_dring_msg_t dmsg; 1848 size_t msglen = sizeof (dmsg); 1849 int status = 0; 1850 int rv; 1851 int retries = 0; 1852 1853 ASSERT(vdc != NULL); 1854 ASSERT(slice < V_NUMPAR); 1855 1856 /* 1857 * Get next available DRing entry. 1858 */ 1859 mutex_enter(&vdc->dring_lock); 1860 idx = vdc_get_next_dring_entry_idx(vdc, 1); 1861 if (idx == -1) { 1862 mutex_exit(&vdc->dring_lock); 1863 vdc_msg("%s[%d]: no descriptor ring entry avail, seq=%d\n", 1864 __func__, vdc->instance, vdc->seq_num); 1865 1866 /* 1867 * Since strategy should not block we don't wait for the DRing 1868 * to empty and instead return 1869 */ 1870 return (EAGAIN); 1871 } 1872 1873 ASSERT(idx < VD_DRING_LEN); 1874 local_dep = &vdc->local_dring[idx]; 1875 dep = local_dep->dep; 1876 ASSERT(dep != NULL); 1877 1878 /* 1879 * Wait for anybody still using the DRing entry to finish. 1880 * (e.g. still waiting for vds to respond to a request) 1881 */ 1882 mutex_enter(&local_dep->lock); 1883 1884 switch (operation) { 1885 case VD_OP_BREAD: 1886 case VD_OP_BWRITE: 1887 PR1("buf=%p, block=%lx, nbytes=%lx\n", addr, arg, nbytes); 1888 dep->payload.addr = (diskaddr_t)arg; 1889 rv = vdc_populate_mem_hdl(vdc, idx, addr, nbytes, operation); 1890 break; 1891 1892 case VD_OP_FLUSH: 1893 case VD_OP_GET_VTOC: 1894 case VD_OP_SET_VTOC: 1895 case VD_OP_GET_DISKGEOM: 1896 case VD_OP_SET_DISKGEOM: 1897 case VD_OP_SCSICMD: 1898 if (nbytes > 0) { 1899 rv = vdc_populate_mem_hdl(vdc, idx, addr, nbytes, 1900 operation); 1901 } 1902 break; 1903 default: 1904 cmn_err(CE_NOTE, "[%d] Unsupported vDisk operation [%d]\n", 1905 vdc->instance, operation); 1906 rv = EINVAL; 1907 } 1908 1909 if (rv != 0) { 1910 mutex_exit(&local_dep->lock); 1911 mutex_exit(&vdc->dring_lock); 1912 return (rv); 1913 } 1914 1915 /* 1916 * fill in the data details into the DRing 1917 */ 1918 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdc); 1919 dep->payload.operation = operation; 1920 dep->payload.nbytes = nbytes; 1921 dep->payload.status = EINPROGRESS; /* vds will set valid value */ 1922 dep->payload.slice = slice; 1923 dep->hdr.dstate = VIO_DESC_READY; 1924 dep->hdr.ack = 1; /* request an ACK for every message */ 1925 1926 local_dep->flags = VIO_DESC_READY; 1927 local_dep->addr = addr; 1928 1929 /* 1930 * Send a msg with the DRing details to vds 1931 */ 1932 VIO_INIT_DRING_DATA_TAG(dmsg); 1933 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc); 1934 dmsg.dring_ident = vdc->dring_ident; 1935 dmsg.start_idx = idx; 1936 dmsg.end_idx = idx; 1937 1938 PR1("ident=0x%llx, st=%d, end=%d, seq=%d req=%d dep=%p\n", 1939 vdc->dring_ident, dmsg.start_idx, dmsg.end_idx, 1940 dmsg.seq_num, dep->payload.req_id, dep); 1941 1942 status = vdc_send(vdc->ldc_handle, (caddr_t)&dmsg, &msglen); 1943 PR1("%s[%d]: ldc_write() status=%d\n", __func__, vdc->instance, status); 1944 if (status != 0) { 1945 mutex_exit(&local_dep->lock); 1946 mutex_exit(&vdc->dring_lock); 1947 vdc_msg("%s: ldc_write(%d)\n", __func__, status); 1948 return (EAGAIN); 1949 } 1950 1951 /* 1952 * XXX - potential performance enhancement (Investigate at a later date) 1953 * 1954 * for calls from strategy(9E), instead of waiting for a response from 1955 * vds, we could return at this stage and let the ACK handling code 1956 * trigger the biodone(9F) 1957 */ 1958 1959 /* 1960 * When a guest is panicking, the completion of requests needs to be 1961 * handled differently because interrupts are disabled and vdc 1962 * will not get messages. We have to poll for the messages instead. 1963 */ 1964 if (ddi_in_panic()) { 1965 int start = 0; 1966 retries = 0; 1967 for (;;) { 1968 msglen = sizeof (dmsg); 1969 status = ldc_read(vdc->ldc_handle, (caddr_t)&dmsg, 1970 &msglen); 1971 if (status) { 1972 status = EINVAL; 1973 break; 1974 } 1975 1976 /* 1977 * if there are no packets wait and check again 1978 */ 1979 if ((status == 0) && (msglen == 0)) { 1980 if (retries++ > vdc_dump_retries) { 1981 PR0("[%d] Giving up waiting, idx %d\n", 1982 vdc->instance, idx); 1983 status = EAGAIN; 1984 break; 1985 } 1986 1987 PR1("Waiting for next packet @ %d\n", idx); 1988 delay(drv_usectohz(vdc_dump_usec_timeout)); 1989 continue; 1990 } 1991 1992 /* 1993 * Ignore all messages that are not ACKs/NACKs to 1994 * DRing requests. 1995 */ 1996 if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 1997 (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 1998 PR0("discarding pkt: type=%d sub=%d env=%d\n", 1999 dmsg.tag.vio_msgtype, 2000 dmsg.tag.vio_subtype, 2001 dmsg.tag.vio_subtype_env); 2002 continue; 2003 } 2004 2005 /* 2006 * set the appropriate return value for the 2007 * current request. 2008 */ 2009 switch (dmsg.tag.vio_subtype) { 2010 case VIO_SUBTYPE_ACK: 2011 status = 0; 2012 break; 2013 case VIO_SUBTYPE_NACK: 2014 status = EAGAIN; 2015 break; 2016 default: 2017 continue; 2018 } 2019 2020 start = dmsg.start_idx; 2021 if (start >= VD_DRING_LEN) { 2022 PR0("[%d] Bogus ack data : start %d\n", 2023 vdc->instance, start); 2024 continue; 2025 } 2026 2027 dep = VDC_GET_DRING_ENTRY_PTR(vdc, start); 2028 2029 PR1("[%d] Dumping start=%d idx=%d state=%d\n", 2030 vdc->instance, start, idx, dep->hdr.dstate); 2031 2032 if (dep->hdr.dstate != VIO_DESC_DONE) { 2033 PR0("[%d] Entry @ %d - state !DONE %d\n", 2034 vdc->instance, start, dep->hdr.dstate); 2035 continue; 2036 } 2037 2038 (void) vdc_depopulate_descriptor(vdc, start); 2039 2040 /* 2041 * We want to process all Dring entries up to 2042 * the current one so that we can return an 2043 * error with the correct request. 2044 */ 2045 if (idx > start) { 2046 PR0("[%d] Looping: start %d, idx %d\n", 2047 vdc->instance, idx, start); 2048 continue; 2049 } 2050 2051 /* exit - all outstanding requests are completed */ 2052 break; 2053 } 2054 2055 mutex_exit(&local_dep->lock); 2056 mutex_exit(&vdc->dring_lock); 2057 2058 return (status); 2059 } 2060 2061 /* 2062 * Now watch the DRing entries we modified to get the response 2063 * from vds. 2064 */ 2065 status = vdc_wait_for_descriptor_update(vdc, idx, dmsg); 2066 if (status == ETIMEDOUT) { 2067 /* debug info when dumping state on vds side */ 2068 dep->payload.status = ECANCELED; 2069 } 2070 2071 status = vdc_depopulate_descriptor(vdc, idx); 2072 PR1("%s[%d] Status=%d\n", __func__, vdc->instance, status); 2073 2074 mutex_exit(&local_dep->lock); 2075 mutex_exit(&vdc->dring_lock); 2076 2077 return (status); 2078 } 2079 2080 static int 2081 vdc_wait_for_descriptor_update(vdc_t *vdc, uint_t idx, vio_dring_msg_t dmsg) 2082 { 2083 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2084 vdc_local_desc_t *local_dep = NULL; /* Local Dring Entry Pointer */ 2085 size_t msglen = sizeof (dmsg); 2086 int retries = 0; 2087 int status = ENXIO; 2088 int rv = 0; 2089 2090 ASSERT(vdc != NULL); 2091 ASSERT(idx < VD_DRING_LEN); 2092 local_dep = &vdc->local_dring[idx]; 2093 ASSERT(local_dep != NULL); 2094 dep = local_dep->dep; 2095 ASSERT(dep != NULL); 2096 2097 while (dep->hdr.dstate != VIO_DESC_DONE) { 2098 rv = cv_timedwait(&local_dep->cv, &local_dep->lock, 2099 VD_GET_TIMEOUT_HZ(retries)); 2100 if (rv == -1) { 2101 /* 2102 * If they persist in ignoring us we'll storm off in a 2103 * huff and return ETIMEDOUT to the upper layers. 2104 */ 2105 if (retries >= vdc_retries) { 2106 PR0("%s: Finished waiting on entry %d\n", 2107 __func__, idx); 2108 status = ETIMEDOUT; 2109 break; 2110 } else { 2111 retries++; 2112 PR0("%s[%d]: Timeout #%d on entry %d " 2113 "[seq %d][req %d]\n", __func__, 2114 vdc->instance, 2115 retries, idx, dmsg.seq_num, 2116 dep->payload.req_id); 2117 } 2118 2119 if (dep->hdr.dstate & VIO_DESC_ACCEPTED) { 2120 PR0("%s[%d]: vds has accessed entry %d [seq %d]" 2121 "[req %d] but not ack'ed it yet\n", 2122 __func__, vdc->instance, idx, dmsg.seq_num, 2123 dep->payload.req_id); 2124 continue; 2125 } 2126 2127 /* 2128 * we resend the message as it may have been dropped 2129 * and have never made it to the other side (vds). 2130 * (We reuse the original message but update seq ID) 2131 */ 2132 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc); 2133 retries = 0; 2134 status = vdc_send(vdc->ldc_handle, (caddr_t)&dmsg, 2135 &msglen); 2136 if (status != 0) { 2137 vdc_msg("%s: Error (%d) while resending after " 2138 "timeout\n", __func__, status); 2139 status = ETIMEDOUT; 2140 break; 2141 } 2142 } 2143 } 2144 2145 return (status); 2146 } 2147 2148 static int 2149 vdc_get_response(vdc_t *vdc, int start, int end) 2150 { 2151 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2152 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2153 int status = ENXIO; 2154 int idx = -1; 2155 2156 ASSERT(vdc != NULL); 2157 ASSERT(start >= 0); 2158 ASSERT(start <= VD_DRING_LEN); 2159 ASSERT(start >= -1); 2160 ASSERT(start <= VD_DRING_LEN); 2161 2162 idx = start; 2163 ldep = &vdc->local_dring[idx]; 2164 ASSERT(ldep != NULL); 2165 dep = ldep->dep; 2166 ASSERT(dep != NULL); 2167 2168 PR0("%s[%d] DRING entry=%d status=%d\n", __func__, vdc->instance, 2169 idx, VIO_GET_DESC_STATE(dep->hdr.dstate)); 2170 while (VIO_GET_DESC_STATE(dep->hdr.dstate) == VIO_DESC_DONE) { 2171 if ((end != -1) && (idx > end)) 2172 return (0); 2173 2174 switch (ldep->operation) { 2175 case VD_OP_BREAD: 2176 case VD_OP_BWRITE: 2177 /* call bioxxx */ 2178 break; 2179 default: 2180 /* signal waiter */ 2181 break; 2182 } 2183 2184 /* Clear the DRing entry */ 2185 status = vdc_depopulate_descriptor(vdc, idx); 2186 PR0("%s[%d] Status=%d\n", __func__, vdc->instance, status); 2187 2188 /* loop accounting to get next DRing entry */ 2189 idx++; 2190 ldep = &vdc->local_dring[idx]; 2191 dep = ldep->dep; 2192 } 2193 2194 return (status); 2195 } 2196 2197 static int 2198 vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 2199 { 2200 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2201 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2202 int status = ENXIO; 2203 2204 ASSERT(vdc != NULL); 2205 ASSERT(idx < VD_DRING_LEN); 2206 ldep = &vdc->local_dring[idx]; 2207 ASSERT(ldep != NULL); 2208 dep = ldep->dep; 2209 ASSERT(dep != NULL); 2210 2211 status = dep->payload.status; 2212 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 2213 ldep = &vdc->local_dring[idx]; 2214 VIO_SET_DESC_STATE(ldep->flags, VIO_DESC_FREE); 2215 2216 /* 2217 * If the upper layer passed in a misaligned address we copied the 2218 * data into an aligned buffer before sending it to LDC - we now 2219 * copy it back to the original buffer. 2220 */ 2221 if (ldep->align_addr) { 2222 ASSERT(ldep->addr != NULL); 2223 ASSERT(dep->payload.nbytes > 0); 2224 2225 bcopy(ldep->align_addr, ldep->addr, dep->payload.nbytes); 2226 kmem_free(ldep->align_addr, 2227 sizeof (caddr_t) * dep->payload.nbytes); 2228 ldep->align_addr = NULL; 2229 } 2230 2231 status = ldc_mem_unbind_handle(ldep->desc_mhdl); 2232 if (status != 0) { 2233 cmn_err(CE_NOTE, "[%d] unbind mem hdl 0x%lx @ idx %d failed:%d", 2234 vdc->instance, ldep->desc_mhdl, idx, status); 2235 } 2236 2237 return (status); 2238 } 2239 2240 static int 2241 vdc_populate_mem_hdl(vdc_t *vdc, uint_t idx, caddr_t addr, size_t nbytes, 2242 int operation) 2243 { 2244 vd_dring_entry_t *dep = NULL; 2245 vdc_local_desc_t *ldep = NULL; 2246 ldc_mem_handle_t mhdl; 2247 caddr_t vaddr; 2248 int perm = LDC_MEM_RW; 2249 int rv = 0; 2250 int i; 2251 2252 ASSERT(vdc != NULL); 2253 ASSERT(idx < VD_DRING_LEN); 2254 2255 dep = VDC_GET_DRING_ENTRY_PTR(vdc, idx); 2256 ldep = &vdc->local_dring[idx]; 2257 mhdl = ldep->desc_mhdl; 2258 2259 switch (operation) { 2260 case VD_OP_BREAD: 2261 perm = LDC_MEM_W; 2262 break; 2263 2264 case VD_OP_BWRITE: 2265 perm = LDC_MEM_R; 2266 break; 2267 2268 case VD_OP_FLUSH: 2269 case VD_OP_GET_VTOC: 2270 case VD_OP_SET_VTOC: 2271 case VD_OP_GET_DISKGEOM: 2272 case VD_OP_SET_DISKGEOM: 2273 case VD_OP_SCSICMD: 2274 perm = LDC_MEM_RW; 2275 break; 2276 2277 default: 2278 ASSERT(0); /* catch bad programming in vdc */ 2279 } 2280 2281 /* 2282 * LDC expects any addresses passed in to be 8-byte aligned. We need 2283 * to copy the contents of any misaligned buffers to a newly allocated 2284 * buffer and bind it instead (and copy the the contents back to the 2285 * original buffer passed in when depopulating the descriptor) 2286 */ 2287 vaddr = addr; 2288 if (((uint64_t)addr & 0x7) != 0) { 2289 ldep->align_addr = 2290 kmem_zalloc(sizeof (caddr_t) * nbytes, KM_SLEEP); 2291 PR0("%s[%d] Misaligned address %lx reallocating " 2292 "(buf=%lx entry=%d)\n", 2293 __func__, vdc->instance, addr, ldep->align_addr, idx); 2294 bcopy(addr, ldep->align_addr, nbytes); 2295 vaddr = ldep->align_addr; 2296 } 2297 2298 rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 2299 vdc->dring_mem_info.mtype, perm, &dep->payload.cookie[0], 2300 &dep->payload.ncookies); 2301 PR1("%s[%d] bound mem handle; ncookies=%d\n", 2302 __func__, vdc->instance, dep->payload.ncookies); 2303 if (rv != 0) { 2304 vdc_msg("%s[%d] failed to ldc_mem_bind_handle " 2305 "(mhdl=%lx, buf=%lx entry=%d err=%d)\n", 2306 __func__, vdc->instance, mhdl, addr, idx, rv); 2307 if (ldep->align_addr) { 2308 kmem_free(ldep->align_addr, 2309 sizeof (caddr_t) * dep->payload.nbytes); 2310 ldep->align_addr = NULL; 2311 } 2312 return (EAGAIN); 2313 } 2314 2315 /* 2316 * Get the other cookies (if any). 2317 */ 2318 for (i = 1; i < dep->payload.ncookies; i++) { 2319 rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 2320 if (rv != 0) { 2321 (void) ldc_mem_unbind_handle(mhdl); 2322 vdc_msg("%s: failed to get next cookie(mhdl=%lx " 2323 "cnum=%d), err=%d", __func__, mhdl, i, rv); 2324 if (ldep->align_addr) { 2325 kmem_free(ldep->align_addr, 2326 sizeof (caddr_t) * dep->payload.nbytes); 2327 ldep->align_addr = NULL; 2328 } 2329 return (EAGAIN); 2330 } 2331 } 2332 2333 return (rv); 2334 } 2335 2336 /* 2337 * Interrupt handlers for messages from LDC 2338 */ 2339 2340 static uint_t 2341 vdc_handle_cb(uint64_t event, caddr_t arg) 2342 { 2343 ldc_status_t ldc_state; 2344 int rv = 0; 2345 2346 vdc_t *vdc = (vdc_t *)(void *)arg; 2347 2348 ASSERT(vdc != NULL); 2349 2350 PR1("%s[%d] event=%x seqID=%d\n", 2351 __func__, vdc->instance, event, vdc->seq_num); 2352 2353 /* 2354 * Depending on the type of event that triggered this callback, 2355 * we modify the handhske state or read the data. 2356 * 2357 * NOTE: not done as a switch() as event could be triggered by 2358 * a state change and a read request. Also the ordering of the 2359 * check for the event types is deliberate. 2360 */ 2361 if (event & LDC_EVT_UP) { 2362 PR0("%s[%d] Received LDC_EVT_UP\n", __func__, vdc->instance); 2363 2364 /* get LDC state */ 2365 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2366 if (rv != 0) { 2367 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d", 2368 vdc->instance, rv); 2369 vdc_reset_connection(vdc, B_TRUE); 2370 return (LDC_SUCCESS); 2371 } 2372 2373 /* 2374 * Reset the transaction sequence numbers when LDC comes up. 2375 * We then kick off the handshake negotiation with the vDisk 2376 * server. 2377 */ 2378 mutex_enter(&vdc->lock); 2379 vdc->seq_num = 0; 2380 vdc->seq_num_reply = 0; 2381 vdc->ldc_state = ldc_state; 2382 ASSERT(ldc_state == LDC_UP); 2383 mutex_exit(&vdc->lock); 2384 2385 vdc_init_handshake_negotiation(vdc); 2386 2387 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 2388 } 2389 2390 if (event & LDC_EVT_READ) { 2391 /* 2392 * Wake up the worker thread to process the message 2393 */ 2394 mutex_enter(&vdc->msg_proc_lock); 2395 vdc->msg_pending = B_TRUE; 2396 cv_signal(&vdc->msg_proc_cv); 2397 mutex_exit(&vdc->msg_proc_lock); 2398 2399 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 2400 2401 /* that's all we have to do - no need to handle DOWN/RESET */ 2402 return (LDC_SUCCESS); 2403 } 2404 2405 if (event & LDC_EVT_RESET) { 2406 PR0("%s[%d] Recvd LDC RESET event\n", __func__, vdc->instance); 2407 } 2408 2409 if (event & LDC_EVT_DOWN) { 2410 PR0("%s[%d] Recvd LDC DOWN event\n", __func__, vdc->instance); 2411 2412 /* get LDC state */ 2413 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2414 if (rv != 0) { 2415 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d", 2416 vdc->instance, rv); 2417 ldc_state = LDC_OPEN; 2418 } 2419 mutex_enter(&vdc->lock); 2420 vdc->ldc_state = ldc_state; 2421 mutex_exit(&vdc->lock); 2422 2423 vdc_reset_connection(vdc, B_TRUE); 2424 } 2425 2426 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 2427 cmn_err(CE_NOTE, "![%d] Unexpected LDC event (%lx) received", 2428 vdc->instance, event); 2429 2430 return (LDC_SUCCESS); 2431 } 2432 2433 /* -------------------------------------------------------------------------- */ 2434 2435 /* 2436 * The following functions process the incoming messages from vds 2437 */ 2438 2439 2440 static void 2441 vdc_process_msg_thread(vdc_t *vdc) 2442 { 2443 int status = 0; 2444 boolean_t q_is_empty = B_TRUE; 2445 2446 ASSERT(vdc != NULL); 2447 2448 mutex_enter(&vdc->msg_proc_lock); 2449 PR0("%s[%d]: Starting\n", __func__, vdc->instance); 2450 2451 vdc->msg_proc_thr_state = VDC_THR_RUNNING; 2452 2453 while (vdc->msg_proc_thr_state == VDC_THR_RUNNING) { 2454 2455 PR1("%s[%d] Waiting\n", __func__, vdc->instance); 2456 while (vdc->msg_pending == B_FALSE) 2457 cv_wait(&vdc->msg_proc_cv, &vdc->msg_proc_lock); 2458 2459 PR1("%s[%d] Message Received\n", __func__, vdc->instance); 2460 2461 /* check if there is data */ 2462 status = ldc_chkq(vdc->ldc_handle, &q_is_empty); 2463 if ((status != 0) && 2464 (vdc->msg_proc_thr_state == VDC_THR_RUNNING)) { 2465 cmn_err(CE_NOTE, "[%d] Unable to communicate with vDisk" 2466 " server. Cannot check LDC queue: %d", 2467 vdc->instance, status); 2468 mutex_enter(&vdc->lock); 2469 vdc_reset_connection(vdc, B_TRUE); 2470 mutex_exit(&vdc->lock); 2471 vdc->msg_proc_thr_state = VDC_THR_STOP; 2472 continue; 2473 } 2474 2475 if (q_is_empty == B_FALSE) { 2476 PR1("%s: new pkt(s) available\n", __func__); 2477 vdc_process_msg(vdc); 2478 } 2479 2480 vdc->msg_pending = B_FALSE; 2481 } 2482 2483 PR0("Message processing thread stopped\n"); 2484 vdc->msg_pending = B_FALSE; 2485 vdc->msg_proc_thr_state = VDC_THR_DONE; 2486 cv_signal(&vdc->msg_proc_cv); 2487 mutex_exit(&vdc->msg_proc_lock); 2488 thread_exit(); 2489 } 2490 2491 2492 /* 2493 * Function: 2494 * vdc_process_msg() 2495 * 2496 * Description: 2497 * This function is called by the message processing thread each time it 2498 * is triggered when LDC sends an interrupt to indicate that there are 2499 * more packets on the queue. When it is called it will continue to loop 2500 * and read the messages until there are no more left of the queue. If it 2501 * encounters an invalid sized message it will drop it and check the next 2502 * message. 2503 * 2504 * Arguments: 2505 * arg - soft state pointer for this instance of the device driver. 2506 * 2507 * Return Code: 2508 * None. 2509 */ 2510 static void 2511 vdc_process_msg(void *arg) 2512 { 2513 vdc_t *vdc = (vdc_t *)(void *)arg; 2514 vio_msg_t vio_msg; 2515 size_t nbytes = sizeof (vio_msg); 2516 int status; 2517 2518 ASSERT(vdc != NULL); 2519 2520 mutex_enter(&vdc->lock); 2521 2522 PR1("%s\n", __func__); 2523 2524 for (;;) { 2525 2526 /* read all messages - until no more left */ 2527 status = ldc_read(vdc->ldc_handle, (caddr_t)&vio_msg, &nbytes); 2528 2529 if (status) { 2530 vdc_msg("%s: ldc_read() failed = %d", __func__, status); 2531 2532 /* if status is ECONNRESET --- reset vdc state */ 2533 if (status == EIO || status == ECONNRESET) { 2534 vdc_reset_connection(vdc, B_FALSE); 2535 } 2536 2537 mutex_exit(&vdc->lock); 2538 return; 2539 } 2540 2541 if ((nbytes > 0) && (nbytes < sizeof (vio_msg_tag_t))) { 2542 cmn_err(CE_CONT, "![%d] Expect %lu bytes; recv'd %lu\n", 2543 vdc->instance, sizeof (vio_msg_tag_t), nbytes); 2544 mutex_exit(&vdc->lock); 2545 return; 2546 } 2547 2548 if (nbytes == 0) { 2549 PR2("%s[%d]: ldc_read() done..\n", 2550 __func__, vdc->instance); 2551 mutex_exit(&vdc->lock); 2552 return; 2553 } 2554 2555 PR1("%s[%d] (%x/%x/%x)\n", __func__, vdc->instance, 2556 vio_msg.tag.vio_msgtype, 2557 vio_msg.tag.vio_subtype, 2558 vio_msg.tag.vio_subtype_env); 2559 2560 /* 2561 * Verify the Session ID of the message 2562 * 2563 * Every message after the Version has been negotiated should 2564 * have the correct session ID set. 2565 */ 2566 if ((vio_msg.tag.vio_sid != vdc->session_id) && 2567 (vio_msg.tag.vio_subtype_env != VIO_VER_INFO)) { 2568 PR0("%s: Incorrect SID 0x%x msg 0x%lx, expected 0x%x\n", 2569 __func__, vio_msg.tag.vio_sid, &vio_msg, 2570 vdc->session_id); 2571 vdc_reset_connection(vdc, B_FALSE); 2572 mutex_exit(&vdc->lock); 2573 return; 2574 } 2575 2576 switch (vio_msg.tag.vio_msgtype) { 2577 case VIO_TYPE_CTRL: 2578 status = vdc_process_ctrl_msg(vdc, vio_msg); 2579 break; 2580 case VIO_TYPE_DATA: 2581 status = vdc_process_data_msg(vdc, vio_msg); 2582 break; 2583 case VIO_TYPE_ERR: 2584 status = vdc_process_err_msg(vdc, vio_msg); 2585 break; 2586 default: 2587 PR1("%s", __func__); 2588 status = EINVAL; 2589 break; 2590 } 2591 2592 if (status != 0) { 2593 PR0("%s[%d] Error (%d) occcurred processing msg\n", 2594 __func__, vdc->instance, status); 2595 vdc_reset_connection(vdc, B_FALSE); 2596 } 2597 } 2598 _NOTE(NOTREACHED) 2599 } 2600 2601 /* 2602 * Function: 2603 * vdc_process_ctrl_msg() 2604 * 2605 * Description: 2606 * This function is called by the message processing thread each time 2607 * an LDC message with a msgtype of VIO_TYPE_CTRL is received. 2608 * 2609 * Arguments: 2610 * vdc - soft state pointer for this instance of the device driver. 2611 * msg - the LDC message sent by vds 2612 * 2613 * Return Codes: 2614 * 0 - Success. 2615 * EPROTO - A message was received which shouldn't have happened according 2616 * to the protocol 2617 * ENOTSUP - An action which is allowed according to the protocol but which 2618 * isn't (or doesn't need to be) implemented yet. 2619 * EINVAL - An invalid value was returned as part of a message. 2620 */ 2621 static int 2622 vdc_process_ctrl_msg(vdc_t *vdc, vio_msg_t msg) 2623 { 2624 size_t msglen = sizeof (msg); 2625 vd_attr_msg_t *attr_msg = NULL; 2626 vio_dring_reg_msg_t *dring_msg = NULL; 2627 int status = -1; 2628 2629 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_CTRL); 2630 ASSERT(vdc != NULL); 2631 ASSERT(mutex_owned(&vdc->lock)); 2632 2633 /* Depending on which state we are in; process the message */ 2634 switch (vdc->state) { 2635 case VD_STATE_INIT: 2636 if (msg.tag.vio_subtype_env != VIO_VER_INFO) { 2637 status = EPROTO; 2638 break; 2639 } 2640 2641 switch (msg.tag.vio_subtype) { 2642 case VIO_SUBTYPE_ACK: 2643 vdc->state = VD_STATE_VER; 2644 status = vdc_init_attr_negotiation(vdc); 2645 break; 2646 case VIO_SUBTYPE_NACK: 2647 /* 2648 * For now there is only one version number so we 2649 * cannot step back to an earlier version but in the 2650 * future we may need to add further logic here 2651 * to try negotiating an earlier version as the VIO 2652 * design allow for it. 2653 */ 2654 2655 /* 2656 * vds could not handle the version we sent so we just 2657 * stop negotiating. 2658 */ 2659 status = EPROTO; 2660 break; 2661 2662 case VIO_SUBTYPE_INFO: 2663 /* 2664 * Handle the case where vds starts handshake 2665 * (for now only vdc is the instigatior) 2666 */ 2667 status = ENOTSUP; 2668 break; 2669 2670 default: 2671 status = ENOTSUP; 2672 break; 2673 } 2674 break; 2675 2676 case VD_STATE_VER: 2677 if (msg.tag.vio_subtype_env != VIO_ATTR_INFO) { 2678 status = EPROTO; 2679 break; 2680 } 2681 2682 switch (msg.tag.vio_subtype) { 2683 case VIO_SUBTYPE_ACK: 2684 /* 2685 * We now verify the attributes sent by vds. 2686 */ 2687 attr_msg = (vd_attr_msg_t *)&msg; 2688 vdc->vdisk_size = attr_msg->vdisk_size; 2689 vdc->vdisk_type = attr_msg->vdisk_type; 2690 2691 if ((attr_msg->max_xfer_sz != vdc->max_xfer_sz) || 2692 (attr_msg->vdisk_block_size != vdc->block_size)) { 2693 /* 2694 * Future support: step down to the block size 2695 * and max transfer size suggested by the 2696 * server. (If this value is less than 128K 2697 * then multiple Dring entries per request 2698 * would need to be implemented) 2699 */ 2700 cmn_err(CE_NOTE, "[%d] Couldn't process block " 2701 "attrs from vds", vdc->instance); 2702 status = EINVAL; 2703 break; 2704 } 2705 2706 if ((attr_msg->xfer_mode != VIO_DRING_MODE) || 2707 (attr_msg->vdisk_size > INT64_MAX) || 2708 (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 2709 vdc_msg("%s[%d] Couldn't process attrs " 2710 "from vds", __func__, vdc->instance); 2711 status = EINVAL; 2712 break; 2713 } 2714 2715 vdc->state = VD_STATE_ATTR; 2716 status = vdc_init_dring_negotiate(vdc); 2717 break; 2718 2719 case VIO_SUBTYPE_NACK: 2720 /* 2721 * vds could not handle the attributes we sent so we 2722 * stop negotiating. 2723 */ 2724 status = EPROTO; 2725 break; 2726 2727 case VIO_SUBTYPE_INFO: 2728 /* 2729 * Handle the case where vds starts the handshake 2730 * (for now; vdc is the only supported instigatior) 2731 */ 2732 status = ENOTSUP; 2733 break; 2734 2735 default: 2736 status = ENOTSUP; 2737 break; 2738 } 2739 break; 2740 2741 2742 case VD_STATE_ATTR: 2743 if (msg.tag.vio_subtype_env != VIO_DRING_REG) { 2744 status = EPROTO; 2745 break; 2746 } 2747 2748 switch (msg.tag.vio_subtype) { 2749 case VIO_SUBTYPE_ACK: 2750 /* Verify that we have sent all the descr. ring info */ 2751 /* nop for now as we have just 1 dring */ 2752 dring_msg = (vio_dring_reg_msg_t *)&msg; 2753 2754 /* save the received dring_ident */ 2755 vdc->dring_ident = dring_msg->dring_ident; 2756 PR0("%s[%d] Received dring ident=0x%lx\n", 2757 __func__, vdc->instance, vdc->dring_ident); 2758 2759 /* 2760 * Send an RDX message to vds to indicate we are ready 2761 * to send data 2762 */ 2763 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 2764 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 2765 msg.tag.vio_subtype_env = VIO_RDX; 2766 msg.tag.vio_sid = vdc->session_id; 2767 status = vdc_send(vdc->ldc_handle, (caddr_t)&msg, 2768 &msglen); 2769 if (status != 0) { 2770 cmn_err(CE_NOTE, "[%d] Failed to send RDX" 2771 " message (%d)", vdc->instance, status); 2772 break; 2773 } 2774 2775 status = vdc_create_fake_geometry(vdc); 2776 if (status != 0) { 2777 cmn_err(CE_NOTE, "[%d] Failed to create disk " 2778 "geometery(%d)", vdc->instance, status); 2779 break; 2780 } 2781 2782 vdc->state = VD_STATE_RDX; 2783 break; 2784 2785 case VIO_SUBTYPE_NACK: 2786 /* 2787 * vds could not handle the DRing info we sent so we 2788 * stop negotiating. 2789 */ 2790 cmn_err(CE_CONT, "server could not register DRing\n"); 2791 vdc_reset_connection(vdc, B_FALSE); 2792 vdc_destroy_descriptor_ring(vdc); 2793 status = EPROTO; 2794 break; 2795 2796 case VIO_SUBTYPE_INFO: 2797 /* 2798 * Handle the case where vds starts handshake 2799 * (for now only vdc is the instigatior) 2800 */ 2801 status = ENOTSUP; 2802 break; 2803 default: 2804 status = ENOTSUP; 2805 } 2806 break; 2807 2808 case VD_STATE_RDX: 2809 if (msg.tag.vio_subtype_env != VIO_RDX) { 2810 status = EPROTO; 2811 break; 2812 } 2813 2814 PR0("%s: Received RDX - handshake successful\n", __func__); 2815 2816 status = 0; 2817 vdc->state = VD_STATE_DATA; 2818 2819 cv_broadcast(&vdc->attach_cv); 2820 break; 2821 2822 default: 2823 cmn_err(CE_NOTE, "[%d] unknown handshake negotiation state %d", 2824 vdc->instance, vdc->state); 2825 break; 2826 } 2827 2828 return (status); 2829 } 2830 2831 2832 /* 2833 * Function: 2834 * vdc_process_data_msg() 2835 * 2836 * Description: 2837 * This function is called by the message processing thread each time it 2838 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 2839 * be an ACK or NACK from vds[1] which vdc handles as follows. 2840 * ACK - wake up the waiting thread 2841 * NACK - resend any messages necessary 2842 * 2843 * [1] Although the message format allows it, vds should not send a 2844 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 2845 * some bizarre reason it does, vdc will reset the connection. 2846 * 2847 * Arguments: 2848 * vdc - soft state pointer for this instance of the device driver. 2849 * msg - the LDC message sent by vds 2850 * 2851 * Return Code: 2852 * 0 - Success. 2853 * > 0 - error value returned by LDC 2854 */ 2855 static int 2856 vdc_process_data_msg(vdc_t *vdc, vio_msg_t msg) 2857 { 2858 int status = 0; 2859 vdc_local_desc_t *local_dep = NULL; 2860 vio_dring_msg_t *dring_msg = NULL; 2861 size_t msglen = sizeof (*dring_msg); 2862 uint_t num_msgs; 2863 uint_t start; 2864 uint_t end; 2865 uint_t i; 2866 2867 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_DATA); 2868 ASSERT(vdc != NULL); 2869 ASSERT(mutex_owned(&vdc->lock)); 2870 2871 dring_msg = (vio_dring_msg_t *)&msg; 2872 2873 /* 2874 * Check to see if the message has bogus data 2875 */ 2876 start = dring_msg->start_idx; 2877 end = dring_msg->end_idx; 2878 if ((start >= VD_DRING_LEN) || (end >= VD_DRING_LEN)) { 2879 vdc_msg("%s: Bogus ACK data : start %d, end %d\n", 2880 __func__, start, end); 2881 return (EPROTO); 2882 } 2883 2884 /* 2885 * calculate the number of messages that vds ACK'ed 2886 * 2887 * Assumes, (like the rest of vdc) that there is a 1:1 mapping 2888 * between requests and Dring entries. 2889 */ 2890 num_msgs = (end >= start) ? 2891 (end - start + 1) : 2892 (VD_DRING_LEN - start + end + 1); 2893 2894 /* 2895 * Verify that the sequence number is what vdc expects. 2896 */ 2897 if (vdc_verify_seq_num(vdc, dring_msg, num_msgs) == B_FALSE) { 2898 return (ENXIO); 2899 } 2900 2901 switch (msg.tag.vio_subtype) { 2902 case VIO_SUBTYPE_ACK: 2903 PR2("%s: DATA ACK\n", __func__); 2904 2905 /* 2906 * Wake the thread waiting for each DRing entry ACK'ed 2907 */ 2908 for (i = 0; i < num_msgs; i++) { 2909 int idx = (start + i) % VD_DRING_LEN; 2910 2911 local_dep = &vdc->local_dring[idx]; 2912 mutex_enter(&local_dep->lock); 2913 cv_signal(&local_dep->cv); 2914 mutex_exit(&local_dep->lock); 2915 } 2916 break; 2917 2918 case VIO_SUBTYPE_NACK: 2919 PR0("%s: DATA NACK\n", __func__); 2920 dring_msg = (vio_dring_msg_t *)&msg; 2921 VDC_DUMP_DRING_MSG(dring_msg); 2922 2923 /* Resend necessary messages */ 2924 for (i = 0; i < num_msgs; i++) { 2925 int idx = (start + i) % VD_DRING_LEN; 2926 2927 local_dep = &vdc->local_dring[idx]; 2928 ASSERT(local_dep != NULL); 2929 mutex_enter(&local_dep->lock); 2930 2931 if (local_dep->dep->hdr.dstate != VIO_DESC_READY) { 2932 PR0("%s[%d]: Won't resend entry %d [flag=%d]\n", 2933 __func__, vdc->instance, idx, 2934 local_dep->dep->hdr.dstate); 2935 mutex_exit(&local_dep->lock); 2936 break; 2937 } 2938 2939 /* we'll reuse the message passed in */ 2940 VIO_INIT_DRING_DATA_TAG(msg); 2941 dring_msg->tag.vio_sid = vdc->session_id; 2942 dring_msg->seq_num = ++(vdc->seq_num); 2943 VDC_DUMP_DRING_MSG(dring_msg); 2944 2945 status = vdc_send(vdc->ldc_handle, (caddr_t)&dring_msg, 2946 &msglen); 2947 PR1("%s: ldc_write() status=%d\n", __func__, status); 2948 if (status != 0) { 2949 vdc_msg("%s ldc_write(%d)\n", __func__, status); 2950 mutex_exit(&local_dep->lock); 2951 break; 2952 } 2953 2954 mutex_exit(&local_dep->lock); 2955 } 2956 break; 2957 2958 case VIO_SUBTYPE_INFO: 2959 default: 2960 cmn_err(CE_NOTE, "[%d] Got an unexpected DATA msg [subtype %d]", 2961 vdc->instance, msg.tag.vio_subtype); 2962 break; 2963 } 2964 2965 return (status); 2966 } 2967 2968 /* 2969 * Function: 2970 * vdc_process_err_msg() 2971 * 2972 * NOTE: No error messages are used as part of the vDisk protocol 2973 */ 2974 static int 2975 vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg) 2976 { 2977 _NOTE(ARGUNUSED(vdc)) 2978 _NOTE(ARGUNUSED(msg)) 2979 2980 int status = ENOTSUP; 2981 2982 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_ERR); 2983 cmn_err(CE_NOTE, "[%d] Got an ERR msg", vdc->instance); 2984 2985 return (status); 2986 } 2987 2988 /* 2989 * Function: 2990 * vdc_verify_seq_num() 2991 * 2992 * Description: 2993 * This functions verifies that the sequence number sent back by vds with 2994 * the latest message correctly follows the last request processed. 2995 * 2996 * Arguments: 2997 * vdc - soft state pointer for this instance of the driver. 2998 * dring_msg - pointer to the LDC message sent by vds 2999 * num_msgs - the number of requests being acknowledged 3000 * 3001 * Return Code: 3002 * B_TRUE - Success. 3003 * B_FALSE - The seq numbers are so out of sync, vdc cannot deal with them 3004 */ 3005 static boolean_t 3006 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg, int num_msgs) 3007 { 3008 ASSERT(vdc != NULL); 3009 ASSERT(dring_msg != NULL); 3010 3011 /* 3012 * Check to see if the messages were responded to in the correct 3013 * order by vds. There are 3 possible scenarios: 3014 * - the seq_num we expected is returned (everything is OK) 3015 * - a seq_num earlier than the last one acknowledged is returned, 3016 * if so something is seriously wrong so we reset the connection 3017 * - a seq_num greater than what we expected is returned. 3018 */ 3019 if (dring_msg->seq_num != (vdc->seq_num_reply + num_msgs)) { 3020 vdc_msg("%s[%d]: Bogus seq_num %d, expected %d\n", 3021 __func__, vdc->instance, dring_msg->seq_num, 3022 vdc->seq_num_reply + num_msgs); 3023 if (dring_msg->seq_num < (vdc->seq_num_reply + num_msgs)) { 3024 return (B_FALSE); 3025 } else { 3026 /* 3027 * vds has responded with a seq_num greater than what we 3028 * expected 3029 */ 3030 return (B_FALSE); 3031 } 3032 } 3033 vdc->seq_num_reply += num_msgs; 3034 3035 return (B_TRUE); 3036 } 3037 3038 /* -------------------------------------------------------------------------- */ 3039 3040 /* 3041 * DKIO(7) support 3042 * 3043 * XXX FIXME - needs to be converted to use the structures defined in the 3044 * latest VIO spec to communicate with the vDisk server. 3045 */ 3046 3047 typedef struct vdc_dk_arg { 3048 struct dk_callback dkc; 3049 int mode; 3050 dev_t dev; 3051 vdc_t *vdc; 3052 } vdc_dk_arg_t; 3053 3054 /* 3055 * Function: 3056 * vdc_dkio_flush_cb() 3057 * 3058 * Description: 3059 * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 3060 * by kernel code. 3061 * 3062 * Arguments: 3063 * arg - a pointer to a vdc_dk_arg_t structure. 3064 */ 3065 void 3066 vdc_dkio_flush_cb(void *arg) 3067 { 3068 struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 3069 struct dk_callback *dkc = NULL; 3070 vdc_t *vdc = NULL; 3071 int rv; 3072 3073 if (dk_arg == NULL) { 3074 vdc_msg("%s[?] DKIOCFLUSHWRITECACHE arg is NULL\n", __func__); 3075 return; 3076 } 3077 dkc = &dk_arg->dkc; 3078 vdc = dk_arg->vdc; 3079 ASSERT(vdc != NULL); 3080 3081 rv = vdc_populate_descriptor(vdc, NULL, 0, VD_OP_FLUSH, 3082 dk_arg->mode, SDPART(getminor(dk_arg->dev))); 3083 if (rv != 0) { 3084 PR0("%s[%d] DKIOCFLUSHWRITECACHE failed : model %x\n", 3085 __func__, vdc->instance, 3086 ddi_model_convert_from(dk_arg->mode & FMODELS)); 3087 return; 3088 } 3089 3090 /* 3091 * Trigger the call back to notify the caller the the ioctl call has 3092 * been completed. 3093 */ 3094 if ((dk_arg->mode & FKIOCTL) && 3095 (dkc != NULL) && 3096 (dkc->dkc_callback != NULL)) { 3097 ASSERT(dkc->dkc_cookie != NULL); 3098 (*dkc->dkc_callback)(dkc->dkc_cookie, ENOTSUP); 3099 } 3100 3101 /* Indicate that one less DKIO write flush is outstanding */ 3102 mutex_enter(&vdc->lock); 3103 vdc->dkio_flush_pending--; 3104 ASSERT(vdc->dkio_flush_pending >= 0); 3105 mutex_exit(&vdc->lock); 3106 } 3107 3108 3109 /* 3110 * This structure is used in the DKIO(7I) array below. 3111 */ 3112 typedef struct vdc_dk_ioctl { 3113 uint8_t op; /* VD_OP_XXX value */ 3114 int cmd; /* Solaris ioctl operation number */ 3115 uint8_t copy; /* copyin and/or copyout needed ? */ 3116 size_t nbytes; /* size of structure to be copied */ 3117 size_t nbytes32; /* size of 32bit struct if different */ 3118 /* to 64bit struct (zero otherwise) */ 3119 } vdc_dk_ioctl_t; 3120 3121 /* 3122 * Subset of DKIO(7I) operations currently supported 3123 */ 3124 static vdc_dk_ioctl_t dk_ioctl[] = { 3125 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, 0, 3126 0, 0}, 3127 {VD_OP_GET_WCE, DKIOCGETWCE, 0, 3128 0, 0}, 3129 {VD_OP_SET_WCE, DKIOCSETWCE, 0, 3130 0, 0}, 3131 {VD_OP_GET_VTOC, DKIOCGVTOC, VD_COPYOUT, 3132 sizeof (struct vtoc), sizeof (struct vtoc32)}, 3133 {VD_OP_SET_VTOC, DKIOCSVTOC, VD_COPYIN, 3134 sizeof (struct vtoc), sizeof (struct vtoc32)}, 3135 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, VD_COPYIN, 3136 sizeof (struct dk_geom), 0}, 3137 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, VD_COPYOUT, 3138 sizeof (struct dk_geom), 0}, 3139 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, VD_COPYOUT, 3140 sizeof (struct dk_geom), 0}, 3141 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, VD_COPYOUT, 3142 sizeof (struct dk_geom), 0}, 3143 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, VD_COPYOUT, 3144 sizeof (struct dk_geom), 0}, 3145 {VD_OP_SCSICMD, USCSICMD, VD_COPYIN|VD_COPYOUT, 3146 sizeof (struct uscsi_cmd), sizeof (struct uscsi_cmd32)}, 3147 {0, DKIOCINFO, VD_COPYOUT, 3148 sizeof (struct dk_cinfo), 0}, 3149 {0, DKIOCGMEDIAINFO, VD_COPYOUT, 3150 sizeof (struct dk_minfo), 0}, 3151 {0, DKIOCREMOVABLE, 0, 3152 0, 0}, 3153 {0, CDROMREADOFFSET, 0, 3154 0, 0} 3155 }; 3156 3157 /* 3158 * Function: 3159 * vd_process_ioctl() 3160 * 3161 * Description: 3162 * This routine is the driver entry point for handling user 3163 * requests to get the device geometry. 3164 * 3165 * Arguments: 3166 * dev - the device number 3167 * cmd - the operation [dkio(7I)] to be processed 3168 * arg - pointer to user provided structure 3169 * (contains data to be set or reference parameter for get) 3170 * mode - bit flag, indicating open settings, 32/64 bit type, etc 3171 * rvalp - calling process return value, used in some ioctl calls 3172 * (passed throught to vds who fills in the value) 3173 * 3174 * Assumptions: 3175 * vds will make the ioctl calls in the 64 bit address space so vdc 3176 * will convert the data to/from 32 bit as necessary before doing 3177 * the copyin or copyout. 3178 * 3179 * Return Code: 3180 * 0 3181 * EFAULT 3182 * ENXIO 3183 * EIO 3184 * ENOTSUP 3185 */ 3186 static int 3187 vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode) 3188 { 3189 int instance = SDUNIT(getminor(dev)); 3190 vdc_t *vdc = NULL; 3191 int op = -1; /* VD_OP_XXX value */ 3192 int rv = -1; 3193 int idx = 0; /* index into dk_ioctl[] */ 3194 size_t len = 0; /* #bytes to send to vds */ 3195 size_t alloc_len = 0; /* #bytes to allocate mem for */ 3196 size_t copy_len = 0; /* #bytes to copy in/out */ 3197 caddr_t mem_p = NULL; 3198 boolean_t do_convert_32to64 = B_FALSE; 3199 size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 3200 3201 PR0("%s: Processing ioctl(%x) for dev %x : model %x\n", 3202 __func__, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 3203 3204 vdc = ddi_get_soft_state(vdc_state, instance); 3205 if (vdc == NULL) { 3206 cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 3207 instance); 3208 return (ENXIO); 3209 } 3210 3211 /* 3212 * Check to see if we can communicate with the vDisk server 3213 */ 3214 rv = vdc_is_able_to_tx_data(vdc, O_NONBLOCK); 3215 if (rv == B_FALSE) { 3216 PR0("%s[%d] Not ready to transmit data\n", __func__, instance); 3217 return (ENOLINK); 3218 } 3219 3220 /* 3221 * Validate the ioctl operation to be performed. 3222 * 3223 * If we have looped through the array without finding a match then we 3224 * don't support this ioctl. 3225 */ 3226 for (idx = 0; idx < nioctls; idx++) { 3227 if (cmd == dk_ioctl[idx].cmd) 3228 break; 3229 } 3230 3231 if (idx >= nioctls) { 3232 PR0("%s[%d] Unsupported ioctl(%x)\n", 3233 __func__, vdc->instance, cmd); 3234 return (ENOTSUP); 3235 } 3236 3237 copy_len = len = dk_ioctl[idx].nbytes; 3238 op = dk_ioctl[idx].op; 3239 3240 /* 3241 * Some ioctl operations have different sized structures for 32 bit 3242 * and 64 bit. If the userland caller is 32 bit, we need to check 3243 * to see if the operation is one of those special cases and 3244 * flag that we need to convert to and/or from 32 bit since vds 3245 * will make the call as 64 bit. 3246 */ 3247 if ((ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) && 3248 (dk_ioctl[idx].nbytes != 0) && 3249 (dk_ioctl[idx].nbytes32 != 0)) { 3250 do_convert_32to64 = B_TRUE; 3251 copy_len = dk_ioctl[idx].nbytes32; 3252 } 3253 3254 /* 3255 * Deal with the ioctls which the server does not provide. 3256 */ 3257 switch (cmd) { 3258 case CDROMREADOFFSET: 3259 case DKIOCREMOVABLE: 3260 return (ENOTTY); 3261 3262 case DKIOCINFO: 3263 { 3264 struct dk_cinfo cinfo; 3265 if (vdc->cinfo == NULL) 3266 return (ENXIO); 3267 3268 bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 3269 cinfo.dki_partition = SDPART(getminor(dev)); 3270 3271 rv = ddi_copyout(&cinfo, (void *)arg, 3272 sizeof (struct dk_cinfo), mode); 3273 if (rv != 0) 3274 return (EFAULT); 3275 3276 return (0); 3277 } 3278 3279 case DKIOCGMEDIAINFO: 3280 if (vdc->minfo == NULL) 3281 return (ENXIO); 3282 3283 rv = ddi_copyout(vdc->minfo, (void *)arg, 3284 sizeof (struct dk_minfo), mode); 3285 if (rv != 0) 3286 return (EFAULT); 3287 3288 return (0); 3289 } 3290 3291 /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 3292 ASSERT(op != 0); 3293 3294 /* LDC requires that the memory being mapped is 8-byte aligned */ 3295 alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 3296 PR1("%s[%d]: struct size %d alloc %d\n", 3297 __func__, instance, len, alloc_len); 3298 3299 if (alloc_len != 0) 3300 mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 3301 3302 if (dk_ioctl[idx].copy & VD_COPYIN) { 3303 if (arg == NULL) { 3304 if (mem_p != NULL) 3305 kmem_free(mem_p, alloc_len); 3306 return (EINVAL); 3307 } 3308 3309 ASSERT(copy_len != 0); 3310 3311 rv = ddi_copyin((void *)arg, mem_p, copy_len, mode); 3312 if (rv != 0) { 3313 if (mem_p != NULL) 3314 kmem_free(mem_p, alloc_len); 3315 return (EFAULT); 3316 } 3317 3318 /* 3319 * some operations need the data to be converted from 32 bit 3320 * to 64 bit structures so that vds can process them on the 3321 * other side. 3322 */ 3323 if (do_convert_32to64) { 3324 switch (cmd) { 3325 case DKIOCSVTOC: 3326 { 3327 struct vtoc vt; 3328 struct vtoc32 vt32; 3329 3330 ASSERT(mem_p != NULL); 3331 vt32 = *((struct vtoc32 *)(mem_p)); 3332 3333 vtoc32tovtoc(vt32, vt); 3334 bcopy(&vt, mem_p, len); 3335 break; 3336 } 3337 3338 case USCSICMD: 3339 { 3340 struct uscsi_cmd scmd; 3341 struct uscsi_cmd *uscmd = &scmd; 3342 struct uscsi_cmd32 *uscmd32; 3343 3344 ASSERT(mem_p != NULL); 3345 uscmd32 = (struct uscsi_cmd32 *)mem_p; 3346 3347 /* 3348 * Convert the ILP32 uscsi data from the 3349 * application to LP64 for internal use. 3350 */ 3351 uscsi_cmd32touscsi_cmd(uscmd32, uscmd); 3352 bcopy(uscmd, mem_p, len); 3353 break; 3354 } 3355 default: 3356 break; 3357 } 3358 } 3359 } 3360 3361 /* 3362 * handle the special case of DKIOCFLUSHWRITECACHE 3363 */ 3364 if (cmd == DKIOCFLUSHWRITECACHE) { 3365 struct dk_callback *dkc = (struct dk_callback *)arg; 3366 3367 PR0("%s[%d]: DKIOCFLUSHWRITECACHE\n", __func__, instance); 3368 3369 /* no mem should have been allocated hence no need to free it */ 3370 ASSERT(mem_p == NULL); 3371 3372 /* 3373 * If arg is NULL, we break here and the call operates 3374 * synchronously; waiting for vds to return. 3375 * 3376 * i.e. after the request to vds returns successfully, 3377 * all writes completed prior to the ioctl will have been 3378 * flushed from the disk write cache to persistent media. 3379 */ 3380 if (dkc != NULL) { 3381 vdc_dk_arg_t arg; 3382 arg.mode = mode; 3383 arg.dev = dev; 3384 bcopy(dkc, &arg.dkc, sizeof (*dkc)); 3385 3386 mutex_enter(&vdc->lock); 3387 vdc->dkio_flush_pending++; 3388 arg.vdc = vdc; 3389 mutex_exit(&vdc->lock); 3390 3391 /* put the request on a task queue */ 3392 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 3393 (void *)&arg, DDI_SLEEP); 3394 3395 return (rv == NULL ? ENOMEM : 0); 3396 } 3397 } 3398 3399 /* 3400 * send request to vds to service the ioctl. 3401 */ 3402 rv = vdc_populate_descriptor(vdc, mem_p, alloc_len, op, mode, 3403 SDPART((getminor(dev)))); 3404 if (rv != 0) { 3405 /* 3406 * This is not necessarily an error. The ioctl could 3407 * be returning a value such as ENOTTY to indicate 3408 * that the ioctl is not applicable. 3409 */ 3410 PR0("%s[%d]: vds returned %d for ioctl 0x%x\n", 3411 __func__, instance, rv, cmd); 3412 if (mem_p != NULL) 3413 kmem_free(mem_p, alloc_len); 3414 return (rv); 3415 } 3416 3417 /* 3418 * If the VTOC has been changed, then vdc needs to update the copy 3419 * it saved in the soft state structure and try and update the device 3420 * node properties. Failing to set the properties should not cause 3421 * an error to be return the caller though. 3422 */ 3423 if (cmd == DKIOCSVTOC) { 3424 bcopy(mem_p, vdc->vtoc, sizeof (struct vtoc)); 3425 if (vdc_create_device_nodes_props(vdc)) { 3426 cmn_err(CE_NOTE, "![%d] Failed to update device nodes" 3427 " properties", instance); 3428 } 3429 } 3430 3431 /* 3432 * if we don't have to do a copyout, we have nothing left to do 3433 * so we just return. 3434 */ 3435 if ((dk_ioctl[idx].copy & VD_COPYOUT) == 0) { 3436 if (mem_p != NULL) 3437 kmem_free(mem_p, alloc_len); 3438 return (0); 3439 } 3440 3441 /* sanity check */ 3442 if (mem_p == NULL) 3443 return (EFAULT); 3444 3445 3446 /* 3447 * some operations need the data to be converted from 64 bit 3448 * back to 32 bit structures after vds has processed them. 3449 */ 3450 if (do_convert_32to64) { 3451 switch (cmd) { 3452 case DKIOCGVTOC: 3453 { 3454 struct vtoc vt; 3455 struct vtoc32 vt32; 3456 3457 ASSERT(mem_p != NULL); 3458 vt = *((struct vtoc *)(mem_p)); 3459 3460 vtoctovtoc32(vt, vt32); 3461 bcopy(&vt32, mem_p, copy_len); 3462 break; 3463 } 3464 3465 case USCSICMD: 3466 { 3467 struct uscsi_cmd32 *uc32; 3468 struct uscsi_cmd *uc; 3469 3470 len = sizeof (struct uscsi_cmd32); 3471 3472 ASSERT(mem_p != NULL); 3473 uc = (struct uscsi_cmd *)mem_p; 3474 uc32 = kmem_zalloc(len, KM_SLEEP); 3475 3476 uscsi_cmdtouscsi_cmd32(uc, uc32); 3477 bcopy(uc32, mem_p, copy_len); 3478 PR0("%s[%d]: uscsi_cmd32:%x\n", __func__, instance, 3479 ((struct uscsi_cmd32 *)mem_p)->uscsi_cdblen); 3480 kmem_free(uc32, len); 3481 break; 3482 } 3483 default: 3484 PR1("%s[%d]: This mode (%x) should just work for(%x)\n", 3485 __func__, instance, mode, cmd); 3486 break; 3487 } 3488 } 3489 3490 ASSERT(len != 0); 3491 ASSERT(mem_p != NULL); 3492 3493 rv = ddi_copyout(mem_p, (void *)arg, copy_len, mode); 3494 if (rv != 0) { 3495 vdc_msg("%s[%d]: Could not do copy out for ioctl (%x)\n", 3496 __func__, instance, cmd); 3497 rv = EFAULT; 3498 } 3499 3500 if (mem_p != NULL) 3501 kmem_free(mem_p, alloc_len); 3502 3503 return (rv); 3504 } 3505 3506 /* 3507 * Function: 3508 * vdc_create_fake_geometry() 3509 * 3510 * Description: 3511 * This routine fakes up the disk info needed for some DKIO ioctls. 3512 * - DKIOCINFO 3513 * - DKIOCGMEDIAINFO 3514 * 3515 * [ just like lofi(7D) and ramdisk(7D) ] 3516 * 3517 * Arguments: 3518 * vdc - soft state pointer for this instance of the device driver. 3519 * 3520 * Return Code: 3521 * 0 - Success 3522 */ 3523 static int 3524 vdc_create_fake_geometry(vdc_t *vdc) 3525 { 3526 ASSERT(vdc != NULL); 3527 3528 /* 3529 * DKIOCINFO support 3530 */ 3531 vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 3532 3533 (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 3534 (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 3535 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz / vdc->block_size; 3536 vdc->cinfo->dki_ctype = DKC_SCSI_CCS; 3537 vdc->cinfo->dki_flags = DKI_FMTVOL; 3538 vdc->cinfo->dki_cnum = 0; 3539 vdc->cinfo->dki_addr = 0; 3540 vdc->cinfo->dki_space = 0; 3541 vdc->cinfo->dki_prio = 0; 3542 vdc->cinfo->dki_vec = 0; 3543 vdc->cinfo->dki_unit = vdc->instance; 3544 vdc->cinfo->dki_slave = 0; 3545 /* 3546 * The partition number will be created on the fly depending on the 3547 * actual slice (i.e. minor node) that is used to request the data. 3548 */ 3549 vdc->cinfo->dki_partition = 0; 3550 3551 /* 3552 * DKIOCGMEDIAINFO support 3553 */ 3554 vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 3555 vdc->minfo->dki_media_type = DK_FIXED_DISK; 3556 vdc->minfo->dki_capacity = 1; 3557 vdc->minfo->dki_lbsize = DEV_BSIZE; 3558 3559 return (0); 3560 } 3561