1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * LDoms virtual disk client (vdc) device driver 31 * 32 * This driver runs on a guest logical domain and communicates with the virtual 33 * disk server (vds) driver running on the service domain which is exporting 34 * virtualized "disks" to the guest logical domain. 35 * 36 * The driver can be divided into four sections: 37 * 38 * 1) generic device driver housekeeping 39 * _init, _fini, attach, detach, ops structures, etc. 40 * 41 * 2) communication channel setup 42 * Setup the communications link over the LDC channel that vdc uses to 43 * talk to the vDisk server. Initialise the descriptor ring which 44 * allows the LDC clients to transfer data via memory mappings. 45 * 46 * 3) Support exported to upper layers (filesystems, etc) 47 * The upper layers call into vdc via strategy(9E) and DKIO(7I) 48 * ioctl calls. vdc will copy the data to be written to the descriptor 49 * ring or maps the buffer to store the data read by the vDisk 50 * server into the descriptor ring. It then sends a message to the 51 * vDisk server requesting it to complete the operation. 52 * 53 * 4) Handling responses from vDisk server. 54 * The vDisk server will ACK some or all of the messages vdc sends to it 55 * (this is configured during the handshake). Upon receipt of an ACK 56 * vdc will check the descriptor ring and signal to the upper layer 57 * code waiting on the IO. 58 */ 59 60 #include <sys/atomic.h> 61 #include <sys/conf.h> 62 #include <sys/disp.h> 63 #include <sys/ddi.h> 64 #include <sys/dkio.h> 65 #include <sys/efi_partition.h> 66 #include <sys/fcntl.h> 67 #include <sys/file.h> 68 #include <sys/mach_descrip.h> 69 #include <sys/modctl.h> 70 #include <sys/mdeg.h> 71 #include <sys/note.h> 72 #include <sys/open.h> 73 #include <sys/sdt.h> 74 #include <sys/stat.h> 75 #include <sys/sunddi.h> 76 #include <sys/types.h> 77 #include <sys/promif.h> 78 #include <sys/vtoc.h> 79 #include <sys/archsystm.h> 80 #include <sys/sysmacros.h> 81 82 #include <sys/cdio.h> 83 #include <sys/dktp/fdisk.h> 84 #include <sys/scsi/generic/sense.h> 85 #include <sys/scsi/impl/uscsi.h> /* Needed for defn of USCSICMD ioctl */ 86 87 #include <sys/ldoms.h> 88 #include <sys/ldc.h> 89 #include <sys/vio_common.h> 90 #include <sys/vio_mailbox.h> 91 #include <sys/vdsk_common.h> 92 #include <sys/vdsk_mailbox.h> 93 #include <sys/vdc.h> 94 95 /* 96 * function prototypes 97 */ 98 99 /* standard driver functions */ 100 static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 101 static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 102 static int vdc_strategy(struct buf *buf); 103 static int vdc_print(dev_t dev, char *str); 104 static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 105 static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 106 static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 107 static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 108 cred_t *credp, int *rvalp); 109 static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 110 static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 111 112 static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 113 void *arg, void **resultp); 114 static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 115 static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 116 117 /* setup */ 118 static void vdc_min(struct buf *bufp); 119 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 120 static int vdc_do_ldc_init(vdc_t *vdc); 121 static int vdc_start_ldc_connection(vdc_t *vdc); 122 static int vdc_create_device_nodes(vdc_t *vdc); 123 static int vdc_create_device_nodes_efi(vdc_t *vdc); 124 static int vdc_create_device_nodes_vtoc(vdc_t *vdc); 125 static int vdc_create_device_nodes_props(vdc_t *vdc); 126 static int vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id); 127 static int vdc_do_ldc_up(vdc_t *vdc); 128 static void vdc_terminate_ldc(vdc_t *vdc); 129 static int vdc_init_descriptor_ring(vdc_t *vdc); 130 static void vdc_destroy_descriptor_ring(vdc_t *vdc); 131 static int vdc_setup_devid(vdc_t *vdc); 132 static void vdc_store_efi(vdc_t *vdc, struct dk_gpt *efi); 133 134 /* handshake with vds */ 135 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 136 static int vdc_ver_negotiation(vdc_t *vdcp); 137 static int vdc_init_attr_negotiation(vdc_t *vdc); 138 static int vdc_attr_negotiation(vdc_t *vdcp); 139 static int vdc_init_dring_negotiate(vdc_t *vdc); 140 static int vdc_dring_negotiation(vdc_t *vdcp); 141 static int vdc_send_rdx(vdc_t *vdcp); 142 static int vdc_rdx_exchange(vdc_t *vdcp); 143 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 144 145 /* processing incoming messages from vDisk server */ 146 static void vdc_process_msg_thread(vdc_t *vdc); 147 static int vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp); 148 149 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 150 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t *msg); 151 static int vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg); 152 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 153 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 154 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 155 static int vdc_send_request(vdc_t *vdcp, int operation, 156 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 157 int cb_type, void *cb_arg, vio_desc_direction_t dir); 158 static int vdc_map_to_shared_dring(vdc_t *vdcp, int idx); 159 static int vdc_populate_descriptor(vdc_t *vdcp, int operation, 160 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 161 int cb_type, void *cb_arg, vio_desc_direction_t dir); 162 static int vdc_do_sync_op(vdc_t *vdcp, int operation, 163 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 164 int cb_type, void *cb_arg, vio_desc_direction_t dir); 165 166 static int vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp); 167 static int vdc_drain_response(vdc_t *vdcp); 168 static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 169 static int vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep); 170 static int vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg); 171 172 /* dkio */ 173 static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode); 174 static int vdc_create_fake_geometry(vdc_t *vdc); 175 static int vdc_setup_disk_layout(vdc_t *vdc); 176 static int vdc_null_copy_func(vdc_t *vdc, void *from, void *to, 177 int mode, int dir); 178 static int vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 179 int mode, int dir); 180 static int vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 181 int mode, int dir); 182 static int vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, 183 int mode, int dir); 184 static int vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, 185 int mode, int dir); 186 static int vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, 187 int mode, int dir); 188 static int vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, 189 int mode, int dir); 190 static int vdc_uscsicmd_convert(vdc_t *vdc, void *from, void *to, 191 int mode, int dir); 192 static int vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, 193 int mode, int dir); 194 static int vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, 195 int mode, int dir); 196 197 /* 198 * Module variables 199 */ 200 201 /* 202 * Tunable variables to control how long vdc waits before timing out on 203 * various operations 204 */ 205 static int vdc_retries = 10; 206 static int vdc_hshake_retries = 3; 207 208 /* calculated from 'vdc_usec_timeout' during attach */ 209 static uint64_t vdc_hz_timeout; /* units: Hz */ 210 static uint64_t vdc_usec_timeout = 30 * MICROSEC; /* 30s units: ns */ 211 212 static uint64_t vdc_hz_min_ldc_delay; 213 static uint64_t vdc_min_timeout_ldc = 1 * MILLISEC; 214 static uint64_t vdc_hz_max_ldc_delay; 215 static uint64_t vdc_max_timeout_ldc = 100 * MILLISEC; 216 217 static uint64_t vdc_ldc_read_init_delay = 1 * MILLISEC; 218 static uint64_t vdc_ldc_read_max_delay = 100 * MILLISEC; 219 220 /* values for dumping - need to run in a tighter loop */ 221 static uint64_t vdc_usec_timeout_dump = 100 * MILLISEC; /* 0.1s units: ns */ 222 static int vdc_dump_retries = 100; 223 224 /* Count of the number of vdc instances attached */ 225 static volatile uint32_t vdc_instance_count = 0; 226 227 /* Soft state pointer */ 228 static void *vdc_state; 229 230 /* 231 * Controlling the verbosity of the error/debug messages 232 * 233 * vdc_msglevel - controls level of messages 234 * vdc_matchinst - 64-bit variable where each bit corresponds 235 * to the vdc instance the vdc_msglevel applies. 236 */ 237 int vdc_msglevel = 0x0; 238 uint64_t vdc_matchinst = 0ull; 239 240 /* 241 * Supported vDisk protocol version pairs. 242 * 243 * The first array entry is the latest and preferred version. 244 */ 245 static const vio_ver_t vdc_version[] = {{1, 0}}; 246 247 static struct cb_ops vdc_cb_ops = { 248 vdc_open, /* cb_open */ 249 vdc_close, /* cb_close */ 250 vdc_strategy, /* cb_strategy */ 251 vdc_print, /* cb_print */ 252 vdc_dump, /* cb_dump */ 253 vdc_read, /* cb_read */ 254 vdc_write, /* cb_write */ 255 vdc_ioctl, /* cb_ioctl */ 256 nodev, /* cb_devmap */ 257 nodev, /* cb_mmap */ 258 nodev, /* cb_segmap */ 259 nochpoll, /* cb_chpoll */ 260 ddi_prop_op, /* cb_prop_op */ 261 NULL, /* cb_str */ 262 D_MP | D_64BIT, /* cb_flag */ 263 CB_REV, /* cb_rev */ 264 vdc_aread, /* cb_aread */ 265 vdc_awrite /* cb_awrite */ 266 }; 267 268 static struct dev_ops vdc_ops = { 269 DEVO_REV, /* devo_rev */ 270 0, /* devo_refcnt */ 271 vdc_getinfo, /* devo_getinfo */ 272 nulldev, /* devo_identify */ 273 nulldev, /* devo_probe */ 274 vdc_attach, /* devo_attach */ 275 vdc_detach, /* devo_detach */ 276 nodev, /* devo_reset */ 277 &vdc_cb_ops, /* devo_cb_ops */ 278 NULL, /* devo_bus_ops */ 279 nulldev /* devo_power */ 280 }; 281 282 static struct modldrv modldrv = { 283 &mod_driverops, 284 "virtual disk client %I%", 285 &vdc_ops, 286 }; 287 288 static struct modlinkage modlinkage = { 289 MODREV_1, 290 &modldrv, 291 NULL 292 }; 293 294 /* -------------------------------------------------------------------------- */ 295 296 /* 297 * Device Driver housekeeping and setup 298 */ 299 300 int 301 _init(void) 302 { 303 int status; 304 305 if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 306 return (status); 307 if ((status = mod_install(&modlinkage)) != 0) 308 ddi_soft_state_fini(&vdc_state); 309 vdc_efi_init(vd_process_ioctl); 310 return (status); 311 } 312 313 int 314 _info(struct modinfo *modinfop) 315 { 316 return (mod_info(&modlinkage, modinfop)); 317 } 318 319 int 320 _fini(void) 321 { 322 int status; 323 324 if ((status = mod_remove(&modlinkage)) != 0) 325 return (status); 326 vdc_efi_fini(); 327 ddi_soft_state_fini(&vdc_state); 328 return (0); 329 } 330 331 static int 332 vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 333 { 334 _NOTE(ARGUNUSED(dip)) 335 336 int instance = VDCUNIT((dev_t)arg); 337 vdc_t *vdc = NULL; 338 339 switch (cmd) { 340 case DDI_INFO_DEVT2DEVINFO: 341 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 342 *resultp = NULL; 343 return (DDI_FAILURE); 344 } 345 *resultp = vdc->dip; 346 return (DDI_SUCCESS); 347 case DDI_INFO_DEVT2INSTANCE: 348 *resultp = (void *)(uintptr_t)instance; 349 return (DDI_SUCCESS); 350 default: 351 *resultp = NULL; 352 return (DDI_FAILURE); 353 } 354 } 355 356 static int 357 vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 358 { 359 int instance; 360 int rv; 361 vdc_t *vdc = NULL; 362 363 switch (cmd) { 364 case DDI_DETACH: 365 /* the real work happens below */ 366 break; 367 case DDI_SUSPEND: 368 /* nothing to do for this non-device */ 369 return (DDI_SUCCESS); 370 default: 371 return (DDI_FAILURE); 372 } 373 374 ASSERT(cmd == DDI_DETACH); 375 instance = ddi_get_instance(dip); 376 DMSGX(1, "[%d] Entered\n", instance); 377 378 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 379 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 380 return (DDI_FAILURE); 381 } 382 383 if (vdc->open_count) { 384 DMSG(vdc, 0, "[%d] Cannot detach: device is open", instance); 385 return (DDI_FAILURE); 386 } 387 388 DMSG(vdc, 0, "[%d] proceeding...\n", instance); 389 390 /* mark instance as detaching */ 391 vdc->lifecycle = VDC_LC_DETACHING; 392 393 /* 394 * try and disable callbacks to prevent another handshake 395 */ 396 rv = ldc_set_cb_mode(vdc->ldc_handle, LDC_CB_DISABLE); 397 DMSG(vdc, 0, "callback disabled (rv=%d)\n", rv); 398 399 if (vdc->initialized & VDC_THREAD) { 400 mutex_enter(&vdc->read_lock); 401 if ((vdc->read_state == VDC_READ_WAITING) || 402 (vdc->read_state == VDC_READ_RESET)) { 403 vdc->read_state = VDC_READ_RESET; 404 cv_signal(&vdc->read_cv); 405 } 406 407 mutex_exit(&vdc->read_lock); 408 409 /* wake up any thread waiting for connection to come online */ 410 mutex_enter(&vdc->lock); 411 if (vdc->state == VDC_STATE_INIT_WAITING) { 412 DMSG(vdc, 0, 413 "[%d] write reset - move to resetting state...\n", 414 instance); 415 vdc->state = VDC_STATE_RESETTING; 416 cv_signal(&vdc->initwait_cv); 417 } 418 mutex_exit(&vdc->lock); 419 420 /* now wait until state transitions to VDC_STATE_DETACH */ 421 thread_join(vdc->msg_proc_thr->t_did); 422 ASSERT(vdc->state == VDC_STATE_DETACH); 423 DMSG(vdc, 0, "[%d] Reset thread exit and join ..\n", 424 vdc->instance); 425 } 426 427 mutex_enter(&vdc->lock); 428 429 if (vdc->initialized & VDC_DRING) 430 vdc_destroy_descriptor_ring(vdc); 431 432 if (vdc->initialized & VDC_LDC) 433 vdc_terminate_ldc(vdc); 434 435 mutex_exit(&vdc->lock); 436 437 if (vdc->initialized & VDC_MINOR) { 438 ddi_prop_remove_all(dip); 439 ddi_remove_minor_node(dip, NULL); 440 } 441 442 if (vdc->initialized & VDC_LOCKS) { 443 mutex_destroy(&vdc->lock); 444 mutex_destroy(&vdc->read_lock); 445 cv_destroy(&vdc->initwait_cv); 446 cv_destroy(&vdc->dring_free_cv); 447 cv_destroy(&vdc->membind_cv); 448 cv_destroy(&vdc->sync_pending_cv); 449 cv_destroy(&vdc->sync_blocked_cv); 450 cv_destroy(&vdc->read_cv); 451 cv_destroy(&vdc->running_cv); 452 } 453 454 if (vdc->minfo) 455 kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 456 457 if (vdc->cinfo) 458 kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 459 460 if (vdc->vtoc) 461 kmem_free(vdc->vtoc, sizeof (struct vtoc)); 462 463 if (vdc->label) 464 kmem_free(vdc->label, DK_LABEL_SIZE); 465 466 if (vdc->devid) { 467 ddi_devid_unregister(dip); 468 ddi_devid_free(vdc->devid); 469 } 470 471 if (vdc->initialized & VDC_SOFT_STATE) 472 ddi_soft_state_free(vdc_state, instance); 473 474 DMSG(vdc, 0, "[%d] End %p\n", instance, (void *)vdc); 475 476 return (DDI_SUCCESS); 477 } 478 479 480 static int 481 vdc_do_attach(dev_info_t *dip) 482 { 483 int instance; 484 vdc_t *vdc = NULL; 485 int status; 486 487 ASSERT(dip != NULL); 488 489 instance = ddi_get_instance(dip); 490 if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 491 cmn_err(CE_NOTE, "[%d] Couldn't alloc state structure", 492 instance); 493 return (DDI_FAILURE); 494 } 495 496 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 497 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 498 return (DDI_FAILURE); 499 } 500 501 /* 502 * We assign the value to initialized in this case to zero out the 503 * variable and then set bits in it to indicate what has been done 504 */ 505 vdc->initialized = VDC_SOFT_STATE; 506 507 vdc_hz_timeout = drv_usectohz(vdc_usec_timeout); 508 509 vdc_hz_min_ldc_delay = drv_usectohz(vdc_min_timeout_ldc); 510 vdc_hz_max_ldc_delay = drv_usectohz(vdc_max_timeout_ldc); 511 512 vdc->dip = dip; 513 vdc->instance = instance; 514 vdc->open_count = 0; 515 vdc->vdisk_type = VD_DISK_TYPE_UNK; 516 vdc->vdisk_label = VD_DISK_LABEL_UNK; 517 vdc->state = VDC_STATE_INIT; 518 vdc->lifecycle = VDC_LC_ATTACHING; 519 vdc->ldc_state = 0; 520 vdc->session_id = 0; 521 vdc->block_size = DEV_BSIZE; 522 vdc->max_xfer_sz = maxphys / DEV_BSIZE; 523 524 vdc->vtoc = NULL; 525 vdc->cinfo = NULL; 526 vdc->minfo = NULL; 527 528 mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 529 cv_init(&vdc->initwait_cv, NULL, CV_DRIVER, NULL); 530 cv_init(&vdc->dring_free_cv, NULL, CV_DRIVER, NULL); 531 cv_init(&vdc->membind_cv, NULL, CV_DRIVER, NULL); 532 cv_init(&vdc->running_cv, NULL, CV_DRIVER, NULL); 533 534 vdc->threads_pending = 0; 535 vdc->sync_op_pending = B_FALSE; 536 vdc->sync_op_blocked = B_FALSE; 537 cv_init(&vdc->sync_pending_cv, NULL, CV_DRIVER, NULL); 538 cv_init(&vdc->sync_blocked_cv, NULL, CV_DRIVER, NULL); 539 540 /* init blocking msg read functionality */ 541 mutex_init(&vdc->read_lock, NULL, MUTEX_DRIVER, NULL); 542 cv_init(&vdc->read_cv, NULL, CV_DRIVER, NULL); 543 vdc->read_state = VDC_READ_IDLE; 544 545 vdc->initialized |= VDC_LOCKS; 546 547 /* initialise LDC channel which will be used to communicate with vds */ 548 if ((status = vdc_do_ldc_init(vdc)) != 0) { 549 cmn_err(CE_NOTE, "[%d] Couldn't initialize LDC", instance); 550 goto return_status; 551 } 552 553 /* initialize the thread responsible for managing state with server */ 554 vdc->msg_proc_thr = thread_create(NULL, 0, vdc_process_msg_thread, 555 vdc, 0, &p0, TS_RUN, minclsyspri); 556 if (vdc->msg_proc_thr == NULL) { 557 cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 558 instance); 559 return (DDI_FAILURE); 560 } 561 562 vdc->initialized |= VDC_THREAD; 563 564 atomic_inc_32(&vdc_instance_count); 565 566 /* 567 * Once the handshake is complete, we can use the DRing to send 568 * requests to the vDisk server to calculate the geometry and 569 * VTOC of the "disk" 570 */ 571 status = vdc_setup_disk_layout(vdc); 572 if (status != 0) { 573 DMSG(vdc, 0, "[%d] Failed to discover disk layout (err%d)", 574 vdc->instance, status); 575 goto return_status; 576 } 577 578 /* 579 * Now that we have the device info we can create the 580 * device nodes and properties 581 */ 582 status = vdc_create_device_nodes(vdc); 583 if (status) { 584 DMSG(vdc, 0, "[%d] Failed to create device nodes", 585 instance); 586 goto return_status; 587 } 588 status = vdc_create_device_nodes_props(vdc); 589 if (status) { 590 DMSG(vdc, 0, "[%d] Failed to create device nodes" 591 " properties (%d)", instance, status); 592 goto return_status; 593 } 594 595 /* 596 * Setup devid 597 */ 598 if (vdc_setup_devid(vdc)) { 599 DMSG(vdc, 0, "[%d] No device id available\n", instance); 600 } 601 602 ddi_report_dev(dip); 603 vdc->lifecycle = VDC_LC_ONLINE; 604 DMSG(vdc, 0, "[%d] Attach tasks successful\n", instance); 605 606 return_status: 607 DMSG(vdc, 0, "[%d] Attach completed\n", instance); 608 return (status); 609 } 610 611 static int 612 vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 613 { 614 int status; 615 616 switch (cmd) { 617 case DDI_ATTACH: 618 if ((status = vdc_do_attach(dip)) != 0) 619 (void) vdc_detach(dip, DDI_DETACH); 620 return (status); 621 case DDI_RESUME: 622 /* nothing to do for this non-device */ 623 return (DDI_SUCCESS); 624 default: 625 return (DDI_FAILURE); 626 } 627 } 628 629 static int 630 vdc_do_ldc_init(vdc_t *vdc) 631 { 632 int status = 0; 633 ldc_status_t ldc_state; 634 ldc_attr_t ldc_attr; 635 uint64_t ldc_id = 0; 636 dev_info_t *dip = NULL; 637 638 ASSERT(vdc != NULL); 639 640 dip = vdc->dip; 641 vdc->initialized |= VDC_LDC; 642 643 if ((status = vdc_get_ldc_id(dip, &ldc_id)) != 0) { 644 DMSG(vdc, 0, "[%d] Failed to get LDC channel ID property", 645 vdc->instance); 646 return (EIO); 647 } 648 vdc->ldc_id = ldc_id; 649 650 ldc_attr.devclass = LDC_DEV_BLK; 651 ldc_attr.instance = vdc->instance; 652 ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 653 ldc_attr.mtu = VD_LDC_MTU; 654 655 if ((vdc->initialized & VDC_LDC_INIT) == 0) { 656 status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle); 657 if (status != 0) { 658 DMSG(vdc, 0, "[%d] ldc_init(chan %ld) returned %d", 659 vdc->instance, ldc_id, status); 660 return (status); 661 } 662 vdc->initialized |= VDC_LDC_INIT; 663 } 664 status = ldc_status(vdc->ldc_handle, &ldc_state); 665 if (status != 0) { 666 DMSG(vdc, 0, "[%d] Cannot discover LDC status [err=%d]", 667 vdc->instance, status); 668 return (status); 669 } 670 vdc->ldc_state = ldc_state; 671 672 if ((vdc->initialized & VDC_LDC_CB) == 0) { 673 status = ldc_reg_callback(vdc->ldc_handle, vdc_handle_cb, 674 (caddr_t)vdc); 675 if (status != 0) { 676 DMSG(vdc, 0, "[%d] LDC callback reg. failed (%d)", 677 vdc->instance, status); 678 return (status); 679 } 680 vdc->initialized |= VDC_LDC_CB; 681 } 682 683 vdc->initialized |= VDC_LDC; 684 685 /* 686 * At this stage we have initialised LDC, we will now try and open 687 * the connection. 688 */ 689 if (vdc->ldc_state == LDC_INIT) { 690 status = ldc_open(vdc->ldc_handle); 691 if (status != 0) { 692 DMSG(vdc, 0, "[%d] ldc_open(chan %ld) returned %d", 693 vdc->instance, vdc->ldc_id, status); 694 return (status); 695 } 696 vdc->initialized |= VDC_LDC_OPEN; 697 } 698 699 return (status); 700 } 701 702 static int 703 vdc_start_ldc_connection(vdc_t *vdc) 704 { 705 int status = 0; 706 707 ASSERT(vdc != NULL); 708 709 ASSERT(MUTEX_HELD(&vdc->lock)); 710 711 status = vdc_do_ldc_up(vdc); 712 713 DMSG(vdc, 0, "[%d] Finished bringing up LDC\n", vdc->instance); 714 715 return (status); 716 } 717 718 static int 719 vdc_stop_ldc_connection(vdc_t *vdcp) 720 { 721 int status; 722 723 DMSG(vdcp, 0, ": Resetting connection to vDisk server : state %d\n", 724 vdcp->state); 725 726 status = ldc_down(vdcp->ldc_handle); 727 DMSG(vdcp, 0, "ldc_down() = %d\n", status); 728 729 vdcp->initialized &= ~VDC_HANDSHAKE; 730 DMSG(vdcp, 0, "initialized=%x\n", vdcp->initialized); 731 732 return (status); 733 } 734 735 static int 736 vdc_create_device_nodes_efi(vdc_t *vdc) 737 { 738 ddi_remove_minor_node(vdc->dip, "h"); 739 ddi_remove_minor_node(vdc->dip, "h,raw"); 740 741 if (ddi_create_minor_node(vdc->dip, "wd", S_IFBLK, 742 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 743 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 744 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd'", 745 vdc->instance); 746 return (EIO); 747 } 748 749 /* if any device node is created we set this flag */ 750 vdc->initialized |= VDC_MINOR; 751 752 if (ddi_create_minor_node(vdc->dip, "wd,raw", S_IFCHR, 753 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 754 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 755 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd,raw'", 756 vdc->instance); 757 return (EIO); 758 } 759 760 return (0); 761 } 762 763 static int 764 vdc_create_device_nodes_vtoc(vdc_t *vdc) 765 { 766 ddi_remove_minor_node(vdc->dip, "wd"); 767 ddi_remove_minor_node(vdc->dip, "wd,raw"); 768 769 if (ddi_create_minor_node(vdc->dip, "h", S_IFBLK, 770 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 771 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 772 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h'", 773 vdc->instance); 774 return (EIO); 775 } 776 777 /* if any device node is created we set this flag */ 778 vdc->initialized |= VDC_MINOR; 779 780 if (ddi_create_minor_node(vdc->dip, "h,raw", S_IFCHR, 781 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 782 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 783 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h,raw'", 784 vdc->instance); 785 return (EIO); 786 } 787 788 return (0); 789 } 790 791 /* 792 * Function: 793 * vdc_create_device_nodes 794 * 795 * Description: 796 * This function creates the block and character device nodes under 797 * /devices along with the node properties. It is called as part of 798 * the attach(9E) of the instance during the handshake with vds after 799 * vds has sent the attributes to vdc. 800 * 801 * If the device is of type VD_DISK_TYPE_SLICE then the minor node 802 * of 2 is used in keeping with the Solaris convention that slice 2 803 * refers to a whole disk. Slices start at 'a' 804 * 805 * Parameters: 806 * vdc - soft state pointer 807 * 808 * Return Values 809 * 0 - Success 810 * EIO - Failed to create node 811 * EINVAL - Unknown type of disk exported 812 */ 813 static int 814 vdc_create_device_nodes(vdc_t *vdc) 815 { 816 char name[sizeof ("s,raw")]; 817 dev_info_t *dip = NULL; 818 int instance, status; 819 int num_slices = 1; 820 int i; 821 822 ASSERT(vdc != NULL); 823 824 instance = vdc->instance; 825 dip = vdc->dip; 826 827 switch (vdc->vdisk_type) { 828 case VD_DISK_TYPE_DISK: 829 num_slices = V_NUMPAR; 830 break; 831 case VD_DISK_TYPE_SLICE: 832 num_slices = 1; 833 break; 834 case VD_DISK_TYPE_UNK: 835 default: 836 return (EINVAL); 837 } 838 839 /* 840 * Minor nodes are different for EFI disks: EFI disks do not have 841 * a minor node 'g' for the minor number corresponding to slice 842 * VD_EFI_WD_SLICE (slice 7) instead they have a minor node 'wd' 843 * representing the whole disk. 844 */ 845 for (i = 0; i < num_slices; i++) { 846 847 if (i == VD_EFI_WD_SLICE) { 848 if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 849 status = vdc_create_device_nodes_efi(vdc); 850 else 851 status = vdc_create_device_nodes_vtoc(vdc); 852 if (status != 0) 853 return (status); 854 continue; 855 } 856 857 (void) snprintf(name, sizeof (name), "%c", 'a' + i); 858 if (ddi_create_minor_node(dip, name, S_IFBLK, 859 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 860 cmn_err(CE_NOTE, "[%d] Couldn't add block node '%s'", 861 instance, name); 862 return (EIO); 863 } 864 865 /* if any device node is created we set this flag */ 866 vdc->initialized |= VDC_MINOR; 867 868 (void) snprintf(name, sizeof (name), "%c%s", 869 'a' + i, ",raw"); 870 if (ddi_create_minor_node(dip, name, S_IFCHR, 871 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 872 cmn_err(CE_NOTE, "[%d] Couldn't add raw node '%s'", 873 instance, name); 874 return (EIO); 875 } 876 } 877 878 return (0); 879 } 880 881 /* 882 * Function: 883 * vdc_create_device_nodes_props 884 * 885 * Description: 886 * This function creates the block and character device nodes under 887 * /devices along with the node properties. It is called as part of 888 * the attach(9E) of the instance during the handshake with vds after 889 * vds has sent the attributes to vdc. 890 * 891 * Parameters: 892 * vdc - soft state pointer 893 * 894 * Return Values 895 * 0 - Success 896 * EIO - Failed to create device node property 897 * EINVAL - Unknown type of disk exported 898 */ 899 static int 900 vdc_create_device_nodes_props(vdc_t *vdc) 901 { 902 dev_info_t *dip = NULL; 903 int instance; 904 int num_slices = 1; 905 int64_t size = 0; 906 dev_t dev; 907 int rv; 908 int i; 909 910 ASSERT(vdc != NULL); 911 912 instance = vdc->instance; 913 dip = vdc->dip; 914 915 if ((vdc->vtoc == NULL) || (vdc->vtoc->v_sanity != VTOC_SANE)) { 916 DMSG(vdc, 0, "![%d] Could not create device node property." 917 " No VTOC available", instance); 918 return (ENXIO); 919 } 920 921 switch (vdc->vdisk_type) { 922 case VD_DISK_TYPE_DISK: 923 num_slices = V_NUMPAR; 924 break; 925 case VD_DISK_TYPE_SLICE: 926 num_slices = 1; 927 break; 928 case VD_DISK_TYPE_UNK: 929 default: 930 return (EINVAL); 931 } 932 933 for (i = 0; i < num_slices; i++) { 934 dev = makedevice(ddi_driver_major(dip), 935 VD_MAKE_DEV(instance, i)); 936 937 size = vdc->vtoc->v_part[i].p_size * vdc->vtoc->v_sectorsz; 938 DMSG(vdc, 0, "[%d] sz %ld (%ld Mb) p_size %lx\n", 939 instance, size, size / (1024 * 1024), 940 vdc->vtoc->v_part[i].p_size); 941 942 rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size); 943 if (rv != DDI_PROP_SUCCESS) { 944 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop of [%ld]", 945 instance, VDC_SIZE_PROP_NAME, size); 946 return (EIO); 947 } 948 949 rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME, 950 lbtodb(size)); 951 if (rv != DDI_PROP_SUCCESS) { 952 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop [%llu]", 953 instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size)); 954 return (EIO); 955 } 956 } 957 958 return (0); 959 } 960 961 static int 962 vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 963 { 964 _NOTE(ARGUNUSED(cred)) 965 966 int instance; 967 vdc_t *vdc; 968 969 ASSERT(dev != NULL); 970 instance = VDCUNIT(*dev); 971 972 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 973 return (EINVAL); 974 975 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 976 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 977 return (ENXIO); 978 } 979 980 DMSG(vdc, 0, "minor = %d flag = %x, otyp = %x\n", 981 getminor(*dev), flag, otyp); 982 983 mutex_enter(&vdc->lock); 984 vdc->open_count++; 985 mutex_exit(&vdc->lock); 986 987 return (0); 988 } 989 990 static int 991 vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 992 { 993 _NOTE(ARGUNUSED(cred)) 994 995 int instance; 996 vdc_t *vdc; 997 998 instance = VDCUNIT(dev); 999 1000 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 1001 return (EINVAL); 1002 1003 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1004 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1005 return (ENXIO); 1006 } 1007 1008 DMSG(vdc, 0, "[%d] flag = %x, otyp = %x\n", instance, flag, otyp); 1009 if (vdc->dkio_flush_pending) { 1010 DMSG(vdc, 0, 1011 "[%d] Cannot detach: %d outstanding DKIO flushes\n", 1012 instance, vdc->dkio_flush_pending); 1013 return (EBUSY); 1014 } 1015 1016 /* 1017 * Should not need the mutex here, since the framework should protect 1018 * against more opens on this device, but just in case. 1019 */ 1020 mutex_enter(&vdc->lock); 1021 vdc->open_count--; 1022 mutex_exit(&vdc->lock); 1023 1024 return (0); 1025 } 1026 1027 static int 1028 vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1029 { 1030 _NOTE(ARGUNUSED(credp)) 1031 _NOTE(ARGUNUSED(rvalp)) 1032 1033 return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode)); 1034 } 1035 1036 static int 1037 vdc_print(dev_t dev, char *str) 1038 { 1039 cmn_err(CE_NOTE, "vdc%d: %s", VDCUNIT(dev), str); 1040 return (0); 1041 } 1042 1043 static int 1044 vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 1045 { 1046 int rv; 1047 size_t nbytes = nblk * DEV_BSIZE; 1048 int instance = VDCUNIT(dev); 1049 vdc_t *vdc = NULL; 1050 1051 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1052 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1053 return (ENXIO); 1054 } 1055 1056 DMSG(vdc, 2, "[%d] dump %ld bytes at block 0x%lx : addr=0x%p\n", 1057 instance, nbytes, blkno, (void *)addr); 1058 rv = vdc_send_request(vdc, VD_OP_BWRITE, addr, nbytes, 1059 VDCPART(dev), blkno, CB_STRATEGY, 0, VIO_write_dir); 1060 if (rv) { 1061 DMSG(vdc, 0, "Failed to do a disk dump (err=%d)\n", rv); 1062 return (rv); 1063 } 1064 1065 if (ddi_in_panic()) 1066 (void) vdc_drain_response(vdc); 1067 1068 DMSG(vdc, 0, "[%d] End\n", instance); 1069 1070 return (0); 1071 } 1072 1073 /* -------------------------------------------------------------------------- */ 1074 1075 /* 1076 * Disk access routines 1077 * 1078 */ 1079 1080 /* 1081 * vdc_strategy() 1082 * 1083 * Return Value: 1084 * 0: As per strategy(9E), the strategy() function must return 0 1085 * [ bioerror(9f) sets b_flags to the proper error code ] 1086 */ 1087 static int 1088 vdc_strategy(struct buf *buf) 1089 { 1090 int rv = -1; 1091 vdc_t *vdc = NULL; 1092 int instance = VDCUNIT(buf->b_edev); 1093 int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 1094 1095 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1096 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1097 bioerror(buf, ENXIO); 1098 biodone(buf); 1099 return (0); 1100 } 1101 1102 DMSG(vdc, 2, "[%d] %s %ld bytes at block %llx : b_addr=0x%p\n", 1103 instance, (buf->b_flags & B_READ) ? "Read" : "Write", 1104 buf->b_bcount, buf->b_lblkno, (void *)buf->b_un.b_addr); 1105 DTRACE_IO2(vstart, buf_t *, buf, vdc_t *, vdc); 1106 1107 bp_mapin(buf); 1108 1109 rv = vdc_send_request(vdc, op, (caddr_t)buf->b_un.b_addr, 1110 buf->b_bcount, VDCPART(buf->b_edev), buf->b_lblkno, 1111 CB_STRATEGY, buf, (op == VD_OP_BREAD) ? VIO_read_dir : 1112 VIO_write_dir); 1113 1114 /* 1115 * If the request was successfully sent, the strategy call returns and 1116 * the ACK handler calls the bioxxx functions when the vDisk server is 1117 * done. 1118 */ 1119 if (rv) { 1120 DMSG(vdc, 0, "Failed to read/write (err=%d)\n", rv); 1121 bioerror(buf, rv); 1122 biodone(buf); 1123 } 1124 1125 return (0); 1126 } 1127 1128 /* 1129 * Function: 1130 * vdc_min 1131 * 1132 * Description: 1133 * Routine to limit the size of a data transfer. Used in 1134 * conjunction with physio(9F). 1135 * 1136 * Arguments: 1137 * bp - pointer to the indicated buf(9S) struct. 1138 * 1139 */ 1140 static void 1141 vdc_min(struct buf *bufp) 1142 { 1143 vdc_t *vdc = NULL; 1144 int instance = VDCUNIT(bufp->b_edev); 1145 1146 vdc = ddi_get_soft_state(vdc_state, instance); 1147 VERIFY(vdc != NULL); 1148 1149 if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->block_size)) { 1150 bufp->b_bcount = vdc->max_xfer_sz * vdc->block_size; 1151 } 1152 } 1153 1154 static int 1155 vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 1156 { 1157 _NOTE(ARGUNUSED(cred)) 1158 1159 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1160 return (physio(vdc_strategy, NULL, dev, B_READ, vdc_min, uio)); 1161 } 1162 1163 static int 1164 vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 1165 { 1166 _NOTE(ARGUNUSED(cred)) 1167 1168 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1169 return (physio(vdc_strategy, NULL, dev, B_WRITE, vdc_min, uio)); 1170 } 1171 1172 static int 1173 vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 1174 { 1175 _NOTE(ARGUNUSED(cred)) 1176 1177 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1178 return (aphysio(vdc_strategy, anocancel, dev, B_READ, vdc_min, aio)); 1179 } 1180 1181 static int 1182 vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 1183 { 1184 _NOTE(ARGUNUSED(cred)) 1185 1186 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1187 return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, vdc_min, aio)); 1188 } 1189 1190 1191 /* -------------------------------------------------------------------------- */ 1192 1193 /* 1194 * Handshake support 1195 */ 1196 1197 1198 /* 1199 * Function: 1200 * vdc_init_ver_negotiation() 1201 * 1202 * Description: 1203 * 1204 * Arguments: 1205 * vdc - soft state pointer for this instance of the device driver. 1206 * 1207 * Return Code: 1208 * 0 - Success 1209 */ 1210 static int 1211 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 1212 { 1213 vio_ver_msg_t pkt; 1214 size_t msglen = sizeof (pkt); 1215 int status = -1; 1216 1217 ASSERT(vdc != NULL); 1218 ASSERT(mutex_owned(&vdc->lock)); 1219 1220 DMSG(vdc, 0, "[%d] Entered.\n", vdc->instance); 1221 1222 /* 1223 * set the Session ID to a unique value 1224 * (the lower 32 bits of the clock tick) 1225 */ 1226 vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 1227 DMSG(vdc, 0, "[%d] Set SID to 0x%lx\n", vdc->instance, vdc->session_id); 1228 1229 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1230 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1231 pkt.tag.vio_subtype_env = VIO_VER_INFO; 1232 pkt.tag.vio_sid = vdc->session_id; 1233 pkt.dev_class = VDEV_DISK; 1234 pkt.ver_major = ver.major; 1235 pkt.ver_minor = ver.minor; 1236 1237 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1238 DMSG(vdc, 0, "[%d] Ver info sent (status = %d)\n", 1239 vdc->instance, status); 1240 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1241 DMSG(vdc, 0, "[%d] Failed to send Ver negotiation info: " 1242 "id(%lx) rv(%d) size(%ld)", 1243 vdc->instance, vdc->ldc_handle, 1244 status, msglen); 1245 if (msglen != sizeof (vio_ver_msg_t)) 1246 status = ENOMSG; 1247 } 1248 1249 return (status); 1250 } 1251 1252 /* 1253 * Function: 1254 * vdc_ver_negotiation() 1255 * 1256 * Description: 1257 * 1258 * Arguments: 1259 * vdcp - soft state pointer for this instance of the device driver. 1260 * 1261 * Return Code: 1262 * 0 - Success 1263 */ 1264 static int 1265 vdc_ver_negotiation(vdc_t *vdcp) 1266 { 1267 vio_msg_t vio_msg; 1268 int status; 1269 1270 if (status = vdc_init_ver_negotiation(vdcp, vdc_version[0])) 1271 return (status); 1272 1273 /* release lock and wait for response */ 1274 mutex_exit(&vdcp->lock); 1275 status = vdc_wait_for_response(vdcp, &vio_msg); 1276 mutex_enter(&vdcp->lock); 1277 if (status) { 1278 DMSG(vdcp, 0, 1279 "[%d] Failed waiting for Ver negotiation response, rv(%d)", 1280 vdcp->instance, status); 1281 return (status); 1282 } 1283 1284 /* check type and sub_type ... */ 1285 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1286 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1287 DMSG(vdcp, 0, "[%d] Invalid ver negotiation response\n", 1288 vdcp->instance); 1289 return (EPROTO); 1290 } 1291 1292 return (vdc_handle_ver_msg(vdcp, (vio_ver_msg_t *)&vio_msg)); 1293 } 1294 1295 /* 1296 * Function: 1297 * vdc_init_attr_negotiation() 1298 * 1299 * Description: 1300 * 1301 * Arguments: 1302 * vdc - soft state pointer for this instance of the device driver. 1303 * 1304 * Return Code: 1305 * 0 - Success 1306 */ 1307 static int 1308 vdc_init_attr_negotiation(vdc_t *vdc) 1309 { 1310 vd_attr_msg_t pkt; 1311 size_t msglen = sizeof (pkt); 1312 int status; 1313 1314 ASSERT(vdc != NULL); 1315 ASSERT(mutex_owned(&vdc->lock)); 1316 1317 DMSG(vdc, 0, "[%d] entered\n", vdc->instance); 1318 1319 /* fill in tag */ 1320 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1321 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1322 pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 1323 pkt.tag.vio_sid = vdc->session_id; 1324 /* fill in payload */ 1325 pkt.max_xfer_sz = vdc->max_xfer_sz; 1326 pkt.vdisk_block_size = vdc->block_size; 1327 pkt.xfer_mode = VIO_DRING_MODE; 1328 pkt.operations = 0; /* server will set bits of valid operations */ 1329 pkt.vdisk_type = 0; /* server will set to valid device type */ 1330 pkt.vdisk_size = 0; /* server will set to valid size */ 1331 1332 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1333 DMSG(vdc, 0, "Attr info sent (status = %d)\n", status); 1334 1335 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1336 DMSG(vdc, 0, "[%d] Failed to send Attr negotiation info: " 1337 "id(%lx) rv(%d) size(%ld)", 1338 vdc->instance, vdc->ldc_handle, 1339 status, msglen); 1340 if (msglen != sizeof (vio_ver_msg_t)) 1341 status = ENOMSG; 1342 } 1343 1344 return (status); 1345 } 1346 1347 /* 1348 * Function: 1349 * vdc_attr_negotiation() 1350 * 1351 * Description: 1352 * 1353 * Arguments: 1354 * vdc - soft state pointer for this instance of the device driver. 1355 * 1356 * Return Code: 1357 * 0 - Success 1358 */ 1359 static int 1360 vdc_attr_negotiation(vdc_t *vdcp) 1361 { 1362 int status; 1363 vio_msg_t vio_msg; 1364 1365 if (status = vdc_init_attr_negotiation(vdcp)) 1366 return (status); 1367 1368 /* release lock and wait for response */ 1369 mutex_exit(&vdcp->lock); 1370 status = vdc_wait_for_response(vdcp, &vio_msg); 1371 mutex_enter(&vdcp->lock); 1372 if (status) { 1373 DMSG(vdcp, 0, 1374 "[%d] Failed waiting for Attr negotiation response, rv(%d)", 1375 vdcp->instance, status); 1376 return (status); 1377 } 1378 1379 /* check type and sub_type ... */ 1380 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1381 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1382 DMSG(vdcp, 0, "[%d] Invalid attr negotiation response\n", 1383 vdcp->instance); 1384 return (EPROTO); 1385 } 1386 1387 return (vdc_handle_attr_msg(vdcp, (vd_attr_msg_t *)&vio_msg)); 1388 } 1389 1390 1391 /* 1392 * Function: 1393 * vdc_init_dring_negotiate() 1394 * 1395 * Description: 1396 * 1397 * Arguments: 1398 * vdc - soft state pointer for this instance of the device driver. 1399 * 1400 * Return Code: 1401 * 0 - Success 1402 */ 1403 static int 1404 vdc_init_dring_negotiate(vdc_t *vdc) 1405 { 1406 vio_dring_reg_msg_t pkt; 1407 size_t msglen = sizeof (pkt); 1408 int status = -1; 1409 int retry; 1410 int nretries = 10; 1411 1412 ASSERT(vdc != NULL); 1413 ASSERT(mutex_owned(&vdc->lock)); 1414 1415 for (retry = 0; retry < nretries; retry++) { 1416 status = vdc_init_descriptor_ring(vdc); 1417 if (status != EAGAIN) 1418 break; 1419 drv_usecwait(vdc_min_timeout_ldc); 1420 } 1421 1422 if (status != 0) { 1423 DMSG(vdc, 0, "[%d] Failed to init DRing (status = %d)\n", 1424 vdc->instance, status); 1425 return (status); 1426 } 1427 1428 DMSG(vdc, 0, "[%d] Init of descriptor ring completed (status = %d)\n", 1429 vdc->instance, status); 1430 1431 /* fill in tag */ 1432 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1433 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1434 pkt.tag.vio_subtype_env = VIO_DRING_REG; 1435 pkt.tag.vio_sid = vdc->session_id; 1436 /* fill in payload */ 1437 pkt.dring_ident = 0; 1438 pkt.num_descriptors = vdc->dring_len; 1439 pkt.descriptor_size = vdc->dring_entry_size; 1440 pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 1441 pkt.ncookies = vdc->dring_cookie_count; 1442 pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 1443 1444 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1445 if (status != 0) { 1446 DMSG(vdc, 0, "[%d] Failed to register DRing (err = %d)", 1447 vdc->instance, status); 1448 } 1449 1450 return (status); 1451 } 1452 1453 1454 /* 1455 * Function: 1456 * vdc_dring_negotiation() 1457 * 1458 * Description: 1459 * 1460 * Arguments: 1461 * vdc - soft state pointer for this instance of the device driver. 1462 * 1463 * Return Code: 1464 * 0 - Success 1465 */ 1466 static int 1467 vdc_dring_negotiation(vdc_t *vdcp) 1468 { 1469 int status; 1470 vio_msg_t vio_msg; 1471 1472 if (status = vdc_init_dring_negotiate(vdcp)) 1473 return (status); 1474 1475 /* release lock and wait for response */ 1476 mutex_exit(&vdcp->lock); 1477 status = vdc_wait_for_response(vdcp, &vio_msg); 1478 mutex_enter(&vdcp->lock); 1479 if (status) { 1480 DMSG(vdcp, 0, 1481 "[%d] Failed waiting for Dring negotiation response," 1482 " rv(%d)", vdcp->instance, status); 1483 return (status); 1484 } 1485 1486 /* check type and sub_type ... */ 1487 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1488 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1489 DMSG(vdcp, 0, "[%d] Invalid Dring negotiation response\n", 1490 vdcp->instance); 1491 return (EPROTO); 1492 } 1493 1494 return (vdc_handle_dring_reg_msg(vdcp, 1495 (vio_dring_reg_msg_t *)&vio_msg)); 1496 } 1497 1498 1499 /* 1500 * Function: 1501 * vdc_send_rdx() 1502 * 1503 * Description: 1504 * 1505 * Arguments: 1506 * vdc - soft state pointer for this instance of the device driver. 1507 * 1508 * Return Code: 1509 * 0 - Success 1510 */ 1511 static int 1512 vdc_send_rdx(vdc_t *vdcp) 1513 { 1514 vio_msg_t msg; 1515 size_t msglen = sizeof (vio_msg_t); 1516 int status; 1517 1518 /* 1519 * Send an RDX message to vds to indicate we are ready 1520 * to send data 1521 */ 1522 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 1523 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 1524 msg.tag.vio_subtype_env = VIO_RDX; 1525 msg.tag.vio_sid = vdcp->session_id; 1526 status = vdc_send(vdcp, (caddr_t)&msg, &msglen); 1527 if (status != 0) { 1528 DMSG(vdcp, 0, "[%d] Failed to send RDX message (%d)", 1529 vdcp->instance, status); 1530 } 1531 1532 return (status); 1533 } 1534 1535 /* 1536 * Function: 1537 * vdc_handle_rdx() 1538 * 1539 * Description: 1540 * 1541 * Arguments: 1542 * vdc - soft state pointer for this instance of the device driver. 1543 * msgp - received msg 1544 * 1545 * Return Code: 1546 * 0 - Success 1547 */ 1548 static int 1549 vdc_handle_rdx(vdc_t *vdcp, vio_rdx_msg_t *msgp) 1550 { 1551 _NOTE(ARGUNUSED(vdcp)) 1552 _NOTE(ARGUNUSED(msgp)) 1553 1554 ASSERT(msgp->tag.vio_msgtype == VIO_TYPE_CTRL); 1555 ASSERT(msgp->tag.vio_subtype == VIO_SUBTYPE_ACK); 1556 ASSERT(msgp->tag.vio_subtype_env == VIO_RDX); 1557 1558 DMSG(vdcp, 1, "[%d] Got an RDX msg", vdcp->instance); 1559 1560 return (0); 1561 } 1562 1563 /* 1564 * Function: 1565 * vdc_rdx_exchange() 1566 * 1567 * Description: 1568 * 1569 * Arguments: 1570 * vdc - soft state pointer for this instance of the device driver. 1571 * 1572 * Return Code: 1573 * 0 - Success 1574 */ 1575 static int 1576 vdc_rdx_exchange(vdc_t *vdcp) 1577 { 1578 int status; 1579 vio_msg_t vio_msg; 1580 1581 if (status = vdc_send_rdx(vdcp)) 1582 return (status); 1583 1584 /* release lock and wait for response */ 1585 mutex_exit(&vdcp->lock); 1586 status = vdc_wait_for_response(vdcp, &vio_msg); 1587 mutex_enter(&vdcp->lock); 1588 if (status) { 1589 DMSG(vdcp, 0, 1590 "[%d] Failed waiting for RDX response," 1591 " rv(%d)", vdcp->instance, status); 1592 return (status); 1593 } 1594 1595 /* check type and sub_type ... */ 1596 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1597 vio_msg.tag.vio_subtype != VIO_SUBTYPE_ACK) { 1598 DMSG(vdcp, 0, "[%d] Invalid RDX response\n", 1599 vdcp->instance); 1600 return (EPROTO); 1601 } 1602 1603 return (vdc_handle_rdx(vdcp, (vio_rdx_msg_t *)&vio_msg)); 1604 } 1605 1606 1607 /* -------------------------------------------------------------------------- */ 1608 1609 /* 1610 * LDC helper routines 1611 */ 1612 1613 static int 1614 vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp) 1615 { 1616 int status; 1617 boolean_t q_has_pkts = B_FALSE; 1618 int delay_time; 1619 size_t len; 1620 1621 mutex_enter(&vdc->read_lock); 1622 1623 if (vdc->read_state == VDC_READ_IDLE) 1624 vdc->read_state = VDC_READ_WAITING; 1625 1626 while (vdc->read_state != VDC_READ_PENDING) { 1627 1628 /* detect if the connection has been reset */ 1629 if (vdc->read_state == VDC_READ_RESET) { 1630 status = ECONNRESET; 1631 goto done; 1632 } 1633 1634 cv_wait(&vdc->read_cv, &vdc->read_lock); 1635 } 1636 1637 /* 1638 * Until we get a blocking ldc read we have to retry 1639 * until the entire LDC message has arrived before 1640 * ldc_read() will succeed. Note we also bail out if 1641 * the channel is reset or goes away. 1642 */ 1643 delay_time = vdc_ldc_read_init_delay; 1644 loop: 1645 len = *nbytesp; 1646 status = ldc_read(vdc->ldc_handle, (caddr_t)msgp, &len); 1647 switch (status) { 1648 case EAGAIN: 1649 delay_time *= 2; 1650 if (delay_time >= vdc_ldc_read_max_delay) 1651 delay_time = vdc_ldc_read_max_delay; 1652 delay(delay_time); 1653 goto loop; 1654 1655 case 0: 1656 if (len == 0) { 1657 DMSG(vdc, 0, "[%d] ldc_read returned 0 bytes with " 1658 "no error!\n", vdc->instance); 1659 goto loop; 1660 } 1661 1662 *nbytesp = len; 1663 1664 /* 1665 * If there are pending messages, leave the 1666 * read state as pending. Otherwise, set the state 1667 * back to idle. 1668 */ 1669 status = ldc_chkq(vdc->ldc_handle, &q_has_pkts); 1670 if (status == 0 && !q_has_pkts) 1671 vdc->read_state = VDC_READ_IDLE; 1672 1673 break; 1674 default: 1675 DMSG(vdc, 0, "ldc_read returned %d\n", status); 1676 break; 1677 } 1678 1679 done: 1680 mutex_exit(&vdc->read_lock); 1681 1682 return (status); 1683 } 1684 1685 1686 1687 #ifdef DEBUG 1688 void 1689 vdc_decode_tag(vdc_t *vdcp, vio_msg_t *msg) 1690 { 1691 char *ms, *ss, *ses; 1692 switch (msg->tag.vio_msgtype) { 1693 #define Q(_s) case _s : ms = #_s; break; 1694 Q(VIO_TYPE_CTRL) 1695 Q(VIO_TYPE_DATA) 1696 Q(VIO_TYPE_ERR) 1697 #undef Q 1698 default: ms = "unknown"; break; 1699 } 1700 1701 switch (msg->tag.vio_subtype) { 1702 #define Q(_s) case _s : ss = #_s; break; 1703 Q(VIO_SUBTYPE_INFO) 1704 Q(VIO_SUBTYPE_ACK) 1705 Q(VIO_SUBTYPE_NACK) 1706 #undef Q 1707 default: ss = "unknown"; break; 1708 } 1709 1710 switch (msg->tag.vio_subtype_env) { 1711 #define Q(_s) case _s : ses = #_s; break; 1712 Q(VIO_VER_INFO) 1713 Q(VIO_ATTR_INFO) 1714 Q(VIO_DRING_REG) 1715 Q(VIO_DRING_UNREG) 1716 Q(VIO_RDX) 1717 Q(VIO_PKT_DATA) 1718 Q(VIO_DESC_DATA) 1719 Q(VIO_DRING_DATA) 1720 #undef Q 1721 default: ses = "unknown"; break; 1722 } 1723 1724 DMSG(vdcp, 3, "(%x/%x/%x) message : (%s/%s/%s)\n", 1725 msg->tag.vio_msgtype, msg->tag.vio_subtype, 1726 msg->tag.vio_subtype_env, ms, ss, ses); 1727 } 1728 #endif 1729 1730 /* 1731 * Function: 1732 * vdc_send() 1733 * 1734 * Description: 1735 * The function encapsulates the call to write a message using LDC. 1736 * If LDC indicates that the call failed due to the queue being full, 1737 * we retry the ldc_write() [ up to 'vdc_retries' time ], otherwise 1738 * we return the error returned by LDC. 1739 * 1740 * Arguments: 1741 * ldc_handle - LDC handle for the channel this instance of vdc uses 1742 * pkt - address of LDC message to be sent 1743 * msglen - the size of the message being sent. When the function 1744 * returns, this contains the number of bytes written. 1745 * 1746 * Return Code: 1747 * 0 - Success. 1748 * EINVAL - pkt or msglen were NULL 1749 * ECONNRESET - The connection was not up. 1750 * EWOULDBLOCK - LDC queue is full 1751 * xxx - other error codes returned by ldc_write 1752 */ 1753 static int 1754 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 1755 { 1756 size_t size = 0; 1757 int status = 0; 1758 clock_t delay_ticks; 1759 1760 ASSERT(vdc != NULL); 1761 ASSERT(mutex_owned(&vdc->lock)); 1762 ASSERT(msglen != NULL); 1763 ASSERT(*msglen != 0); 1764 1765 #ifdef DEBUG 1766 vdc_decode_tag(vdc, (vio_msg_t *)pkt); 1767 #endif 1768 /* 1769 * Wait indefinitely to send if channel 1770 * is busy, but bail out if we succeed or 1771 * if the channel closes or is reset. 1772 */ 1773 delay_ticks = vdc_hz_min_ldc_delay; 1774 do { 1775 size = *msglen; 1776 status = ldc_write(vdc->ldc_handle, pkt, &size); 1777 if (status == EWOULDBLOCK) { 1778 delay(delay_ticks); 1779 /* geometric backoff */ 1780 delay_ticks *= 2; 1781 if (delay_ticks > vdc_hz_max_ldc_delay) 1782 delay_ticks = vdc_hz_max_ldc_delay; 1783 } 1784 } while (status == EWOULDBLOCK); 1785 1786 /* if LDC had serious issues --- reset vdc state */ 1787 if (status == EIO || status == ECONNRESET) { 1788 /* LDC had serious issues --- reset vdc state */ 1789 mutex_enter(&vdc->read_lock); 1790 if ((vdc->read_state == VDC_READ_WAITING) || 1791 (vdc->read_state == VDC_READ_RESET)) 1792 cv_signal(&vdc->read_cv); 1793 vdc->read_state = VDC_READ_RESET; 1794 mutex_exit(&vdc->read_lock); 1795 1796 /* wake up any waiters in the reset thread */ 1797 if (vdc->state == VDC_STATE_INIT_WAITING) { 1798 DMSG(vdc, 0, "[%d] write reset - " 1799 "vdc is resetting ..\n", vdc->instance); 1800 vdc->state = VDC_STATE_RESETTING; 1801 cv_signal(&vdc->initwait_cv); 1802 } 1803 1804 return (ECONNRESET); 1805 } 1806 1807 /* return the last size written */ 1808 *msglen = size; 1809 1810 return (status); 1811 } 1812 1813 /* 1814 * Function: 1815 * vdc_get_ldc_id() 1816 * 1817 * Description: 1818 * This function gets the 'ldc-id' for this particular instance of vdc. 1819 * The id returned is the guest domain channel endpoint LDC uses for 1820 * communication with vds. 1821 * 1822 * Arguments: 1823 * dip - dev info pointer for this instance of the device driver. 1824 * ldc_id - pointer to variable used to return the 'ldc-id' found. 1825 * 1826 * Return Code: 1827 * 0 - Success. 1828 * ENOENT - Expected node or property did not exist. 1829 * ENXIO - Unexpected error communicating with MD framework 1830 */ 1831 static int 1832 vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id) 1833 { 1834 int status = ENOENT; 1835 char *node_name = NULL; 1836 md_t *mdp = NULL; 1837 int num_nodes; 1838 int num_vdevs; 1839 int num_chans; 1840 mde_cookie_t rootnode; 1841 mde_cookie_t *listp = NULL; 1842 mde_cookie_t *chanp = NULL; 1843 boolean_t found_inst = B_FALSE; 1844 int listsz; 1845 int idx; 1846 uint64_t md_inst; 1847 int obp_inst; 1848 int instance = ddi_get_instance(dip); 1849 1850 ASSERT(ldc_id != NULL); 1851 *ldc_id = 0; 1852 1853 /* 1854 * Get the OBP instance number for comparison with the MD instance 1855 * 1856 * The "cfg-handle" property of a vdc node in an MD contains the MD's 1857 * notion of "instance", or unique identifier, for that node; OBP 1858 * stores the value of the "cfg-handle" MD property as the value of 1859 * the "reg" property on the node in the device tree it builds from 1860 * the MD and passes to Solaris. Thus, we look up the devinfo node's 1861 * "reg" property value to uniquely identify this device instance. 1862 * If the "reg" property cannot be found, the device tree state is 1863 * presumably so broken that there is no point in continuing. 1864 */ 1865 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 1866 cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 1867 return (ENOENT); 1868 } 1869 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1870 OBP_REG, -1); 1871 DMSGX(1, "[%d] OBP inst=%d\n", instance, obp_inst); 1872 1873 /* 1874 * We now walk the MD nodes and if an instance of a vdc node matches 1875 * the instance got from OBP we get the ldc-id property. 1876 */ 1877 if ((mdp = md_get_handle()) == NULL) { 1878 cmn_err(CE_WARN, "unable to init machine description"); 1879 return (ENXIO); 1880 } 1881 1882 num_nodes = md_node_count(mdp); 1883 ASSERT(num_nodes > 0); 1884 1885 listsz = num_nodes * sizeof (mde_cookie_t); 1886 1887 /* allocate memory for nodes */ 1888 listp = kmem_zalloc(listsz, KM_SLEEP); 1889 chanp = kmem_zalloc(listsz, KM_SLEEP); 1890 1891 rootnode = md_root_node(mdp); 1892 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 1893 1894 /* 1895 * Search for all the virtual devices, we will then check to see which 1896 * ones are disk nodes. 1897 */ 1898 num_vdevs = md_scan_dag(mdp, rootnode, 1899 md_find_name(mdp, VDC_MD_VDEV_NAME), 1900 md_find_name(mdp, "fwd"), listp); 1901 1902 if (num_vdevs <= 0) { 1903 cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 1904 status = ENOENT; 1905 goto done; 1906 } 1907 1908 DMSGX(1, "[%d] num_vdevs=%d\n", instance, num_vdevs); 1909 for (idx = 0; idx < num_vdevs; idx++) { 1910 status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 1911 if ((status != 0) || (node_name == NULL)) { 1912 cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 1913 ": err %d", VDC_MD_VDEV_NAME, status); 1914 continue; 1915 } 1916 1917 DMSGX(1, "[%d] Found node '%s'\n", instance, node_name); 1918 if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 1919 status = md_get_prop_val(mdp, listp[idx], 1920 VDC_MD_CFG_HDL, &md_inst); 1921 DMSGX(1, "[%d] vdc inst in MD=%lx\n", 1922 instance, md_inst); 1923 if ((status == 0) && (md_inst == obp_inst)) { 1924 found_inst = B_TRUE; 1925 break; 1926 } 1927 } 1928 } 1929 1930 if (!found_inst) { 1931 DMSGX(0, "Unable to find correct '%s' node", VDC_MD_DISK_NAME); 1932 status = ENOENT; 1933 goto done; 1934 } 1935 DMSGX(0, "[%d] MD inst=%lx\n", instance, md_inst); 1936 1937 /* get the channels for this node */ 1938 num_chans = md_scan_dag(mdp, listp[idx], 1939 md_find_name(mdp, VDC_MD_CHAN_NAME), 1940 md_find_name(mdp, "fwd"), chanp); 1941 1942 /* expecting at least one channel */ 1943 if (num_chans <= 0) { 1944 cmn_err(CE_NOTE, "No '%s' node for '%s' port", 1945 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 1946 status = ENOENT; 1947 goto done; 1948 1949 } else if (num_chans != 1) { 1950 DMSGX(0, "[%d] Expected 1 '%s' node for '%s' port, found %d\n", 1951 instance, VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, 1952 num_chans); 1953 } 1954 1955 /* 1956 * We use the first channel found (index 0), irrespective of how 1957 * many are there in total. 1958 */ 1959 if (md_get_prop_val(mdp, chanp[0], VDC_ID_PROP, ldc_id) != 0) { 1960 cmn_err(CE_NOTE, "Channel '%s' property not found", 1961 VDC_ID_PROP); 1962 status = ENOENT; 1963 } 1964 1965 DMSGX(0, "[%d] LDC id is 0x%lx\n", instance, *ldc_id); 1966 1967 done: 1968 if (chanp) 1969 kmem_free(chanp, listsz); 1970 if (listp) 1971 kmem_free(listp, listsz); 1972 1973 (void) md_fini_handle(mdp); 1974 1975 return (status); 1976 } 1977 1978 static int 1979 vdc_do_ldc_up(vdc_t *vdc) 1980 { 1981 int status; 1982 ldc_status_t ldc_state; 1983 1984 DMSG(vdc, 0, "[%d] Bringing up channel %lx\n", 1985 vdc->instance, vdc->ldc_id); 1986 1987 if (vdc->lifecycle == VDC_LC_DETACHING) 1988 return (EINVAL); 1989 1990 if ((status = ldc_up(vdc->ldc_handle)) != 0) { 1991 switch (status) { 1992 case ECONNREFUSED: /* listener not ready at other end */ 1993 DMSG(vdc, 0, "[%d] ldc_up(%lx,...) return %d\n", 1994 vdc->instance, vdc->ldc_id, status); 1995 status = 0; 1996 break; 1997 default: 1998 DMSG(vdc, 0, "[%d] Failed to bring up LDC: " 1999 "channel=%ld, err=%d", vdc->instance, vdc->ldc_id, 2000 status); 2001 break; 2002 } 2003 } 2004 2005 if (ldc_status(vdc->ldc_handle, &ldc_state) == 0) { 2006 vdc->ldc_state = ldc_state; 2007 if (ldc_state == LDC_UP) { 2008 DMSG(vdc, 0, "[%d] LDC channel already up\n", 2009 vdc->instance); 2010 vdc->seq_num = 1; 2011 vdc->seq_num_reply = 0; 2012 } 2013 } 2014 2015 return (status); 2016 } 2017 2018 /* 2019 * Function: 2020 * vdc_terminate_ldc() 2021 * 2022 * Description: 2023 * 2024 * Arguments: 2025 * vdc - soft state pointer for this instance of the device driver. 2026 * 2027 * Return Code: 2028 * None 2029 */ 2030 static void 2031 vdc_terminate_ldc(vdc_t *vdc) 2032 { 2033 int instance = ddi_get_instance(vdc->dip); 2034 2035 ASSERT(vdc != NULL); 2036 ASSERT(mutex_owned(&vdc->lock)); 2037 2038 DMSG(vdc, 0, "[%d] initialized=%x\n", instance, vdc->initialized); 2039 2040 if (vdc->initialized & VDC_LDC_OPEN) { 2041 DMSG(vdc, 0, "[%d] ldc_close()\n", instance); 2042 (void) ldc_close(vdc->ldc_handle); 2043 } 2044 if (vdc->initialized & VDC_LDC_CB) { 2045 DMSG(vdc, 0, "[%d] ldc_unreg_callback()\n", instance); 2046 (void) ldc_unreg_callback(vdc->ldc_handle); 2047 } 2048 if (vdc->initialized & VDC_LDC) { 2049 DMSG(vdc, 0, "[%d] ldc_fini()\n", instance); 2050 (void) ldc_fini(vdc->ldc_handle); 2051 vdc->ldc_handle = NULL; 2052 } 2053 2054 vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN); 2055 } 2056 2057 /* -------------------------------------------------------------------------- */ 2058 2059 /* 2060 * Descriptor Ring helper routines 2061 */ 2062 2063 /* 2064 * Function: 2065 * vdc_init_descriptor_ring() 2066 * 2067 * Description: 2068 * 2069 * Arguments: 2070 * vdc - soft state pointer for this instance of the device driver. 2071 * 2072 * Return Code: 2073 * 0 - Success 2074 */ 2075 static int 2076 vdc_init_descriptor_ring(vdc_t *vdc) 2077 { 2078 vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 2079 int status = 0; 2080 int i; 2081 2082 DMSG(vdc, 0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized); 2083 2084 ASSERT(vdc != NULL); 2085 ASSERT(mutex_owned(&vdc->lock)); 2086 ASSERT(vdc->ldc_handle != NULL); 2087 2088 /* ensure we have enough room to store max sized block */ 2089 ASSERT(maxphys <= VD_MAX_BLOCK_SIZE); 2090 2091 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 2092 DMSG(vdc, 0, "[%d] ldc_mem_dring_create\n", vdc->instance); 2093 /* 2094 * Calculate the maximum block size we can transmit using one 2095 * Descriptor Ring entry from the attributes returned by the 2096 * vDisk server. This is subject to a minimum of 'maxphys' 2097 * as we do not have the capability to split requests over 2098 * multiple DRing entries. 2099 */ 2100 if ((vdc->max_xfer_sz * vdc->block_size) < maxphys) { 2101 DMSG(vdc, 0, "[%d] using minimum DRing size\n", 2102 vdc->instance); 2103 vdc->dring_max_cookies = maxphys / PAGESIZE; 2104 } else { 2105 vdc->dring_max_cookies = 2106 (vdc->max_xfer_sz * vdc->block_size) / PAGESIZE; 2107 } 2108 vdc->dring_entry_size = (sizeof (vd_dring_entry_t) + 2109 (sizeof (ldc_mem_cookie_t) * 2110 (vdc->dring_max_cookies - 1))); 2111 vdc->dring_len = VD_DRING_LEN; 2112 2113 status = ldc_mem_dring_create(vdc->dring_len, 2114 vdc->dring_entry_size, &vdc->ldc_dring_hdl); 2115 if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) { 2116 DMSG(vdc, 0, "[%d] Descriptor ring creation failed", 2117 vdc->instance); 2118 return (status); 2119 } 2120 vdc->initialized |= VDC_DRING_INIT; 2121 } 2122 2123 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 2124 DMSG(vdc, 0, "[%d] ldc_mem_dring_bind\n", vdc->instance); 2125 vdc->dring_cookie = 2126 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 2127 2128 status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl, 2129 LDC_SHADOW_MAP|LDC_DIRECT_MAP, LDC_MEM_RW, 2130 &vdc->dring_cookie[0], 2131 &vdc->dring_cookie_count); 2132 if (status != 0) { 2133 DMSG(vdc, 0, "[%d] Failed to bind descriptor ring " 2134 "(%lx) to channel (%lx) status=%d\n", 2135 vdc->instance, vdc->ldc_dring_hdl, 2136 vdc->ldc_handle, status); 2137 return (status); 2138 } 2139 ASSERT(vdc->dring_cookie_count == 1); 2140 vdc->initialized |= VDC_DRING_BOUND; 2141 } 2142 2143 status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info); 2144 if (status != 0) { 2145 DMSG(vdc, 0, 2146 "[%d] Failed to get info for descriptor ring (%lx)\n", 2147 vdc->instance, vdc->ldc_dring_hdl); 2148 return (status); 2149 } 2150 2151 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 2152 DMSG(vdc, 0, "[%d] local dring\n", vdc->instance); 2153 2154 /* Allocate the local copy of this dring */ 2155 vdc->local_dring = 2156 kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t), 2157 KM_SLEEP); 2158 vdc->initialized |= VDC_DRING_LOCAL; 2159 } 2160 2161 /* 2162 * Mark all DRing entries as free and initialize the private 2163 * descriptor's memory handles. If any entry is initialized, 2164 * we need to free it later so we set the bit in 'initialized' 2165 * at the start. 2166 */ 2167 vdc->initialized |= VDC_DRING_ENTRY; 2168 for (i = 0; i < vdc->dring_len; i++) { 2169 dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 2170 dep->hdr.dstate = VIO_DESC_FREE; 2171 2172 status = ldc_mem_alloc_handle(vdc->ldc_handle, 2173 &vdc->local_dring[i].desc_mhdl); 2174 if (status != 0) { 2175 DMSG(vdc, 0, "![%d] Failed to alloc mem handle for" 2176 " descriptor %d", vdc->instance, i); 2177 return (status); 2178 } 2179 vdc->local_dring[i].is_free = B_TRUE; 2180 vdc->local_dring[i].dep = dep; 2181 } 2182 2183 /* Initialize the starting index */ 2184 vdc->dring_curr_idx = 0; 2185 2186 return (status); 2187 } 2188 2189 /* 2190 * Function: 2191 * vdc_destroy_descriptor_ring() 2192 * 2193 * Description: 2194 * 2195 * Arguments: 2196 * vdc - soft state pointer for this instance of the device driver. 2197 * 2198 * Return Code: 2199 * None 2200 */ 2201 static void 2202 vdc_destroy_descriptor_ring(vdc_t *vdc) 2203 { 2204 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2205 ldc_mem_handle_t mhdl = NULL; 2206 ldc_mem_info_t minfo; 2207 int status = -1; 2208 int i; /* loop */ 2209 2210 ASSERT(vdc != NULL); 2211 ASSERT(mutex_owned(&vdc->lock)); 2212 2213 DMSG(vdc, 0, "[%d] Entered\n", vdc->instance); 2214 2215 if (vdc->initialized & VDC_DRING_ENTRY) { 2216 DMSG(vdc, 0, 2217 "[%d] Removing Local DRing entries\n", vdc->instance); 2218 for (i = 0; i < vdc->dring_len; i++) { 2219 ldep = &vdc->local_dring[i]; 2220 mhdl = ldep->desc_mhdl; 2221 2222 if (mhdl == NULL) 2223 continue; 2224 2225 if ((status = ldc_mem_info(mhdl, &minfo)) != 0) { 2226 DMSG(vdc, 0, 2227 "ldc_mem_info returned an error: %d\n", 2228 status); 2229 2230 /* 2231 * This must mean that the mem handle 2232 * is not valid. Clear it out so that 2233 * no one tries to use it. 2234 */ 2235 ldep->desc_mhdl = NULL; 2236 continue; 2237 } 2238 2239 if (minfo.status == LDC_BOUND) { 2240 (void) ldc_mem_unbind_handle(mhdl); 2241 } 2242 2243 (void) ldc_mem_free_handle(mhdl); 2244 2245 ldep->desc_mhdl = NULL; 2246 } 2247 vdc->initialized &= ~VDC_DRING_ENTRY; 2248 } 2249 2250 if (vdc->initialized & VDC_DRING_LOCAL) { 2251 DMSG(vdc, 0, "[%d] Freeing Local DRing\n", vdc->instance); 2252 kmem_free(vdc->local_dring, 2253 vdc->dring_len * sizeof (vdc_local_desc_t)); 2254 vdc->initialized &= ~VDC_DRING_LOCAL; 2255 } 2256 2257 if (vdc->initialized & VDC_DRING_BOUND) { 2258 DMSG(vdc, 0, "[%d] Unbinding DRing\n", vdc->instance); 2259 status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl); 2260 if (status == 0) { 2261 vdc->initialized &= ~VDC_DRING_BOUND; 2262 } else { 2263 DMSG(vdc, 0, "[%d] Error %d unbinding DRing %lx", 2264 vdc->instance, status, vdc->ldc_dring_hdl); 2265 } 2266 kmem_free(vdc->dring_cookie, sizeof (ldc_mem_cookie_t)); 2267 } 2268 2269 if (vdc->initialized & VDC_DRING_INIT) { 2270 DMSG(vdc, 0, "[%d] Destroying DRing\n", vdc->instance); 2271 status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl); 2272 if (status == 0) { 2273 vdc->ldc_dring_hdl = NULL; 2274 bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 2275 vdc->initialized &= ~VDC_DRING_INIT; 2276 } else { 2277 DMSG(vdc, 0, "[%d] Error %d destroying DRing (%lx)", 2278 vdc->instance, status, vdc->ldc_dring_hdl); 2279 } 2280 } 2281 } 2282 2283 /* 2284 * Function: 2285 * vdc_map_to_shared_ring() 2286 * 2287 * Description: 2288 * Copy contents of the local descriptor to the shared 2289 * memory descriptor. 2290 * 2291 * Arguments: 2292 * vdcp - soft state pointer for this instance of the device driver. 2293 * idx - descriptor ring index 2294 * 2295 * Return Code: 2296 * None 2297 */ 2298 static int 2299 vdc_map_to_shared_dring(vdc_t *vdcp, int idx) 2300 { 2301 vdc_local_desc_t *ldep; 2302 vd_dring_entry_t *dep; 2303 int rv; 2304 2305 ldep = &(vdcp->local_dring[idx]); 2306 2307 /* for now leave in the old pop_mem_hdl stuff */ 2308 if (ldep->nbytes > 0) { 2309 rv = vdc_populate_mem_hdl(vdcp, ldep); 2310 if (rv) { 2311 DMSG(vdcp, 0, "[%d] Cannot populate mem handle\n", 2312 vdcp->instance); 2313 return (rv); 2314 } 2315 } 2316 2317 /* 2318 * fill in the data details into the DRing 2319 */ 2320 dep = ldep->dep; 2321 ASSERT(dep != NULL); 2322 2323 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdcp); 2324 dep->payload.operation = ldep->operation; 2325 dep->payload.addr = ldep->offset; 2326 dep->payload.nbytes = ldep->nbytes; 2327 dep->payload.status = (uint32_t)-1; /* vds will set valid value */ 2328 dep->payload.slice = ldep->slice; 2329 dep->hdr.dstate = VIO_DESC_READY; 2330 dep->hdr.ack = 1; /* request an ACK for every message */ 2331 2332 return (0); 2333 } 2334 2335 /* 2336 * Function: 2337 * vdc_send_request 2338 * 2339 * Description: 2340 * This routine writes the data to be transmitted to vds into the 2341 * descriptor, notifies vds that the ring has been updated and 2342 * then waits for the request to be processed. 2343 * 2344 * Arguments: 2345 * vdcp - the soft state pointer 2346 * operation - operation we want vds to perform (VD_OP_XXX) 2347 * addr - address of data buf to be read/written. 2348 * nbytes - number of bytes to read/write 2349 * slice - the disk slice this request is for 2350 * offset - relative disk offset 2351 * cb_type - type of call - STRATEGY or SYNC 2352 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2353 * . mode for ioctl(9e) 2354 * . LP64 diskaddr_t (block I/O) 2355 * dir - direction of operation (READ/WRITE/BOTH) 2356 * 2357 * Return Codes: 2358 * 0 2359 * ENXIO 2360 */ 2361 static int 2362 vdc_send_request(vdc_t *vdcp, int operation, caddr_t addr, 2363 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 2364 void *cb_arg, vio_desc_direction_t dir) 2365 { 2366 ASSERT(vdcp != NULL); 2367 ASSERT(slice < V_NUMPAR); 2368 2369 mutex_enter(&vdcp->lock); 2370 2371 do { 2372 while (vdcp->state != VDC_STATE_RUNNING) { 2373 cv_wait(&vdcp->running_cv, &vdcp->lock); 2374 2375 /* return error if detaching */ 2376 if (vdcp->state == VDC_STATE_DETACH) { 2377 mutex_exit(&vdcp->lock); 2378 return (ENXIO); 2379 } 2380 } 2381 2382 } while (vdc_populate_descriptor(vdcp, operation, addr, 2383 nbytes, slice, offset, cb_type, cb_arg, dir)); 2384 2385 mutex_exit(&vdcp->lock); 2386 return (0); 2387 } 2388 2389 2390 /* 2391 * Function: 2392 * vdc_populate_descriptor 2393 * 2394 * Description: 2395 * This routine writes the data to be transmitted to vds into the 2396 * descriptor, notifies vds that the ring has been updated and 2397 * then waits for the request to be processed. 2398 * 2399 * Arguments: 2400 * vdcp - the soft state pointer 2401 * operation - operation we want vds to perform (VD_OP_XXX) 2402 * addr - address of data buf to be read/written. 2403 * nbytes - number of bytes to read/write 2404 * slice - the disk slice this request is for 2405 * offset - relative disk offset 2406 * cb_type - type of call - STRATEGY or SYNC 2407 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2408 * . mode for ioctl(9e) 2409 * . LP64 diskaddr_t (block I/O) 2410 * dir - direction of operation (READ/WRITE/BOTH) 2411 * 2412 * Return Codes: 2413 * 0 2414 * EAGAIN 2415 * EFAULT 2416 * ENXIO 2417 * EIO 2418 */ 2419 static int 2420 vdc_populate_descriptor(vdc_t *vdcp, int operation, caddr_t addr, 2421 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 2422 void *cb_arg, vio_desc_direction_t dir) 2423 { 2424 vdc_local_desc_t *local_dep = NULL; /* Local Dring Pointer */ 2425 int idx; /* Index of DRing entry used */ 2426 int next_idx; 2427 vio_dring_msg_t dmsg; 2428 size_t msglen; 2429 int rv; 2430 2431 ASSERT(MUTEX_HELD(&vdcp->lock)); 2432 vdcp->threads_pending++; 2433 loop: 2434 DMSG(vdcp, 2, ": dring_curr_idx = %d\n", vdcp->dring_curr_idx); 2435 2436 /* Get next available D-Ring entry */ 2437 idx = vdcp->dring_curr_idx; 2438 local_dep = &(vdcp->local_dring[idx]); 2439 2440 if (!local_dep->is_free) { 2441 DMSG(vdcp, 2, "[%d]: dring full - waiting for space\n", 2442 vdcp->instance); 2443 cv_wait(&vdcp->dring_free_cv, &vdcp->lock); 2444 if (vdcp->state == VDC_STATE_RUNNING || 2445 vdcp->state == VDC_STATE_HANDLE_PENDING) { 2446 goto loop; 2447 } 2448 vdcp->threads_pending--; 2449 return (ECONNRESET); 2450 } 2451 2452 next_idx = idx + 1; 2453 if (next_idx >= vdcp->dring_len) 2454 next_idx = 0; 2455 vdcp->dring_curr_idx = next_idx; 2456 2457 ASSERT(local_dep->is_free); 2458 2459 local_dep->operation = operation; 2460 local_dep->addr = addr; 2461 local_dep->nbytes = nbytes; 2462 local_dep->slice = slice; 2463 local_dep->offset = offset; 2464 local_dep->cb_type = cb_type; 2465 local_dep->cb_arg = cb_arg; 2466 local_dep->dir = dir; 2467 2468 local_dep->is_free = B_FALSE; 2469 2470 rv = vdc_map_to_shared_dring(vdcp, idx); 2471 if (rv) { 2472 DMSG(vdcp, 0, "[%d]: cannot bind memory - waiting ..\n", 2473 vdcp->instance); 2474 /* free the descriptor */ 2475 local_dep->is_free = B_TRUE; 2476 vdcp->dring_curr_idx = idx; 2477 cv_wait(&vdcp->membind_cv, &vdcp->lock); 2478 if (vdcp->state == VDC_STATE_RUNNING || 2479 vdcp->state == VDC_STATE_HANDLE_PENDING) { 2480 goto loop; 2481 } 2482 vdcp->threads_pending--; 2483 return (ECONNRESET); 2484 } 2485 2486 /* 2487 * Send a msg with the DRing details to vds 2488 */ 2489 VIO_INIT_DRING_DATA_TAG(dmsg); 2490 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdcp); 2491 dmsg.dring_ident = vdcp->dring_ident; 2492 dmsg.start_idx = idx; 2493 dmsg.end_idx = idx; 2494 vdcp->seq_num++; 2495 2496 DTRACE_IO2(send, vio_dring_msg_t *, &dmsg, vdc_t *, vdcp); 2497 2498 DMSG(vdcp, 2, "ident=0x%lx, st=%u, end=%u, seq=%ld\n", 2499 vdcp->dring_ident, dmsg.start_idx, dmsg.end_idx, dmsg.seq_num); 2500 2501 /* 2502 * note we're still holding the lock here to 2503 * make sure the message goes out in order !!!... 2504 */ 2505 msglen = sizeof (dmsg); 2506 rv = vdc_send(vdcp, (caddr_t)&dmsg, &msglen); 2507 switch (rv) { 2508 case ECONNRESET: 2509 /* 2510 * vdc_send initiates the reset on failure. 2511 * Since the transaction has already been put 2512 * on the local dring, it will automatically get 2513 * retried when the channel is reset. Given that, 2514 * it is ok to just return success even though the 2515 * send failed. 2516 */ 2517 rv = 0; 2518 break; 2519 2520 case 0: /* EOK */ 2521 DMSG(vdcp, 1, "sent via LDC: rv=%d\n", rv); 2522 break; 2523 2524 default: 2525 goto cleanup_and_exit; 2526 } 2527 2528 vdcp->threads_pending--; 2529 return (rv); 2530 2531 cleanup_and_exit: 2532 DMSG(vdcp, 0, "unexpected error, rv=%d\n", rv); 2533 return (ENXIO); 2534 } 2535 2536 /* 2537 * Function: 2538 * vdc_do_sync_op 2539 * 2540 * Description: 2541 * Wrapper around vdc_populate_descriptor that blocks until the 2542 * response to the message is available. 2543 * 2544 * Arguments: 2545 * vdcp - the soft state pointer 2546 * operation - operation we want vds to perform (VD_OP_XXX) 2547 * addr - address of data buf to be read/written. 2548 * nbytes - number of bytes to read/write 2549 * slice - the disk slice this request is for 2550 * offset - relative disk offset 2551 * cb_type - type of call - STRATEGY or SYNC 2552 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2553 * . mode for ioctl(9e) 2554 * . LP64 diskaddr_t (block I/O) 2555 * dir - direction of operation (READ/WRITE/BOTH) 2556 * 2557 * Return Codes: 2558 * 0 2559 * EAGAIN 2560 * EFAULT 2561 * ENXIO 2562 * EIO 2563 */ 2564 static int 2565 vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, size_t nbytes, 2566 int slice, diskaddr_t offset, int cb_type, void *cb_arg, 2567 vio_desc_direction_t dir) 2568 { 2569 int status; 2570 2571 ASSERT(cb_type == CB_SYNC); 2572 2573 /* 2574 * Grab the lock, if blocked wait until the server 2575 * response causes us to wake up again. 2576 */ 2577 mutex_enter(&vdcp->lock); 2578 vdcp->sync_op_cnt++; 2579 while (vdcp->sync_op_blocked && vdcp->state != VDC_STATE_DETACH) 2580 cv_wait(&vdcp->sync_blocked_cv, &vdcp->lock); 2581 2582 if (vdcp->state == VDC_STATE_DETACH) { 2583 cv_broadcast(&vdcp->sync_blocked_cv); 2584 vdcp->sync_op_cnt--; 2585 mutex_exit(&vdcp->lock); 2586 return (ENXIO); 2587 } 2588 2589 /* now block anyone other thread entering after us */ 2590 vdcp->sync_op_blocked = B_TRUE; 2591 vdcp->sync_op_pending = B_TRUE; 2592 mutex_exit(&vdcp->lock); 2593 2594 /* 2595 * No need to check return value - will return error only 2596 * in the DETACH case and we can fall through 2597 */ 2598 (void) vdc_send_request(vdcp, operation, addr, 2599 nbytes, slice, offset, cb_type, cb_arg, dir); 2600 2601 /* 2602 * block until our transaction completes. 2603 * Also anyone else waiting also gets to go next. 2604 */ 2605 mutex_enter(&vdcp->lock); 2606 while (vdcp->sync_op_pending && vdcp->state != VDC_STATE_DETACH) 2607 cv_wait(&vdcp->sync_pending_cv, &vdcp->lock); 2608 2609 DMSG(vdcp, 2, ": operation returned %d\n", vdcp->sync_op_status); 2610 if (vdcp->state == VDC_STATE_DETACH) { 2611 vdcp->sync_op_pending = B_FALSE; 2612 status = ENXIO; 2613 } else { 2614 status = vdcp->sync_op_status; 2615 } 2616 2617 vdcp->sync_op_status = 0; 2618 vdcp->sync_op_blocked = B_FALSE; 2619 vdcp->sync_op_cnt--; 2620 2621 /* signal the next waiting thread */ 2622 cv_signal(&vdcp->sync_blocked_cv); 2623 mutex_exit(&vdcp->lock); 2624 2625 return (status); 2626 } 2627 2628 2629 /* 2630 * Function: 2631 * vdc_drain_response() 2632 * 2633 * Description: 2634 * When a guest is panicking, the completion of requests needs to be 2635 * handled differently because interrupts are disabled and vdc 2636 * will not get messages. We have to poll for the messages instead. 2637 * 2638 * Arguments: 2639 * vdc - soft state pointer for this instance of the device driver. 2640 * 2641 * Return Code: 2642 * 0 - Success 2643 */ 2644 static int 2645 vdc_drain_response(vdc_t *vdc) 2646 { 2647 int rv, idx, retries; 2648 size_t msglen; 2649 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2650 vio_dring_msg_t dmsg; 2651 2652 mutex_enter(&vdc->lock); 2653 2654 retries = 0; 2655 for (;;) { 2656 msglen = sizeof (dmsg); 2657 rv = ldc_read(vdc->ldc_handle, (caddr_t)&dmsg, &msglen); 2658 if (rv) { 2659 rv = EINVAL; 2660 break; 2661 } 2662 2663 /* 2664 * if there are no packets wait and check again 2665 */ 2666 if ((rv == 0) && (msglen == 0)) { 2667 if (retries++ > vdc_dump_retries) { 2668 rv = EAGAIN; 2669 break; 2670 } 2671 2672 drv_usecwait(vdc_usec_timeout_dump); 2673 continue; 2674 } 2675 2676 /* 2677 * Ignore all messages that are not ACKs/NACKs to 2678 * DRing requests. 2679 */ 2680 if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 2681 (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 2682 DMSG(vdc, 0, "discard pkt: type=%d sub=%d env=%d\n", 2683 dmsg.tag.vio_msgtype, 2684 dmsg.tag.vio_subtype, 2685 dmsg.tag.vio_subtype_env); 2686 continue; 2687 } 2688 2689 /* 2690 * set the appropriate return value for the current request. 2691 */ 2692 switch (dmsg.tag.vio_subtype) { 2693 case VIO_SUBTYPE_ACK: 2694 rv = 0; 2695 break; 2696 case VIO_SUBTYPE_NACK: 2697 rv = EAGAIN; 2698 break; 2699 default: 2700 continue; 2701 } 2702 2703 idx = dmsg.start_idx; 2704 if (idx >= vdc->dring_len) { 2705 DMSG(vdc, 0, "[%d] Bogus ack data : start %d\n", 2706 vdc->instance, idx); 2707 continue; 2708 } 2709 ldep = &vdc->local_dring[idx]; 2710 if (ldep->dep->hdr.dstate != VIO_DESC_DONE) { 2711 DMSG(vdc, 0, "[%d] Entry @ %d - state !DONE %d\n", 2712 vdc->instance, idx, ldep->dep->hdr.dstate); 2713 continue; 2714 } 2715 2716 DMSG(vdc, 1, "[%d] Depopulating idx=%d state=%d\n", 2717 vdc->instance, idx, ldep->dep->hdr.dstate); 2718 rv = vdc_depopulate_descriptor(vdc, idx); 2719 if (rv) { 2720 DMSG(vdc, 0, 2721 "[%d] Entry @ %d - depopulate failed ..\n", 2722 vdc->instance, idx); 2723 } 2724 2725 /* if this is the last descriptor - break out of loop */ 2726 if ((idx + 1) % vdc->dring_len == vdc->dring_curr_idx) 2727 break; 2728 } 2729 2730 mutex_exit(&vdc->lock); 2731 DMSG(vdc, 0, "End idx=%d\n", idx); 2732 2733 return (rv); 2734 } 2735 2736 2737 /* 2738 * Function: 2739 * vdc_depopulate_descriptor() 2740 * 2741 * Description: 2742 * 2743 * Arguments: 2744 * vdc - soft state pointer for this instance of the device driver. 2745 * idx - Index of the Descriptor Ring entry being modified 2746 * 2747 * Return Code: 2748 * 0 - Success 2749 */ 2750 static int 2751 vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 2752 { 2753 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2754 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2755 int status = ENXIO; 2756 int rv = 0; 2757 2758 ASSERT(vdc != NULL); 2759 ASSERT(idx < vdc->dring_len); 2760 ldep = &vdc->local_dring[idx]; 2761 ASSERT(ldep != NULL); 2762 ASSERT(MUTEX_HELD(&vdc->lock)); 2763 2764 DMSG(vdc, 2, ": idx = %d\n", idx); 2765 dep = ldep->dep; 2766 ASSERT(dep != NULL); 2767 ASSERT((dep->hdr.dstate == VIO_DESC_DONE) || 2768 (dep->payload.status == ECANCELED)); 2769 2770 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 2771 2772 ldep->is_free = B_TRUE; 2773 DMSG(vdc, 2, ": is_free = %d\n", ldep->is_free); 2774 status = dep->payload.status; 2775 2776 /* 2777 * If no buffers were used to transfer information to the server when 2778 * populating the descriptor then no memory handles need to be unbound 2779 * and we can return now. 2780 */ 2781 if (ldep->nbytes == 0) { 2782 cv_signal(&vdc->dring_free_cv); 2783 return (status); 2784 } 2785 2786 /* 2787 * If the upper layer passed in a misaligned address we copied the 2788 * data into an aligned buffer before sending it to LDC - we now 2789 * copy it back to the original buffer. 2790 */ 2791 if (ldep->align_addr) { 2792 ASSERT(ldep->addr != NULL); 2793 2794 if (dep->payload.nbytes > 0) 2795 bcopy(ldep->align_addr, ldep->addr, 2796 dep->payload.nbytes); 2797 kmem_free(ldep->align_addr, 2798 sizeof (caddr_t) * P2ROUNDUP(ldep->nbytes, 8)); 2799 ldep->align_addr = NULL; 2800 } 2801 2802 rv = ldc_mem_unbind_handle(ldep->desc_mhdl); 2803 if (rv != 0) { 2804 DMSG(vdc, 0, "?[%d] unbind mhdl 0x%lx @ idx %d failed (%d)", 2805 vdc->instance, ldep->desc_mhdl, idx, rv); 2806 /* 2807 * The error returned by the vDisk server is more informative 2808 * and thus has a higher priority but if it isn't set we ensure 2809 * that this function returns an error. 2810 */ 2811 if (status == 0) 2812 status = EINVAL; 2813 } 2814 2815 cv_signal(&vdc->membind_cv); 2816 cv_signal(&vdc->dring_free_cv); 2817 2818 return (status); 2819 } 2820 2821 /* 2822 * Function: 2823 * vdc_populate_mem_hdl() 2824 * 2825 * Description: 2826 * 2827 * Arguments: 2828 * vdc - soft state pointer for this instance of the device driver. 2829 * idx - Index of the Descriptor Ring entry being modified 2830 * addr - virtual address being mapped in 2831 * nybtes - number of bytes in 'addr' 2832 * operation - the vDisk operation being performed (VD_OP_xxx) 2833 * 2834 * Return Code: 2835 * 0 - Success 2836 */ 2837 static int 2838 vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep) 2839 { 2840 vd_dring_entry_t *dep = NULL; 2841 ldc_mem_handle_t mhdl; 2842 caddr_t vaddr; 2843 size_t nbytes; 2844 uint8_t perm = LDC_MEM_RW; 2845 uint8_t maptype; 2846 int rv = 0; 2847 int i; 2848 2849 ASSERT(vdcp != NULL); 2850 2851 dep = ldep->dep; 2852 mhdl = ldep->desc_mhdl; 2853 2854 switch (ldep->dir) { 2855 case VIO_read_dir: 2856 perm = LDC_MEM_W; 2857 break; 2858 2859 case VIO_write_dir: 2860 perm = LDC_MEM_R; 2861 break; 2862 2863 case VIO_both_dir: 2864 perm = LDC_MEM_RW; 2865 break; 2866 2867 default: 2868 ASSERT(0); /* catch bad programming in vdc */ 2869 } 2870 2871 /* 2872 * LDC expects any addresses passed in to be 8-byte aligned. We need 2873 * to copy the contents of any misaligned buffers to a newly allocated 2874 * buffer and bind it instead (and copy the the contents back to the 2875 * original buffer passed in when depopulating the descriptor) 2876 */ 2877 vaddr = ldep->addr; 2878 nbytes = ldep->nbytes; 2879 if (((uint64_t)vaddr & 0x7) != 0) { 2880 ASSERT(ldep->align_addr == NULL); 2881 ldep->align_addr = 2882 kmem_alloc(sizeof (caddr_t) * 2883 P2ROUNDUP(nbytes, 8), KM_SLEEP); 2884 DMSG(vdcp, 0, "[%d] Misaligned address %p reallocating " 2885 "(buf=%p nb=%ld op=%d)\n", 2886 vdcp->instance, (void *)vaddr, (void *)ldep->align_addr, 2887 nbytes, ldep->operation); 2888 if (perm != LDC_MEM_W) 2889 bcopy(vaddr, ldep->align_addr, nbytes); 2890 vaddr = ldep->align_addr; 2891 } 2892 2893 maptype = LDC_IO_MAP|LDC_SHADOW_MAP|LDC_DIRECT_MAP; 2894 rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 2895 maptype, perm, &dep->payload.cookie[0], 2896 &dep->payload.ncookies); 2897 DMSG(vdcp, 2, "[%d] bound mem handle; ncookies=%d\n", 2898 vdcp->instance, dep->payload.ncookies); 2899 if (rv != 0) { 2900 DMSG(vdcp, 0, "[%d] Failed to bind LDC memory handle " 2901 "(mhdl=%p, buf=%p, err=%d)\n", 2902 vdcp->instance, (void *)mhdl, (void *)vaddr, rv); 2903 if (ldep->align_addr) { 2904 kmem_free(ldep->align_addr, 2905 sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); 2906 ldep->align_addr = NULL; 2907 } 2908 return (EAGAIN); 2909 } 2910 2911 /* 2912 * Get the other cookies (if any). 2913 */ 2914 for (i = 1; i < dep->payload.ncookies; i++) { 2915 rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 2916 if (rv != 0) { 2917 (void) ldc_mem_unbind_handle(mhdl); 2918 DMSG(vdcp, 0, "?[%d] Failed to get next cookie " 2919 "(mhdl=%lx cnum=%d), err=%d", 2920 vdcp->instance, mhdl, i, rv); 2921 if (ldep->align_addr) { 2922 kmem_free(ldep->align_addr, 2923 sizeof (caddr_t) * ldep->nbytes); 2924 ldep->align_addr = NULL; 2925 } 2926 return (EAGAIN); 2927 } 2928 } 2929 2930 return (rv); 2931 } 2932 2933 /* 2934 * Interrupt handlers for messages from LDC 2935 */ 2936 2937 /* 2938 * Function: 2939 * vdc_handle_cb() 2940 * 2941 * Description: 2942 * 2943 * Arguments: 2944 * event - Type of event (LDC_EVT_xxx) that triggered the callback 2945 * arg - soft state pointer for this instance of the device driver. 2946 * 2947 * Return Code: 2948 * 0 - Success 2949 */ 2950 static uint_t 2951 vdc_handle_cb(uint64_t event, caddr_t arg) 2952 { 2953 ldc_status_t ldc_state; 2954 int rv = 0; 2955 2956 vdc_t *vdc = (vdc_t *)(void *)arg; 2957 2958 ASSERT(vdc != NULL); 2959 2960 DMSG(vdc, 1, "evt=%lx seqID=%ld\n", event, vdc->seq_num); 2961 2962 /* 2963 * Depending on the type of event that triggered this callback, 2964 * we modify the handshake state or read the data. 2965 * 2966 * NOTE: not done as a switch() as event could be triggered by 2967 * a state change and a read request. Also the ordering of the 2968 * check for the event types is deliberate. 2969 */ 2970 if (event & LDC_EVT_UP) { 2971 DMSG(vdc, 0, "[%d] Received LDC_EVT_UP\n", vdc->instance); 2972 2973 mutex_enter(&vdc->lock); 2974 2975 /* get LDC state */ 2976 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2977 if (rv != 0) { 2978 DMSG(vdc, 0, "[%d] Couldn't get LDC status %d", 2979 vdc->instance, rv); 2980 return (LDC_SUCCESS); 2981 } 2982 if (vdc->ldc_state != LDC_UP && ldc_state == LDC_UP) { 2983 /* 2984 * Reset the transaction sequence numbers when 2985 * LDC comes up. We then kick off the handshake 2986 * negotiation with the vDisk server. 2987 */ 2988 vdc->seq_num = 1; 2989 vdc->seq_num_reply = 0; 2990 vdc->ldc_state = ldc_state; 2991 cv_signal(&vdc->initwait_cv); 2992 } 2993 2994 mutex_exit(&vdc->lock); 2995 } 2996 2997 if (event & LDC_EVT_READ) { 2998 DMSG(vdc, 0, "[%d] Received LDC_EVT_READ\n", vdc->instance); 2999 mutex_enter(&vdc->read_lock); 3000 cv_signal(&vdc->read_cv); 3001 vdc->read_state = VDC_READ_PENDING; 3002 mutex_exit(&vdc->read_lock); 3003 3004 /* that's all we have to do - no need to handle DOWN/RESET */ 3005 return (LDC_SUCCESS); 3006 } 3007 3008 if (event & (LDC_EVT_RESET|LDC_EVT_DOWN)) { 3009 3010 DMSG(vdc, 0, "[%d] Received LDC RESET event\n", vdc->instance); 3011 3012 mutex_enter(&vdc->lock); 3013 /* 3014 * Need to wake up any readers so they will 3015 * detect that a reset has occurred. 3016 */ 3017 mutex_enter(&vdc->read_lock); 3018 if ((vdc->read_state == VDC_READ_WAITING) || 3019 (vdc->read_state == VDC_READ_RESET)) 3020 cv_signal(&vdc->read_cv); 3021 vdc->read_state = VDC_READ_RESET; 3022 mutex_exit(&vdc->read_lock); 3023 3024 /* wake up any threads waiting for connection to come up */ 3025 if (vdc->state == VDC_STATE_INIT_WAITING) { 3026 vdc->state = VDC_STATE_RESETTING; 3027 cv_signal(&vdc->initwait_cv); 3028 } 3029 3030 mutex_exit(&vdc->lock); 3031 } 3032 3033 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 3034 DMSG(vdc, 0, "![%d] Unexpected LDC event (%lx) received", 3035 vdc->instance, event); 3036 3037 return (LDC_SUCCESS); 3038 } 3039 3040 /* 3041 * Function: 3042 * vdc_wait_for_response() 3043 * 3044 * Description: 3045 * Block waiting for a response from the server. If there is 3046 * no data the thread block on the read_cv that is signalled 3047 * by the callback when an EVT_READ occurs. 3048 * 3049 * Arguments: 3050 * vdcp - soft state pointer for this instance of the device driver. 3051 * 3052 * Return Code: 3053 * 0 - Success 3054 */ 3055 static int 3056 vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp) 3057 { 3058 size_t nbytes = sizeof (*msgp); 3059 int status; 3060 3061 ASSERT(vdcp != NULL); 3062 3063 DMSG(vdcp, 1, "[%d] Entered\n", vdcp->instance); 3064 3065 status = vdc_recv(vdcp, msgp, &nbytes); 3066 DMSG(vdcp, 3, "vdc_read() done.. status=0x%x size=0x%x\n", 3067 status, (int)nbytes); 3068 if (status) { 3069 DMSG(vdcp, 0, "?[%d] Error %d reading LDC msg\n", 3070 vdcp->instance, status); 3071 return (status); 3072 } 3073 3074 if (nbytes < sizeof (vio_msg_tag_t)) { 3075 DMSG(vdcp, 0, "?[%d] Expect %lu bytes; recv'd %lu\n", 3076 vdcp->instance, sizeof (vio_msg_tag_t), nbytes); 3077 return (ENOMSG); 3078 } 3079 3080 DMSG(vdcp, 2, "[%d] (%x/%x/%x)\n", vdcp->instance, 3081 msgp->tag.vio_msgtype, 3082 msgp->tag.vio_subtype, 3083 msgp->tag.vio_subtype_env); 3084 3085 /* 3086 * Verify the Session ID of the message 3087 * 3088 * Every message after the Version has been negotiated should 3089 * have the correct session ID set. 3090 */ 3091 if ((msgp->tag.vio_sid != vdcp->session_id) && 3092 (msgp->tag.vio_subtype_env != VIO_VER_INFO)) { 3093 DMSG(vdcp, 0, "[%d] Invalid SID: received 0x%x, " 3094 "expected 0x%lx [seq num %lx @ %d]", 3095 vdcp->instance, msgp->tag.vio_sid, 3096 vdcp->session_id, 3097 ((vio_dring_msg_t *)msgp)->seq_num, 3098 ((vio_dring_msg_t *)msgp)->start_idx); 3099 return (ENOMSG); 3100 } 3101 return (0); 3102 } 3103 3104 3105 /* 3106 * Function: 3107 * vdc_resubmit_backup_dring() 3108 * 3109 * Description: 3110 * Resubmit each descriptor in the backed up dring to 3111 * vDisk server. The Dring was backed up during connection 3112 * reset. 3113 * 3114 * Arguments: 3115 * vdcp - soft state pointer for this instance of the device driver. 3116 * 3117 * Return Code: 3118 * 0 - Success 3119 */ 3120 static int 3121 vdc_resubmit_backup_dring(vdc_t *vdcp) 3122 { 3123 int count; 3124 int b_idx; 3125 int rv; 3126 int dring_size; 3127 int status; 3128 vio_msg_t vio_msg; 3129 vdc_local_desc_t *curr_ldep; 3130 3131 ASSERT(MUTEX_NOT_HELD(&vdcp->lock)); 3132 ASSERT(vdcp->state == VDC_STATE_HANDLE_PENDING); 3133 3134 DMSG(vdcp, 1, "restoring pending dring entries (len=%d, tail=%d)\n", 3135 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3136 3137 /* 3138 * Walk the backup copy of the local descriptor ring and 3139 * resubmit all the outstanding transactions. 3140 */ 3141 b_idx = vdcp->local_dring_backup_tail; 3142 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 3143 3144 curr_ldep = &(vdcp->local_dring_backup[b_idx]); 3145 3146 /* only resubmit outstanding transactions */ 3147 if (!curr_ldep->is_free) { 3148 3149 DMSG(vdcp, 1, "resubmitting entry idx=%x\n", b_idx); 3150 mutex_enter(&vdcp->lock); 3151 rv = vdc_populate_descriptor(vdcp, curr_ldep->operation, 3152 curr_ldep->addr, curr_ldep->nbytes, 3153 curr_ldep->slice, curr_ldep->offset, 3154 curr_ldep->cb_type, curr_ldep->cb_arg, 3155 curr_ldep->dir); 3156 mutex_exit(&vdcp->lock); 3157 if (rv) { 3158 DMSG(vdcp, 1, "[%d] cannot resubmit entry %d\n", 3159 vdcp->instance, b_idx); 3160 return (rv); 3161 } 3162 3163 /* Wait for the response message. */ 3164 DMSG(vdcp, 1, "waiting for response to idx=%x\n", 3165 b_idx); 3166 status = vdc_wait_for_response(vdcp, &vio_msg); 3167 if (status) { 3168 DMSG(vdcp, 1, "[%d] wait_for_response " 3169 "returned err=%d\n", vdcp->instance, 3170 status); 3171 return (status); 3172 } 3173 3174 DMSG(vdcp, 1, "processing msg for idx=%x\n", b_idx); 3175 status = vdc_process_data_msg(vdcp, &vio_msg); 3176 if (status) { 3177 DMSG(vdcp, 1, "[%d] process_data_msg " 3178 "returned err=%d\n", vdcp->instance, 3179 status); 3180 return (status); 3181 } 3182 } 3183 3184 /* get the next element to submit */ 3185 if (++b_idx >= vdcp->local_dring_backup_len) 3186 b_idx = 0; 3187 } 3188 3189 /* all done - now clear up pending dring copy */ 3190 dring_size = vdcp->local_dring_backup_len * 3191 sizeof (vdcp->local_dring_backup[0]); 3192 3193 (void) kmem_free(vdcp->local_dring_backup, dring_size); 3194 3195 vdcp->local_dring_backup = NULL; 3196 3197 return (0); 3198 } 3199 3200 /* 3201 * Function: 3202 * vdc_backup_local_dring() 3203 * 3204 * Description: 3205 * Backup the current dring in the event of a reset. The Dring 3206 * transactions will be resubmitted to the server when the 3207 * connection is restored. 3208 * 3209 * Arguments: 3210 * vdcp - soft state pointer for this instance of the device driver. 3211 * 3212 * Return Code: 3213 * NONE 3214 */ 3215 static void 3216 vdc_backup_local_dring(vdc_t *vdcp) 3217 { 3218 int dring_size; 3219 3220 ASSERT(vdcp->state == VDC_STATE_RESETTING); 3221 3222 /* 3223 * If the backup dring is stil around, it means 3224 * that the last restore did not complete. However, 3225 * since we never got back into the running state, 3226 * the backup copy we have is still valid. 3227 */ 3228 if (vdcp->local_dring_backup != NULL) { 3229 DMSG(vdcp, 1, "reusing local descriptor ring backup " 3230 "(len=%d, tail=%d)\n", vdcp->local_dring_backup_len, 3231 vdcp->local_dring_backup_tail); 3232 return; 3233 } 3234 3235 DMSG(vdcp, 1, "backing up the local descriptor ring (len=%d, " 3236 "tail=%d)\n", vdcp->dring_len, vdcp->dring_curr_idx); 3237 3238 dring_size = vdcp->dring_len * sizeof (vdcp->local_dring[0]); 3239 3240 vdcp->local_dring_backup = kmem_alloc(dring_size, KM_SLEEP); 3241 bcopy(vdcp->local_dring, vdcp->local_dring_backup, dring_size); 3242 3243 vdcp->local_dring_backup_tail = vdcp->dring_curr_idx; 3244 vdcp->local_dring_backup_len = vdcp->dring_len; 3245 } 3246 3247 /* -------------------------------------------------------------------------- */ 3248 3249 /* 3250 * The following functions process the incoming messages from vds 3251 */ 3252 3253 /* 3254 * Function: 3255 * vdc_process_msg_thread() 3256 * 3257 * Description: 3258 * 3259 * Main VDC message processing thread. Each vDisk instance 3260 * consists of a copy of this thread. This thread triggers 3261 * all the handshakes and data exchange with the server. It 3262 * also handles all channel resets 3263 * 3264 * Arguments: 3265 * vdc - soft state pointer for this instance of the device driver. 3266 * 3267 * Return Code: 3268 * None 3269 */ 3270 static void 3271 vdc_process_msg_thread(vdc_t *vdcp) 3272 { 3273 int status; 3274 3275 mutex_enter(&vdcp->lock); 3276 3277 for (;;) { 3278 3279 #define Q(_s) (vdcp->state == _s) ? #_s : 3280 DMSG(vdcp, 3, "state = %d (%s)\n", vdcp->state, 3281 Q(VDC_STATE_INIT) 3282 Q(VDC_STATE_INIT_WAITING) 3283 Q(VDC_STATE_NEGOTIATE) 3284 Q(VDC_STATE_HANDLE_PENDING) 3285 Q(VDC_STATE_RUNNING) 3286 Q(VDC_STATE_RESETTING) 3287 Q(VDC_STATE_DETACH) 3288 "UNKNOWN"); 3289 3290 switch (vdcp->state) { 3291 case VDC_STATE_INIT: 3292 3293 /* Check if have re-initializing repeatedly */ 3294 if (vdcp->hshake_cnt++ > vdc_hshake_retries) { 3295 cmn_err(CE_NOTE, "[%d] disk access failed.\n", 3296 vdcp->instance); 3297 vdcp->state = VDC_STATE_DETACH; 3298 break; 3299 } 3300 3301 /* Bring up connection with vds via LDC */ 3302 status = vdc_start_ldc_connection(vdcp); 3303 switch (status) { 3304 case EINVAL: 3305 DMSG(vdcp, 0, "[%d] Could not start LDC", 3306 vdcp->instance); 3307 vdcp->state = VDC_STATE_DETACH; 3308 break; 3309 case 0: 3310 vdcp->state = VDC_STATE_INIT_WAITING; 3311 break; 3312 default: 3313 vdcp->state = VDC_STATE_INIT_WAITING; 3314 break; 3315 } 3316 break; 3317 3318 case VDC_STATE_INIT_WAITING: 3319 3320 /* 3321 * Let the callback event move us on 3322 * when channel is open to server 3323 */ 3324 while (vdcp->ldc_state != LDC_UP) { 3325 cv_wait(&vdcp->initwait_cv, &vdcp->lock); 3326 if (vdcp->state != VDC_STATE_INIT_WAITING) { 3327 DMSG(vdcp, 0, 3328 "state moved to %d out from under us...\n", 3329 vdcp->state); 3330 3331 break; 3332 } 3333 } 3334 if (vdcp->state == VDC_STATE_INIT_WAITING && 3335 vdcp->ldc_state == LDC_UP) { 3336 vdcp->state = VDC_STATE_NEGOTIATE; 3337 } 3338 break; 3339 3340 case VDC_STATE_NEGOTIATE: 3341 switch (status = vdc_ver_negotiation(vdcp)) { 3342 case 0: 3343 break; 3344 default: 3345 DMSG(vdcp, 0, "ver negotiate failed (%d)..\n", 3346 status); 3347 goto reset; 3348 } 3349 3350 switch (status = vdc_attr_negotiation(vdcp)) { 3351 case 0: 3352 break; 3353 default: 3354 DMSG(vdcp, 0, "attr negotiate failed (%d)..\n", 3355 status); 3356 goto reset; 3357 } 3358 3359 switch (status = vdc_dring_negotiation(vdcp)) { 3360 case 0: 3361 break; 3362 default: 3363 DMSG(vdcp, 0, "dring negotiate failed (%d)..\n", 3364 status); 3365 goto reset; 3366 } 3367 3368 switch (status = vdc_rdx_exchange(vdcp)) { 3369 case 0: 3370 vdcp->state = VDC_STATE_HANDLE_PENDING; 3371 goto done; 3372 default: 3373 DMSG(vdcp, 0, "RDX xchg failed ..(%d)\n", 3374 status); 3375 goto reset; 3376 } 3377 reset: 3378 DMSG(vdcp, 0, "negotiation failed: resetting (%d)\n", 3379 status); 3380 vdcp->state = VDC_STATE_RESETTING; 3381 done: 3382 DMSG(vdcp, 0, "negotiation complete (state=0x%x)...\n", 3383 vdcp->state); 3384 break; 3385 3386 case VDC_STATE_HANDLE_PENDING: 3387 3388 mutex_exit(&vdcp->lock); 3389 status = vdc_resubmit_backup_dring(vdcp); 3390 mutex_enter(&vdcp->lock); 3391 3392 if (status) 3393 vdcp->state = VDC_STATE_RESETTING; 3394 else 3395 vdcp->state = VDC_STATE_RUNNING; 3396 3397 break; 3398 3399 /* enter running state */ 3400 case VDC_STATE_RUNNING: 3401 /* 3402 * Signal anyone waiting for the connection 3403 * to come on line. 3404 */ 3405 vdcp->hshake_cnt = 0; 3406 cv_broadcast(&vdcp->running_cv); 3407 mutex_exit(&vdcp->lock); 3408 3409 for (;;) { 3410 vio_msg_t msg; 3411 status = vdc_wait_for_response(vdcp, &msg); 3412 if (status) break; 3413 3414 DMSG(vdcp, 1, "[%d] new pkt(s) available\n", 3415 vdcp->instance); 3416 status = vdc_process_data_msg(vdcp, &msg); 3417 if (status) { 3418 DMSG(vdcp, 1, "[%d] process_data_msg " 3419 "returned err=%d\n", vdcp->instance, 3420 status); 3421 break; 3422 } 3423 3424 } 3425 3426 mutex_enter(&vdcp->lock); 3427 3428 vdcp->state = VDC_STATE_RESETTING; 3429 vdcp->self_reset = B_TRUE; 3430 break; 3431 3432 case VDC_STATE_RESETTING: 3433 DMSG(vdcp, 0, "Initiating channel reset " 3434 "(pending = %d)\n", (int)vdcp->threads_pending); 3435 3436 if (vdcp->self_reset) { 3437 DMSG(vdcp, 0, 3438 "[%d] calling stop_ldc_connection.\n", 3439 vdcp->instance); 3440 status = vdc_stop_ldc_connection(vdcp); 3441 vdcp->self_reset = B_FALSE; 3442 } 3443 3444 /* 3445 * Wait for all threads currently waiting 3446 * for a free dring entry to use. 3447 */ 3448 while (vdcp->threads_pending) { 3449 cv_broadcast(&vdcp->membind_cv); 3450 cv_broadcast(&vdcp->dring_free_cv); 3451 mutex_exit(&vdcp->lock); 3452 /* let them wake up */ 3453 drv_usecwait(vdc_min_timeout_ldc); 3454 mutex_enter(&vdcp->lock); 3455 } 3456 3457 ASSERT(vdcp->threads_pending == 0); 3458 3459 /* Sanity check that no thread is receiving */ 3460 ASSERT(vdcp->read_state != VDC_READ_WAITING); 3461 3462 vdcp->read_state = VDC_READ_IDLE; 3463 3464 vdc_backup_local_dring(vdcp); 3465 3466 /* cleanup the old d-ring */ 3467 vdc_destroy_descriptor_ring(vdcp); 3468 3469 /* go and start again */ 3470 vdcp->state = VDC_STATE_INIT; 3471 3472 break; 3473 3474 case VDC_STATE_DETACH: 3475 DMSG(vdcp, 0, "[%d] Reset thread exit cleanup ..\n", 3476 vdcp->instance); 3477 3478 /* 3479 * Signal anyone waiting for connection 3480 * to come online 3481 */ 3482 cv_broadcast(&vdcp->running_cv); 3483 3484 while (vdcp->sync_op_pending) { 3485 cv_signal(&vdcp->sync_pending_cv); 3486 cv_signal(&vdcp->sync_blocked_cv); 3487 mutex_exit(&vdcp->lock); 3488 drv_usecwait(vdc_min_timeout_ldc); 3489 mutex_enter(&vdcp->lock); 3490 } 3491 3492 mutex_exit(&vdcp->lock); 3493 3494 DMSG(vdcp, 0, "[%d] Msg processing thread exiting ..\n", 3495 vdcp->instance); 3496 thread_exit(); 3497 break; 3498 } 3499 } 3500 } 3501 3502 3503 /* 3504 * Function: 3505 * vdc_process_data_msg() 3506 * 3507 * Description: 3508 * This function is called by the message processing thread each time 3509 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 3510 * be an ACK or NACK from vds[1] which vdc handles as follows. 3511 * ACK - wake up the waiting thread 3512 * NACK - resend any messages necessary 3513 * 3514 * [1] Although the message format allows it, vds should not send a 3515 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 3516 * some bizarre reason it does, vdc will reset the connection. 3517 * 3518 * Arguments: 3519 * vdc - soft state pointer for this instance of the device driver. 3520 * msg - the LDC message sent by vds 3521 * 3522 * Return Code: 3523 * 0 - Success. 3524 * > 0 - error value returned by LDC 3525 */ 3526 static int 3527 vdc_process_data_msg(vdc_t *vdcp, vio_msg_t *msg) 3528 { 3529 int status = 0; 3530 vio_dring_msg_t *dring_msg; 3531 vdc_local_desc_t *ldep = NULL; 3532 int start, end; 3533 int idx; 3534 3535 dring_msg = (vio_dring_msg_t *)msg; 3536 3537 ASSERT(msg->tag.vio_msgtype == VIO_TYPE_DATA); 3538 ASSERT(vdcp != NULL); 3539 3540 mutex_enter(&vdcp->lock); 3541 3542 /* 3543 * Check to see if the message has bogus data 3544 */ 3545 idx = start = dring_msg->start_idx; 3546 end = dring_msg->end_idx; 3547 if ((start >= vdcp->dring_len) || 3548 (end >= vdcp->dring_len) || (end < -1)) { 3549 DMSG(vdcp, 0, "[%d] Bogus ACK data : start %d, end %d\n", 3550 vdcp->instance, start, end); 3551 mutex_exit(&vdcp->lock); 3552 return (EINVAL); 3553 } 3554 3555 /* 3556 * Verify that the sequence number is what vdc expects. 3557 */ 3558 switch (vdc_verify_seq_num(vdcp, dring_msg)) { 3559 case VDC_SEQ_NUM_TODO: 3560 break; /* keep processing this message */ 3561 case VDC_SEQ_NUM_SKIP: 3562 mutex_exit(&vdcp->lock); 3563 return (0); 3564 case VDC_SEQ_NUM_INVALID: 3565 mutex_exit(&vdcp->lock); 3566 DMSG(vdcp, 0, "[%d] invalid seqno\n", vdcp->instance); 3567 return (ENXIO); 3568 } 3569 3570 if (msg->tag.vio_subtype == VIO_SUBTYPE_NACK) { 3571 DMSG(vdcp, 0, "[%d] DATA NACK\n", vdcp->instance); 3572 VDC_DUMP_DRING_MSG(dring_msg); 3573 mutex_exit(&vdcp->lock); 3574 return (EIO); 3575 3576 } else if (msg->tag.vio_subtype == VIO_SUBTYPE_INFO) { 3577 mutex_exit(&vdcp->lock); 3578 return (EPROTO); 3579 } 3580 3581 DTRACE_IO2(recv, vio_dring_msg_t, dring_msg, vdc_t *, vdcp); 3582 DMSG(vdcp, 1, ": start %d end %d\n", start, end); 3583 ASSERT(start == end); 3584 3585 ldep = &vdcp->local_dring[idx]; 3586 3587 DMSG(vdcp, 1, ": state 0x%x - cb_type 0x%x\n", 3588 ldep->dep->hdr.dstate, ldep->cb_type); 3589 3590 if (ldep->dep->hdr.dstate == VIO_DESC_DONE) { 3591 struct buf *bufp; 3592 3593 switch (ldep->cb_type) { 3594 case CB_SYNC: 3595 ASSERT(vdcp->sync_op_pending); 3596 3597 status = vdc_depopulate_descriptor(vdcp, idx); 3598 vdcp->sync_op_status = status; 3599 vdcp->sync_op_pending = B_FALSE; 3600 cv_signal(&vdcp->sync_pending_cv); 3601 break; 3602 3603 case CB_STRATEGY: 3604 bufp = ldep->cb_arg; 3605 ASSERT(bufp != NULL); 3606 bufp->b_resid = 3607 bufp->b_bcount - ldep->dep->payload.nbytes; 3608 status = ldep->dep->payload.status; /* Future:ntoh */ 3609 if (status != 0) { 3610 DMSG(vdcp, 1, "strategy status=%d\n", status); 3611 bioerror(bufp, status); 3612 } 3613 status = vdc_depopulate_descriptor(vdcp, idx); 3614 biodone(bufp); 3615 3616 DMSG(vdcp, 1, 3617 "strategy complete req=%ld bytes resp=%ld bytes\n", 3618 bufp->b_bcount, ldep->dep->payload.nbytes); 3619 break; 3620 3621 default: 3622 ASSERT(0); 3623 } 3624 } 3625 3626 /* let the arrival signal propogate */ 3627 mutex_exit(&vdcp->lock); 3628 3629 /* probe gives the count of how many entries were processed */ 3630 DTRACE_IO2(processed, int, 1, vdc_t *, vdcp); 3631 3632 return (0); 3633 } 3634 3635 /* 3636 * Function: 3637 * vdc_process_err_msg() 3638 * 3639 * NOTE: No error messages are used as part of the vDisk protocol 3640 */ 3641 static int 3642 vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg) 3643 { 3644 _NOTE(ARGUNUSED(vdc)) 3645 _NOTE(ARGUNUSED(msg)) 3646 3647 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_ERR); 3648 DMSG(vdc, 1, "[%d] Got an ERR msg", vdc->instance); 3649 3650 return (ENOTSUP); 3651 } 3652 3653 /* 3654 * Function: 3655 * vdc_handle_ver_msg() 3656 * 3657 * Description: 3658 * 3659 * Arguments: 3660 * vdc - soft state pointer for this instance of the device driver. 3661 * ver_msg - LDC message sent by vDisk server 3662 * 3663 * Return Code: 3664 * 0 - Success 3665 */ 3666 static int 3667 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) 3668 { 3669 int status = 0; 3670 3671 ASSERT(vdc != NULL); 3672 ASSERT(mutex_owned(&vdc->lock)); 3673 3674 if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { 3675 return (EPROTO); 3676 } 3677 3678 if (ver_msg->dev_class != VDEV_DISK_SERVER) { 3679 return (EINVAL); 3680 } 3681 3682 switch (ver_msg->tag.vio_subtype) { 3683 case VIO_SUBTYPE_ACK: 3684 /* 3685 * We check to see if the version returned is indeed supported 3686 * (The server may have also adjusted the minor number downwards 3687 * and if so 'ver_msg' will contain the actual version agreed) 3688 */ 3689 if (vdc_is_supported_version(ver_msg)) { 3690 vdc->ver.major = ver_msg->ver_major; 3691 vdc->ver.minor = ver_msg->ver_minor; 3692 ASSERT(vdc->ver.major > 0); 3693 } else { 3694 status = EPROTO; 3695 } 3696 break; 3697 3698 case VIO_SUBTYPE_NACK: 3699 /* 3700 * call vdc_is_supported_version() which will return the next 3701 * supported version (if any) in 'ver_msg' 3702 */ 3703 (void) vdc_is_supported_version(ver_msg); 3704 if (ver_msg->ver_major > 0) { 3705 size_t len = sizeof (*ver_msg); 3706 3707 ASSERT(vdc->ver.major > 0); 3708 3709 /* reset the necessary fields and resend */ 3710 ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; 3711 ver_msg->dev_class = VDEV_DISK; 3712 3713 status = vdc_send(vdc, (caddr_t)ver_msg, &len); 3714 DMSG(vdc, 0, "[%d] Resend VER info (LDC status = %d)\n", 3715 vdc->instance, status); 3716 if (len != sizeof (*ver_msg)) 3717 status = EBADMSG; 3718 } else { 3719 DMSG(vdc, 0, "[%d] No common version with " 3720 "vDisk server", vdc->instance); 3721 status = ENOTSUP; 3722 } 3723 3724 break; 3725 case VIO_SUBTYPE_INFO: 3726 /* 3727 * Handle the case where vds starts handshake 3728 * (for now only vdc is the instigator) 3729 */ 3730 status = ENOTSUP; 3731 break; 3732 3733 default: 3734 status = EINVAL; 3735 break; 3736 } 3737 3738 return (status); 3739 } 3740 3741 /* 3742 * Function: 3743 * vdc_handle_attr_msg() 3744 * 3745 * Description: 3746 * 3747 * Arguments: 3748 * vdc - soft state pointer for this instance of the device driver. 3749 * attr_msg - LDC message sent by vDisk server 3750 * 3751 * Return Code: 3752 * 0 - Success 3753 */ 3754 static int 3755 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) 3756 { 3757 int status = 0; 3758 3759 ASSERT(vdc != NULL); 3760 ASSERT(mutex_owned(&vdc->lock)); 3761 3762 if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { 3763 return (EPROTO); 3764 } 3765 3766 switch (attr_msg->tag.vio_subtype) { 3767 case VIO_SUBTYPE_ACK: 3768 /* 3769 * We now verify the attributes sent by vds. 3770 */ 3771 vdc->vdisk_size = attr_msg->vdisk_size; 3772 vdc->vdisk_type = attr_msg->vdisk_type; 3773 3774 DMSG(vdc, 0, "[%d] max_xfer_sz: sent %lx acked %lx\n", 3775 vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz); 3776 DMSG(vdc, 0, "[%d] vdisk_block_size: sent %lx acked %x\n", 3777 vdc->instance, vdc->block_size, 3778 attr_msg->vdisk_block_size); 3779 3780 /* 3781 * We don't know at compile time what the vDisk server will 3782 * think are good values but we apply an large (arbitrary) 3783 * upper bound to prevent memory exhaustion in vdc if it was 3784 * allocating a DRing based of huge values sent by the server. 3785 * We probably will never exceed this except if the message 3786 * was garbage. 3787 */ 3788 if ((attr_msg->max_xfer_sz * attr_msg->vdisk_block_size) <= 3789 (PAGESIZE * DEV_BSIZE)) { 3790 vdc->max_xfer_sz = attr_msg->max_xfer_sz; 3791 vdc->block_size = attr_msg->vdisk_block_size; 3792 } else { 3793 DMSG(vdc, 0, "[%d] vds block transfer size too big;" 3794 " using max supported by vdc", vdc->instance); 3795 } 3796 3797 if ((attr_msg->xfer_mode != VIO_DRING_MODE) || 3798 (attr_msg->vdisk_size > INT64_MAX) || 3799 (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 3800 DMSG(vdc, 0, "[%d] Invalid attributes from vds", 3801 vdc->instance); 3802 status = EINVAL; 3803 break; 3804 } 3805 3806 break; 3807 3808 case VIO_SUBTYPE_NACK: 3809 /* 3810 * vds could not handle the attributes we sent so we 3811 * stop negotiating. 3812 */ 3813 status = EPROTO; 3814 break; 3815 3816 case VIO_SUBTYPE_INFO: 3817 /* 3818 * Handle the case where vds starts the handshake 3819 * (for now; vdc is the only supported instigatior) 3820 */ 3821 status = ENOTSUP; 3822 break; 3823 3824 default: 3825 status = ENOTSUP; 3826 break; 3827 } 3828 3829 return (status); 3830 } 3831 3832 /* 3833 * Function: 3834 * vdc_handle_dring_reg_msg() 3835 * 3836 * Description: 3837 * 3838 * Arguments: 3839 * vdc - soft state pointer for this instance of the driver. 3840 * dring_msg - LDC message sent by vDisk server 3841 * 3842 * Return Code: 3843 * 0 - Success 3844 */ 3845 static int 3846 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) 3847 { 3848 int status = 0; 3849 3850 ASSERT(vdc != NULL); 3851 ASSERT(mutex_owned(&vdc->lock)); 3852 3853 if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { 3854 return (EPROTO); 3855 } 3856 3857 switch (dring_msg->tag.vio_subtype) { 3858 case VIO_SUBTYPE_ACK: 3859 /* save the received dring_ident */ 3860 vdc->dring_ident = dring_msg->dring_ident; 3861 DMSG(vdc, 0, "[%d] Received dring ident=0x%lx\n", 3862 vdc->instance, vdc->dring_ident); 3863 break; 3864 3865 case VIO_SUBTYPE_NACK: 3866 /* 3867 * vds could not handle the DRing info we sent so we 3868 * stop negotiating. 3869 */ 3870 DMSG(vdc, 0, "[%d] server could not register DRing\n", 3871 vdc->instance); 3872 status = EPROTO; 3873 break; 3874 3875 case VIO_SUBTYPE_INFO: 3876 /* 3877 * Handle the case where vds starts handshake 3878 * (for now only vdc is the instigatior) 3879 */ 3880 status = ENOTSUP; 3881 break; 3882 default: 3883 status = ENOTSUP; 3884 } 3885 3886 return (status); 3887 } 3888 3889 /* 3890 * Function: 3891 * vdc_verify_seq_num() 3892 * 3893 * Description: 3894 * This functions verifies that the sequence number sent back by the vDisk 3895 * server with the latest message is what is expected (i.e. it is greater 3896 * than the last seq num sent by the vDisk server and less than or equal 3897 * to the last seq num generated by vdc). 3898 * 3899 * It then checks the request ID to see if any requests need processing 3900 * in the DRing. 3901 * 3902 * Arguments: 3903 * vdc - soft state pointer for this instance of the driver. 3904 * dring_msg - pointer to the LDC message sent by vds 3905 * 3906 * Return Code: 3907 * VDC_SEQ_NUM_TODO - Message needs to be processed 3908 * VDC_SEQ_NUM_SKIP - Message has already been processed 3909 * VDC_SEQ_NUM_INVALID - The seq numbers are so out of sync, 3910 * vdc cannot deal with them 3911 */ 3912 static int 3913 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg) 3914 { 3915 ASSERT(vdc != NULL); 3916 ASSERT(dring_msg != NULL); 3917 ASSERT(mutex_owned(&vdc->lock)); 3918 3919 /* 3920 * Check to see if the messages were responded to in the correct 3921 * order by vds. 3922 */ 3923 if ((dring_msg->seq_num <= vdc->seq_num_reply) || 3924 (dring_msg->seq_num > vdc->seq_num)) { 3925 DMSG(vdc, 0, "?[%d] Bogus sequence_number %lu: " 3926 "%lu > expected <= %lu (last proc req %lu sent %lu)\n", 3927 vdc->instance, dring_msg->seq_num, 3928 vdc->seq_num_reply, vdc->seq_num, 3929 vdc->req_id_proc, vdc->req_id); 3930 return (VDC_SEQ_NUM_INVALID); 3931 } 3932 vdc->seq_num_reply = dring_msg->seq_num; 3933 3934 if (vdc->req_id_proc < vdc->req_id) 3935 return (VDC_SEQ_NUM_TODO); 3936 else 3937 return (VDC_SEQ_NUM_SKIP); 3938 } 3939 3940 3941 /* 3942 * Function: 3943 * vdc_is_supported_version() 3944 * 3945 * Description: 3946 * This routine checks if the major/minor version numbers specified in 3947 * 'ver_msg' are supported. If not it finds the next version that is 3948 * in the supported version list 'vdc_version[]' and sets the fields in 3949 * 'ver_msg' to those values 3950 * 3951 * Arguments: 3952 * ver_msg - LDC message sent by vDisk server 3953 * 3954 * Return Code: 3955 * B_TRUE - Success 3956 * B_FALSE - Version not supported 3957 */ 3958 static boolean_t 3959 vdc_is_supported_version(vio_ver_msg_t *ver_msg) 3960 { 3961 int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); 3962 3963 for (int i = 0; i < vdc_num_versions; i++) { 3964 ASSERT(vdc_version[i].major > 0); 3965 ASSERT((i == 0) || 3966 (vdc_version[i].major < vdc_version[i-1].major)); 3967 3968 /* 3969 * If the major versions match, adjust the minor version, if 3970 * necessary, down to the highest value supported by this 3971 * client. The server should support all minor versions lower 3972 * than the value it sent 3973 */ 3974 if (ver_msg->ver_major == vdc_version[i].major) { 3975 if (ver_msg->ver_minor > vdc_version[i].minor) { 3976 DMSGX(0, 3977 "Adjusting minor version from %u to %u", 3978 ver_msg->ver_minor, vdc_version[i].minor); 3979 ver_msg->ver_minor = vdc_version[i].minor; 3980 } 3981 return (B_TRUE); 3982 } 3983 3984 /* 3985 * If the message contains a higher major version number, set 3986 * the message's major/minor versions to the current values 3987 * and return false, so this message will get resent with 3988 * these values, and the server will potentially try again 3989 * with the same or a lower version 3990 */ 3991 if (ver_msg->ver_major > vdc_version[i].major) { 3992 ver_msg->ver_major = vdc_version[i].major; 3993 ver_msg->ver_minor = vdc_version[i].minor; 3994 DMSGX(0, "Suggesting major/minor (0x%x/0x%x)\n", 3995 ver_msg->ver_major, ver_msg->ver_minor); 3996 3997 return (B_FALSE); 3998 } 3999 4000 /* 4001 * Otherwise, the message's major version is less than the 4002 * current major version, so continue the loop to the next 4003 * (lower) supported version 4004 */ 4005 } 4006 4007 /* 4008 * No common version was found; "ground" the version pair in the 4009 * message to terminate negotiation 4010 */ 4011 ver_msg->ver_major = 0; 4012 ver_msg->ver_minor = 0; 4013 4014 return (B_FALSE); 4015 } 4016 /* -------------------------------------------------------------------------- */ 4017 4018 /* 4019 * DKIO(7) support 4020 */ 4021 4022 typedef struct vdc_dk_arg { 4023 struct dk_callback dkc; 4024 int mode; 4025 dev_t dev; 4026 vdc_t *vdc; 4027 } vdc_dk_arg_t; 4028 4029 /* 4030 * Function: 4031 * vdc_dkio_flush_cb() 4032 * 4033 * Description: 4034 * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 4035 * by kernel code. 4036 * 4037 * Arguments: 4038 * arg - a pointer to a vdc_dk_arg_t structure. 4039 */ 4040 void 4041 vdc_dkio_flush_cb(void *arg) 4042 { 4043 struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 4044 struct dk_callback *dkc = NULL; 4045 vdc_t *vdc = NULL; 4046 int rv; 4047 4048 if (dk_arg == NULL) { 4049 cmn_err(CE_NOTE, "?[Unk] DKIOCFLUSHWRITECACHE arg is NULL\n"); 4050 return; 4051 } 4052 dkc = &dk_arg->dkc; 4053 vdc = dk_arg->vdc; 4054 ASSERT(vdc != NULL); 4055 4056 rv = vdc_do_sync_op(vdc, VD_OP_FLUSH, NULL, 0, 4057 VDCPART(dk_arg->dev), 0, CB_SYNC, 0, VIO_both_dir); 4058 if (rv != 0) { 4059 DMSG(vdc, 0, "[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n", 4060 vdc->instance, rv, 4061 ddi_model_convert_from(dk_arg->mode & FMODELS)); 4062 } 4063 4064 /* 4065 * Trigger the call back to notify the caller the the ioctl call has 4066 * been completed. 4067 */ 4068 if ((dk_arg->mode & FKIOCTL) && 4069 (dkc != NULL) && 4070 (dkc->dkc_callback != NULL)) { 4071 ASSERT(dkc->dkc_cookie != NULL); 4072 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 4073 } 4074 4075 /* Indicate that one less DKIO write flush is outstanding */ 4076 mutex_enter(&vdc->lock); 4077 vdc->dkio_flush_pending--; 4078 ASSERT(vdc->dkio_flush_pending >= 0); 4079 mutex_exit(&vdc->lock); 4080 4081 /* free the mem that was allocated when the callback was dispatched */ 4082 kmem_free(arg, sizeof (vdc_dk_arg_t)); 4083 } 4084 4085 /* 4086 * This structure is used in the DKIO(7I) array below. 4087 */ 4088 typedef struct vdc_dk_ioctl { 4089 uint8_t op; /* VD_OP_XXX value */ 4090 int cmd; /* Solaris ioctl operation number */ 4091 size_t nbytes; /* size of structure to be copied */ 4092 4093 /* function to convert between vDisk and Solaris structure formats */ 4094 int (*convert)(vdc_t *vdc, void *vd_buf, void *ioctl_arg, 4095 int mode, int dir); 4096 } vdc_dk_ioctl_t; 4097 4098 /* 4099 * Subset of DKIO(7I) operations currently supported 4100 */ 4101 static vdc_dk_ioctl_t dk_ioctl[] = { 4102 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, 0, 4103 vdc_null_copy_func}, 4104 {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), 4105 vdc_get_wce_convert}, 4106 {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), 4107 vdc_set_wce_convert}, 4108 {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), 4109 vdc_get_vtoc_convert}, 4110 {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), 4111 vdc_set_vtoc_convert}, 4112 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), 4113 vdc_get_geom_convert}, 4114 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), 4115 vdc_get_geom_convert}, 4116 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), 4117 vdc_get_geom_convert}, 4118 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 4119 vdc_set_geom_convert}, 4120 {VD_OP_GET_EFI, DKIOCGETEFI, 0, 4121 vdc_get_efi_convert}, 4122 {VD_OP_SET_EFI, DKIOCSETEFI, 0, 4123 vdc_set_efi_convert}, 4124 4125 /* 4126 * These particular ioctls are not sent to the server - vdc fakes up 4127 * the necessary info. 4128 */ 4129 {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, 4130 {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, 4131 {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, 4132 {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, 4133 {0, CDROMREADOFFSET, 0, vdc_null_copy_func} 4134 }; 4135 4136 /* 4137 * Function: 4138 * vd_process_ioctl() 4139 * 4140 * Description: 4141 * This routine processes disk specific ioctl calls 4142 * 4143 * Arguments: 4144 * dev - the device number 4145 * cmd - the operation [dkio(7I)] to be processed 4146 * arg - pointer to user provided structure 4147 * (contains data to be set or reference parameter for get) 4148 * mode - bit flag, indicating open settings, 32/64 bit type, etc 4149 * 4150 * Return Code: 4151 * 0 4152 * EFAULT 4153 * ENXIO 4154 * EIO 4155 * ENOTSUP 4156 */ 4157 static int 4158 vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode) 4159 { 4160 int instance = VDCUNIT(dev); 4161 vdc_t *vdc = NULL; 4162 int rv = -1; 4163 int idx = 0; /* index into dk_ioctl[] */ 4164 size_t len = 0; /* #bytes to send to vds */ 4165 size_t alloc_len = 0; /* #bytes to allocate mem for */ 4166 caddr_t mem_p = NULL; 4167 size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 4168 struct vtoc vtoc_saved; 4169 vdc_dk_ioctl_t *iop; 4170 4171 vdc = ddi_get_soft_state(vdc_state, instance); 4172 if (vdc == NULL) { 4173 cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 4174 instance); 4175 return (ENXIO); 4176 } 4177 4178 DMSG(vdc, 0, "[%d] Processing ioctl(%x) for dev %lx : model %x\n", 4179 instance, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 4180 4181 /* 4182 * Validate the ioctl operation to be performed. 4183 * 4184 * If we have looped through the array without finding a match then we 4185 * don't support this ioctl. 4186 */ 4187 for (idx = 0; idx < nioctls; idx++) { 4188 if (cmd == dk_ioctl[idx].cmd) 4189 break; 4190 } 4191 4192 if (idx >= nioctls) { 4193 DMSG(vdc, 0, "[%d] Unsupported ioctl (0x%x)\n", 4194 vdc->instance, cmd); 4195 return (ENOTSUP); 4196 } 4197 4198 iop = &(dk_ioctl[idx]); 4199 4200 if (cmd == DKIOCGETEFI || cmd == DKIOCSETEFI) { 4201 /* size is not fixed for EFI ioctls, it depends on ioctl arg */ 4202 dk_efi_t dk_efi; 4203 4204 rv = ddi_copyin(arg, &dk_efi, sizeof (dk_efi_t), mode); 4205 if (rv != 0) 4206 return (EFAULT); 4207 4208 len = sizeof (vd_efi_t) - 1 + dk_efi.dki_length; 4209 } else { 4210 len = iop->nbytes; 4211 } 4212 4213 /* 4214 * Deal with the ioctls which the server does not provide. vdc can 4215 * fake these up and return immediately 4216 */ 4217 switch (cmd) { 4218 case CDROMREADOFFSET: 4219 case DKIOCREMOVABLE: 4220 case USCSICMD: 4221 return (ENOTTY); 4222 4223 case DKIOCINFO: 4224 { 4225 struct dk_cinfo cinfo; 4226 if (vdc->cinfo == NULL) 4227 return (ENXIO); 4228 4229 bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 4230 cinfo.dki_partition = VDCPART(dev); 4231 4232 rv = ddi_copyout(&cinfo, (void *)arg, 4233 sizeof (struct dk_cinfo), mode); 4234 if (rv != 0) 4235 return (EFAULT); 4236 4237 return (0); 4238 } 4239 4240 case DKIOCGMEDIAINFO: 4241 { 4242 if (vdc->minfo == NULL) 4243 return (ENXIO); 4244 4245 rv = ddi_copyout(vdc->minfo, (void *)arg, 4246 sizeof (struct dk_minfo), mode); 4247 if (rv != 0) 4248 return (EFAULT); 4249 4250 return (0); 4251 } 4252 4253 case DKIOCFLUSHWRITECACHE: 4254 { 4255 struct dk_callback *dkc = (struct dk_callback *)arg; 4256 vdc_dk_arg_t *dkarg = NULL; 4257 4258 DMSG(vdc, 1, "[%d] Flush W$: mode %x\n", 4259 instance, mode); 4260 4261 /* 4262 * If the backing device is not a 'real' disk then the 4263 * W$ operation request to the vDisk server will fail 4264 * so we might as well save the cycles and return now. 4265 */ 4266 if (vdc->vdisk_type != VD_DISK_TYPE_DISK) 4267 return (ENOTTY); 4268 4269 /* 4270 * If arg is NULL, then there is no callback function 4271 * registered and the call operates synchronously; we 4272 * break and continue with the rest of the function and 4273 * wait for vds to return (i.e. after the request to 4274 * vds returns successfully, all writes completed prior 4275 * to the ioctl will have been flushed from the disk 4276 * write cache to persistent media. 4277 * 4278 * If a callback function is registered, we dispatch 4279 * the request on a task queue and return immediately. 4280 * The callback will deal with informing the calling 4281 * thread that the flush request is completed. 4282 */ 4283 if (dkc == NULL) 4284 break; 4285 4286 /* 4287 * the asynchronous callback is only supported if 4288 * invoked from within the kernel 4289 */ 4290 if ((mode & FKIOCTL) == 0) 4291 return (ENOTSUP); 4292 4293 dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP); 4294 4295 dkarg->mode = mode; 4296 dkarg->dev = dev; 4297 bcopy(dkc, &dkarg->dkc, sizeof (*dkc)); 4298 4299 mutex_enter(&vdc->lock); 4300 vdc->dkio_flush_pending++; 4301 dkarg->vdc = vdc; 4302 mutex_exit(&vdc->lock); 4303 4304 /* put the request on a task queue */ 4305 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 4306 (void *)dkarg, DDI_SLEEP); 4307 if (rv == NULL) { 4308 /* clean up if dispatch fails */ 4309 mutex_enter(&vdc->lock); 4310 vdc->dkio_flush_pending--; 4311 kmem_free(dkarg, sizeof (vdc_dk_arg_t)); 4312 } 4313 4314 return (rv == NULL ? ENOMEM : 0); 4315 } 4316 } 4317 4318 /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 4319 ASSERT(iop->op != 0); 4320 4321 /* LDC requires that the memory being mapped is 8-byte aligned */ 4322 alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 4323 DMSG(vdc, 1, "[%d] struct size %ld alloc %ld\n", 4324 instance, len, alloc_len); 4325 4326 ASSERT(alloc_len >= 0); /* sanity check */ 4327 if (alloc_len > 0) 4328 mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 4329 4330 if (cmd == DKIOCSVTOC) { 4331 /* 4332 * Save a copy of the current VTOC so that we can roll back 4333 * if the setting of the new VTOC fails. 4334 */ 4335 bcopy(vdc->vtoc, &vtoc_saved, sizeof (struct vtoc)); 4336 } 4337 4338 /* 4339 * Call the conversion function for this ioctl which, if necessary, 4340 * converts from the Solaris format to the format ARC'ed 4341 * as part of the vDisk protocol (FWARC 2006/195) 4342 */ 4343 ASSERT(iop->convert != NULL); 4344 rv = (iop->convert)(vdc, arg, mem_p, mode, VD_COPYIN); 4345 if (rv != 0) { 4346 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 4347 instance, rv, cmd); 4348 if (mem_p != NULL) 4349 kmem_free(mem_p, alloc_len); 4350 return (rv); 4351 } 4352 4353 /* 4354 * send request to vds to service the ioctl. 4355 */ 4356 rv = vdc_do_sync_op(vdc, iop->op, mem_p, alloc_len, 4357 VDCPART(dev), 0, CB_SYNC, (void *)(uint64_t)mode, 4358 VIO_both_dir); 4359 4360 if (rv != 0) { 4361 /* 4362 * This is not necessarily an error. The ioctl could 4363 * be returning a value such as ENOTTY to indicate 4364 * that the ioctl is not applicable. 4365 */ 4366 DMSG(vdc, 0, "[%d] vds returned %d for ioctl 0x%x\n", 4367 instance, rv, cmd); 4368 if (mem_p != NULL) 4369 kmem_free(mem_p, alloc_len); 4370 4371 if (cmd == DKIOCSVTOC) { 4372 /* update of the VTOC has failed, roll back */ 4373 bcopy(&vtoc_saved, vdc->vtoc, sizeof (struct vtoc)); 4374 } 4375 4376 return (rv); 4377 } 4378 4379 if (cmd == DKIOCSVTOC) { 4380 /* 4381 * The VTOC has been changed. We need to update the device 4382 * nodes to handle the case where an EFI label has been 4383 * changed to a VTOC label. We also try and update the device 4384 * node properties. Failing to set the properties should 4385 * not cause an error to be return the caller though. 4386 */ 4387 vdc->vdisk_label = VD_DISK_LABEL_VTOC; 4388 (void) vdc_create_device_nodes_vtoc(vdc); 4389 4390 if (vdc_create_device_nodes_props(vdc)) { 4391 DMSG(vdc, 0, "![%d] Failed to update device nodes" 4392 " properties", vdc->instance); 4393 } 4394 4395 } else if (cmd == DKIOCSETEFI) { 4396 /* 4397 * The EFI has been changed. We need to update the device 4398 * nodes to handle the case where a VTOC label has been 4399 * changed to an EFI label. We also try and update the device 4400 * node properties. Failing to set the properties should 4401 * not cause an error to be return the caller though. 4402 */ 4403 struct dk_gpt *efi; 4404 size_t efi_len; 4405 4406 vdc->vdisk_label = VD_DISK_LABEL_EFI; 4407 (void) vdc_create_device_nodes_efi(vdc); 4408 4409 rv = vdc_efi_alloc_and_read(dev, &efi, &efi_len); 4410 4411 if (rv == 0) { 4412 vdc_store_efi(vdc, efi); 4413 rv = vdc_create_device_nodes_props(vdc); 4414 vd_efi_free(efi, efi_len); 4415 } 4416 4417 if (rv) { 4418 DMSG(vdc, 0, "![%d] Failed to update device nodes" 4419 " properties", vdc->instance); 4420 } 4421 } 4422 4423 /* 4424 * Call the conversion function (if it exists) for this ioctl 4425 * which converts from the format ARC'ed as part of the vDisk 4426 * protocol (FWARC 2006/195) back to a format understood by 4427 * the rest of Solaris. 4428 */ 4429 rv = (iop->convert)(vdc, mem_p, arg, mode, VD_COPYOUT); 4430 if (rv != 0) { 4431 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 4432 instance, rv, cmd); 4433 if (mem_p != NULL) 4434 kmem_free(mem_p, alloc_len); 4435 return (rv); 4436 } 4437 4438 if (mem_p != NULL) 4439 kmem_free(mem_p, alloc_len); 4440 4441 return (rv); 4442 } 4443 4444 /* 4445 * Function: 4446 * 4447 * Description: 4448 * This is an empty conversion function used by ioctl calls which 4449 * do not need to convert the data being passed in/out to userland 4450 */ 4451 static int 4452 vdc_null_copy_func(vdc_t *vdc, void *from, void *to, int mode, int dir) 4453 { 4454 _NOTE(ARGUNUSED(vdc)) 4455 _NOTE(ARGUNUSED(from)) 4456 _NOTE(ARGUNUSED(to)) 4457 _NOTE(ARGUNUSED(mode)) 4458 _NOTE(ARGUNUSED(dir)) 4459 4460 return (0); 4461 } 4462 4463 static int 4464 vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 4465 int mode, int dir) 4466 { 4467 _NOTE(ARGUNUSED(vdc)) 4468 4469 if (dir == VD_COPYIN) 4470 return (0); /* nothing to do */ 4471 4472 if (ddi_copyout(from, to, sizeof (int), mode) != 0) 4473 return (EFAULT); 4474 4475 return (0); 4476 } 4477 4478 static int 4479 vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 4480 int mode, int dir) 4481 { 4482 _NOTE(ARGUNUSED(vdc)) 4483 4484 if (dir == VD_COPYOUT) 4485 return (0); /* nothing to do */ 4486 4487 if (ddi_copyin(from, to, sizeof (int), mode) != 0) 4488 return (EFAULT); 4489 4490 return (0); 4491 } 4492 4493 /* 4494 * Function: 4495 * vdc_get_vtoc_convert() 4496 * 4497 * Description: 4498 * This routine performs the necessary convertions from the DKIOCGVTOC 4499 * Solaris structure to the format defined in FWARC 2006/195. 4500 * 4501 * In the struct vtoc definition, the timestamp field is marked as not 4502 * supported so it is not part of vDisk protocol (FWARC 2006/195). 4503 * However SVM uses that field to check it can write into the VTOC, 4504 * so we fake up the info of that field. 4505 * 4506 * Arguments: 4507 * vdc - the vDisk client 4508 * from - the buffer containing the data to be copied from 4509 * to - the buffer to be copied to 4510 * mode - flags passed to ioctl() call 4511 * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT 4512 * 4513 * Return Code: 4514 * 0 - Success 4515 * ENXIO - incorrect buffer passed in. 4516 * EFAULT - ddi_copyout routine encountered an error. 4517 */ 4518 static int 4519 vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4520 { 4521 int i; 4522 void *tmp_mem = NULL; 4523 void *tmp_memp; 4524 struct vtoc vt; 4525 struct vtoc32 vt32; 4526 int copy_len = 0; 4527 int rv = 0; 4528 4529 if (dir != VD_COPYOUT) 4530 return (0); /* nothing to do */ 4531 4532 if ((from == NULL) || (to == NULL)) 4533 return (ENXIO); 4534 4535 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 4536 copy_len = sizeof (struct vtoc32); 4537 else 4538 copy_len = sizeof (struct vtoc); 4539 4540 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 4541 4542 VD_VTOC2VTOC((vd_vtoc_t *)from, &vt); 4543 4544 /* fake the VTOC timestamp field */ 4545 for (i = 0; i < V_NUMPAR; i++) { 4546 vt.timestamp[i] = vdc->vtoc->timestamp[i]; 4547 } 4548 4549 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 4550 vtoctovtoc32(vt, vt32); 4551 tmp_memp = &vt32; 4552 } else { 4553 tmp_memp = &vt; 4554 } 4555 rv = ddi_copyout(tmp_memp, to, copy_len, mode); 4556 if (rv != 0) 4557 rv = EFAULT; 4558 4559 kmem_free(tmp_mem, copy_len); 4560 return (rv); 4561 } 4562 4563 /* 4564 * Function: 4565 * vdc_set_vtoc_convert() 4566 * 4567 * Description: 4568 * This routine performs the necessary convertions from the DKIOCSVTOC 4569 * Solaris structure to the format defined in FWARC 2006/195. 4570 * 4571 * Arguments: 4572 * vdc - the vDisk client 4573 * from - Buffer with data 4574 * to - Buffer where data is to be copied to 4575 * mode - flags passed to ioctl 4576 * dir - direction of copy (in or out) 4577 * 4578 * Return Code: 4579 * 0 - Success 4580 * ENXIO - Invalid buffer passed in 4581 * EFAULT - ddi_copyin of data failed 4582 */ 4583 static int 4584 vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4585 { 4586 void *tmp_mem = NULL; 4587 struct vtoc vt; 4588 struct vtoc *vtp = &vt; 4589 vd_vtoc_t vtvd; 4590 int copy_len = 0; 4591 int rv = 0; 4592 4593 if (dir != VD_COPYIN) 4594 return (0); /* nothing to do */ 4595 4596 if ((from == NULL) || (to == NULL)) 4597 return (ENXIO); 4598 4599 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 4600 copy_len = sizeof (struct vtoc32); 4601 else 4602 copy_len = sizeof (struct vtoc); 4603 4604 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 4605 4606 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 4607 if (rv != 0) { 4608 kmem_free(tmp_mem, copy_len); 4609 return (EFAULT); 4610 } 4611 4612 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 4613 vtoc32tovtoc((*(struct vtoc32 *)tmp_mem), vt); 4614 } else { 4615 vtp = tmp_mem; 4616 } 4617 4618 /* 4619 * The VTOC is being changed, then vdc needs to update the copy 4620 * it saved in the soft state structure. 4621 */ 4622 bcopy(vtp, vdc->vtoc, sizeof (struct vtoc)); 4623 4624 VTOC2VD_VTOC(vtp, &vtvd); 4625 bcopy(&vtvd, to, sizeof (vd_vtoc_t)); 4626 kmem_free(tmp_mem, copy_len); 4627 4628 return (0); 4629 } 4630 4631 /* 4632 * Function: 4633 * vdc_get_geom_convert() 4634 * 4635 * Description: 4636 * This routine performs the necessary convertions from the DKIOCGGEOM, 4637 * DKIOCG_PHYSGEOM and DKIOG_VIRTGEOM Solaris structures to the format 4638 * defined in FWARC 2006/195 4639 * 4640 * Arguments: 4641 * vdc - the vDisk client 4642 * from - Buffer with data 4643 * to - Buffer where data is to be copied to 4644 * mode - flags passed to ioctl 4645 * dir - direction of copy (in or out) 4646 * 4647 * Return Code: 4648 * 0 - Success 4649 * ENXIO - Invalid buffer passed in 4650 * EFAULT - ddi_copyout of data failed 4651 */ 4652 static int 4653 vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4654 { 4655 _NOTE(ARGUNUSED(vdc)) 4656 4657 struct dk_geom geom; 4658 int copy_len = sizeof (struct dk_geom); 4659 int rv = 0; 4660 4661 if (dir != VD_COPYOUT) 4662 return (0); /* nothing to do */ 4663 4664 if ((from == NULL) || (to == NULL)) 4665 return (ENXIO); 4666 4667 VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); 4668 rv = ddi_copyout(&geom, to, copy_len, mode); 4669 if (rv != 0) 4670 rv = EFAULT; 4671 4672 return (rv); 4673 } 4674 4675 /* 4676 * Function: 4677 * vdc_set_geom_convert() 4678 * 4679 * Description: 4680 * This routine performs the necessary convertions from the DKIOCSGEOM 4681 * Solaris structure to the format defined in FWARC 2006/195. 4682 * 4683 * Arguments: 4684 * vdc - the vDisk client 4685 * from - Buffer with data 4686 * to - Buffer where data is to be copied to 4687 * mode - flags passed to ioctl 4688 * dir - direction of copy (in or out) 4689 * 4690 * Return Code: 4691 * 0 - Success 4692 * ENXIO - Invalid buffer passed in 4693 * EFAULT - ddi_copyin of data failed 4694 */ 4695 static int 4696 vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4697 { 4698 _NOTE(ARGUNUSED(vdc)) 4699 4700 vd_geom_t vdgeom; 4701 void *tmp_mem = NULL; 4702 int copy_len = sizeof (struct dk_geom); 4703 int rv = 0; 4704 4705 if (dir != VD_COPYIN) 4706 return (0); /* nothing to do */ 4707 4708 if ((from == NULL) || (to == NULL)) 4709 return (ENXIO); 4710 4711 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 4712 4713 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 4714 if (rv != 0) { 4715 kmem_free(tmp_mem, copy_len); 4716 return (EFAULT); 4717 } 4718 DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); 4719 bcopy(&vdgeom, to, sizeof (vdgeom)); 4720 kmem_free(tmp_mem, copy_len); 4721 4722 return (0); 4723 } 4724 4725 static int 4726 vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4727 { 4728 _NOTE(ARGUNUSED(vdc)) 4729 4730 vd_efi_t *vd_efi; 4731 dk_efi_t dk_efi; 4732 int rv = 0; 4733 void *uaddr; 4734 4735 if ((from == NULL) || (to == NULL)) 4736 return (ENXIO); 4737 4738 if (dir == VD_COPYIN) { 4739 4740 vd_efi = (vd_efi_t *)to; 4741 4742 rv = ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode); 4743 if (rv != 0) 4744 return (EFAULT); 4745 4746 vd_efi->lba = dk_efi.dki_lba; 4747 vd_efi->length = dk_efi.dki_length; 4748 bzero(vd_efi->data, vd_efi->length); 4749 4750 } else { 4751 4752 rv = ddi_copyin(to, &dk_efi, sizeof (dk_efi_t), mode); 4753 if (rv != 0) 4754 return (EFAULT); 4755 4756 uaddr = dk_efi.dki_data; 4757 4758 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 4759 4760 VD_EFI2DK_EFI((vd_efi_t *)from, &dk_efi); 4761 4762 rv = ddi_copyout(dk_efi.dki_data, uaddr, dk_efi.dki_length, 4763 mode); 4764 if (rv != 0) 4765 return (EFAULT); 4766 4767 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 4768 } 4769 4770 return (0); 4771 } 4772 4773 static int 4774 vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4775 { 4776 _NOTE(ARGUNUSED(vdc)) 4777 4778 dk_efi_t dk_efi; 4779 void *uaddr; 4780 4781 if (dir == VD_COPYOUT) 4782 return (0); /* nothing to do */ 4783 4784 if ((from == NULL) || (to == NULL)) 4785 return (ENXIO); 4786 4787 if (ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode) != 0) 4788 return (EFAULT); 4789 4790 uaddr = dk_efi.dki_data; 4791 4792 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 4793 4794 if (ddi_copyin(uaddr, dk_efi.dki_data, dk_efi.dki_length, mode) != 0) 4795 return (EFAULT); 4796 4797 DK_EFI2VD_EFI(&dk_efi, (vd_efi_t *)to); 4798 4799 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 4800 4801 return (0); 4802 } 4803 4804 /* 4805 * Function: 4806 * vdc_create_fake_geometry() 4807 * 4808 * Description: 4809 * This routine fakes up the disk info needed for some DKIO ioctls. 4810 * - DKIOCINFO 4811 * - DKIOCGMEDIAINFO 4812 * 4813 * [ just like lofi(7D) and ramdisk(7D) ] 4814 * 4815 * Arguments: 4816 * vdc - soft state pointer for this instance of the device driver. 4817 * 4818 * Return Code: 4819 * 0 - Success 4820 */ 4821 static int 4822 vdc_create_fake_geometry(vdc_t *vdc) 4823 { 4824 ASSERT(vdc != NULL); 4825 4826 /* 4827 * Check if max_xfer_sz and vdisk_size are valid 4828 */ 4829 if (vdc->vdisk_size == 0 || vdc->max_xfer_sz == 0) 4830 return (EIO); 4831 4832 /* 4833 * DKIOCINFO support 4834 */ 4835 vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 4836 4837 (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 4838 (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 4839 /* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */ 4840 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; 4841 vdc->cinfo->dki_ctype = DKC_SCSI_CCS; 4842 vdc->cinfo->dki_flags = DKI_FMTVOL; 4843 vdc->cinfo->dki_cnum = 0; 4844 vdc->cinfo->dki_addr = 0; 4845 vdc->cinfo->dki_space = 0; 4846 vdc->cinfo->dki_prio = 0; 4847 vdc->cinfo->dki_vec = 0; 4848 vdc->cinfo->dki_unit = vdc->instance; 4849 vdc->cinfo->dki_slave = 0; 4850 /* 4851 * The partition number will be created on the fly depending on the 4852 * actual slice (i.e. minor node) that is used to request the data. 4853 */ 4854 vdc->cinfo->dki_partition = 0; 4855 4856 /* 4857 * DKIOCGMEDIAINFO support 4858 */ 4859 if (vdc->minfo == NULL) 4860 vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 4861 vdc->minfo->dki_media_type = DK_FIXED_DISK; 4862 vdc->minfo->dki_capacity = vdc->vdisk_size; 4863 vdc->minfo->dki_lbsize = DEV_BSIZE; 4864 4865 return (0); 4866 } 4867 4868 /* 4869 * Function: 4870 * vdc_setup_disk_layout() 4871 * 4872 * Description: 4873 * This routine discovers all the necessary details about the "disk" 4874 * by requesting the data that is available from the vDisk server and by 4875 * faking up the rest of the data. 4876 * 4877 * Arguments: 4878 * vdc - soft state pointer for this instance of the device driver. 4879 * 4880 * Return Code: 4881 * 0 - Success 4882 */ 4883 static int 4884 vdc_setup_disk_layout(vdc_t *vdc) 4885 { 4886 buf_t *buf; /* BREAD requests need to be in a buf_t structure */ 4887 dev_t dev; 4888 int slice = 0; 4889 int rv, error; 4890 4891 ASSERT(vdc != NULL); 4892 4893 if (vdc->vtoc == NULL) 4894 vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); 4895 4896 dev = makedevice(ddi_driver_major(vdc->dip), 4897 VD_MAKE_DEV(vdc->instance, 0)); 4898 rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)vdc->vtoc, FKIOCTL); 4899 4900 if (rv && rv != ENOTSUP) { 4901 DMSG(vdc, 0, "[%d] Failed to get VTOC (err=%d)", 4902 vdc->instance, rv); 4903 return (rv); 4904 } 4905 4906 /* 4907 * The process of attempting to read VTOC will initiate 4908 * the handshake and establish a connection. Following 4909 * handshake, go ahead and create geometry. 4910 */ 4911 error = vdc_create_fake_geometry(vdc); 4912 if (error != 0) { 4913 DMSG(vdc, 0, "[%d] Failed to create disk geometry (err%d)", 4914 vdc->instance, error); 4915 return (error); 4916 } 4917 4918 if (rv == ENOTSUP) { 4919 /* 4920 * If the device does not support VTOC then we try 4921 * to read an EFI label. 4922 */ 4923 struct dk_gpt *efi; 4924 size_t efi_len; 4925 4926 rv = vdc_efi_alloc_and_read(dev, &efi, &efi_len); 4927 4928 if (rv) { 4929 DMSG(vdc, 0, "[%d] Failed to get EFI (err=%d)", 4930 vdc->instance, rv); 4931 return (rv); 4932 } 4933 4934 vdc->vdisk_label = VD_DISK_LABEL_EFI; 4935 vdc_store_efi(vdc, efi); 4936 vd_efi_free(efi, efi_len); 4937 4938 return (0); 4939 } 4940 4941 vdc->vdisk_label = VD_DISK_LABEL_VTOC; 4942 4943 /* 4944 * FUTURE: This could be default way for reading the VTOC 4945 * from the disk as supposed to sending the VD_OP_GET_VTOC 4946 * to the server. Currently this is a sanity check. 4947 * 4948 * find the slice that represents the entire "disk" and use that to 4949 * read the disk label. The convention in Solaris is that slice 2 4950 * represents the whole disk so we check that it is, otherwise we 4951 * default to slice 0 4952 */ 4953 if ((vdc->vdisk_type == VD_DISK_TYPE_DISK) && 4954 (vdc->vtoc->v_part[2].p_tag == V_BACKUP)) { 4955 slice = 2; 4956 } else { 4957 slice = 0; 4958 } 4959 4960 /* 4961 * Read disk label from start of disk 4962 */ 4963 vdc->label = kmem_zalloc(DK_LABEL_SIZE, KM_SLEEP); 4964 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 4965 bioinit(buf); 4966 buf->b_un.b_addr = (caddr_t)vdc->label; 4967 buf->b_bcount = DK_LABEL_SIZE; 4968 buf->b_flags = B_BUSY | B_READ; 4969 buf->b_dev = dev; 4970 rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)vdc->label, 4971 DK_LABEL_SIZE, slice, 0, CB_STRATEGY, buf, VIO_read_dir); 4972 if (rv) { 4973 DMSG(vdc, 1, "[%d] Failed to read disk block 0\n", 4974 vdc->instance); 4975 kmem_free(buf, sizeof (buf_t)); 4976 return (rv); 4977 } 4978 rv = biowait(buf); 4979 biofini(buf); 4980 kmem_free(buf, sizeof (buf_t)); 4981 4982 return (rv); 4983 } 4984 4985 /* 4986 * Function: 4987 * vdc_setup_devid() 4988 * 4989 * Description: 4990 * This routine discovers the devid of a vDisk. It requests the devid of 4991 * the underlying device from the vDisk server, builds an encapsulated 4992 * devid based on the retrieved devid and registers that new devid to 4993 * the vDisk. 4994 * 4995 * Arguments: 4996 * vdc - soft state pointer for this instance of the device driver. 4997 * 4998 * Return Code: 4999 * 0 - A devid was succesfully registered for the vDisk 5000 */ 5001 static int 5002 vdc_setup_devid(vdc_t *vdc) 5003 { 5004 int rv; 5005 vd_devid_t *vd_devid; 5006 size_t bufsize, bufid_len; 5007 5008 /* 5009 * At first sight, we don't know the size of the devid that the 5010 * server will return but this size will be encoded into the 5011 * reply. So we do a first request using a default size then we 5012 * check if this size was large enough. If not then we do a second 5013 * request with the correct size returned by the server. Note that 5014 * ldc requires size to be 8-byte aligned. 5015 */ 5016 bufsize = P2ROUNDUP(VD_DEVID_SIZE(VD_DEVID_DEFAULT_LEN), 5017 sizeof (uint64_t)); 5018 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 5019 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 5020 5021 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, (caddr_t)vd_devid, 5022 bufsize, 0, 0, CB_SYNC, 0, VIO_both_dir); 5023 5024 DMSG(vdc, 2, "sync_op returned %d\n", rv); 5025 5026 if (rv) { 5027 kmem_free(vd_devid, bufsize); 5028 return (rv); 5029 } 5030 5031 if (vd_devid->length > bufid_len) { 5032 /* 5033 * The returned devid is larger than the buffer used. Try again 5034 * with a buffer with the right size. 5035 */ 5036 kmem_free(vd_devid, bufsize); 5037 bufsize = P2ROUNDUP(VD_DEVID_SIZE(vd_devid->length), 5038 sizeof (uint64_t)); 5039 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 5040 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 5041 5042 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, 5043 (caddr_t)vd_devid, bufsize, 0, 0, CB_SYNC, 0, 5044 VIO_both_dir); 5045 5046 if (rv) { 5047 kmem_free(vd_devid, bufsize); 5048 return (rv); 5049 } 5050 } 5051 5052 /* 5053 * The virtual disk should have the same device id as the one associated 5054 * with the physical disk it is mapped on, otherwise sharing a disk 5055 * between a LDom and a non-LDom may not work (for example for a shared 5056 * SVM disk set). 5057 * 5058 * The DDI framework does not allow creating a device id with any 5059 * type so we first create a device id of type DEVID_ENCAP and then 5060 * we restore the orignal type of the physical device. 5061 */ 5062 5063 DMSG(vdc, 2, ": devid length = %d\n", vd_devid->length); 5064 5065 /* build an encapsulated devid based on the returned devid */ 5066 if (ddi_devid_init(vdc->dip, DEVID_ENCAP, vd_devid->length, 5067 vd_devid->id, &vdc->devid) != DDI_SUCCESS) { 5068 DMSG(vdc, 1, "[%d] Fail to created devid\n", vdc->instance); 5069 kmem_free(vd_devid, bufsize); 5070 return (1); 5071 } 5072 5073 DEVID_FORMTYPE((impl_devid_t *)vdc->devid, vd_devid->type); 5074 5075 ASSERT(ddi_devid_valid(vdc->devid) == DDI_SUCCESS); 5076 5077 kmem_free(vd_devid, bufsize); 5078 5079 if (ddi_devid_register(vdc->dip, vdc->devid) != DDI_SUCCESS) { 5080 DMSG(vdc, 1, "[%d] Fail to register devid\n", vdc->instance); 5081 return (1); 5082 } 5083 5084 return (0); 5085 } 5086 5087 static void 5088 vdc_store_efi(vdc_t *vdc, struct dk_gpt *efi) 5089 { 5090 struct vtoc *vtoc = vdc->vtoc; 5091 5092 vd_efi_to_vtoc(efi, vtoc); 5093 if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 5094 /* 5095 * vd_efi_to_vtoc() will store information about the EFI Sun 5096 * reserved partition (representing the entire disk) into 5097 * partition 7. However single-slice device will only have 5098 * that single partition and the vdc driver expects to find 5099 * information about that partition in slice 0. So we need 5100 * to copy information from slice 7 to slice 0. 5101 */ 5102 vtoc->v_part[0].p_tag = vtoc->v_part[VD_EFI_WD_SLICE].p_tag; 5103 vtoc->v_part[0].p_flag = vtoc->v_part[VD_EFI_WD_SLICE].p_flag; 5104 vtoc->v_part[0].p_start = vtoc->v_part[VD_EFI_WD_SLICE].p_start; 5105 vtoc->v_part[0].p_size = vtoc->v_part[VD_EFI_WD_SLICE].p_size; 5106 } 5107 } 5108