1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * LDoms virtual disk client (vdc) device driver 31 * 32 * This driver runs on a guest logical domain and communicates with the virtual 33 * disk server (vds) driver running on the service domain which is exporting 34 * virtualized "disks" to the guest logical domain. 35 * 36 * The driver can be divided into four sections: 37 * 38 * 1) generic device driver housekeeping 39 * _init, _fini, attach, detach, ops structures, etc. 40 * 41 * 2) communication channel setup 42 * Setup the communications link over the LDC channel that vdc uses to 43 * talk to the vDisk server. Initialise the descriptor ring which 44 * allows the LDC clients to transfer data via memory mappings. 45 * 46 * 3) Support exported to upper layers (filesystems, etc) 47 * The upper layers call into vdc via strategy(9E) and DKIO(7I) 48 * ioctl calls. vdc will copy the data to be written to the descriptor 49 * ring or maps the buffer to store the data read by the vDisk 50 * server into the descriptor ring. It then sends a message to the 51 * vDisk server requesting it to complete the operation. 52 * 53 * 4) Handling responses from vDisk server. 54 * The vDisk server will ACK some or all of the messages vdc sends to it 55 * (this is configured during the handshake). Upon receipt of an ACK 56 * vdc will check the descriptor ring and signal to the upper layer 57 * code waiting on the IO. 58 */ 59 60 #include <sys/atomic.h> 61 #include <sys/conf.h> 62 #include <sys/disp.h> 63 #include <sys/ddi.h> 64 #include <sys/dkio.h> 65 #include <sys/efi_partition.h> 66 #include <sys/fcntl.h> 67 #include <sys/file.h> 68 #include <sys/mach_descrip.h> 69 #include <sys/modctl.h> 70 #include <sys/mdeg.h> 71 #include <sys/note.h> 72 #include <sys/open.h> 73 #include <sys/sdt.h> 74 #include <sys/stat.h> 75 #include <sys/sunddi.h> 76 #include <sys/types.h> 77 #include <sys/promif.h> 78 #include <sys/vtoc.h> 79 #include <sys/archsystm.h> 80 #include <sys/sysmacros.h> 81 82 #include <sys/cdio.h> 83 #include <sys/dktp/cm.h> 84 #include <sys/dktp/fdisk.h> 85 #include <sys/scsi/generic/sense.h> 86 #include <sys/scsi/impl/uscsi.h> /* Needed for defn of USCSICMD ioctl */ 87 88 #include <sys/ldoms.h> 89 #include <sys/ldc.h> 90 #include <sys/vio_common.h> 91 #include <sys/vio_mailbox.h> 92 #include <sys/vdsk_common.h> 93 #include <sys/vdsk_mailbox.h> 94 #include <sys/vdc.h> 95 96 /* 97 * function prototypes 98 */ 99 100 /* standard driver functions */ 101 static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 102 static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 103 static int vdc_strategy(struct buf *buf); 104 static int vdc_print(dev_t dev, char *str); 105 static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 106 static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 107 static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 108 static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 109 cred_t *credp, int *rvalp); 110 static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 111 static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 112 113 static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 114 void *arg, void **resultp); 115 static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 116 static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 117 118 /* setup */ 119 static void vdc_min(struct buf *bufp); 120 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 121 static int vdc_do_ldc_init(vdc_t *vdc); 122 static int vdc_start_ldc_connection(vdc_t *vdc); 123 static int vdc_create_device_nodes(vdc_t *vdc); 124 static int vdc_create_device_nodes_efi(vdc_t *vdc); 125 static int vdc_create_device_nodes_vtoc(vdc_t *vdc); 126 static int vdc_create_device_nodes_props(vdc_t *vdc); 127 static int vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id); 128 static int vdc_do_ldc_up(vdc_t *vdc); 129 static void vdc_terminate_ldc(vdc_t *vdc); 130 static int vdc_init_descriptor_ring(vdc_t *vdc); 131 static void vdc_destroy_descriptor_ring(vdc_t *vdc); 132 static int vdc_setup_devid(vdc_t *vdc); 133 static void vdc_store_efi(vdc_t *vdc, struct dk_gpt *efi); 134 135 /* handshake with vds */ 136 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 137 static int vdc_ver_negotiation(vdc_t *vdcp); 138 static int vdc_init_attr_negotiation(vdc_t *vdc); 139 static int vdc_attr_negotiation(vdc_t *vdcp); 140 static int vdc_init_dring_negotiate(vdc_t *vdc); 141 static int vdc_dring_negotiation(vdc_t *vdcp); 142 static int vdc_send_rdx(vdc_t *vdcp); 143 static int vdc_rdx_exchange(vdc_t *vdcp); 144 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 145 146 /* processing incoming messages from vDisk server */ 147 static void vdc_process_msg_thread(vdc_t *vdc); 148 static int vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp); 149 150 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 151 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t *msg); 152 static int vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg); 153 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 154 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 155 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 156 static int vdc_send_request(vdc_t *vdcp, int operation, 157 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 158 int cb_type, void *cb_arg, vio_desc_direction_t dir); 159 static int vdc_map_to_shared_dring(vdc_t *vdcp, int idx); 160 static int vdc_populate_descriptor(vdc_t *vdcp, int operation, 161 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 162 int cb_type, void *cb_arg, vio_desc_direction_t dir); 163 static int vdc_do_sync_op(vdc_t *vdcp, int operation, 164 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 165 int cb_type, void *cb_arg, vio_desc_direction_t dir); 166 167 static int vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp); 168 static int vdc_drain_response(vdc_t *vdcp); 169 static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 170 static int vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep); 171 static int vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg); 172 173 /* dkio */ 174 static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode); 175 static int vdc_create_fake_geometry(vdc_t *vdc); 176 static int vdc_setup_disk_layout(vdc_t *vdc); 177 static int vdc_null_copy_func(vdc_t *vdc, void *from, void *to, 178 int mode, int dir); 179 static int vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 180 int mode, int dir); 181 static int vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 182 int mode, int dir); 183 static int vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, 184 int mode, int dir); 185 static int vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, 186 int mode, int dir); 187 static int vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, 188 int mode, int dir); 189 static int vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, 190 int mode, int dir); 191 static int vdc_uscsicmd_convert(vdc_t *vdc, void *from, void *to, 192 int mode, int dir); 193 static int vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, 194 int mode, int dir); 195 static int vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, 196 int mode, int dir); 197 198 /* 199 * Module variables 200 */ 201 202 /* 203 * Tunable variables to control how long vdc waits before timing out on 204 * various operations 205 */ 206 static int vdc_retries = 10; 207 208 /* calculated from 'vdc_usec_timeout' during attach */ 209 static uint64_t vdc_hz_timeout; /* units: Hz */ 210 static uint64_t vdc_usec_timeout = 30 * MICROSEC; /* 30s units: ns */ 211 212 static uint64_t vdc_hz_min_ldc_delay; 213 static uint64_t vdc_min_timeout_ldc = 1 * MILLISEC; 214 static uint64_t vdc_hz_max_ldc_delay; 215 static uint64_t vdc_max_timeout_ldc = 100 * MILLISEC; 216 217 static uint64_t vdc_ldc_read_init_delay = 1 * MILLISEC; 218 static uint64_t vdc_ldc_read_max_delay = 100 * MILLISEC; 219 220 /* values for dumping - need to run in a tighter loop */ 221 static uint64_t vdc_usec_timeout_dump = 100 * MILLISEC; /* 0.1s units: ns */ 222 static int vdc_dump_retries = 100; 223 224 /* Count of the number of vdc instances attached */ 225 static volatile uint32_t vdc_instance_count = 0; 226 227 /* Soft state pointer */ 228 static void *vdc_state; 229 230 /* 231 * Controlling the verbosity of the error/debug messages 232 * 233 * vdc_msglevel - controls level of messages 234 * vdc_matchinst - 64-bit variable where each bit corresponds 235 * to the vdc instance the vdc_msglevel applies. 236 */ 237 int vdc_msglevel = 0x0; 238 uint64_t vdc_matchinst = 0ull; 239 240 /* 241 * Supported vDisk protocol version pairs. 242 * 243 * The first array entry is the latest and preferred version. 244 */ 245 static const vio_ver_t vdc_version[] = {{1, 0}}; 246 247 static struct cb_ops vdc_cb_ops = { 248 vdc_open, /* cb_open */ 249 vdc_close, /* cb_close */ 250 vdc_strategy, /* cb_strategy */ 251 vdc_print, /* cb_print */ 252 vdc_dump, /* cb_dump */ 253 vdc_read, /* cb_read */ 254 vdc_write, /* cb_write */ 255 vdc_ioctl, /* cb_ioctl */ 256 nodev, /* cb_devmap */ 257 nodev, /* cb_mmap */ 258 nodev, /* cb_segmap */ 259 nochpoll, /* cb_chpoll */ 260 ddi_prop_op, /* cb_prop_op */ 261 NULL, /* cb_str */ 262 D_MP | D_64BIT, /* cb_flag */ 263 CB_REV, /* cb_rev */ 264 vdc_aread, /* cb_aread */ 265 vdc_awrite /* cb_awrite */ 266 }; 267 268 static struct dev_ops vdc_ops = { 269 DEVO_REV, /* devo_rev */ 270 0, /* devo_refcnt */ 271 vdc_getinfo, /* devo_getinfo */ 272 nulldev, /* devo_identify */ 273 nulldev, /* devo_probe */ 274 vdc_attach, /* devo_attach */ 275 vdc_detach, /* devo_detach */ 276 nodev, /* devo_reset */ 277 &vdc_cb_ops, /* devo_cb_ops */ 278 NULL, /* devo_bus_ops */ 279 nulldev /* devo_power */ 280 }; 281 282 static struct modldrv modldrv = { 283 &mod_driverops, 284 "virtual disk client %I%", 285 &vdc_ops, 286 }; 287 288 static struct modlinkage modlinkage = { 289 MODREV_1, 290 &modldrv, 291 NULL 292 }; 293 294 /* -------------------------------------------------------------------------- */ 295 296 /* 297 * Device Driver housekeeping and setup 298 */ 299 300 int 301 _init(void) 302 { 303 int status; 304 305 if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 306 return (status); 307 if ((status = mod_install(&modlinkage)) != 0) 308 ddi_soft_state_fini(&vdc_state); 309 vdc_efi_init(vd_process_ioctl); 310 return (status); 311 } 312 313 int 314 _info(struct modinfo *modinfop) 315 { 316 return (mod_info(&modlinkage, modinfop)); 317 } 318 319 int 320 _fini(void) 321 { 322 int status; 323 324 if ((status = mod_remove(&modlinkage)) != 0) 325 return (status); 326 vdc_efi_fini(); 327 ddi_soft_state_fini(&vdc_state); 328 return (0); 329 } 330 331 static int 332 vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 333 { 334 _NOTE(ARGUNUSED(dip)) 335 336 int instance = VDCUNIT((dev_t)arg); 337 vdc_t *vdc = NULL; 338 339 switch (cmd) { 340 case DDI_INFO_DEVT2DEVINFO: 341 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 342 *resultp = NULL; 343 return (DDI_FAILURE); 344 } 345 *resultp = vdc->dip; 346 return (DDI_SUCCESS); 347 case DDI_INFO_DEVT2INSTANCE: 348 *resultp = (void *)(uintptr_t)instance; 349 return (DDI_SUCCESS); 350 default: 351 *resultp = NULL; 352 return (DDI_FAILURE); 353 } 354 } 355 356 static int 357 vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 358 { 359 int instance; 360 int rv; 361 vdc_t *vdc = NULL; 362 363 switch (cmd) { 364 case DDI_DETACH: 365 /* the real work happens below */ 366 break; 367 case DDI_SUSPEND: 368 /* nothing to do for this non-device */ 369 return (DDI_SUCCESS); 370 default: 371 return (DDI_FAILURE); 372 } 373 374 ASSERT(cmd == DDI_DETACH); 375 instance = ddi_get_instance(dip); 376 DMSGX(1, "[%d] Entered\n", instance); 377 378 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 379 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 380 return (DDI_FAILURE); 381 } 382 383 if (vdc->open_count) { 384 DMSG(vdc, 0, "[%d] Cannot detach: device is open", instance); 385 return (DDI_FAILURE); 386 } 387 388 DMSG(vdc, 0, "[%d] proceeding...\n", instance); 389 390 /* mark instance as detaching */ 391 vdc->lifecycle = VDC_LC_DETACHING; 392 393 /* 394 * try and disable callbacks to prevent another handshake 395 */ 396 rv = ldc_set_cb_mode(vdc->ldc_handle, LDC_CB_DISABLE); 397 DMSG(vdc, 0, "callback disabled (rv=%d)\n", rv); 398 399 if (vdc->initialized & VDC_THREAD) { 400 mutex_enter(&vdc->read_lock); 401 if ((vdc->read_state == VDC_READ_WAITING) || 402 (vdc->read_state == VDC_READ_RESET)) { 403 vdc->read_state = VDC_READ_RESET; 404 cv_signal(&vdc->read_cv); 405 } 406 407 mutex_exit(&vdc->read_lock); 408 409 /* wake up any thread waiting for connection to come online */ 410 mutex_enter(&vdc->lock); 411 if (vdc->state == VDC_STATE_INIT_WAITING) { 412 DMSG(vdc, 0, 413 "[%d] write reset - move to resetting state...\n", 414 instance); 415 vdc->state = VDC_STATE_RESETTING; 416 cv_signal(&vdc->initwait_cv); 417 } 418 mutex_exit(&vdc->lock); 419 420 /* now wait until state transitions to VDC_STATE_DETACH */ 421 thread_join(vdc->msg_proc_thr->t_did); 422 ASSERT(vdc->state == VDC_STATE_DETACH); 423 DMSG(vdc, 0, "[%d] Reset thread exit and join ..\n", 424 vdc->instance); 425 } 426 427 mutex_enter(&vdc->lock); 428 429 if (vdc->initialized & VDC_DRING) 430 vdc_destroy_descriptor_ring(vdc); 431 432 if (vdc->initialized & VDC_LDC) 433 vdc_terminate_ldc(vdc); 434 435 mutex_exit(&vdc->lock); 436 437 if (vdc->initialized & VDC_MINOR) { 438 ddi_prop_remove_all(dip); 439 ddi_remove_minor_node(dip, NULL); 440 } 441 442 if (vdc->initialized & VDC_LOCKS) { 443 mutex_destroy(&vdc->lock); 444 mutex_destroy(&vdc->read_lock); 445 cv_destroy(&vdc->initwait_cv); 446 cv_destroy(&vdc->dring_free_cv); 447 cv_destroy(&vdc->membind_cv); 448 cv_destroy(&vdc->sync_pending_cv); 449 cv_destroy(&vdc->sync_blocked_cv); 450 cv_destroy(&vdc->read_cv); 451 cv_destroy(&vdc->running_cv); 452 } 453 454 if (vdc->minfo) 455 kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 456 457 if (vdc->cinfo) 458 kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 459 460 if (vdc->vtoc) 461 kmem_free(vdc->vtoc, sizeof (struct vtoc)); 462 463 if (vdc->label) 464 kmem_free(vdc->label, DK_LABEL_SIZE); 465 466 if (vdc->devid) { 467 ddi_devid_unregister(dip); 468 ddi_devid_free(vdc->devid); 469 } 470 471 if (vdc->initialized & VDC_SOFT_STATE) 472 ddi_soft_state_free(vdc_state, instance); 473 474 DMSG(vdc, 0, "[%d] End %p\n", instance, (void *)vdc); 475 476 return (DDI_SUCCESS); 477 } 478 479 480 static int 481 vdc_do_attach(dev_info_t *dip) 482 { 483 int instance; 484 vdc_t *vdc = NULL; 485 int status; 486 487 ASSERT(dip != NULL); 488 489 instance = ddi_get_instance(dip); 490 if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 491 cmn_err(CE_NOTE, "[%d] Couldn't alloc state structure", 492 instance); 493 return (DDI_FAILURE); 494 } 495 496 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 497 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 498 return (DDI_FAILURE); 499 } 500 501 /* 502 * We assign the value to initialized in this case to zero out the 503 * variable and then set bits in it to indicate what has been done 504 */ 505 vdc->initialized = VDC_SOFT_STATE; 506 507 vdc_hz_timeout = drv_usectohz(vdc_usec_timeout); 508 509 vdc_hz_min_ldc_delay = drv_usectohz(vdc_min_timeout_ldc); 510 vdc_hz_max_ldc_delay = drv_usectohz(vdc_max_timeout_ldc); 511 512 vdc->dip = dip; 513 vdc->instance = instance; 514 vdc->open_count = 0; 515 vdc->vdisk_type = VD_DISK_TYPE_UNK; 516 vdc->vdisk_label = VD_DISK_LABEL_UNK; 517 vdc->state = VDC_STATE_INIT; 518 vdc->lifecycle = VDC_LC_ATTACHING; 519 vdc->ldc_state = 0; 520 vdc->session_id = 0; 521 vdc->block_size = DEV_BSIZE; 522 vdc->max_xfer_sz = maxphys / DEV_BSIZE; 523 524 vdc->vtoc = NULL; 525 vdc->cinfo = NULL; 526 vdc->minfo = NULL; 527 528 mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 529 cv_init(&vdc->initwait_cv, NULL, CV_DRIVER, NULL); 530 cv_init(&vdc->dring_free_cv, NULL, CV_DRIVER, NULL); 531 cv_init(&vdc->membind_cv, NULL, CV_DRIVER, NULL); 532 cv_init(&vdc->running_cv, NULL, CV_DRIVER, NULL); 533 534 vdc->threads_pending = 0; 535 vdc->sync_op_pending = B_FALSE; 536 vdc->sync_op_blocked = B_FALSE; 537 cv_init(&vdc->sync_pending_cv, NULL, CV_DRIVER, NULL); 538 cv_init(&vdc->sync_blocked_cv, NULL, CV_DRIVER, NULL); 539 540 /* init blocking msg read functionality */ 541 mutex_init(&vdc->read_lock, NULL, MUTEX_DRIVER, NULL); 542 cv_init(&vdc->read_cv, NULL, CV_DRIVER, NULL); 543 vdc->read_state = VDC_READ_IDLE; 544 545 vdc->initialized |= VDC_LOCKS; 546 547 /* initialise LDC channel which will be used to communicate with vds */ 548 if ((status = vdc_do_ldc_init(vdc)) != 0) { 549 cmn_err(CE_NOTE, "[%d] Couldn't initialize LDC", instance); 550 goto return_status; 551 } 552 553 /* initialize the thread responsible for managing state with server */ 554 vdc->msg_proc_thr = thread_create(NULL, 0, vdc_process_msg_thread, 555 vdc, 0, &p0, TS_RUN, minclsyspri); 556 if (vdc->msg_proc_thr == NULL) { 557 cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 558 instance); 559 return (DDI_FAILURE); 560 } 561 562 vdc->initialized |= VDC_THREAD; 563 564 atomic_inc_32(&vdc_instance_count); 565 566 /* 567 * Once the handshake is complete, we can use the DRing to send 568 * requests to the vDisk server to calculate the geometry and 569 * VTOC of the "disk" 570 */ 571 status = vdc_setup_disk_layout(vdc); 572 if (status != 0) { 573 DMSG(vdc, 0, "[%d] Failed to discover disk layout (err%d)", 574 vdc->instance, status); 575 goto return_status; 576 } 577 578 /* 579 * Now that we have the device info we can create the 580 * device nodes and properties 581 */ 582 status = vdc_create_device_nodes(vdc); 583 if (status) { 584 DMSG(vdc, 0, "[%d] Failed to create device nodes", 585 instance); 586 goto return_status; 587 } 588 status = vdc_create_device_nodes_props(vdc); 589 if (status) { 590 DMSG(vdc, 0, "[%d] Failed to create device nodes" 591 " properties (%d)", instance, status); 592 goto return_status; 593 } 594 595 /* 596 * Setup devid 597 */ 598 if (vdc_setup_devid(vdc)) { 599 DMSG(vdc, 0, "[%d] No device id available\n", instance); 600 } 601 602 ddi_report_dev(dip); 603 vdc->lifecycle = VDC_LC_ONLINE; 604 DMSG(vdc, 0, "[%d] Attach tasks successful\n", instance); 605 606 return_status: 607 DMSG(vdc, 0, "[%d] Attach completed\n", instance); 608 return (status); 609 } 610 611 static int 612 vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 613 { 614 int status; 615 616 switch (cmd) { 617 case DDI_ATTACH: 618 if ((status = vdc_do_attach(dip)) != 0) 619 (void) vdc_detach(dip, DDI_DETACH); 620 return (status); 621 case DDI_RESUME: 622 /* nothing to do for this non-device */ 623 return (DDI_SUCCESS); 624 default: 625 return (DDI_FAILURE); 626 } 627 } 628 629 static int 630 vdc_do_ldc_init(vdc_t *vdc) 631 { 632 int status = 0; 633 ldc_status_t ldc_state; 634 ldc_attr_t ldc_attr; 635 uint64_t ldc_id = 0; 636 dev_info_t *dip = NULL; 637 638 ASSERT(vdc != NULL); 639 640 dip = vdc->dip; 641 vdc->initialized |= VDC_LDC; 642 643 if ((status = vdc_get_ldc_id(dip, &ldc_id)) != 0) { 644 DMSG(vdc, 0, "[%d] Failed to get LDC channel ID property", 645 vdc->instance); 646 return (EIO); 647 } 648 vdc->ldc_id = ldc_id; 649 650 ldc_attr.devclass = LDC_DEV_BLK; 651 ldc_attr.instance = vdc->instance; 652 ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 653 ldc_attr.mtu = VD_LDC_MTU; 654 655 if ((vdc->initialized & VDC_LDC_INIT) == 0) { 656 status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle); 657 if (status != 0) { 658 DMSG(vdc, 0, "[%d] ldc_init(chan %ld) returned %d", 659 vdc->instance, ldc_id, status); 660 return (status); 661 } 662 vdc->initialized |= VDC_LDC_INIT; 663 } 664 status = ldc_status(vdc->ldc_handle, &ldc_state); 665 if (status != 0) { 666 DMSG(vdc, 0, "[%d] Cannot discover LDC status [err=%d]", 667 vdc->instance, status); 668 return (status); 669 } 670 vdc->ldc_state = ldc_state; 671 672 if ((vdc->initialized & VDC_LDC_CB) == 0) { 673 status = ldc_reg_callback(vdc->ldc_handle, vdc_handle_cb, 674 (caddr_t)vdc); 675 if (status != 0) { 676 DMSG(vdc, 0, "[%d] LDC callback reg. failed (%d)", 677 vdc->instance, status); 678 return (status); 679 } 680 vdc->initialized |= VDC_LDC_CB; 681 } 682 683 vdc->initialized |= VDC_LDC; 684 685 /* 686 * At this stage we have initialised LDC, we will now try and open 687 * the connection. 688 */ 689 if (vdc->ldc_state == LDC_INIT) { 690 status = ldc_open(vdc->ldc_handle); 691 if (status != 0) { 692 DMSG(vdc, 0, "[%d] ldc_open(chan %ld) returned %d", 693 vdc->instance, vdc->ldc_id, status); 694 return (status); 695 } 696 vdc->initialized |= VDC_LDC_OPEN; 697 } 698 699 return (status); 700 } 701 702 static int 703 vdc_start_ldc_connection(vdc_t *vdc) 704 { 705 int status = 0; 706 707 ASSERT(vdc != NULL); 708 709 ASSERT(MUTEX_HELD(&vdc->lock)); 710 711 status = vdc_do_ldc_up(vdc); 712 713 DMSG(vdc, 0, "[%d] Finished bringing up LDC\n", vdc->instance); 714 715 return (status); 716 } 717 718 static int 719 vdc_stop_ldc_connection(vdc_t *vdcp) 720 { 721 int status; 722 723 DMSG(vdcp, 0, ": Resetting connection to vDisk server : state %d\n", 724 vdcp->state); 725 726 status = ldc_down(vdcp->ldc_handle); 727 DMSG(vdcp, 0, "ldc_down() = %d\n", status); 728 729 vdcp->initialized &= ~VDC_HANDSHAKE; 730 DMSG(vdcp, 0, "initialized=%x\n", vdcp->initialized); 731 732 return (status); 733 } 734 735 static int 736 vdc_create_device_nodes_efi(vdc_t *vdc) 737 { 738 ddi_remove_minor_node(vdc->dip, "h"); 739 ddi_remove_minor_node(vdc->dip, "h,raw"); 740 741 if (ddi_create_minor_node(vdc->dip, "wd", S_IFBLK, 742 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 743 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 744 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd'", 745 vdc->instance); 746 return (EIO); 747 } 748 749 /* if any device node is created we set this flag */ 750 vdc->initialized |= VDC_MINOR; 751 752 if (ddi_create_minor_node(vdc->dip, "wd,raw", S_IFCHR, 753 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 754 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 755 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd,raw'", 756 vdc->instance); 757 return (EIO); 758 } 759 760 return (0); 761 } 762 763 static int 764 vdc_create_device_nodes_vtoc(vdc_t *vdc) 765 { 766 ddi_remove_minor_node(vdc->dip, "wd"); 767 ddi_remove_minor_node(vdc->dip, "wd,raw"); 768 769 if (ddi_create_minor_node(vdc->dip, "h", S_IFBLK, 770 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 771 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 772 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h'", 773 vdc->instance); 774 return (EIO); 775 } 776 777 /* if any device node is created we set this flag */ 778 vdc->initialized |= VDC_MINOR; 779 780 if (ddi_create_minor_node(vdc->dip, "h,raw", S_IFCHR, 781 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 782 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 783 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h,raw'", 784 vdc->instance); 785 return (EIO); 786 } 787 788 return (0); 789 } 790 791 /* 792 * Function: 793 * vdc_create_device_nodes 794 * 795 * Description: 796 * This function creates the block and character device nodes under 797 * /devices along with the node properties. It is called as part of 798 * the attach(9E) of the instance during the handshake with vds after 799 * vds has sent the attributes to vdc. 800 * 801 * If the device is of type VD_DISK_TYPE_SLICE then the minor node 802 * of 2 is used in keeping with the Solaris convention that slice 2 803 * refers to a whole disk. Slices start at 'a' 804 * 805 * Parameters: 806 * vdc - soft state pointer 807 * 808 * Return Values 809 * 0 - Success 810 * EIO - Failed to create node 811 * EINVAL - Unknown type of disk exported 812 */ 813 static int 814 vdc_create_device_nodes(vdc_t *vdc) 815 { 816 char name[sizeof ("s,raw")]; 817 dev_info_t *dip = NULL; 818 int instance, status; 819 int num_slices = 1; 820 int i; 821 822 ASSERT(vdc != NULL); 823 824 instance = vdc->instance; 825 dip = vdc->dip; 826 827 switch (vdc->vdisk_type) { 828 case VD_DISK_TYPE_DISK: 829 num_slices = V_NUMPAR; 830 break; 831 case VD_DISK_TYPE_SLICE: 832 num_slices = 1; 833 break; 834 case VD_DISK_TYPE_UNK: 835 default: 836 return (EINVAL); 837 } 838 839 /* 840 * Minor nodes are different for EFI disks: EFI disks do not have 841 * a minor node 'g' for the minor number corresponding to slice 842 * VD_EFI_WD_SLICE (slice 7) instead they have a minor node 'wd' 843 * representing the whole disk. 844 */ 845 for (i = 0; i < num_slices; i++) { 846 847 if (i == VD_EFI_WD_SLICE) { 848 if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 849 status = vdc_create_device_nodes_efi(vdc); 850 else 851 status = vdc_create_device_nodes_vtoc(vdc); 852 if (status != 0) 853 return (status); 854 continue; 855 } 856 857 (void) snprintf(name, sizeof (name), "%c", 'a' + i); 858 if (ddi_create_minor_node(dip, name, S_IFBLK, 859 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 860 cmn_err(CE_NOTE, "[%d] Couldn't add block node '%s'", 861 instance, name); 862 return (EIO); 863 } 864 865 /* if any device node is created we set this flag */ 866 vdc->initialized |= VDC_MINOR; 867 868 (void) snprintf(name, sizeof (name), "%c%s", 869 'a' + i, ",raw"); 870 if (ddi_create_minor_node(dip, name, S_IFCHR, 871 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 872 cmn_err(CE_NOTE, "[%d] Couldn't add raw node '%s'", 873 instance, name); 874 return (EIO); 875 } 876 } 877 878 return (0); 879 } 880 881 /* 882 * Function: 883 * vdc_create_device_nodes_props 884 * 885 * Description: 886 * This function creates the block and character device nodes under 887 * /devices along with the node properties. It is called as part of 888 * the attach(9E) of the instance during the handshake with vds after 889 * vds has sent the attributes to vdc. 890 * 891 * Parameters: 892 * vdc - soft state pointer 893 * 894 * Return Values 895 * 0 - Success 896 * EIO - Failed to create device node property 897 * EINVAL - Unknown type of disk exported 898 */ 899 static int 900 vdc_create_device_nodes_props(vdc_t *vdc) 901 { 902 dev_info_t *dip = NULL; 903 int instance; 904 int num_slices = 1; 905 int64_t size = 0; 906 dev_t dev; 907 int rv; 908 int i; 909 910 ASSERT(vdc != NULL); 911 912 instance = vdc->instance; 913 dip = vdc->dip; 914 915 if ((vdc->vtoc == NULL) || (vdc->vtoc->v_sanity != VTOC_SANE)) { 916 DMSG(vdc, 0, "![%d] Could not create device node property." 917 " No VTOC available", instance); 918 return (ENXIO); 919 } 920 921 switch (vdc->vdisk_type) { 922 case VD_DISK_TYPE_DISK: 923 num_slices = V_NUMPAR; 924 break; 925 case VD_DISK_TYPE_SLICE: 926 num_slices = 1; 927 break; 928 case VD_DISK_TYPE_UNK: 929 default: 930 return (EINVAL); 931 } 932 933 for (i = 0; i < num_slices; i++) { 934 dev = makedevice(ddi_driver_major(dip), 935 VD_MAKE_DEV(instance, i)); 936 937 size = vdc->vtoc->v_part[i].p_size * vdc->vtoc->v_sectorsz; 938 DMSG(vdc, 0, "[%d] sz %ld (%ld Mb) p_size %lx\n", 939 instance, size, size / (1024 * 1024), 940 vdc->vtoc->v_part[i].p_size); 941 942 rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size); 943 if (rv != DDI_PROP_SUCCESS) { 944 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop of [%ld]", 945 instance, VDC_SIZE_PROP_NAME, size); 946 return (EIO); 947 } 948 949 rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME, 950 lbtodb(size)); 951 if (rv != DDI_PROP_SUCCESS) { 952 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop [%llu]", 953 instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size)); 954 return (EIO); 955 } 956 } 957 958 return (0); 959 } 960 961 static int 962 vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 963 { 964 _NOTE(ARGUNUSED(cred)) 965 966 int instance; 967 vdc_t *vdc; 968 969 ASSERT(dev != NULL); 970 instance = VDCUNIT(*dev); 971 972 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 973 return (EINVAL); 974 975 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 976 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 977 return (ENXIO); 978 } 979 980 DMSG(vdc, 0, "minor = %d flag = %x, otyp = %x\n", 981 getminor(*dev), flag, otyp); 982 983 mutex_enter(&vdc->lock); 984 vdc->open_count++; 985 mutex_exit(&vdc->lock); 986 987 return (0); 988 } 989 990 static int 991 vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 992 { 993 _NOTE(ARGUNUSED(cred)) 994 995 int instance; 996 vdc_t *vdc; 997 998 instance = VDCUNIT(dev); 999 1000 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 1001 return (EINVAL); 1002 1003 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1004 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1005 return (ENXIO); 1006 } 1007 1008 DMSG(vdc, 0, "[%d] flag = %x, otyp = %x\n", instance, flag, otyp); 1009 if (vdc->dkio_flush_pending) { 1010 DMSG(vdc, 0, 1011 "[%d] Cannot detach: %d outstanding DKIO flushes\n", 1012 instance, vdc->dkio_flush_pending); 1013 return (EBUSY); 1014 } 1015 1016 /* 1017 * Should not need the mutex here, since the framework should protect 1018 * against more opens on this device, but just in case. 1019 */ 1020 mutex_enter(&vdc->lock); 1021 vdc->open_count--; 1022 mutex_exit(&vdc->lock); 1023 1024 return (0); 1025 } 1026 1027 static int 1028 vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1029 { 1030 _NOTE(ARGUNUSED(credp)) 1031 _NOTE(ARGUNUSED(rvalp)) 1032 1033 return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode)); 1034 } 1035 1036 static int 1037 vdc_print(dev_t dev, char *str) 1038 { 1039 cmn_err(CE_NOTE, "vdc%d: %s", VDCUNIT(dev), str); 1040 return (0); 1041 } 1042 1043 static int 1044 vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 1045 { 1046 int rv; 1047 size_t nbytes = nblk * DEV_BSIZE; 1048 int instance = VDCUNIT(dev); 1049 vdc_t *vdc = NULL; 1050 1051 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1052 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1053 return (ENXIO); 1054 } 1055 1056 DMSG(vdc, 2, "[%d] dump %ld bytes at block 0x%lx : addr=0x%p\n", 1057 instance, nbytes, blkno, (void *)addr); 1058 rv = vdc_send_request(vdc, VD_OP_BWRITE, addr, nbytes, 1059 VDCPART(dev), blkno, CB_STRATEGY, 0, VIO_write_dir); 1060 if (rv) { 1061 DMSG(vdc, 0, "Failed to do a disk dump (err=%d)\n", rv); 1062 return (rv); 1063 } 1064 1065 if (ddi_in_panic()) 1066 (void) vdc_drain_response(vdc); 1067 1068 DMSG(vdc, 0, "[%d] End\n", instance); 1069 1070 return (0); 1071 } 1072 1073 /* -------------------------------------------------------------------------- */ 1074 1075 /* 1076 * Disk access routines 1077 * 1078 */ 1079 1080 /* 1081 * vdc_strategy() 1082 * 1083 * Return Value: 1084 * 0: As per strategy(9E), the strategy() function must return 0 1085 * [ bioerror(9f) sets b_flags to the proper error code ] 1086 */ 1087 static int 1088 vdc_strategy(struct buf *buf) 1089 { 1090 int rv = -1; 1091 vdc_t *vdc = NULL; 1092 int instance = VDCUNIT(buf->b_edev); 1093 int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 1094 1095 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1096 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1097 bioerror(buf, ENXIO); 1098 biodone(buf); 1099 return (0); 1100 } 1101 1102 DMSG(vdc, 2, "[%d] %s %ld bytes at block %llx : b_addr=0x%p\n", 1103 instance, (buf->b_flags & B_READ) ? "Read" : "Write", 1104 buf->b_bcount, buf->b_lblkno, (void *)buf->b_un.b_addr); 1105 DTRACE_IO2(vstart, buf_t *, buf, vdc_t *, vdc); 1106 1107 bp_mapin(buf); 1108 1109 rv = vdc_send_request(vdc, op, (caddr_t)buf->b_un.b_addr, 1110 buf->b_bcount, VDCPART(buf->b_edev), buf->b_lblkno, 1111 CB_STRATEGY, buf, (op == VD_OP_BREAD) ? VIO_read_dir : 1112 VIO_write_dir); 1113 1114 ASSERT(rv == 0 || rv == EINVAL); 1115 1116 /* 1117 * If the request was successfully sent, the strategy call returns and 1118 * the ACK handler calls the bioxxx functions when the vDisk server is 1119 * done. 1120 */ 1121 if (rv) { 1122 DMSG(vdc, 0, "Failed to read/write (err=%d)\n", rv); 1123 bioerror(buf, rv); 1124 biodone(buf); 1125 } 1126 1127 return (0); 1128 } 1129 1130 /* 1131 * Function: 1132 * vdc_min 1133 * 1134 * Description: 1135 * Routine to limit the size of a data transfer. Used in 1136 * conjunction with physio(9F). 1137 * 1138 * Arguments: 1139 * bp - pointer to the indicated buf(9S) struct. 1140 * 1141 */ 1142 static void 1143 vdc_min(struct buf *bufp) 1144 { 1145 vdc_t *vdc = NULL; 1146 int instance = VDCUNIT(bufp->b_edev); 1147 1148 vdc = ddi_get_soft_state(vdc_state, instance); 1149 VERIFY(vdc != NULL); 1150 1151 if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->block_size)) { 1152 bufp->b_bcount = vdc->max_xfer_sz * vdc->block_size; 1153 } 1154 } 1155 1156 static int 1157 vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 1158 { 1159 _NOTE(ARGUNUSED(cred)) 1160 1161 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1162 return (physio(vdc_strategy, NULL, dev, B_READ, vdc_min, uio)); 1163 } 1164 1165 static int 1166 vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 1167 { 1168 _NOTE(ARGUNUSED(cred)) 1169 1170 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1171 return (physio(vdc_strategy, NULL, dev, B_WRITE, vdc_min, uio)); 1172 } 1173 1174 static int 1175 vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 1176 { 1177 _NOTE(ARGUNUSED(cred)) 1178 1179 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1180 return (aphysio(vdc_strategy, anocancel, dev, B_READ, vdc_min, aio)); 1181 } 1182 1183 static int 1184 vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 1185 { 1186 _NOTE(ARGUNUSED(cred)) 1187 1188 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1189 return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, vdc_min, aio)); 1190 } 1191 1192 1193 /* -------------------------------------------------------------------------- */ 1194 1195 /* 1196 * Handshake support 1197 */ 1198 1199 1200 /* 1201 * Function: 1202 * vdc_init_ver_negotiation() 1203 * 1204 * Description: 1205 * 1206 * Arguments: 1207 * vdc - soft state pointer for this instance of the device driver. 1208 * 1209 * Return Code: 1210 * 0 - Success 1211 */ 1212 static int 1213 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 1214 { 1215 vio_ver_msg_t pkt; 1216 size_t msglen = sizeof (pkt); 1217 int status = -1; 1218 1219 ASSERT(vdc != NULL); 1220 ASSERT(mutex_owned(&vdc->lock)); 1221 1222 DMSG(vdc, 0, "[%d] Entered.\n", vdc->instance); 1223 1224 /* 1225 * set the Session ID to a unique value 1226 * (the lower 32 bits of the clock tick) 1227 */ 1228 vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 1229 DMSG(vdc, 0, "[%d] Set SID to 0x%lx\n", vdc->instance, vdc->session_id); 1230 1231 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1232 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1233 pkt.tag.vio_subtype_env = VIO_VER_INFO; 1234 pkt.tag.vio_sid = vdc->session_id; 1235 pkt.dev_class = VDEV_DISK; 1236 pkt.ver_major = ver.major; 1237 pkt.ver_minor = ver.minor; 1238 1239 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1240 DMSG(vdc, 0, "[%d] Ver info sent (status = %d)\n", 1241 vdc->instance, status); 1242 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1243 DMSG(vdc, 0, "[%d] Failed to send Ver negotiation info: " 1244 "id(%lx) rv(%d) size(%ld)", 1245 vdc->instance, vdc->ldc_handle, 1246 status, msglen); 1247 if (msglen != sizeof (vio_ver_msg_t)) 1248 status = ENOMSG; 1249 } 1250 1251 return (status); 1252 } 1253 1254 /* 1255 * Function: 1256 * vdc_ver_negotiation() 1257 * 1258 * Description: 1259 * 1260 * Arguments: 1261 * vdcp - soft state pointer for this instance of the device driver. 1262 * 1263 * Return Code: 1264 * 0 - Success 1265 */ 1266 static int 1267 vdc_ver_negotiation(vdc_t *vdcp) 1268 { 1269 vio_msg_t vio_msg; 1270 int status; 1271 1272 if (status = vdc_init_ver_negotiation(vdcp, vdc_version[0])) 1273 return (status); 1274 1275 /* release lock and wait for response */ 1276 mutex_exit(&vdcp->lock); 1277 status = vdc_wait_for_response(vdcp, &vio_msg); 1278 mutex_enter(&vdcp->lock); 1279 if (status) { 1280 DMSG(vdcp, 0, 1281 "[%d] Failed waiting for Ver negotiation response, rv(%d)", 1282 vdcp->instance, status); 1283 return (status); 1284 } 1285 1286 /* check type and sub_type ... */ 1287 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1288 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1289 DMSG(vdcp, 0, "[%d] Invalid ver negotiation response\n", 1290 vdcp->instance); 1291 return (EPROTO); 1292 } 1293 1294 return (vdc_handle_ver_msg(vdcp, (vio_ver_msg_t *)&vio_msg)); 1295 } 1296 1297 /* 1298 * Function: 1299 * vdc_init_attr_negotiation() 1300 * 1301 * Description: 1302 * 1303 * Arguments: 1304 * vdc - soft state pointer for this instance of the device driver. 1305 * 1306 * Return Code: 1307 * 0 - Success 1308 */ 1309 static int 1310 vdc_init_attr_negotiation(vdc_t *vdc) 1311 { 1312 vd_attr_msg_t pkt; 1313 size_t msglen = sizeof (pkt); 1314 int status; 1315 1316 ASSERT(vdc != NULL); 1317 ASSERT(mutex_owned(&vdc->lock)); 1318 1319 DMSG(vdc, 0, "[%d] entered\n", vdc->instance); 1320 1321 /* fill in tag */ 1322 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1323 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1324 pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 1325 pkt.tag.vio_sid = vdc->session_id; 1326 /* fill in payload */ 1327 pkt.max_xfer_sz = vdc->max_xfer_sz; 1328 pkt.vdisk_block_size = vdc->block_size; 1329 pkt.xfer_mode = VIO_DRING_MODE; 1330 pkt.operations = 0; /* server will set bits of valid operations */ 1331 pkt.vdisk_type = 0; /* server will set to valid device type */ 1332 pkt.vdisk_size = 0; /* server will set to valid size */ 1333 1334 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1335 DMSG(vdc, 0, "Attr info sent (status = %d)\n", status); 1336 1337 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1338 DMSG(vdc, 0, "[%d] Failed to send Attr negotiation info: " 1339 "id(%lx) rv(%d) size(%ld)", 1340 vdc->instance, vdc->ldc_handle, 1341 status, msglen); 1342 if (msglen != sizeof (vio_ver_msg_t)) 1343 status = ENOMSG; 1344 } 1345 1346 return (status); 1347 } 1348 1349 /* 1350 * Function: 1351 * vdc_attr_negotiation() 1352 * 1353 * Description: 1354 * 1355 * Arguments: 1356 * vdc - soft state pointer for this instance of the device driver. 1357 * 1358 * Return Code: 1359 * 0 - Success 1360 */ 1361 static int 1362 vdc_attr_negotiation(vdc_t *vdcp) 1363 { 1364 int status; 1365 vio_msg_t vio_msg; 1366 1367 if (status = vdc_init_attr_negotiation(vdcp)) 1368 return (status); 1369 1370 /* release lock and wait for response */ 1371 mutex_exit(&vdcp->lock); 1372 status = vdc_wait_for_response(vdcp, &vio_msg); 1373 mutex_enter(&vdcp->lock); 1374 if (status) { 1375 DMSG(vdcp, 0, 1376 "[%d] Failed waiting for Attr negotiation response, rv(%d)", 1377 vdcp->instance, status); 1378 return (status); 1379 } 1380 1381 /* check type and sub_type ... */ 1382 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1383 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1384 DMSG(vdcp, 0, "[%d] Invalid attr negotiation response\n", 1385 vdcp->instance); 1386 return (EPROTO); 1387 } 1388 1389 return (vdc_handle_attr_msg(vdcp, (vd_attr_msg_t *)&vio_msg)); 1390 } 1391 1392 1393 /* 1394 * Function: 1395 * vdc_init_dring_negotiate() 1396 * 1397 * Description: 1398 * 1399 * Arguments: 1400 * vdc - soft state pointer for this instance of the device driver. 1401 * 1402 * Return Code: 1403 * 0 - Success 1404 */ 1405 static int 1406 vdc_init_dring_negotiate(vdc_t *vdc) 1407 { 1408 vio_dring_reg_msg_t pkt; 1409 size_t msglen = sizeof (pkt); 1410 int status = -1; 1411 int retry; 1412 int nretries = 10; 1413 1414 ASSERT(vdc != NULL); 1415 ASSERT(mutex_owned(&vdc->lock)); 1416 1417 for (retry = 0; retry < nretries; retry++) { 1418 status = vdc_init_descriptor_ring(vdc); 1419 if (status != EAGAIN) 1420 break; 1421 drv_usecwait(vdc_min_timeout_ldc); 1422 } 1423 1424 if (status != 0) { 1425 DMSG(vdc, 0, "[%d] Failed to init DRing (status = %d)\n", 1426 vdc->instance, status); 1427 return (status); 1428 } 1429 1430 DMSG(vdc, 0, "[%d] Init of descriptor ring completed (status = %d)\n", 1431 vdc->instance, status); 1432 1433 /* fill in tag */ 1434 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1435 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1436 pkt.tag.vio_subtype_env = VIO_DRING_REG; 1437 pkt.tag.vio_sid = vdc->session_id; 1438 /* fill in payload */ 1439 pkt.dring_ident = 0; 1440 pkt.num_descriptors = vdc->dring_len; 1441 pkt.descriptor_size = vdc->dring_entry_size; 1442 pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 1443 pkt.ncookies = vdc->dring_cookie_count; 1444 pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 1445 1446 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1447 if (status != 0) { 1448 DMSG(vdc, 0, "[%d] Failed to register DRing (err = %d)", 1449 vdc->instance, status); 1450 } 1451 1452 return (status); 1453 } 1454 1455 1456 /* 1457 * Function: 1458 * vdc_dring_negotiation() 1459 * 1460 * Description: 1461 * 1462 * Arguments: 1463 * vdc - soft state pointer for this instance of the device driver. 1464 * 1465 * Return Code: 1466 * 0 - Success 1467 */ 1468 static int 1469 vdc_dring_negotiation(vdc_t *vdcp) 1470 { 1471 int status; 1472 vio_msg_t vio_msg; 1473 1474 if (status = vdc_init_dring_negotiate(vdcp)) 1475 return (status); 1476 1477 /* release lock and wait for response */ 1478 mutex_exit(&vdcp->lock); 1479 status = vdc_wait_for_response(vdcp, &vio_msg); 1480 mutex_enter(&vdcp->lock); 1481 if (status) { 1482 DMSG(vdcp, 0, 1483 "[%d] Failed waiting for Dring negotiation response," 1484 " rv(%d)", vdcp->instance, status); 1485 return (status); 1486 } 1487 1488 /* check type and sub_type ... */ 1489 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1490 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1491 DMSG(vdcp, 0, "[%d] Invalid Dring negotiation response\n", 1492 vdcp->instance); 1493 return (EPROTO); 1494 } 1495 1496 return (vdc_handle_dring_reg_msg(vdcp, 1497 (vio_dring_reg_msg_t *)&vio_msg)); 1498 } 1499 1500 1501 /* 1502 * Function: 1503 * vdc_send_rdx() 1504 * 1505 * Description: 1506 * 1507 * Arguments: 1508 * vdc - soft state pointer for this instance of the device driver. 1509 * 1510 * Return Code: 1511 * 0 - Success 1512 */ 1513 static int 1514 vdc_send_rdx(vdc_t *vdcp) 1515 { 1516 vio_msg_t msg; 1517 size_t msglen = sizeof (vio_msg_t); 1518 int status; 1519 1520 /* 1521 * Send an RDX message to vds to indicate we are ready 1522 * to send data 1523 */ 1524 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 1525 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 1526 msg.tag.vio_subtype_env = VIO_RDX; 1527 msg.tag.vio_sid = vdcp->session_id; 1528 status = vdc_send(vdcp, (caddr_t)&msg, &msglen); 1529 if (status != 0) { 1530 DMSG(vdcp, 0, "[%d] Failed to send RDX message (%d)", 1531 vdcp->instance, status); 1532 } 1533 1534 return (status); 1535 } 1536 1537 /* 1538 * Function: 1539 * vdc_handle_rdx() 1540 * 1541 * Description: 1542 * 1543 * Arguments: 1544 * vdc - soft state pointer for this instance of the device driver. 1545 * msgp - received msg 1546 * 1547 * Return Code: 1548 * 0 - Success 1549 */ 1550 static int 1551 vdc_handle_rdx(vdc_t *vdcp, vio_rdx_msg_t *msgp) 1552 { 1553 _NOTE(ARGUNUSED(vdcp)) 1554 _NOTE(ARGUNUSED(msgp)) 1555 1556 ASSERT(msgp->tag.vio_msgtype == VIO_TYPE_CTRL); 1557 ASSERT(msgp->tag.vio_subtype == VIO_SUBTYPE_ACK); 1558 ASSERT(msgp->tag.vio_subtype_env == VIO_RDX); 1559 1560 DMSG(vdcp, 1, "[%d] Got an RDX msg", vdcp->instance); 1561 1562 return (0); 1563 } 1564 1565 /* 1566 * Function: 1567 * vdc_rdx_exchange() 1568 * 1569 * Description: 1570 * 1571 * Arguments: 1572 * vdc - soft state pointer for this instance of the device driver. 1573 * 1574 * Return Code: 1575 * 0 - Success 1576 */ 1577 static int 1578 vdc_rdx_exchange(vdc_t *vdcp) 1579 { 1580 int status; 1581 vio_msg_t vio_msg; 1582 1583 if (status = vdc_send_rdx(vdcp)) 1584 return (status); 1585 1586 /* release lock and wait for response */ 1587 mutex_exit(&vdcp->lock); 1588 status = vdc_wait_for_response(vdcp, &vio_msg); 1589 mutex_enter(&vdcp->lock); 1590 if (status) { 1591 DMSG(vdcp, 0, 1592 "[%d] Failed waiting for RDX response," 1593 " rv(%d)", vdcp->instance, status); 1594 return (status); 1595 } 1596 1597 /* check type and sub_type ... */ 1598 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1599 vio_msg.tag.vio_subtype != VIO_SUBTYPE_ACK) { 1600 DMSG(vdcp, 0, "[%d] Invalid RDX response\n", 1601 vdcp->instance); 1602 return (EPROTO); 1603 } 1604 1605 return (vdc_handle_rdx(vdcp, (vio_rdx_msg_t *)&vio_msg)); 1606 } 1607 1608 1609 /* -------------------------------------------------------------------------- */ 1610 1611 /* 1612 * LDC helper routines 1613 */ 1614 1615 static int 1616 vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp) 1617 { 1618 int status; 1619 boolean_t q_has_pkts = B_FALSE; 1620 int delay_time; 1621 size_t len; 1622 1623 mutex_enter(&vdc->read_lock); 1624 1625 if (vdc->read_state == VDC_READ_IDLE) 1626 vdc->read_state = VDC_READ_WAITING; 1627 1628 while (vdc->read_state != VDC_READ_PENDING) { 1629 1630 /* detect if the connection has been reset */ 1631 if (vdc->read_state == VDC_READ_RESET) { 1632 status = ECONNRESET; 1633 goto done; 1634 } 1635 1636 cv_wait(&vdc->read_cv, &vdc->read_lock); 1637 } 1638 1639 /* 1640 * Until we get a blocking ldc read we have to retry 1641 * until the entire LDC message has arrived before 1642 * ldc_read() will succeed. Note we also bail out if 1643 * the chanel is reset or goes away. 1644 */ 1645 delay_time = vdc_ldc_read_init_delay; 1646 loop: 1647 len = *nbytesp; 1648 status = ldc_read(vdc->ldc_handle, (caddr_t)msgp, &len); 1649 switch (status) { 1650 case EAGAIN: 1651 delay_time *= 2; 1652 if (delay_time >= vdc_ldc_read_max_delay) 1653 delay_time = vdc_ldc_read_max_delay; 1654 delay(delay_time); 1655 goto loop; 1656 1657 case 0: 1658 if (len == 0) { 1659 DMSG(vdc, 0, "[%d] ldc_read returned 0 bytes with " 1660 "no error!\n", vdc->instance); 1661 goto loop; 1662 } 1663 1664 *nbytesp = len; 1665 1666 /* 1667 * If there are pending messages, leave the 1668 * read state as pending. Otherwise, set the state 1669 * back to idle. 1670 */ 1671 status = ldc_chkq(vdc->ldc_handle, &q_has_pkts); 1672 if (status == 0 && !q_has_pkts) 1673 vdc->read_state = VDC_READ_IDLE; 1674 1675 break; 1676 default: 1677 DMSG(vdc, 0, "ldc_read returned %d\n", status); 1678 break; 1679 } 1680 1681 done: 1682 mutex_exit(&vdc->read_lock); 1683 1684 return (status); 1685 } 1686 1687 1688 1689 #ifdef DEBUG 1690 void 1691 vdc_decode_tag(vdc_t *vdcp, vio_msg_t *msg) 1692 { 1693 char *ms, *ss, *ses; 1694 switch (msg->tag.vio_msgtype) { 1695 #define Q(_s) case _s : ms = #_s; break; 1696 Q(VIO_TYPE_CTRL) 1697 Q(VIO_TYPE_DATA) 1698 Q(VIO_TYPE_ERR) 1699 #undef Q 1700 default: ms = "unknown"; break; 1701 } 1702 1703 switch (msg->tag.vio_subtype) { 1704 #define Q(_s) case _s : ss = #_s; break; 1705 Q(VIO_SUBTYPE_INFO) 1706 Q(VIO_SUBTYPE_ACK) 1707 Q(VIO_SUBTYPE_NACK) 1708 #undef Q 1709 default: ss = "unknown"; break; 1710 } 1711 1712 switch (msg->tag.vio_subtype_env) { 1713 #define Q(_s) case _s : ses = #_s; break; 1714 Q(VIO_VER_INFO) 1715 Q(VIO_ATTR_INFO) 1716 Q(VIO_DRING_REG) 1717 Q(VIO_DRING_UNREG) 1718 Q(VIO_RDX) 1719 Q(VIO_PKT_DATA) 1720 Q(VIO_DESC_DATA) 1721 Q(VIO_DRING_DATA) 1722 #undef Q 1723 default: ses = "unknown"; break; 1724 } 1725 1726 DMSG(vdcp, 3, "(%x/%x/%x) message : (%s/%s/%s)\n", 1727 msg->tag.vio_msgtype, msg->tag.vio_subtype, 1728 msg->tag.vio_subtype_env, ms, ss, ses); 1729 } 1730 #endif 1731 1732 /* 1733 * Function: 1734 * vdc_send() 1735 * 1736 * Description: 1737 * The function encapsulates the call to write a message using LDC. 1738 * If LDC indicates that the call failed due to the queue being full, 1739 * we retry the ldc_write() [ up to 'vdc_retries' time ], otherwise 1740 * we return the error returned by LDC. 1741 * 1742 * Arguments: 1743 * ldc_handle - LDC handle for the channel this instance of vdc uses 1744 * pkt - address of LDC message to be sent 1745 * msglen - the size of the message being sent. When the function 1746 * returns, this contains the number of bytes written. 1747 * 1748 * Return Code: 1749 * 0 - Success. 1750 * EINVAL - pkt or msglen were NULL 1751 * ECONNRESET - The connection was not up. 1752 * EWOULDBLOCK - LDC queue is full 1753 * xxx - other error codes returned by ldc_write 1754 */ 1755 static int 1756 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 1757 { 1758 size_t size = 0; 1759 int status = 0; 1760 clock_t delay_ticks; 1761 1762 ASSERT(vdc != NULL); 1763 ASSERT(mutex_owned(&vdc->lock)); 1764 ASSERT(msglen != NULL); 1765 ASSERT(*msglen != 0); 1766 1767 #ifdef DEBUG 1768 vdc_decode_tag(vdc, (vio_msg_t *)pkt); 1769 #endif 1770 /* 1771 * Wait indefinitely to send if channel 1772 * is busy, but bail out if we succeed or 1773 * if the channel closes or is reset. 1774 */ 1775 delay_ticks = vdc_hz_min_ldc_delay; 1776 do { 1777 size = *msglen; 1778 status = ldc_write(vdc->ldc_handle, pkt, &size); 1779 if (status == EWOULDBLOCK) { 1780 delay(delay_ticks); 1781 /* geometric backoff */ 1782 delay_ticks *= 2; 1783 if (delay_ticks > vdc_hz_max_ldc_delay) 1784 delay_ticks = vdc_hz_max_ldc_delay; 1785 } 1786 } while (status == EWOULDBLOCK); 1787 1788 /* if LDC had serious issues --- reset vdc state */ 1789 if (status == EIO || status == ECONNRESET) { 1790 /* LDC had serious issues --- reset vdc state */ 1791 mutex_enter(&vdc->read_lock); 1792 if ((vdc->read_state == VDC_READ_WAITING) || 1793 (vdc->read_state == VDC_READ_RESET)) 1794 cv_signal(&vdc->read_cv); 1795 vdc->read_state = VDC_READ_RESET; 1796 mutex_exit(&vdc->read_lock); 1797 1798 /* wake up any waiters in the reset thread */ 1799 if (vdc->state == VDC_STATE_INIT_WAITING) { 1800 DMSG(vdc, 0, "[%d] write reset - " 1801 "vdc is resetting ..\n", vdc->instance); 1802 vdc->state = VDC_STATE_RESETTING; 1803 cv_signal(&vdc->initwait_cv); 1804 } 1805 1806 return (ECONNRESET); 1807 } 1808 1809 /* return the last size written */ 1810 *msglen = size; 1811 1812 return (status); 1813 } 1814 1815 /* 1816 * Function: 1817 * vdc_get_ldc_id() 1818 * 1819 * Description: 1820 * This function gets the 'ldc-id' for this particular instance of vdc. 1821 * The id returned is the guest domain channel endpoint LDC uses for 1822 * communication with vds. 1823 * 1824 * Arguments: 1825 * dip - dev info pointer for this instance of the device driver. 1826 * ldc_id - pointer to variable used to return the 'ldc-id' found. 1827 * 1828 * Return Code: 1829 * 0 - Success. 1830 * ENOENT - Expected node or property did not exist. 1831 * ENXIO - Unexpected error communicating with MD framework 1832 */ 1833 static int 1834 vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id) 1835 { 1836 int status = ENOENT; 1837 char *node_name = NULL; 1838 md_t *mdp = NULL; 1839 int num_nodes; 1840 int num_vdevs; 1841 int num_chans; 1842 mde_cookie_t rootnode; 1843 mde_cookie_t *listp = NULL; 1844 mde_cookie_t *chanp = NULL; 1845 boolean_t found_inst = B_FALSE; 1846 int listsz; 1847 int idx; 1848 uint64_t md_inst; 1849 int obp_inst; 1850 int instance = ddi_get_instance(dip); 1851 1852 ASSERT(ldc_id != NULL); 1853 *ldc_id = 0; 1854 1855 /* 1856 * Get the OBP instance number for comparison with the MD instance 1857 * 1858 * The "cfg-handle" property of a vdc node in an MD contains the MD's 1859 * notion of "instance", or unique identifier, for that node; OBP 1860 * stores the value of the "cfg-handle" MD property as the value of 1861 * the "reg" property on the node in the device tree it builds from 1862 * the MD and passes to Solaris. Thus, we look up the devinfo node's 1863 * "reg" property value to uniquely identify this device instance. 1864 * If the "reg" property cannot be found, the device tree state is 1865 * presumably so broken that there is no point in continuing. 1866 */ 1867 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 1868 cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 1869 return (ENOENT); 1870 } 1871 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1872 OBP_REG, -1); 1873 DMSGX(1, "[%d] OBP inst=%d\n", instance, obp_inst); 1874 1875 /* 1876 * We now walk the MD nodes and if an instance of a vdc node matches 1877 * the instance got from OBP we get the ldc-id property. 1878 */ 1879 if ((mdp = md_get_handle()) == NULL) { 1880 cmn_err(CE_WARN, "unable to init machine description"); 1881 return (ENXIO); 1882 } 1883 1884 num_nodes = md_node_count(mdp); 1885 ASSERT(num_nodes > 0); 1886 1887 listsz = num_nodes * sizeof (mde_cookie_t); 1888 1889 /* allocate memory for nodes */ 1890 listp = kmem_zalloc(listsz, KM_SLEEP); 1891 chanp = kmem_zalloc(listsz, KM_SLEEP); 1892 1893 rootnode = md_root_node(mdp); 1894 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 1895 1896 /* 1897 * Search for all the virtual devices, we will then check to see which 1898 * ones are disk nodes. 1899 */ 1900 num_vdevs = md_scan_dag(mdp, rootnode, 1901 md_find_name(mdp, VDC_MD_VDEV_NAME), 1902 md_find_name(mdp, "fwd"), listp); 1903 1904 if (num_vdevs <= 0) { 1905 cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 1906 status = ENOENT; 1907 goto done; 1908 } 1909 1910 DMSGX(1, "[%d] num_vdevs=%d\n", instance, num_vdevs); 1911 for (idx = 0; idx < num_vdevs; idx++) { 1912 status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 1913 if ((status != 0) || (node_name == NULL)) { 1914 cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 1915 ": err %d", VDC_MD_VDEV_NAME, status); 1916 continue; 1917 } 1918 1919 DMSGX(1, "[%d] Found node '%s'\n", instance, node_name); 1920 if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 1921 status = md_get_prop_val(mdp, listp[idx], 1922 VDC_MD_CFG_HDL, &md_inst); 1923 DMSGX(1, "[%d] vdc inst in MD=%lx\n", 1924 instance, md_inst); 1925 if ((status == 0) && (md_inst == obp_inst)) { 1926 found_inst = B_TRUE; 1927 break; 1928 } 1929 } 1930 } 1931 1932 if (!found_inst) { 1933 DMSGX(0, "Unable to find correct '%s' node", VDC_MD_DISK_NAME); 1934 status = ENOENT; 1935 goto done; 1936 } 1937 DMSGX(0, "[%d] MD inst=%lx\n", instance, md_inst); 1938 1939 /* get the channels for this node */ 1940 num_chans = md_scan_dag(mdp, listp[idx], 1941 md_find_name(mdp, VDC_MD_CHAN_NAME), 1942 md_find_name(mdp, "fwd"), chanp); 1943 1944 /* expecting at least one channel */ 1945 if (num_chans <= 0) { 1946 cmn_err(CE_NOTE, "No '%s' node for '%s' port", 1947 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 1948 status = ENOENT; 1949 goto done; 1950 1951 } else if (num_chans != 1) { 1952 DMSGX(0, "[%d] Expected 1 '%s' node for '%s' port, found %d\n", 1953 instance, VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, 1954 num_chans); 1955 } 1956 1957 /* 1958 * We use the first channel found (index 0), irrespective of how 1959 * many are there in total. 1960 */ 1961 if (md_get_prop_val(mdp, chanp[0], VDC_ID_PROP, ldc_id) != 0) { 1962 cmn_err(CE_NOTE, "Channel '%s' property not found", 1963 VDC_ID_PROP); 1964 status = ENOENT; 1965 } 1966 1967 DMSGX(0, "[%d] LDC id is 0x%lx\n", instance, *ldc_id); 1968 1969 done: 1970 if (chanp) 1971 kmem_free(chanp, listsz); 1972 if (listp) 1973 kmem_free(listp, listsz); 1974 1975 (void) md_fini_handle(mdp); 1976 1977 return (status); 1978 } 1979 1980 static int 1981 vdc_do_ldc_up(vdc_t *vdc) 1982 { 1983 int status; 1984 ldc_status_t ldc_state; 1985 1986 DMSG(vdc, 0, "[%d] Bringing up channel %lx\n", 1987 vdc->instance, vdc->ldc_id); 1988 1989 if (vdc->lifecycle == VDC_LC_DETACHING) 1990 return (EINVAL); 1991 1992 if ((status = ldc_up(vdc->ldc_handle)) != 0) { 1993 switch (status) { 1994 case ECONNREFUSED: /* listener not ready at other end */ 1995 DMSG(vdc, 0, "[%d] ldc_up(%lx,...) return %d\n", 1996 vdc->instance, vdc->ldc_id, status); 1997 status = 0; 1998 break; 1999 default: 2000 DMSG(vdc, 0, "[%d] Failed to bring up LDC: " 2001 "channel=%ld, err=%d", vdc->instance, vdc->ldc_id, 2002 status); 2003 break; 2004 } 2005 } 2006 2007 if (ldc_status(vdc->ldc_handle, &ldc_state) == 0) { 2008 vdc->ldc_state = ldc_state; 2009 if (ldc_state == LDC_UP) { 2010 DMSG(vdc, 0, "[%d] LDC channel already up\n", 2011 vdc->instance); 2012 vdc->seq_num = 1; 2013 vdc->seq_num_reply = 0; 2014 } 2015 } 2016 2017 return (status); 2018 } 2019 2020 /* 2021 * Function: 2022 * vdc_terminate_ldc() 2023 * 2024 * Description: 2025 * 2026 * Arguments: 2027 * vdc - soft state pointer for this instance of the device driver. 2028 * 2029 * Return Code: 2030 * None 2031 */ 2032 static void 2033 vdc_terminate_ldc(vdc_t *vdc) 2034 { 2035 int instance = ddi_get_instance(vdc->dip); 2036 2037 ASSERT(vdc != NULL); 2038 ASSERT(mutex_owned(&vdc->lock)); 2039 2040 DMSG(vdc, 0, "[%d] initialized=%x\n", instance, vdc->initialized); 2041 2042 if (vdc->initialized & VDC_LDC_OPEN) { 2043 DMSG(vdc, 0, "[%d] ldc_close()\n", instance); 2044 (void) ldc_close(vdc->ldc_handle); 2045 } 2046 if (vdc->initialized & VDC_LDC_CB) { 2047 DMSG(vdc, 0, "[%d] ldc_unreg_callback()\n", instance); 2048 (void) ldc_unreg_callback(vdc->ldc_handle); 2049 } 2050 if (vdc->initialized & VDC_LDC) { 2051 DMSG(vdc, 0, "[%d] ldc_fini()\n", instance); 2052 (void) ldc_fini(vdc->ldc_handle); 2053 vdc->ldc_handle = NULL; 2054 } 2055 2056 vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN); 2057 } 2058 2059 /* -------------------------------------------------------------------------- */ 2060 2061 /* 2062 * Descriptor Ring helper routines 2063 */ 2064 2065 /* 2066 * Function: 2067 * vdc_init_descriptor_ring() 2068 * 2069 * Description: 2070 * 2071 * Arguments: 2072 * vdc - soft state pointer for this instance of the device driver. 2073 * 2074 * Return Code: 2075 * 0 - Success 2076 */ 2077 static int 2078 vdc_init_descriptor_ring(vdc_t *vdc) 2079 { 2080 vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 2081 int status = 0; 2082 int i; 2083 2084 DMSG(vdc, 0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized); 2085 2086 ASSERT(vdc != NULL); 2087 ASSERT(mutex_owned(&vdc->lock)); 2088 ASSERT(vdc->ldc_handle != NULL); 2089 2090 /* ensure we have enough room to store max sized block */ 2091 ASSERT(maxphys <= VD_MAX_BLOCK_SIZE); 2092 2093 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 2094 DMSG(vdc, 0, "[%d] ldc_mem_dring_create\n", vdc->instance); 2095 /* 2096 * Calculate the maximum block size we can transmit using one 2097 * Descriptor Ring entry from the attributes returned by the 2098 * vDisk server. This is subject to a minimum of 'maxphys' 2099 * as we do not have the capability to split requests over 2100 * multiple DRing entries. 2101 */ 2102 if ((vdc->max_xfer_sz * vdc->block_size) < maxphys) { 2103 DMSG(vdc, 0, "[%d] using minimum DRing size\n", 2104 vdc->instance); 2105 vdc->dring_max_cookies = maxphys / PAGESIZE; 2106 } else { 2107 vdc->dring_max_cookies = 2108 (vdc->max_xfer_sz * vdc->block_size) / PAGESIZE; 2109 } 2110 vdc->dring_entry_size = (sizeof (vd_dring_entry_t) + 2111 (sizeof (ldc_mem_cookie_t) * 2112 (vdc->dring_max_cookies - 1))); 2113 vdc->dring_len = VD_DRING_LEN; 2114 2115 status = ldc_mem_dring_create(vdc->dring_len, 2116 vdc->dring_entry_size, &vdc->ldc_dring_hdl); 2117 if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) { 2118 DMSG(vdc, 0, "[%d] Descriptor ring creation failed", 2119 vdc->instance); 2120 return (status); 2121 } 2122 vdc->initialized |= VDC_DRING_INIT; 2123 } 2124 2125 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 2126 DMSG(vdc, 0, "[%d] ldc_mem_dring_bind\n", vdc->instance); 2127 vdc->dring_cookie = 2128 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 2129 2130 status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl, 2131 LDC_SHADOW_MAP|LDC_DIRECT_MAP, LDC_MEM_RW, 2132 &vdc->dring_cookie[0], 2133 &vdc->dring_cookie_count); 2134 if (status != 0) { 2135 DMSG(vdc, 0, "[%d] Failed to bind descriptor ring " 2136 "(%lx) to channel (%lx) status=%d\n", 2137 vdc->instance, vdc->ldc_dring_hdl, 2138 vdc->ldc_handle, status); 2139 return (status); 2140 } 2141 ASSERT(vdc->dring_cookie_count == 1); 2142 vdc->initialized |= VDC_DRING_BOUND; 2143 } 2144 2145 status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info); 2146 if (status != 0) { 2147 DMSG(vdc, 0, 2148 "[%d] Failed to get info for descriptor ring (%lx)\n", 2149 vdc->instance, vdc->ldc_dring_hdl); 2150 return (status); 2151 } 2152 2153 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 2154 DMSG(vdc, 0, "[%d] local dring\n", vdc->instance); 2155 2156 /* Allocate the local copy of this dring */ 2157 vdc->local_dring = 2158 kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t), 2159 KM_SLEEP); 2160 vdc->initialized |= VDC_DRING_LOCAL; 2161 } 2162 2163 /* 2164 * Mark all DRing entries as free and initialize the private 2165 * descriptor's memory handles. If any entry is initialized, 2166 * we need to free it later so we set the bit in 'initialized' 2167 * at the start. 2168 */ 2169 vdc->initialized |= VDC_DRING_ENTRY; 2170 for (i = 0; i < vdc->dring_len; i++) { 2171 dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 2172 dep->hdr.dstate = VIO_DESC_FREE; 2173 2174 status = ldc_mem_alloc_handle(vdc->ldc_handle, 2175 &vdc->local_dring[i].desc_mhdl); 2176 if (status != 0) { 2177 DMSG(vdc, 0, "![%d] Failed to alloc mem handle for" 2178 " descriptor %d", vdc->instance, i); 2179 return (status); 2180 } 2181 vdc->local_dring[i].is_free = B_TRUE; 2182 vdc->local_dring[i].dep = dep; 2183 } 2184 2185 /* Initialize the starting index */ 2186 vdc->dring_curr_idx = 0; 2187 2188 return (status); 2189 } 2190 2191 /* 2192 * Function: 2193 * vdc_destroy_descriptor_ring() 2194 * 2195 * Description: 2196 * 2197 * Arguments: 2198 * vdc - soft state pointer for this instance of the device driver. 2199 * 2200 * Return Code: 2201 * None 2202 */ 2203 static void 2204 vdc_destroy_descriptor_ring(vdc_t *vdc) 2205 { 2206 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2207 ldc_mem_handle_t mhdl = NULL; 2208 ldc_mem_info_t minfo; 2209 int status = -1; 2210 int i; /* loop */ 2211 2212 ASSERT(vdc != NULL); 2213 ASSERT(mutex_owned(&vdc->lock)); 2214 2215 DMSG(vdc, 0, "[%d] Entered\n", vdc->instance); 2216 2217 if (vdc->initialized & VDC_DRING_ENTRY) { 2218 DMSG(vdc, 0, 2219 "[%d] Removing Local DRing entries\n", vdc->instance); 2220 for (i = 0; i < vdc->dring_len; i++) { 2221 ldep = &vdc->local_dring[i]; 2222 mhdl = ldep->desc_mhdl; 2223 2224 if (mhdl == NULL) 2225 continue; 2226 2227 if ((status = ldc_mem_info(mhdl, &minfo)) != 0) { 2228 DMSG(vdc, 0, 2229 "ldc_mem_info returned an error: %d\n", 2230 status); 2231 2232 /* 2233 * This must mean that the mem handle 2234 * is not valid. Clear it out so that 2235 * no one tries to use it. 2236 */ 2237 ldep->desc_mhdl = NULL; 2238 continue; 2239 } 2240 2241 if (minfo.status == LDC_BOUND) { 2242 (void) ldc_mem_unbind_handle(mhdl); 2243 } 2244 2245 (void) ldc_mem_free_handle(mhdl); 2246 2247 ldep->desc_mhdl = NULL; 2248 } 2249 vdc->initialized &= ~VDC_DRING_ENTRY; 2250 } 2251 2252 if (vdc->initialized & VDC_DRING_LOCAL) { 2253 DMSG(vdc, 0, "[%d] Freeing Local DRing\n", vdc->instance); 2254 kmem_free(vdc->local_dring, 2255 vdc->dring_len * sizeof (vdc_local_desc_t)); 2256 vdc->initialized &= ~VDC_DRING_LOCAL; 2257 } 2258 2259 if (vdc->initialized & VDC_DRING_BOUND) { 2260 DMSG(vdc, 0, "[%d] Unbinding DRing\n", vdc->instance); 2261 status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl); 2262 if (status == 0) { 2263 vdc->initialized &= ~VDC_DRING_BOUND; 2264 } else { 2265 DMSG(vdc, 0, "[%d] Error %d unbinding DRing %lx", 2266 vdc->instance, status, vdc->ldc_dring_hdl); 2267 } 2268 kmem_free(vdc->dring_cookie, sizeof (ldc_mem_cookie_t)); 2269 } 2270 2271 if (vdc->initialized & VDC_DRING_INIT) { 2272 DMSG(vdc, 0, "[%d] Destroying DRing\n", vdc->instance); 2273 status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl); 2274 if (status == 0) { 2275 vdc->ldc_dring_hdl = NULL; 2276 bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 2277 vdc->initialized &= ~VDC_DRING_INIT; 2278 } else { 2279 DMSG(vdc, 0, "[%d] Error %d destroying DRing (%lx)", 2280 vdc->instance, status, vdc->ldc_dring_hdl); 2281 } 2282 } 2283 } 2284 2285 /* 2286 * Function: 2287 * vdc_map_to_shared_ring() 2288 * 2289 * Description: 2290 * Copy contents of the local descriptor to the shared 2291 * memory descriptor. 2292 * 2293 * Arguments: 2294 * vdcp - soft state pointer for this instance of the device driver. 2295 * idx - descriptor ring index 2296 * 2297 * Return Code: 2298 * None 2299 */ 2300 static int 2301 vdc_map_to_shared_dring(vdc_t *vdcp, int idx) 2302 { 2303 vdc_local_desc_t *ldep; 2304 vd_dring_entry_t *dep; 2305 int rv; 2306 2307 ldep = &(vdcp->local_dring[idx]); 2308 2309 /* for now leave in the old pop_mem_hdl stuff */ 2310 if (ldep->nbytes > 0) { 2311 rv = vdc_populate_mem_hdl(vdcp, ldep); 2312 if (rv) { 2313 DMSG(vdcp, 0, "[%d] Cannot populate mem handle\n", 2314 vdcp->instance); 2315 return (rv); 2316 } 2317 } 2318 2319 /* 2320 * fill in the data details into the DRing 2321 */ 2322 dep = ldep->dep; 2323 ASSERT(dep != NULL); 2324 2325 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdcp); 2326 dep->payload.operation = ldep->operation; 2327 dep->payload.addr = ldep->offset; 2328 dep->payload.nbytes = ldep->nbytes; 2329 dep->payload.status = (uint32_t)-1; /* vds will set valid value */ 2330 dep->payload.slice = ldep->slice; 2331 dep->hdr.dstate = VIO_DESC_READY; 2332 dep->hdr.ack = 1; /* request an ACK for every message */ 2333 2334 return (0); 2335 } 2336 2337 /* 2338 * Function: 2339 * vdc_send_request 2340 * 2341 * Description: 2342 * This routine writes the data to be transmitted to vds into the 2343 * descriptor, notifies vds that the ring has been updated and 2344 * then waits for the request to be processed. 2345 * 2346 * Arguments: 2347 * vdcp - the soft state pointer 2348 * operation - operation we want vds to perform (VD_OP_XXX) 2349 * addr - address of data buf to be read/written. 2350 * nbytes - number of bytes to read/write 2351 * slice - the disk slice this request is for 2352 * offset - relative disk offset 2353 * cb_type - type of call - STRATEGY or SYNC 2354 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2355 * . mode for ioctl(9e) 2356 * . LP64 diskaddr_t (block I/O) 2357 * dir - direction of operation (READ/WRITE/BOTH) 2358 * 2359 * Return Codes: 2360 * 0 2361 * EAGAIN 2362 * EFAULT 2363 * ENXIO 2364 * EIO 2365 */ 2366 static int 2367 vdc_send_request(vdc_t *vdcp, int operation, caddr_t addr, 2368 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 2369 void *cb_arg, vio_desc_direction_t dir) 2370 { 2371 ASSERT(vdcp != NULL); 2372 ASSERT(slice < V_NUMPAR); 2373 2374 mutex_enter(&vdcp->lock); 2375 2376 do { 2377 while (vdcp->state != VDC_STATE_RUNNING) 2378 cv_wait(&vdcp->running_cv, &vdcp->lock); 2379 2380 } while (vdc_populate_descriptor(vdcp, operation, addr, 2381 nbytes, slice, offset, cb_type, cb_arg, dir)); 2382 2383 mutex_exit(&vdcp->lock); 2384 return (0); 2385 } 2386 2387 2388 /* 2389 * Function: 2390 * vdc_populate_descriptor 2391 * 2392 * Description: 2393 * This routine writes the data to be transmitted to vds into the 2394 * descriptor, notifies vds that the ring has been updated and 2395 * then waits for the request to be processed. 2396 * 2397 * Arguments: 2398 * vdcp - the soft state pointer 2399 * operation - operation we want vds to perform (VD_OP_XXX) 2400 * addr - address of data buf to be read/written. 2401 * nbytes - number of bytes to read/write 2402 * slice - the disk slice this request is for 2403 * offset - relative disk offset 2404 * cb_type - type of call - STRATEGY or SYNC 2405 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2406 * . mode for ioctl(9e) 2407 * . LP64 diskaddr_t (block I/O) 2408 * dir - direction of operation (READ/WRITE/BOTH) 2409 * 2410 * Return Codes: 2411 * 0 2412 * EAGAIN 2413 * EFAULT 2414 * ENXIO 2415 * EIO 2416 */ 2417 static int 2418 vdc_populate_descriptor(vdc_t *vdcp, int operation, caddr_t addr, 2419 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 2420 void *cb_arg, vio_desc_direction_t dir) 2421 { 2422 vdc_local_desc_t *local_dep = NULL; /* Local Dring Pointer */ 2423 int idx; /* Index of DRing entry used */ 2424 int next_idx; 2425 vio_dring_msg_t dmsg; 2426 size_t msglen; 2427 int rv; 2428 2429 ASSERT(MUTEX_HELD(&vdcp->lock)); 2430 vdcp->threads_pending++; 2431 loop: 2432 DMSG(vdcp, 2, ": dring_curr_idx = %d\n", vdcp->dring_curr_idx); 2433 2434 /* Get next available D-Ring entry */ 2435 idx = vdcp->dring_curr_idx; 2436 local_dep = &(vdcp->local_dring[idx]); 2437 2438 if (!local_dep->is_free) { 2439 DMSG(vdcp, 2, "[%d]: dring full - waiting for space\n", 2440 vdcp->instance); 2441 cv_wait(&vdcp->dring_free_cv, &vdcp->lock); 2442 if (vdcp->state == VDC_STATE_RUNNING || 2443 vdcp->state == VDC_STATE_HANDLE_PENDING) { 2444 goto loop; 2445 } 2446 vdcp->threads_pending--; 2447 return (ECONNRESET); 2448 } 2449 2450 next_idx = idx + 1; 2451 if (next_idx >= vdcp->dring_len) 2452 next_idx = 0; 2453 vdcp->dring_curr_idx = next_idx; 2454 2455 ASSERT(local_dep->is_free); 2456 2457 local_dep->operation = operation; 2458 local_dep->addr = addr; 2459 local_dep->nbytes = nbytes; 2460 local_dep->slice = slice; 2461 local_dep->offset = offset; 2462 local_dep->cb_type = cb_type; 2463 local_dep->cb_arg = cb_arg; 2464 local_dep->dir = dir; 2465 2466 local_dep->is_free = B_FALSE; 2467 2468 rv = vdc_map_to_shared_dring(vdcp, idx); 2469 if (rv) { 2470 DMSG(vdcp, 0, "[%d]: cannot bind memory - waiting ..\n", 2471 vdcp->instance); 2472 /* free the descriptor */ 2473 local_dep->is_free = B_TRUE; 2474 vdcp->dring_curr_idx = idx; 2475 cv_wait(&vdcp->membind_cv, &vdcp->lock); 2476 if (vdcp->state == VDC_STATE_RUNNING || 2477 vdcp->state == VDC_STATE_HANDLE_PENDING) { 2478 goto loop; 2479 } 2480 vdcp->threads_pending--; 2481 return (ECONNRESET); 2482 } 2483 2484 /* 2485 * Send a msg with the DRing details to vds 2486 */ 2487 VIO_INIT_DRING_DATA_TAG(dmsg); 2488 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdcp); 2489 dmsg.dring_ident = vdcp->dring_ident; 2490 dmsg.start_idx = idx; 2491 dmsg.end_idx = idx; 2492 vdcp->seq_num++; 2493 2494 DTRACE_IO2(send, vio_dring_msg_t *, &dmsg, vdc_t *, vdcp); 2495 2496 DMSG(vdcp, 2, "ident=0x%lx, st=%u, end=%u, seq=%ld\n", 2497 vdcp->dring_ident, dmsg.start_idx, dmsg.end_idx, dmsg.seq_num); 2498 2499 /* 2500 * note we're still holding the lock here to 2501 * make sure the message goes out in order !!!... 2502 */ 2503 msglen = sizeof (dmsg); 2504 rv = vdc_send(vdcp, (caddr_t)&dmsg, &msglen); 2505 switch (rv) { 2506 case ECONNRESET: 2507 /* 2508 * vdc_send initiates the reset on failure. 2509 * Since the transaction has already been put 2510 * on the local dring, it will automatically get 2511 * retried when the channel is reset. Given that, 2512 * it is ok to just return success even though the 2513 * send failed. 2514 */ 2515 rv = 0; 2516 break; 2517 2518 case 0: /* EOK */ 2519 DMSG(vdcp, 1, "sent via LDC: rv=%d\n", rv); 2520 break; 2521 2522 default: 2523 goto cleanup_and_exit; 2524 } 2525 2526 vdcp->threads_pending--; 2527 return (rv); 2528 2529 cleanup_and_exit: 2530 DMSG(vdcp, 0, "unexpected error, rv=%d\n", rv); 2531 return (ENXIO); 2532 } 2533 2534 /* 2535 * Function: 2536 * vdc_do_sync_op 2537 * 2538 * Description: 2539 * Wrapper around vdc_populate_descriptor that blocks until the 2540 * response to the message is available. 2541 * 2542 * Arguments: 2543 * vdcp - the soft state pointer 2544 * operation - operation we want vds to perform (VD_OP_XXX) 2545 * addr - address of data buf to be read/written. 2546 * nbytes - number of bytes to read/write 2547 * slice - the disk slice this request is for 2548 * offset - relative disk offset 2549 * cb_type - type of call - STRATEGY or SYNC 2550 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2551 * . mode for ioctl(9e) 2552 * . LP64 diskaddr_t (block I/O) 2553 * dir - direction of operation (READ/WRITE/BOTH) 2554 * 2555 * Return Codes: 2556 * 0 2557 * EAGAIN 2558 * EFAULT 2559 * ENXIO 2560 * EIO 2561 */ 2562 static int 2563 vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, size_t nbytes, 2564 int slice, diskaddr_t offset, int cb_type, void *cb_arg, 2565 vio_desc_direction_t dir) 2566 { 2567 int status; 2568 2569 ASSERT(cb_type == CB_SYNC); 2570 2571 /* 2572 * Grab the lock, if blocked wait until the server 2573 * response causes us to wake up again. 2574 */ 2575 mutex_enter(&vdcp->lock); 2576 vdcp->sync_op_cnt++; 2577 while (vdcp->sync_op_blocked && vdcp->state != VDC_STATE_DETACH) 2578 cv_wait(&vdcp->sync_blocked_cv, &vdcp->lock); 2579 2580 if (vdcp->state == VDC_STATE_DETACH) { 2581 cv_broadcast(&vdcp->sync_blocked_cv); 2582 vdcp->sync_op_cnt--; 2583 mutex_exit(&vdcp->lock); 2584 return (ENXIO); 2585 } 2586 2587 /* now block anyone other thread entering after us */ 2588 vdcp->sync_op_blocked = B_TRUE; 2589 vdcp->sync_op_pending = B_TRUE; 2590 mutex_exit(&vdcp->lock); 2591 2592 /* 2593 * No need to check return value - will return error only 2594 * in the DETACH case and we can fall through 2595 */ 2596 (void) vdc_send_request(vdcp, operation, addr, 2597 nbytes, slice, offset, cb_type, cb_arg, dir); 2598 2599 /* 2600 * block until our transaction completes. 2601 * Also anyone else waiting also gets to go next. 2602 */ 2603 mutex_enter(&vdcp->lock); 2604 while (vdcp->sync_op_pending && vdcp->state != VDC_STATE_DETACH) 2605 cv_wait(&vdcp->sync_pending_cv, &vdcp->lock); 2606 2607 DMSG(vdcp, 2, ": operation returned %d\n", vdcp->sync_op_status); 2608 if (vdcp->state == VDC_STATE_DETACH) 2609 status = ENXIO; 2610 else 2611 status = vdcp->sync_op_status; 2612 vdcp->sync_op_status = 0; 2613 vdcp->sync_op_blocked = B_FALSE; 2614 vdcp->sync_op_cnt--; 2615 2616 /* signal the next waiting thread */ 2617 cv_signal(&vdcp->sync_blocked_cv); 2618 mutex_exit(&vdcp->lock); 2619 2620 return (status); 2621 } 2622 2623 2624 /* 2625 * Function: 2626 * vdc_drain_response() 2627 * 2628 * Description: 2629 * When a guest is panicking, the completion of requests needs to be 2630 * handled differently because interrupts are disabled and vdc 2631 * will not get messages. We have to poll for the messages instead. 2632 * 2633 * Arguments: 2634 * vdc - soft state pointer for this instance of the device driver. 2635 * 2636 * Return Code: 2637 * 0 - Success 2638 */ 2639 static int 2640 vdc_drain_response(vdc_t *vdc) 2641 { 2642 int rv, idx, retries; 2643 size_t msglen; 2644 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2645 vio_dring_msg_t dmsg; 2646 2647 mutex_enter(&vdc->lock); 2648 2649 retries = 0; 2650 for (;;) { 2651 msglen = sizeof (dmsg); 2652 rv = ldc_read(vdc->ldc_handle, (caddr_t)&dmsg, &msglen); 2653 if (rv) { 2654 rv = EINVAL; 2655 break; 2656 } 2657 2658 /* 2659 * if there are no packets wait and check again 2660 */ 2661 if ((rv == 0) && (msglen == 0)) { 2662 if (retries++ > vdc_dump_retries) { 2663 rv = EAGAIN; 2664 break; 2665 } 2666 2667 drv_usecwait(vdc_usec_timeout_dump); 2668 continue; 2669 } 2670 2671 /* 2672 * Ignore all messages that are not ACKs/NACKs to 2673 * DRing requests. 2674 */ 2675 if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 2676 (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 2677 DMSG(vdc, 0, "discard pkt: type=%d sub=%d env=%d\n", 2678 dmsg.tag.vio_msgtype, 2679 dmsg.tag.vio_subtype, 2680 dmsg.tag.vio_subtype_env); 2681 continue; 2682 } 2683 2684 /* 2685 * set the appropriate return value for the current request. 2686 */ 2687 switch (dmsg.tag.vio_subtype) { 2688 case VIO_SUBTYPE_ACK: 2689 rv = 0; 2690 break; 2691 case VIO_SUBTYPE_NACK: 2692 rv = EAGAIN; 2693 break; 2694 default: 2695 continue; 2696 } 2697 2698 idx = dmsg.start_idx; 2699 if (idx >= vdc->dring_len) { 2700 DMSG(vdc, 0, "[%d] Bogus ack data : start %d\n", 2701 vdc->instance, idx); 2702 continue; 2703 } 2704 ldep = &vdc->local_dring[idx]; 2705 if (ldep->dep->hdr.dstate != VIO_DESC_DONE) { 2706 DMSG(vdc, 0, "[%d] Entry @ %d - state !DONE %d\n", 2707 vdc->instance, idx, ldep->dep->hdr.dstate); 2708 continue; 2709 } 2710 2711 DMSG(vdc, 1, "[%d] Depopulating idx=%d state=%d\n", 2712 vdc->instance, idx, ldep->dep->hdr.dstate); 2713 rv = vdc_depopulate_descriptor(vdc, idx); 2714 if (rv) { 2715 DMSG(vdc, 0, 2716 "[%d] Entry @ %d - depopulate failed ..\n", 2717 vdc->instance, idx); 2718 } 2719 2720 /* if this is the last descriptor - break out of loop */ 2721 if ((idx + 1) % vdc->dring_len == vdc->dring_curr_idx) 2722 break; 2723 } 2724 2725 mutex_exit(&vdc->lock); 2726 DMSG(vdc, 0, "End idx=%d\n", idx); 2727 2728 return (rv); 2729 } 2730 2731 2732 /* 2733 * Function: 2734 * vdc_depopulate_descriptor() 2735 * 2736 * Description: 2737 * 2738 * Arguments: 2739 * vdc - soft state pointer for this instance of the device driver. 2740 * idx - Index of the Descriptor Ring entry being modified 2741 * 2742 * Return Code: 2743 * 0 - Success 2744 */ 2745 static int 2746 vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 2747 { 2748 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2749 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2750 int status = ENXIO; 2751 int operation; 2752 int rv = 0; 2753 2754 ASSERT(vdc != NULL); 2755 ASSERT(idx < vdc->dring_len); 2756 ldep = &vdc->local_dring[idx]; 2757 ASSERT(ldep != NULL); 2758 ASSERT(MUTEX_HELD(&vdc->lock)); 2759 2760 DMSG(vdc, 2, ": idx = %d\n", idx); 2761 dep = ldep->dep; 2762 ASSERT(dep != NULL); 2763 ASSERT((dep->hdr.dstate == VIO_DESC_DONE) || 2764 (dep->payload.status == ECANCELED)); 2765 2766 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 2767 2768 ldep->is_free = B_TRUE; 2769 DMSG(vdc, 2, ": is_free = %d\n", ldep->is_free); 2770 status = dep->payload.status; 2771 operation = dep->payload.operation; 2772 2773 /* the DKIO FLUSH operation never bind handles so we can return now */ 2774 if (operation == VD_OP_FLUSH) 2775 return (status); 2776 2777 /* 2778 * If the upper layer passed in a misaligned address we copied the 2779 * data into an aligned buffer before sending it to LDC - we now 2780 * copy it back to the original buffer. 2781 */ 2782 if (ldep->align_addr) { 2783 ASSERT(ldep->addr != NULL); 2784 ASSERT(dep->payload.nbytes > 0); 2785 2786 bcopy(ldep->align_addr, ldep->addr, dep->payload.nbytes); 2787 kmem_free(ldep->align_addr, 2788 sizeof (caddr_t) * P2ROUNDUP(dep->payload.nbytes, 8)); 2789 ldep->align_addr = NULL; 2790 } 2791 2792 rv = ldc_mem_unbind_handle(ldep->desc_mhdl); 2793 if (rv != 0) { 2794 DMSG(vdc, 0, "?[%d] unbind mhdl 0x%lx @ idx %d failed (%d)", 2795 vdc->instance, ldep->desc_mhdl, idx, rv); 2796 /* 2797 * The error returned by the vDisk server is more informative 2798 * and thus has a higher priority but if it isn't set we ensure 2799 * that this function returns an error. 2800 */ 2801 if (status == 0) 2802 status = EINVAL; 2803 } 2804 2805 cv_signal(&vdc->membind_cv); 2806 cv_signal(&vdc->dring_free_cv); 2807 2808 return (status); 2809 } 2810 2811 /* 2812 * Function: 2813 * vdc_populate_mem_hdl() 2814 * 2815 * Description: 2816 * 2817 * Arguments: 2818 * vdc - soft state pointer for this instance of the device driver. 2819 * idx - Index of the Descriptor Ring entry being modified 2820 * addr - virtual address being mapped in 2821 * nybtes - number of bytes in 'addr' 2822 * operation - the vDisk operation being performed (VD_OP_xxx) 2823 * 2824 * Return Code: 2825 * 0 - Success 2826 */ 2827 static int 2828 vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep) 2829 { 2830 vd_dring_entry_t *dep = NULL; 2831 ldc_mem_handle_t mhdl; 2832 caddr_t vaddr; 2833 size_t nbytes; 2834 uint8_t perm = LDC_MEM_RW; 2835 uint8_t maptype; 2836 int rv = 0; 2837 int i; 2838 2839 ASSERT(vdcp != NULL); 2840 2841 dep = ldep->dep; 2842 mhdl = ldep->desc_mhdl; 2843 2844 switch (ldep->dir) { 2845 case VIO_read_dir: 2846 perm = LDC_MEM_W; 2847 break; 2848 2849 case VIO_write_dir: 2850 perm = LDC_MEM_R; 2851 break; 2852 2853 case VIO_both_dir: 2854 perm = LDC_MEM_RW; 2855 break; 2856 2857 default: 2858 ASSERT(0); /* catch bad programming in vdc */ 2859 } 2860 2861 /* 2862 * LDC expects any addresses passed in to be 8-byte aligned. We need 2863 * to copy the contents of any misaligned buffers to a newly allocated 2864 * buffer and bind it instead (and copy the the contents back to the 2865 * original buffer passed in when depopulating the descriptor) 2866 */ 2867 vaddr = ldep->addr; 2868 nbytes = ldep->nbytes; 2869 if (((uint64_t)vaddr & 0x7) != 0) { 2870 ASSERT(ldep->align_addr == NULL); 2871 ldep->align_addr = 2872 kmem_alloc(sizeof (caddr_t) * 2873 P2ROUNDUP(nbytes, 8), KM_SLEEP); 2874 DMSG(vdcp, 0, "[%d] Misaligned address %p reallocating " 2875 "(buf=%p nb=%ld op=%d)\n", 2876 vdcp->instance, (void *)vaddr, (void *)ldep->align_addr, 2877 nbytes, ldep->operation); 2878 if (perm != LDC_MEM_W) 2879 bcopy(vaddr, ldep->align_addr, nbytes); 2880 vaddr = ldep->align_addr; 2881 } 2882 2883 maptype = LDC_IO_MAP|LDC_SHADOW_MAP|LDC_DIRECT_MAP; 2884 rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 2885 maptype, perm, &dep->payload.cookie[0], 2886 &dep->payload.ncookies); 2887 DMSG(vdcp, 2, "[%d] bound mem handle; ncookies=%d\n", 2888 vdcp->instance, dep->payload.ncookies); 2889 if (rv != 0) { 2890 DMSG(vdcp, 0, "[%d] Failed to bind LDC memory handle " 2891 "(mhdl=%p, buf=%p, err=%d)\n", 2892 vdcp->instance, (void *)mhdl, (void *)vaddr, rv); 2893 if (ldep->align_addr) { 2894 kmem_free(ldep->align_addr, 2895 sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); 2896 ldep->align_addr = NULL; 2897 } 2898 return (EAGAIN); 2899 } 2900 2901 /* 2902 * Get the other cookies (if any). 2903 */ 2904 for (i = 1; i < dep->payload.ncookies; i++) { 2905 rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 2906 if (rv != 0) { 2907 (void) ldc_mem_unbind_handle(mhdl); 2908 DMSG(vdcp, 0, "?[%d] Failed to get next cookie " 2909 "(mhdl=%lx cnum=%d), err=%d", 2910 vdcp->instance, mhdl, i, rv); 2911 if (ldep->align_addr) { 2912 kmem_free(ldep->align_addr, 2913 sizeof (caddr_t) * dep->payload.nbytes); 2914 ldep->align_addr = NULL; 2915 } 2916 return (EAGAIN); 2917 } 2918 } 2919 2920 return (rv); 2921 } 2922 2923 /* 2924 * Interrupt handlers for messages from LDC 2925 */ 2926 2927 /* 2928 * Function: 2929 * vdc_handle_cb() 2930 * 2931 * Description: 2932 * 2933 * Arguments: 2934 * event - Type of event (LDC_EVT_xxx) that triggered the callback 2935 * arg - soft state pointer for this instance of the device driver. 2936 * 2937 * Return Code: 2938 * 0 - Success 2939 */ 2940 static uint_t 2941 vdc_handle_cb(uint64_t event, caddr_t arg) 2942 { 2943 ldc_status_t ldc_state; 2944 int rv = 0; 2945 2946 vdc_t *vdc = (vdc_t *)(void *)arg; 2947 2948 ASSERT(vdc != NULL); 2949 2950 DMSG(vdc, 1, "evt=%lx seqID=%ld\n", event, vdc->seq_num); 2951 2952 /* 2953 * Depending on the type of event that triggered this callback, 2954 * we modify the handshake state or read the data. 2955 * 2956 * NOTE: not done as a switch() as event could be triggered by 2957 * a state change and a read request. Also the ordering of the 2958 * check for the event types is deliberate. 2959 */ 2960 if (event & LDC_EVT_UP) { 2961 DMSG(vdc, 0, "[%d] Received LDC_EVT_UP\n", vdc->instance); 2962 2963 mutex_enter(&vdc->lock); 2964 2965 /* get LDC state */ 2966 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2967 if (rv != 0) { 2968 DMSG(vdc, 0, "[%d] Couldn't get LDC status %d", 2969 vdc->instance, rv); 2970 return (LDC_SUCCESS); 2971 } 2972 if (vdc->ldc_state != LDC_UP && ldc_state == LDC_UP) { 2973 /* 2974 * Reset the transaction sequence numbers when 2975 * LDC comes up. We then kick off the handshake 2976 * negotiation with the vDisk server. 2977 */ 2978 vdc->seq_num = 1; 2979 vdc->seq_num_reply = 0; 2980 vdc->ldc_state = ldc_state; 2981 cv_signal(&vdc->initwait_cv); 2982 } 2983 2984 mutex_exit(&vdc->lock); 2985 } 2986 2987 if (event & LDC_EVT_READ) { 2988 DMSG(vdc, 0, "[%d] Received LDC_EVT_READ\n", vdc->instance); 2989 mutex_enter(&vdc->read_lock); 2990 cv_signal(&vdc->read_cv); 2991 vdc->read_state = VDC_READ_PENDING; 2992 mutex_exit(&vdc->read_lock); 2993 2994 /* that's all we have to do - no need to handle DOWN/RESET */ 2995 return (LDC_SUCCESS); 2996 } 2997 2998 if (event & (LDC_EVT_RESET|LDC_EVT_DOWN)) { 2999 3000 DMSG(vdc, 0, "[%d] Received LDC RESET event\n", vdc->instance); 3001 3002 mutex_enter(&vdc->lock); 3003 /* 3004 * Need to wake up any readers so they will 3005 * detect that a reset has occurred. 3006 */ 3007 mutex_enter(&vdc->read_lock); 3008 if ((vdc->read_state == VDC_READ_WAITING) || 3009 (vdc->read_state == VDC_READ_RESET)) 3010 cv_signal(&vdc->read_cv); 3011 vdc->read_state = VDC_READ_RESET; 3012 mutex_exit(&vdc->read_lock); 3013 3014 /* wake up any threads waiting for connection to come up */ 3015 if (vdc->state == VDC_STATE_INIT_WAITING) { 3016 vdc->state = VDC_STATE_RESETTING; 3017 cv_signal(&vdc->initwait_cv); 3018 } 3019 3020 mutex_exit(&vdc->lock); 3021 } 3022 3023 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 3024 DMSG(vdc, 0, "![%d] Unexpected LDC event (%lx) received", 3025 vdc->instance, event); 3026 3027 return (LDC_SUCCESS); 3028 } 3029 3030 /* 3031 * Function: 3032 * vdc_wait_for_response() 3033 * 3034 * Description: 3035 * Block waiting for a response from the server. If there is 3036 * no data the thread block on the read_cv that is signalled 3037 * by the callback when an EVT_READ occurs. 3038 * 3039 * Arguments: 3040 * vdcp - soft state pointer for this instance of the device driver. 3041 * 3042 * Return Code: 3043 * 0 - Success 3044 */ 3045 static int 3046 vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp) 3047 { 3048 size_t nbytes = sizeof (*msgp); 3049 int status; 3050 3051 ASSERT(vdcp != NULL); 3052 3053 DMSG(vdcp, 1, "[%d] Entered\n", vdcp->instance); 3054 3055 status = vdc_recv(vdcp, msgp, &nbytes); 3056 DMSG(vdcp, 3, "vdc_read() done.. status=0x%x size=0x%x\n", 3057 status, (int)nbytes); 3058 if (status) { 3059 DMSG(vdcp, 0, "?[%d] Error %d reading LDC msg\n", 3060 vdcp->instance, status); 3061 return (status); 3062 } 3063 3064 if (nbytes < sizeof (vio_msg_tag_t)) { 3065 DMSG(vdcp, 0, "?[%d] Expect %lu bytes; recv'd %lu\n", 3066 vdcp->instance, sizeof (vio_msg_tag_t), nbytes); 3067 return (ENOMSG); 3068 } 3069 3070 DMSG(vdcp, 2, "[%d] (%x/%x/%x)\n", vdcp->instance, 3071 msgp->tag.vio_msgtype, 3072 msgp->tag.vio_subtype, 3073 msgp->tag.vio_subtype_env); 3074 3075 /* 3076 * Verify the Session ID of the message 3077 * 3078 * Every message after the Version has been negotiated should 3079 * have the correct session ID set. 3080 */ 3081 if ((msgp->tag.vio_sid != vdcp->session_id) && 3082 (msgp->tag.vio_subtype_env != VIO_VER_INFO)) { 3083 DMSG(vdcp, 0, "[%d] Invalid SID: received 0x%x, " 3084 "expected 0x%lx [seq num %lx @ %d]", 3085 vdcp->instance, msgp->tag.vio_sid, 3086 vdcp->session_id, 3087 ((vio_dring_msg_t *)msgp)->seq_num, 3088 ((vio_dring_msg_t *)msgp)->start_idx); 3089 return (ENOMSG); 3090 } 3091 return (0); 3092 } 3093 3094 3095 /* 3096 * Function: 3097 * vdc_resubmit_backup_dring() 3098 * 3099 * Description: 3100 * Resubmit each descriptor in the backed up dring to 3101 * vDisk server. The Dring was backed up during connection 3102 * reset. 3103 * 3104 * Arguments: 3105 * vdcp - soft state pointer for this instance of the device driver. 3106 * 3107 * Return Code: 3108 * 0 - Success 3109 */ 3110 static int 3111 vdc_resubmit_backup_dring(vdc_t *vdcp) 3112 { 3113 int count; 3114 int b_idx; 3115 int rv; 3116 int dring_size; 3117 int status; 3118 vio_msg_t vio_msg; 3119 vdc_local_desc_t *curr_ldep; 3120 3121 ASSERT(MUTEX_NOT_HELD(&vdcp->lock)); 3122 ASSERT(vdcp->state == VDC_STATE_HANDLE_PENDING); 3123 3124 DMSG(vdcp, 1, "restoring pending dring entries (len=%d, tail=%d)\n", 3125 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3126 3127 /* 3128 * Walk the backup copy of the local descriptor ring and 3129 * resubmit all the outstanding transactions. 3130 */ 3131 b_idx = vdcp->local_dring_backup_tail; 3132 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 3133 3134 curr_ldep = &(vdcp->local_dring_backup[b_idx]); 3135 3136 /* only resubmit oustanding transactions */ 3137 if (!curr_ldep->is_free) { 3138 3139 DMSG(vdcp, 1, "resubmitting entry idx=%x\n", b_idx); 3140 mutex_enter(&vdcp->lock); 3141 rv = vdc_populate_descriptor(vdcp, curr_ldep->operation, 3142 curr_ldep->addr, curr_ldep->nbytes, 3143 curr_ldep->slice, curr_ldep->offset, 3144 curr_ldep->cb_type, curr_ldep->cb_arg, 3145 curr_ldep->dir); 3146 mutex_exit(&vdcp->lock); 3147 if (rv) { 3148 DMSG(vdcp, 1, "[%d] cannot resubmit entry %d\n", 3149 vdcp->instance, b_idx); 3150 return (rv); 3151 } 3152 3153 /* Wait for the response message. */ 3154 DMSG(vdcp, 1, "waiting for response to idx=%x\n", 3155 b_idx); 3156 status = vdc_wait_for_response(vdcp, &vio_msg); 3157 if (status) { 3158 DMSG(vdcp, 1, "[%d] wait_for_response " 3159 "returned err=%d\n", vdcp->instance, 3160 status); 3161 return (status); 3162 } 3163 3164 DMSG(vdcp, 1, "processing msg for idx=%x\n", b_idx); 3165 status = vdc_process_data_msg(vdcp, &vio_msg); 3166 if (status) { 3167 DMSG(vdcp, 1, "[%d] process_data_msg " 3168 "returned err=%d\n", vdcp->instance, 3169 status); 3170 return (status); 3171 } 3172 } 3173 3174 /* get the next element to submit */ 3175 if (++b_idx >= vdcp->local_dring_backup_len) 3176 b_idx = 0; 3177 } 3178 3179 /* all done - now clear up pending dring copy */ 3180 dring_size = vdcp->local_dring_backup_len * 3181 sizeof (vdcp->local_dring_backup[0]); 3182 3183 (void) kmem_free(vdcp->local_dring_backup, dring_size); 3184 3185 vdcp->local_dring_backup = NULL; 3186 3187 return (0); 3188 } 3189 3190 /* 3191 * Function: 3192 * vdc_backup_local_dring() 3193 * 3194 * Description: 3195 * Backup the current dring in the event of a reset. The Dring 3196 * transactions will be resubmitted to the server when the 3197 * connection is restored. 3198 * 3199 * Arguments: 3200 * vdcp - soft state pointer for this instance of the device driver. 3201 * 3202 * Return Code: 3203 * NONE 3204 */ 3205 static void 3206 vdc_backup_local_dring(vdc_t *vdcp) 3207 { 3208 int dring_size; 3209 3210 ASSERT(vdcp->state == VDC_STATE_RESETTING); 3211 3212 /* 3213 * If the backup dring is stil around, it means 3214 * that the last restore did not complete. However, 3215 * since we never got back into the running state, 3216 * the backup copy we have is still valid. 3217 */ 3218 if (vdcp->local_dring_backup != NULL) { 3219 DMSG(vdcp, 1, "reusing local descriptor ring backup " 3220 "(len=%d, tail=%d)\n", vdcp->local_dring_backup_len, 3221 vdcp->local_dring_backup_tail); 3222 return; 3223 } 3224 3225 DMSG(vdcp, 1, "backing up the local descriptor ring (len=%d, " 3226 "tail=%d)\n", vdcp->dring_len, vdcp->dring_curr_idx); 3227 3228 dring_size = vdcp->dring_len * sizeof (vdcp->local_dring[0]); 3229 3230 vdcp->local_dring_backup = kmem_alloc(dring_size, KM_SLEEP); 3231 bcopy(vdcp->local_dring, vdcp->local_dring_backup, dring_size); 3232 3233 vdcp->local_dring_backup_tail = vdcp->dring_curr_idx; 3234 vdcp->local_dring_backup_len = vdcp->dring_len; 3235 } 3236 3237 /* -------------------------------------------------------------------------- */ 3238 3239 /* 3240 * The following functions process the incoming messages from vds 3241 */ 3242 3243 /* 3244 * Function: 3245 * vdc_process_msg_thread() 3246 * 3247 * Description: 3248 * 3249 * Main VDC message processing thread. Each vDisk instance 3250 * consists of a copy of this thread. This thread triggers 3251 * all the handshakes and data exchange with the server. It 3252 * also handles all channel resets 3253 * 3254 * Arguments: 3255 * vdc - soft state pointer for this instance of the device driver. 3256 * 3257 * Return Code: 3258 * None 3259 */ 3260 static void 3261 vdc_process_msg_thread(vdc_t *vdcp) 3262 { 3263 int status; 3264 3265 mutex_enter(&vdcp->lock); 3266 3267 for (;;) { 3268 3269 #define Q(_s) (vdcp->state == _s) ? #_s : 3270 DMSG(vdcp, 3, "state = %d (%s)\n", vdcp->state, 3271 Q(VDC_STATE_INIT) 3272 Q(VDC_STATE_INIT_WAITING) 3273 Q(VDC_STATE_NEGOTIATE) 3274 Q(VDC_STATE_HANDLE_PENDING) 3275 Q(VDC_STATE_RUNNING) 3276 Q(VDC_STATE_RESETTING) 3277 Q(VDC_STATE_DETACH) 3278 "UNKNOWN"); 3279 3280 switch (vdcp->state) { 3281 case VDC_STATE_INIT: 3282 3283 /* Check if have re-initializing repeatedly */ 3284 if (vdcp->hshake_cnt++ > VDC_RETRIES) { 3285 vdcp->state = VDC_STATE_DETACH; 3286 break; 3287 } 3288 3289 /* Bring up connection with vds via LDC */ 3290 status = vdc_start_ldc_connection(vdcp); 3291 switch (status) { 3292 case EINVAL: 3293 DMSG(vdcp, 0, "[%d] Could not start LDC", 3294 vdcp->instance); 3295 vdcp->state = VDC_STATE_DETACH; 3296 break; 3297 case 0: 3298 vdcp->state = VDC_STATE_INIT_WAITING; 3299 break; 3300 default: 3301 vdcp->state = VDC_STATE_INIT_WAITING; 3302 break; 3303 } 3304 break; 3305 3306 case VDC_STATE_INIT_WAITING: 3307 3308 /* 3309 * Let the callback event move us on 3310 * when channel is open to server 3311 */ 3312 while (vdcp->ldc_state != LDC_UP) { 3313 cv_wait(&vdcp->initwait_cv, &vdcp->lock); 3314 if (vdcp->state != VDC_STATE_INIT_WAITING) { 3315 DMSG(vdcp, 0, 3316 "state moved to %d out from under us...\n", 3317 vdcp->state); 3318 3319 break; 3320 } 3321 } 3322 if (vdcp->state == VDC_STATE_INIT_WAITING && 3323 vdcp->ldc_state == LDC_UP) { 3324 vdcp->state = VDC_STATE_NEGOTIATE; 3325 } 3326 break; 3327 3328 case VDC_STATE_NEGOTIATE: 3329 switch (status = vdc_ver_negotiation(vdcp)) { 3330 case 0: 3331 break; 3332 default: 3333 DMSG(vdcp, 0, "ver negotiate failed (%d)..\n", 3334 status); 3335 goto reset; 3336 } 3337 3338 switch (status = vdc_attr_negotiation(vdcp)) { 3339 case 0: 3340 break; 3341 default: 3342 DMSG(vdcp, 0, "attr negotiate failed (%d)..\n", 3343 status); 3344 goto reset; 3345 } 3346 3347 switch (status = vdc_dring_negotiation(vdcp)) { 3348 case 0: 3349 break; 3350 default: 3351 DMSG(vdcp, 0, "dring negotiate failed (%d)..\n", 3352 status); 3353 goto reset; 3354 } 3355 3356 switch (status = vdc_rdx_exchange(vdcp)) { 3357 case 0: 3358 vdcp->state = VDC_STATE_HANDLE_PENDING; 3359 goto done; 3360 default: 3361 DMSG(vdcp, 0, "RDX xchg failed ..(%d)\n", 3362 status); 3363 goto reset; 3364 } 3365 reset: 3366 DMSG(vdcp, 0, "negotiation failed: resetting (%d)\n", 3367 status); 3368 vdcp->state = VDC_STATE_RESETTING; 3369 done: 3370 DMSG(vdcp, 0, "negotiation complete (state=0x%x)...\n", 3371 vdcp->state); 3372 break; 3373 3374 case VDC_STATE_HANDLE_PENDING: 3375 3376 mutex_exit(&vdcp->lock); 3377 status = vdc_resubmit_backup_dring(vdcp); 3378 mutex_enter(&vdcp->lock); 3379 3380 if (status) 3381 vdcp->state = VDC_STATE_RESETTING; 3382 else 3383 vdcp->state = VDC_STATE_RUNNING; 3384 3385 break; 3386 3387 /* enter running state */ 3388 case VDC_STATE_RUNNING: 3389 /* 3390 * Signal anyone waiting for the connection 3391 * to come on line. 3392 */ 3393 vdcp->hshake_cnt = 0; 3394 cv_broadcast(&vdcp->running_cv); 3395 mutex_exit(&vdcp->lock); 3396 3397 for (;;) { 3398 vio_msg_t msg; 3399 status = vdc_wait_for_response(vdcp, &msg); 3400 if (status) break; 3401 3402 DMSG(vdcp, 1, "[%d] new pkt(s) available\n", 3403 vdcp->instance); 3404 status = vdc_process_data_msg(vdcp, &msg); 3405 if (status) { 3406 DMSG(vdcp, 1, "[%d] process_data_msg " 3407 "returned err=%d\n", vdcp->instance, 3408 status); 3409 break; 3410 } 3411 3412 } 3413 3414 mutex_enter(&vdcp->lock); 3415 3416 vdcp->state = VDC_STATE_RESETTING; 3417 break; 3418 3419 case VDC_STATE_RESETTING: 3420 DMSG(vdcp, 0, "Initiating channel reset " 3421 "(pending = %d)\n", (int)vdcp->threads_pending); 3422 3423 if (vdcp->self_reset) { 3424 DMSG(vdcp, 0, 3425 "[%d] calling stop_ldc_connection.\n", 3426 vdcp->instance); 3427 status = vdc_stop_ldc_connection(vdcp); 3428 vdcp->self_reset = B_FALSE; 3429 } 3430 3431 /* 3432 * Wait for all threads currently waiting 3433 * for a free dring entry to use. 3434 */ 3435 while (vdcp->threads_pending) { 3436 cv_broadcast(&vdcp->membind_cv); 3437 cv_broadcast(&vdcp->dring_free_cv); 3438 mutex_exit(&vdcp->lock); 3439 /* let them wake up */ 3440 drv_usecwait(vdc_min_timeout_ldc); 3441 mutex_enter(&vdcp->lock); 3442 } 3443 3444 ASSERT(vdcp->threads_pending == 0); 3445 3446 /* Sanity check that no thread is receiving */ 3447 ASSERT(vdcp->read_state != VDC_READ_WAITING); 3448 3449 vdcp->read_state = VDC_READ_IDLE; 3450 3451 vdc_backup_local_dring(vdcp); 3452 3453 /* cleanup the old d-ring */ 3454 vdc_destroy_descriptor_ring(vdcp); 3455 3456 /* go and start again */ 3457 vdcp->state = VDC_STATE_INIT; 3458 3459 break; 3460 3461 case VDC_STATE_DETACH: 3462 DMSG(vdcp, 0, "[%d] Reset thread exit cleanup ..\n", 3463 vdcp->instance); 3464 3465 while (vdcp->sync_op_pending) { 3466 cv_signal(&vdcp->sync_pending_cv); 3467 cv_signal(&vdcp->sync_blocked_cv); 3468 mutex_exit(&vdcp->lock); 3469 drv_usecwait(vdc_min_timeout_ldc); 3470 mutex_enter(&vdcp->lock); 3471 } 3472 3473 cv_signal(&vdcp->running_cv); 3474 mutex_exit(&vdcp->lock); 3475 3476 DMSG(vdcp, 0, "[%d] Msg processing thread exiting ..\n", 3477 vdcp->instance); 3478 thread_exit(); 3479 break; 3480 } 3481 } 3482 } 3483 3484 3485 /* 3486 * Function: 3487 * vdc_process_data_msg() 3488 * 3489 * Description: 3490 * This function is called by the message processing thread each time 3491 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 3492 * be an ACK or NACK from vds[1] which vdc handles as follows. 3493 * ACK - wake up the waiting thread 3494 * NACK - resend any messages necessary 3495 * 3496 * [1] Although the message format allows it, vds should not send a 3497 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 3498 * some bizarre reason it does, vdc will reset the connection. 3499 * 3500 * Arguments: 3501 * vdc - soft state pointer for this instance of the device driver. 3502 * msg - the LDC message sent by vds 3503 * 3504 * Return Code: 3505 * 0 - Success. 3506 * > 0 - error value returned by LDC 3507 */ 3508 static int 3509 vdc_process_data_msg(vdc_t *vdcp, vio_msg_t *msg) 3510 { 3511 int status = 0; 3512 vio_dring_msg_t *dring_msg; 3513 vdc_local_desc_t *ldep = NULL; 3514 int start, end; 3515 int idx; 3516 3517 dring_msg = (vio_dring_msg_t *)msg; 3518 3519 ASSERT(msg->tag.vio_msgtype == VIO_TYPE_DATA); 3520 ASSERT(vdcp != NULL); 3521 3522 mutex_enter(&vdcp->lock); 3523 3524 /* 3525 * Check to see if the message has bogus data 3526 */ 3527 idx = start = dring_msg->start_idx; 3528 end = dring_msg->end_idx; 3529 if ((start >= vdcp->dring_len) || 3530 (end >= vdcp->dring_len) || (end < -1)) { 3531 DMSG(vdcp, 0, "[%d] Bogus ACK data : start %d, end %d\n", 3532 vdcp->instance, start, end); 3533 mutex_exit(&vdcp->lock); 3534 return (EINVAL); 3535 } 3536 3537 /* 3538 * Verify that the sequence number is what vdc expects. 3539 */ 3540 switch (vdc_verify_seq_num(vdcp, dring_msg)) { 3541 case VDC_SEQ_NUM_TODO: 3542 break; /* keep processing this message */ 3543 case VDC_SEQ_NUM_SKIP: 3544 mutex_exit(&vdcp->lock); 3545 return (0); 3546 case VDC_SEQ_NUM_INVALID: 3547 mutex_exit(&vdcp->lock); 3548 DMSG(vdcp, 0, "[%d] invalid seqno\n", vdcp->instance); 3549 return (ENXIO); 3550 } 3551 3552 if (msg->tag.vio_subtype == VIO_SUBTYPE_NACK) { 3553 DMSG(vdcp, 0, "[%d] DATA NACK\n", vdcp->instance); 3554 VDC_DUMP_DRING_MSG(dring_msg); 3555 mutex_exit(&vdcp->lock); 3556 return (EIO); 3557 3558 } else if (msg->tag.vio_subtype == VIO_SUBTYPE_INFO) { 3559 mutex_exit(&vdcp->lock); 3560 return (EPROTO); 3561 } 3562 3563 DTRACE_IO2(recv, vio_dring_msg_t, dring_msg, vdc_t *, vdcp); 3564 DMSG(vdcp, 1, ": start %d end %d\n", start, end); 3565 ASSERT(start == end); 3566 3567 ldep = &vdcp->local_dring[idx]; 3568 3569 DMSG(vdcp, 1, ": state 0x%x - cb_type 0x%x\n", 3570 ldep->dep->hdr.dstate, ldep->cb_type); 3571 3572 if (ldep->dep->hdr.dstate == VIO_DESC_DONE) { 3573 struct buf *bufp; 3574 3575 switch (ldep->cb_type) { 3576 case CB_SYNC: 3577 ASSERT(vdcp->sync_op_pending); 3578 3579 status = vdc_depopulate_descriptor(vdcp, idx); 3580 vdcp->sync_op_status = status; 3581 vdcp->sync_op_pending = B_FALSE; 3582 cv_signal(&vdcp->sync_pending_cv); 3583 break; 3584 3585 case CB_STRATEGY: 3586 bufp = ldep->cb_arg; 3587 ASSERT(bufp != NULL); 3588 status = ldep->dep->payload.status; /* Future:ntoh */ 3589 if (status != 0) { 3590 DMSG(vdcp, 1, "strategy status=%d\n", status); 3591 bioerror(bufp, status); 3592 } 3593 status = vdc_depopulate_descriptor(vdcp, idx); 3594 biodone(bufp); 3595 break; 3596 3597 default: 3598 ASSERT(0); 3599 } 3600 } 3601 3602 /* let the arrival signal propogate */ 3603 mutex_exit(&vdcp->lock); 3604 3605 /* probe gives the count of how many entries were processed */ 3606 DTRACE_IO2(processed, int, 1, vdc_t *, vdcp); 3607 3608 return (0); 3609 } 3610 3611 /* 3612 * Function: 3613 * vdc_process_err_msg() 3614 * 3615 * NOTE: No error messages are used as part of the vDisk protocol 3616 */ 3617 static int 3618 vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg) 3619 { 3620 _NOTE(ARGUNUSED(vdc)) 3621 _NOTE(ARGUNUSED(msg)) 3622 3623 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_ERR); 3624 DMSG(vdc, 1, "[%d] Got an ERR msg", vdc->instance); 3625 3626 return (ENOTSUP); 3627 } 3628 3629 /* 3630 * Function: 3631 * vdc_handle_ver_msg() 3632 * 3633 * Description: 3634 * 3635 * Arguments: 3636 * vdc - soft state pointer for this instance of the device driver. 3637 * ver_msg - LDC message sent by vDisk server 3638 * 3639 * Return Code: 3640 * 0 - Success 3641 */ 3642 static int 3643 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) 3644 { 3645 int status = 0; 3646 3647 ASSERT(vdc != NULL); 3648 ASSERT(mutex_owned(&vdc->lock)); 3649 3650 if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { 3651 return (EPROTO); 3652 } 3653 3654 if (ver_msg->dev_class != VDEV_DISK_SERVER) { 3655 return (EINVAL); 3656 } 3657 3658 switch (ver_msg->tag.vio_subtype) { 3659 case VIO_SUBTYPE_ACK: 3660 /* 3661 * We check to see if the version returned is indeed supported 3662 * (The server may have also adjusted the minor number downwards 3663 * and if so 'ver_msg' will contain the actual version agreed) 3664 */ 3665 if (vdc_is_supported_version(ver_msg)) { 3666 vdc->ver.major = ver_msg->ver_major; 3667 vdc->ver.minor = ver_msg->ver_minor; 3668 ASSERT(vdc->ver.major > 0); 3669 } else { 3670 status = EPROTO; 3671 } 3672 break; 3673 3674 case VIO_SUBTYPE_NACK: 3675 /* 3676 * call vdc_is_supported_version() which will return the next 3677 * supported version (if any) in 'ver_msg' 3678 */ 3679 (void) vdc_is_supported_version(ver_msg); 3680 if (ver_msg->ver_major > 0) { 3681 size_t len = sizeof (*ver_msg); 3682 3683 ASSERT(vdc->ver.major > 0); 3684 3685 /* reset the necessary fields and resend */ 3686 ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; 3687 ver_msg->dev_class = VDEV_DISK; 3688 3689 status = vdc_send(vdc, (caddr_t)ver_msg, &len); 3690 DMSG(vdc, 0, "[%d] Resend VER info (LDC status = %d)\n", 3691 vdc->instance, status); 3692 if (len != sizeof (*ver_msg)) 3693 status = EBADMSG; 3694 } else { 3695 DMSG(vdc, 0, "[%d] No common version with " 3696 "vDisk server", vdc->instance); 3697 status = ENOTSUP; 3698 } 3699 3700 break; 3701 case VIO_SUBTYPE_INFO: 3702 /* 3703 * Handle the case where vds starts handshake 3704 * (for now only vdc is the instigatior) 3705 */ 3706 status = ENOTSUP; 3707 break; 3708 3709 default: 3710 status = EINVAL; 3711 break; 3712 } 3713 3714 return (status); 3715 } 3716 3717 /* 3718 * Function: 3719 * vdc_handle_attr_msg() 3720 * 3721 * Description: 3722 * 3723 * Arguments: 3724 * vdc - soft state pointer for this instance of the device driver. 3725 * attr_msg - LDC message sent by vDisk server 3726 * 3727 * Return Code: 3728 * 0 - Success 3729 */ 3730 static int 3731 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) 3732 { 3733 int status = 0; 3734 3735 ASSERT(vdc != NULL); 3736 ASSERT(mutex_owned(&vdc->lock)); 3737 3738 if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { 3739 return (EPROTO); 3740 } 3741 3742 switch (attr_msg->tag.vio_subtype) { 3743 case VIO_SUBTYPE_ACK: 3744 /* 3745 * We now verify the attributes sent by vds. 3746 */ 3747 vdc->vdisk_size = attr_msg->vdisk_size; 3748 vdc->vdisk_type = attr_msg->vdisk_type; 3749 3750 DMSG(vdc, 0, "[%d] max_xfer_sz: sent %lx acked %lx\n", 3751 vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz); 3752 DMSG(vdc, 0, "[%d] vdisk_block_size: sent %lx acked %x\n", 3753 vdc->instance, vdc->block_size, 3754 attr_msg->vdisk_block_size); 3755 3756 /* 3757 * We don't know at compile time what the vDisk server will 3758 * think are good values but we apply an large (arbitrary) 3759 * upper bound to prevent memory exhaustion in vdc if it was 3760 * allocating a DRing based of huge values sent by the server. 3761 * We probably will never exceed this except if the message 3762 * was garbage. 3763 */ 3764 if ((attr_msg->max_xfer_sz * attr_msg->vdisk_block_size) <= 3765 (PAGESIZE * DEV_BSIZE)) { 3766 vdc->max_xfer_sz = attr_msg->max_xfer_sz; 3767 vdc->block_size = attr_msg->vdisk_block_size; 3768 } else { 3769 DMSG(vdc, 0, "[%d] vds block transfer size too big;" 3770 " using max supported by vdc", vdc->instance); 3771 } 3772 3773 if ((attr_msg->xfer_mode != VIO_DRING_MODE) || 3774 (attr_msg->vdisk_size > INT64_MAX) || 3775 (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 3776 DMSG(vdc, 0, "[%d] Invalid attributes from vds", 3777 vdc->instance); 3778 status = EINVAL; 3779 break; 3780 } 3781 3782 break; 3783 3784 case VIO_SUBTYPE_NACK: 3785 /* 3786 * vds could not handle the attributes we sent so we 3787 * stop negotiating. 3788 */ 3789 status = EPROTO; 3790 break; 3791 3792 case VIO_SUBTYPE_INFO: 3793 /* 3794 * Handle the case where vds starts the handshake 3795 * (for now; vdc is the only supported instigatior) 3796 */ 3797 status = ENOTSUP; 3798 break; 3799 3800 default: 3801 status = ENOTSUP; 3802 break; 3803 } 3804 3805 return (status); 3806 } 3807 3808 /* 3809 * Function: 3810 * vdc_handle_dring_reg_msg() 3811 * 3812 * Description: 3813 * 3814 * Arguments: 3815 * vdc - soft state pointer for this instance of the driver. 3816 * dring_msg - LDC message sent by vDisk server 3817 * 3818 * Return Code: 3819 * 0 - Success 3820 */ 3821 static int 3822 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) 3823 { 3824 int status = 0; 3825 3826 ASSERT(vdc != NULL); 3827 ASSERT(mutex_owned(&vdc->lock)); 3828 3829 if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { 3830 return (EPROTO); 3831 } 3832 3833 switch (dring_msg->tag.vio_subtype) { 3834 case VIO_SUBTYPE_ACK: 3835 /* save the received dring_ident */ 3836 vdc->dring_ident = dring_msg->dring_ident; 3837 DMSG(vdc, 0, "[%d] Received dring ident=0x%lx\n", 3838 vdc->instance, vdc->dring_ident); 3839 break; 3840 3841 case VIO_SUBTYPE_NACK: 3842 /* 3843 * vds could not handle the DRing info we sent so we 3844 * stop negotiating. 3845 */ 3846 DMSG(vdc, 0, "[%d] server could not register DRing\n", 3847 vdc->instance); 3848 status = EPROTO; 3849 break; 3850 3851 case VIO_SUBTYPE_INFO: 3852 /* 3853 * Handle the case where vds starts handshake 3854 * (for now only vdc is the instigatior) 3855 */ 3856 status = ENOTSUP; 3857 break; 3858 default: 3859 status = ENOTSUP; 3860 } 3861 3862 return (status); 3863 } 3864 3865 /* 3866 * Function: 3867 * vdc_verify_seq_num() 3868 * 3869 * Description: 3870 * This functions verifies that the sequence number sent back by the vDisk 3871 * server with the latest message is what is expected (i.e. it is greater 3872 * than the last seq num sent by the vDisk server and less than or equal 3873 * to the last seq num generated by vdc). 3874 * 3875 * It then checks the request ID to see if any requests need processing 3876 * in the DRing. 3877 * 3878 * Arguments: 3879 * vdc - soft state pointer for this instance of the driver. 3880 * dring_msg - pointer to the LDC message sent by vds 3881 * 3882 * Return Code: 3883 * VDC_SEQ_NUM_TODO - Message needs to be processed 3884 * VDC_SEQ_NUM_SKIP - Message has already been processed 3885 * VDC_SEQ_NUM_INVALID - The seq numbers are so out of sync, 3886 * vdc cannot deal with them 3887 */ 3888 static int 3889 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg) 3890 { 3891 ASSERT(vdc != NULL); 3892 ASSERT(dring_msg != NULL); 3893 ASSERT(mutex_owned(&vdc->lock)); 3894 3895 /* 3896 * Check to see if the messages were responded to in the correct 3897 * order by vds. 3898 */ 3899 if ((dring_msg->seq_num <= vdc->seq_num_reply) || 3900 (dring_msg->seq_num > vdc->seq_num)) { 3901 DMSG(vdc, 0, "?[%d] Bogus sequence_number %lu: " 3902 "%lu > expected <= %lu (last proc req %lu sent %lu)\n", 3903 vdc->instance, dring_msg->seq_num, 3904 vdc->seq_num_reply, vdc->seq_num, 3905 vdc->req_id_proc, vdc->req_id); 3906 return (VDC_SEQ_NUM_INVALID); 3907 } 3908 vdc->seq_num_reply = dring_msg->seq_num; 3909 3910 if (vdc->req_id_proc < vdc->req_id) 3911 return (VDC_SEQ_NUM_TODO); 3912 else 3913 return (VDC_SEQ_NUM_SKIP); 3914 } 3915 3916 3917 /* 3918 * Function: 3919 * vdc_is_supported_version() 3920 * 3921 * Description: 3922 * This routine checks if the major/minor version numbers specified in 3923 * 'ver_msg' are supported. If not it finds the next version that is 3924 * in the supported version list 'vdc_version[]' and sets the fields in 3925 * 'ver_msg' to those values 3926 * 3927 * Arguments: 3928 * ver_msg - LDC message sent by vDisk server 3929 * 3930 * Return Code: 3931 * B_TRUE - Success 3932 * B_FALSE - Version not supported 3933 */ 3934 static boolean_t 3935 vdc_is_supported_version(vio_ver_msg_t *ver_msg) 3936 { 3937 int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); 3938 3939 for (int i = 0; i < vdc_num_versions; i++) { 3940 ASSERT(vdc_version[i].major > 0); 3941 ASSERT((i == 0) || 3942 (vdc_version[i].major < vdc_version[i-1].major)); 3943 3944 /* 3945 * If the major versions match, adjust the minor version, if 3946 * necessary, down to the highest value supported by this 3947 * client. The server should support all minor versions lower 3948 * than the value it sent 3949 */ 3950 if (ver_msg->ver_major == vdc_version[i].major) { 3951 if (ver_msg->ver_minor > vdc_version[i].minor) { 3952 DMSGX(0, 3953 "Adjusting minor version from %u to %u", 3954 ver_msg->ver_minor, vdc_version[i].minor); 3955 ver_msg->ver_minor = vdc_version[i].minor; 3956 } 3957 return (B_TRUE); 3958 } 3959 3960 /* 3961 * If the message contains a higher major version number, set 3962 * the message's major/minor versions to the current values 3963 * and return false, so this message will get resent with 3964 * these values, and the server will potentially try again 3965 * with the same or a lower version 3966 */ 3967 if (ver_msg->ver_major > vdc_version[i].major) { 3968 ver_msg->ver_major = vdc_version[i].major; 3969 ver_msg->ver_minor = vdc_version[i].minor; 3970 DMSGX(0, "Suggesting major/minor (0x%x/0x%x)\n", 3971 ver_msg->ver_major, ver_msg->ver_minor); 3972 3973 return (B_FALSE); 3974 } 3975 3976 /* 3977 * Otherwise, the message's major version is less than the 3978 * current major version, so continue the loop to the next 3979 * (lower) supported version 3980 */ 3981 } 3982 3983 /* 3984 * No common version was found; "ground" the version pair in the 3985 * message to terminate negotiation 3986 */ 3987 ver_msg->ver_major = 0; 3988 ver_msg->ver_minor = 0; 3989 3990 return (B_FALSE); 3991 } 3992 /* -------------------------------------------------------------------------- */ 3993 3994 /* 3995 * DKIO(7) support 3996 */ 3997 3998 typedef struct vdc_dk_arg { 3999 struct dk_callback dkc; 4000 int mode; 4001 dev_t dev; 4002 vdc_t *vdc; 4003 } vdc_dk_arg_t; 4004 4005 /* 4006 * Function: 4007 * vdc_dkio_flush_cb() 4008 * 4009 * Description: 4010 * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 4011 * by kernel code. 4012 * 4013 * Arguments: 4014 * arg - a pointer to a vdc_dk_arg_t structure. 4015 */ 4016 void 4017 vdc_dkio_flush_cb(void *arg) 4018 { 4019 struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 4020 struct dk_callback *dkc = NULL; 4021 vdc_t *vdc = NULL; 4022 int rv; 4023 4024 if (dk_arg == NULL) { 4025 cmn_err(CE_NOTE, "?[Unk] DKIOCFLUSHWRITECACHE arg is NULL\n"); 4026 return; 4027 } 4028 dkc = &dk_arg->dkc; 4029 vdc = dk_arg->vdc; 4030 ASSERT(vdc != NULL); 4031 4032 rv = vdc_do_sync_op(vdc, VD_OP_FLUSH, NULL, 0, 4033 VDCPART(dk_arg->dev), 0, CB_SYNC, 0, VIO_both_dir); 4034 if (rv != 0) { 4035 DMSG(vdc, 0, "[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n", 4036 vdc->instance, rv, 4037 ddi_model_convert_from(dk_arg->mode & FMODELS)); 4038 } 4039 4040 /* 4041 * Trigger the call back to notify the caller the the ioctl call has 4042 * been completed. 4043 */ 4044 if ((dk_arg->mode & FKIOCTL) && 4045 (dkc != NULL) && 4046 (dkc->dkc_callback != NULL)) { 4047 ASSERT(dkc->dkc_cookie != NULL); 4048 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 4049 } 4050 4051 /* Indicate that one less DKIO write flush is outstanding */ 4052 mutex_enter(&vdc->lock); 4053 vdc->dkio_flush_pending--; 4054 ASSERT(vdc->dkio_flush_pending >= 0); 4055 mutex_exit(&vdc->lock); 4056 4057 /* free the mem that was allocated when the callback was dispatched */ 4058 kmem_free(arg, sizeof (vdc_dk_arg_t)); 4059 } 4060 4061 /* 4062 * This structure is used in the DKIO(7I) array below. 4063 */ 4064 typedef struct vdc_dk_ioctl { 4065 uint8_t op; /* VD_OP_XXX value */ 4066 int cmd; /* Solaris ioctl operation number */ 4067 size_t nbytes; /* size of structure to be copied */ 4068 4069 /* function to convert between vDisk and Solaris structure formats */ 4070 int (*convert)(vdc_t *vdc, void *vd_buf, void *ioctl_arg, 4071 int mode, int dir); 4072 } vdc_dk_ioctl_t; 4073 4074 /* 4075 * Subset of DKIO(7I) operations currently supported 4076 */ 4077 static vdc_dk_ioctl_t dk_ioctl[] = { 4078 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, sizeof (int), 4079 vdc_null_copy_func}, 4080 {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), 4081 vdc_get_wce_convert}, 4082 {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), 4083 vdc_set_wce_convert}, 4084 {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), 4085 vdc_get_vtoc_convert}, 4086 {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), 4087 vdc_set_vtoc_convert}, 4088 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), 4089 vdc_get_geom_convert}, 4090 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), 4091 vdc_get_geom_convert}, 4092 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), 4093 vdc_get_geom_convert}, 4094 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 4095 vdc_set_geom_convert}, 4096 {VD_OP_GET_EFI, DKIOCGETEFI, 0, 4097 vdc_get_efi_convert}, 4098 {VD_OP_SET_EFI, DKIOCSETEFI, 0, 4099 vdc_set_efi_convert}, 4100 4101 /* 4102 * These particular ioctls are not sent to the server - vdc fakes up 4103 * the necessary info. 4104 */ 4105 {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, 4106 {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, 4107 {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, 4108 {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, 4109 {0, CDROMREADOFFSET, 0, vdc_null_copy_func} 4110 }; 4111 4112 /* 4113 * Function: 4114 * vd_process_ioctl() 4115 * 4116 * Description: 4117 * This routine processes disk specific ioctl calls 4118 * 4119 * Arguments: 4120 * dev - the device number 4121 * cmd - the operation [dkio(7I)] to be processed 4122 * arg - pointer to user provided structure 4123 * (contains data to be set or reference parameter for get) 4124 * mode - bit flag, indicating open settings, 32/64 bit type, etc 4125 * 4126 * Return Code: 4127 * 0 4128 * EFAULT 4129 * ENXIO 4130 * EIO 4131 * ENOTSUP 4132 */ 4133 static int 4134 vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode) 4135 { 4136 int instance = VDCUNIT(dev); 4137 vdc_t *vdc = NULL; 4138 int rv = -1; 4139 int idx = 0; /* index into dk_ioctl[] */ 4140 size_t len = 0; /* #bytes to send to vds */ 4141 size_t alloc_len = 0; /* #bytes to allocate mem for */ 4142 caddr_t mem_p = NULL; 4143 size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 4144 struct vtoc vtoc_saved; 4145 vdc_dk_ioctl_t *iop; 4146 4147 vdc = ddi_get_soft_state(vdc_state, instance); 4148 if (vdc == NULL) { 4149 cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 4150 instance); 4151 return (ENXIO); 4152 } 4153 4154 DMSG(vdc, 0, "[%d] Processing ioctl(%x) for dev %lx : model %x\n", 4155 instance, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 4156 4157 /* 4158 * Validate the ioctl operation to be performed. 4159 * 4160 * If we have looped through the array without finding a match then we 4161 * don't support this ioctl. 4162 */ 4163 for (idx = 0; idx < nioctls; idx++) { 4164 if (cmd == dk_ioctl[idx].cmd) 4165 break; 4166 } 4167 4168 if (idx >= nioctls) { 4169 DMSG(vdc, 0, "[%d] Unsupported ioctl (0x%x)\n", 4170 vdc->instance, cmd); 4171 return (ENOTSUP); 4172 } 4173 4174 iop = &(dk_ioctl[idx]); 4175 4176 if (cmd == DKIOCGETEFI || cmd == DKIOCSETEFI) { 4177 /* size is not fixed for EFI ioctls, it depends on ioctl arg */ 4178 dk_efi_t dk_efi; 4179 4180 rv = ddi_copyin(arg, &dk_efi, sizeof (dk_efi_t), mode); 4181 if (rv != 0) 4182 return (EFAULT); 4183 4184 len = sizeof (vd_efi_t) - 1 + dk_efi.dki_length; 4185 } else { 4186 len = iop->nbytes; 4187 } 4188 4189 /* 4190 * Deal with the ioctls which the server does not provide. vdc can 4191 * fake these up and return immediately 4192 */ 4193 switch (cmd) { 4194 case CDROMREADOFFSET: 4195 case DKIOCREMOVABLE: 4196 case USCSICMD: 4197 return (ENOTTY); 4198 4199 case DKIOCINFO: 4200 { 4201 struct dk_cinfo cinfo; 4202 if (vdc->cinfo == NULL) 4203 return (ENXIO); 4204 4205 bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 4206 cinfo.dki_partition = VDCPART(dev); 4207 4208 rv = ddi_copyout(&cinfo, (void *)arg, 4209 sizeof (struct dk_cinfo), mode); 4210 if (rv != 0) 4211 return (EFAULT); 4212 4213 return (0); 4214 } 4215 4216 case DKIOCGMEDIAINFO: 4217 { 4218 if (vdc->minfo == NULL) 4219 return (ENXIO); 4220 4221 rv = ddi_copyout(vdc->minfo, (void *)arg, 4222 sizeof (struct dk_minfo), mode); 4223 if (rv != 0) 4224 return (EFAULT); 4225 4226 return (0); 4227 } 4228 4229 case DKIOCFLUSHWRITECACHE: 4230 { 4231 struct dk_callback *dkc = (struct dk_callback *)arg; 4232 vdc_dk_arg_t *dkarg = NULL; 4233 4234 DMSG(vdc, 1, "[%d] Flush W$: mode %x\n", 4235 instance, mode); 4236 4237 /* 4238 * If the backing device is not a 'real' disk then the 4239 * W$ operation request to the vDisk server will fail 4240 * so we might as well save the cycles and return now. 4241 */ 4242 if (vdc->vdisk_type != VD_DISK_TYPE_DISK) 4243 return (ENOTTY); 4244 4245 /* 4246 * If arg is NULL, then there is no callback function 4247 * registered and the call operates synchronously; we 4248 * break and continue with the rest of the function and 4249 * wait for vds to return (i.e. after the request to 4250 * vds returns successfully, all writes completed prior 4251 * to the ioctl will have been flushed from the disk 4252 * write cache to persistent media. 4253 * 4254 * If a callback function is registered, we dispatch 4255 * the request on a task queue and return immediately. 4256 * The callback will deal with informing the calling 4257 * thread that the flush request is completed. 4258 */ 4259 if (dkc == NULL) 4260 break; 4261 4262 dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP); 4263 4264 dkarg->mode = mode; 4265 dkarg->dev = dev; 4266 bcopy(dkc, &dkarg->dkc, sizeof (*dkc)); 4267 4268 mutex_enter(&vdc->lock); 4269 vdc->dkio_flush_pending++; 4270 dkarg->vdc = vdc; 4271 mutex_exit(&vdc->lock); 4272 4273 /* put the request on a task queue */ 4274 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 4275 (void *)dkarg, DDI_SLEEP); 4276 if (rv == NULL) { 4277 /* clean up if dispatch fails */ 4278 mutex_enter(&vdc->lock); 4279 vdc->dkio_flush_pending--; 4280 kmem_free(dkarg, sizeof (vdc_dk_arg_t)); 4281 } 4282 4283 return (rv == NULL ? ENOMEM : 0); 4284 } 4285 } 4286 4287 /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 4288 ASSERT(iop->op != 0); 4289 4290 /* LDC requires that the memory being mapped is 8-byte aligned */ 4291 alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 4292 DMSG(vdc, 1, "[%d] struct size %ld alloc %ld\n", 4293 instance, len, alloc_len); 4294 4295 ASSERT(alloc_len != 0); /* sanity check */ 4296 mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 4297 4298 if (cmd == DKIOCSVTOC) { 4299 /* 4300 * Save a copy of the current VTOC so that we can roll back 4301 * if the setting of the new VTOC fails. 4302 */ 4303 bcopy(vdc->vtoc, &vtoc_saved, sizeof (struct vtoc)); 4304 } 4305 4306 /* 4307 * Call the conversion function for this ioctl whhich if necessary 4308 * converts from the Solaris format to the format ARC'ed 4309 * as part of the vDisk protocol (FWARC 2006/195) 4310 */ 4311 ASSERT(iop->convert != NULL); 4312 rv = (iop->convert)(vdc, arg, mem_p, mode, VD_COPYIN); 4313 if (rv != 0) { 4314 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 4315 instance, rv, cmd); 4316 if (mem_p != NULL) 4317 kmem_free(mem_p, alloc_len); 4318 return (rv); 4319 } 4320 4321 /* 4322 * send request to vds to service the ioctl. 4323 */ 4324 rv = vdc_do_sync_op(vdc, iop->op, mem_p, alloc_len, 4325 VDCPART(dev), 0, CB_SYNC, (void *)(uint64_t)mode, 4326 VIO_both_dir); 4327 4328 if (rv != 0) { 4329 /* 4330 * This is not necessarily an error. The ioctl could 4331 * be returning a value such as ENOTTY to indicate 4332 * that the ioctl is not applicable. 4333 */ 4334 DMSG(vdc, 0, "[%d] vds returned %d for ioctl 0x%x\n", 4335 instance, rv, cmd); 4336 if (mem_p != NULL) 4337 kmem_free(mem_p, alloc_len); 4338 4339 if (cmd == DKIOCSVTOC) { 4340 /* update of the VTOC has failed, roll back */ 4341 bcopy(&vtoc_saved, vdc->vtoc, sizeof (struct vtoc)); 4342 } 4343 4344 return (rv); 4345 } 4346 4347 if (cmd == DKIOCSVTOC) { 4348 /* 4349 * The VTOC has been changed. We need to update the device 4350 * nodes to handle the case where an EFI label has been 4351 * changed to a VTOC label. We also try and update the device 4352 * node properties. Failing to set the properties should 4353 * not cause an error to be return the caller though. 4354 */ 4355 vdc->vdisk_label = VD_DISK_LABEL_VTOC; 4356 (void) vdc_create_device_nodes_vtoc(vdc); 4357 4358 if (vdc_create_device_nodes_props(vdc)) { 4359 DMSG(vdc, 0, "![%d] Failed to update device nodes" 4360 " properties", vdc->instance); 4361 } 4362 4363 } else if (cmd == DKIOCSETEFI) { 4364 /* 4365 * The EFI has been changed. We need to update the device 4366 * nodes to handle the case where a VTOC label has been 4367 * changed to an EFI label. We also try and update the device 4368 * node properties. Failing to set the properties should 4369 * not cause an error to be return the caller though. 4370 */ 4371 struct dk_gpt *efi; 4372 size_t efi_len; 4373 4374 vdc->vdisk_label = VD_DISK_LABEL_EFI; 4375 (void) vdc_create_device_nodes_efi(vdc); 4376 4377 rv = vdc_efi_alloc_and_read(dev, &efi, &efi_len); 4378 4379 if (rv == 0) { 4380 vdc_store_efi(vdc, efi); 4381 rv = vdc_create_device_nodes_props(vdc); 4382 vd_efi_free(efi, efi_len); 4383 } 4384 4385 if (rv) { 4386 DMSG(vdc, 0, "![%d] Failed to update device nodes" 4387 " properties", vdc->instance); 4388 } 4389 } 4390 4391 /* 4392 * Call the conversion function (if it exists) for this ioctl 4393 * which converts from the format ARC'ed as part of the vDisk 4394 * protocol (FWARC 2006/195) back to a format understood by 4395 * the rest of Solaris. 4396 */ 4397 rv = (iop->convert)(vdc, mem_p, arg, mode, VD_COPYOUT); 4398 if (rv != 0) { 4399 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 4400 instance, rv, cmd); 4401 if (mem_p != NULL) 4402 kmem_free(mem_p, alloc_len); 4403 return (rv); 4404 } 4405 4406 if (mem_p != NULL) 4407 kmem_free(mem_p, alloc_len); 4408 4409 return (rv); 4410 } 4411 4412 /* 4413 * Function: 4414 * 4415 * Description: 4416 * This is an empty conversion function used by ioctl calls which 4417 * do not need to convert the data being passed in/out to userland 4418 */ 4419 static int 4420 vdc_null_copy_func(vdc_t *vdc, void *from, void *to, int mode, int dir) 4421 { 4422 _NOTE(ARGUNUSED(vdc)) 4423 _NOTE(ARGUNUSED(from)) 4424 _NOTE(ARGUNUSED(to)) 4425 _NOTE(ARGUNUSED(mode)) 4426 _NOTE(ARGUNUSED(dir)) 4427 4428 return (0); 4429 } 4430 4431 static int 4432 vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 4433 int mode, int dir) 4434 { 4435 _NOTE(ARGUNUSED(vdc)) 4436 4437 if (dir == VD_COPYIN) 4438 return (0); /* nothing to do */ 4439 4440 if (ddi_copyout(from, to, sizeof (int), mode) != 0) 4441 return (EFAULT); 4442 4443 return (0); 4444 } 4445 4446 static int 4447 vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 4448 int mode, int dir) 4449 { 4450 _NOTE(ARGUNUSED(vdc)) 4451 4452 if (dir == VD_COPYOUT) 4453 return (0); /* nothing to do */ 4454 4455 if (ddi_copyin(from, to, sizeof (int), mode) != 0) 4456 return (EFAULT); 4457 4458 return (0); 4459 } 4460 4461 /* 4462 * Function: 4463 * vdc_get_vtoc_convert() 4464 * 4465 * Description: 4466 * This routine performs the necessary convertions from the DKIOCGVTOC 4467 * Solaris structure to the format defined in FWARC 2006/195. 4468 * 4469 * In the struct vtoc definition, the timestamp field is marked as not 4470 * supported so it is not part of vDisk protocol (FWARC 2006/195). 4471 * However SVM uses that field to check it can write into the VTOC, 4472 * so we fake up the info of that field. 4473 * 4474 * Arguments: 4475 * vdc - the vDisk client 4476 * from - the buffer containing the data to be copied from 4477 * to - the buffer to be copied to 4478 * mode - flags passed to ioctl() call 4479 * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT 4480 * 4481 * Return Code: 4482 * 0 - Success 4483 * ENXIO - incorrect buffer passed in. 4484 * EFAULT - ddi_copyout routine encountered an error. 4485 */ 4486 static int 4487 vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4488 { 4489 int i; 4490 void *tmp_mem = NULL; 4491 void *tmp_memp; 4492 struct vtoc vt; 4493 struct vtoc32 vt32; 4494 int copy_len = 0; 4495 int rv = 0; 4496 4497 if (dir != VD_COPYOUT) 4498 return (0); /* nothing to do */ 4499 4500 if ((from == NULL) || (to == NULL)) 4501 return (ENXIO); 4502 4503 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 4504 copy_len = sizeof (struct vtoc32); 4505 else 4506 copy_len = sizeof (struct vtoc); 4507 4508 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 4509 4510 VD_VTOC2VTOC((vd_vtoc_t *)from, &vt); 4511 4512 /* fake the VTOC timestamp field */ 4513 for (i = 0; i < V_NUMPAR; i++) { 4514 vt.timestamp[i] = vdc->vtoc->timestamp[i]; 4515 } 4516 4517 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 4518 vtoctovtoc32(vt, vt32); 4519 tmp_memp = &vt32; 4520 } else { 4521 tmp_memp = &vt; 4522 } 4523 rv = ddi_copyout(tmp_memp, to, copy_len, mode); 4524 if (rv != 0) 4525 rv = EFAULT; 4526 4527 kmem_free(tmp_mem, copy_len); 4528 return (rv); 4529 } 4530 4531 /* 4532 * Function: 4533 * vdc_set_vtoc_convert() 4534 * 4535 * Description: 4536 * This routine performs the necessary convertions from the DKIOCSVTOC 4537 * Solaris structure to the format defined in FWARC 2006/195. 4538 * 4539 * Arguments: 4540 * vdc - the vDisk client 4541 * from - Buffer with data 4542 * to - Buffer where data is to be copied to 4543 * mode - flags passed to ioctl 4544 * dir - direction of copy (in or out) 4545 * 4546 * Return Code: 4547 * 0 - Success 4548 * ENXIO - Invalid buffer passed in 4549 * EFAULT - ddi_copyin of data failed 4550 */ 4551 static int 4552 vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4553 { 4554 void *tmp_mem = NULL; 4555 struct vtoc vt; 4556 struct vtoc *vtp = &vt; 4557 vd_vtoc_t vtvd; 4558 int copy_len = 0; 4559 int rv = 0; 4560 4561 if (dir != VD_COPYIN) 4562 return (0); /* nothing to do */ 4563 4564 if ((from == NULL) || (to == NULL)) 4565 return (ENXIO); 4566 4567 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 4568 copy_len = sizeof (struct vtoc32); 4569 else 4570 copy_len = sizeof (struct vtoc); 4571 4572 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 4573 4574 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 4575 if (rv != 0) { 4576 kmem_free(tmp_mem, copy_len); 4577 return (EFAULT); 4578 } 4579 4580 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 4581 vtoc32tovtoc((*(struct vtoc32 *)tmp_mem), vt); 4582 } else { 4583 vtp = tmp_mem; 4584 } 4585 4586 /* 4587 * The VTOC is being changed, then vdc needs to update the copy 4588 * it saved in the soft state structure. 4589 */ 4590 bcopy(vtp, vdc->vtoc, sizeof (struct vtoc)); 4591 4592 VTOC2VD_VTOC(vtp, &vtvd); 4593 bcopy(&vtvd, to, sizeof (vd_vtoc_t)); 4594 kmem_free(tmp_mem, copy_len); 4595 4596 return (0); 4597 } 4598 4599 /* 4600 * Function: 4601 * vdc_get_geom_convert() 4602 * 4603 * Description: 4604 * This routine performs the necessary convertions from the DKIOCGGEOM, 4605 * DKIOCG_PHYSGEOM and DKIOG_VIRTGEOM Solaris structures to the format 4606 * defined in FWARC 2006/195 4607 * 4608 * Arguments: 4609 * vdc - the vDisk client 4610 * from - Buffer with data 4611 * to - Buffer where data is to be copied to 4612 * mode - flags passed to ioctl 4613 * dir - direction of copy (in or out) 4614 * 4615 * Return Code: 4616 * 0 - Success 4617 * ENXIO - Invalid buffer passed in 4618 * EFAULT - ddi_copyout of data failed 4619 */ 4620 static int 4621 vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4622 { 4623 _NOTE(ARGUNUSED(vdc)) 4624 4625 struct dk_geom geom; 4626 int copy_len = sizeof (struct dk_geom); 4627 int rv = 0; 4628 4629 if (dir != VD_COPYOUT) 4630 return (0); /* nothing to do */ 4631 4632 if ((from == NULL) || (to == NULL)) 4633 return (ENXIO); 4634 4635 VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); 4636 rv = ddi_copyout(&geom, to, copy_len, mode); 4637 if (rv != 0) 4638 rv = EFAULT; 4639 4640 return (rv); 4641 } 4642 4643 /* 4644 * Function: 4645 * vdc_set_geom_convert() 4646 * 4647 * Description: 4648 * This routine performs the necessary convertions from the DKIOCSGEOM 4649 * Solaris structure to the format defined in FWARC 2006/195. 4650 * 4651 * Arguments: 4652 * vdc - the vDisk client 4653 * from - Buffer with data 4654 * to - Buffer where data is to be copied to 4655 * mode - flags passed to ioctl 4656 * dir - direction of copy (in or out) 4657 * 4658 * Return Code: 4659 * 0 - Success 4660 * ENXIO - Invalid buffer passed in 4661 * EFAULT - ddi_copyin of data failed 4662 */ 4663 static int 4664 vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4665 { 4666 _NOTE(ARGUNUSED(vdc)) 4667 4668 vd_geom_t vdgeom; 4669 void *tmp_mem = NULL; 4670 int copy_len = sizeof (struct dk_geom); 4671 int rv = 0; 4672 4673 if (dir != VD_COPYIN) 4674 return (0); /* nothing to do */ 4675 4676 if ((from == NULL) || (to == NULL)) 4677 return (ENXIO); 4678 4679 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 4680 4681 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 4682 if (rv != 0) { 4683 kmem_free(tmp_mem, copy_len); 4684 return (EFAULT); 4685 } 4686 DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); 4687 bcopy(&vdgeom, to, sizeof (vdgeom)); 4688 kmem_free(tmp_mem, copy_len); 4689 4690 return (0); 4691 } 4692 4693 static int 4694 vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4695 { 4696 _NOTE(ARGUNUSED(vdc)) 4697 4698 vd_efi_t *vd_efi; 4699 dk_efi_t dk_efi; 4700 int rv = 0; 4701 void *uaddr; 4702 4703 if ((from == NULL) || (to == NULL)) 4704 return (ENXIO); 4705 4706 if (dir == VD_COPYIN) { 4707 4708 vd_efi = (vd_efi_t *)to; 4709 4710 rv = ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode); 4711 if (rv != 0) 4712 return (EFAULT); 4713 4714 vd_efi->lba = dk_efi.dki_lba; 4715 vd_efi->length = dk_efi.dki_length; 4716 bzero(vd_efi->data, vd_efi->length); 4717 4718 } else { 4719 4720 rv = ddi_copyin(to, &dk_efi, sizeof (dk_efi_t), mode); 4721 if (rv != 0) 4722 return (EFAULT); 4723 4724 uaddr = dk_efi.dki_data; 4725 4726 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 4727 4728 VD_EFI2DK_EFI((vd_efi_t *)from, &dk_efi); 4729 4730 rv = ddi_copyout(dk_efi.dki_data, uaddr, dk_efi.dki_length, 4731 mode); 4732 if (rv != 0) 4733 return (EFAULT); 4734 4735 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 4736 } 4737 4738 return (0); 4739 } 4740 4741 static int 4742 vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4743 { 4744 _NOTE(ARGUNUSED(vdc)) 4745 4746 dk_efi_t dk_efi; 4747 void *uaddr; 4748 4749 if (dir == VD_COPYOUT) 4750 return (0); /* nothing to do */ 4751 4752 if ((from == NULL) || (to == NULL)) 4753 return (ENXIO); 4754 4755 if (ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode) != 0) 4756 return (EFAULT); 4757 4758 uaddr = dk_efi.dki_data; 4759 4760 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 4761 4762 if (ddi_copyin(uaddr, dk_efi.dki_data, dk_efi.dki_length, mode) != 0) 4763 return (EFAULT); 4764 4765 DK_EFI2VD_EFI(&dk_efi, (vd_efi_t *)to); 4766 4767 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 4768 4769 return (0); 4770 } 4771 4772 /* 4773 * Function: 4774 * vdc_create_fake_geometry() 4775 * 4776 * Description: 4777 * This routine fakes up the disk info needed for some DKIO ioctls. 4778 * - DKIOCINFO 4779 * - DKIOCGMEDIAINFO 4780 * 4781 * [ just like lofi(7D) and ramdisk(7D) ] 4782 * 4783 * Arguments: 4784 * vdc - soft state pointer for this instance of the device driver. 4785 * 4786 * Return Code: 4787 * 0 - Success 4788 */ 4789 static int 4790 vdc_create_fake_geometry(vdc_t *vdc) 4791 { 4792 ASSERT(vdc != NULL); 4793 4794 /* 4795 * Check if max_xfer_sz and vdisk_size are valid 4796 */ 4797 if (vdc->vdisk_size == 0 || vdc->max_xfer_sz == 0) 4798 return (EIO); 4799 4800 /* 4801 * DKIOCINFO support 4802 */ 4803 vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 4804 4805 (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 4806 (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 4807 /* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */ 4808 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; 4809 vdc->cinfo->dki_ctype = DKC_SCSI_CCS; 4810 vdc->cinfo->dki_flags = DKI_FMTVOL; 4811 vdc->cinfo->dki_cnum = 0; 4812 vdc->cinfo->dki_addr = 0; 4813 vdc->cinfo->dki_space = 0; 4814 vdc->cinfo->dki_prio = 0; 4815 vdc->cinfo->dki_vec = 0; 4816 vdc->cinfo->dki_unit = vdc->instance; 4817 vdc->cinfo->dki_slave = 0; 4818 /* 4819 * The partition number will be created on the fly depending on the 4820 * actual slice (i.e. minor node) that is used to request the data. 4821 */ 4822 vdc->cinfo->dki_partition = 0; 4823 4824 /* 4825 * DKIOCGMEDIAINFO support 4826 */ 4827 if (vdc->minfo == NULL) 4828 vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 4829 vdc->minfo->dki_media_type = DK_FIXED_DISK; 4830 vdc->minfo->dki_capacity = vdc->vdisk_size; 4831 vdc->minfo->dki_lbsize = DEV_BSIZE; 4832 4833 return (0); 4834 } 4835 4836 /* 4837 * Function: 4838 * vdc_setup_disk_layout() 4839 * 4840 * Description: 4841 * This routine discovers all the necessary details about the "disk" 4842 * by requesting the data that is available from the vDisk server and by 4843 * faking up the rest of the data. 4844 * 4845 * Arguments: 4846 * vdc - soft state pointer for this instance of the device driver. 4847 * 4848 * Return Code: 4849 * 0 - Success 4850 */ 4851 static int 4852 vdc_setup_disk_layout(vdc_t *vdc) 4853 { 4854 buf_t *buf; /* BREAD requests need to be in a buf_t structure */ 4855 dev_t dev; 4856 int slice = 0; 4857 int rv, error; 4858 4859 ASSERT(vdc != NULL); 4860 4861 if (vdc->vtoc == NULL) 4862 vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); 4863 4864 dev = makedevice(ddi_driver_major(vdc->dip), 4865 VD_MAKE_DEV(vdc->instance, 0)); 4866 rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)vdc->vtoc, FKIOCTL); 4867 4868 if (rv && rv != ENOTSUP) { 4869 DMSG(vdc, 0, "[%d] Failed to get VTOC (err=%d)", 4870 vdc->instance, rv); 4871 return (rv); 4872 } 4873 4874 /* 4875 * The process of attempting to read VTOC will initiate 4876 * the handshake and establish a connection. Following 4877 * handshake, go ahead and create geometry. 4878 */ 4879 error = vdc_create_fake_geometry(vdc); 4880 if (error != 0) { 4881 DMSG(vdc, 0, "[%d] Failed to create disk geometry (err%d)", 4882 vdc->instance, error); 4883 return (error); 4884 } 4885 4886 if (rv == ENOTSUP) { 4887 /* 4888 * If the device does not support VTOC then we try 4889 * to read an EFI label. 4890 */ 4891 struct dk_gpt *efi; 4892 size_t efi_len; 4893 4894 rv = vdc_efi_alloc_and_read(dev, &efi, &efi_len); 4895 4896 if (rv) { 4897 DMSG(vdc, 0, "[%d] Failed to get EFI (err=%d)", 4898 vdc->instance, rv); 4899 return (rv); 4900 } 4901 4902 vdc->vdisk_label = VD_DISK_LABEL_EFI; 4903 vdc_store_efi(vdc, efi); 4904 vd_efi_free(efi, efi_len); 4905 4906 return (0); 4907 } 4908 4909 vdc->vdisk_label = VD_DISK_LABEL_VTOC; 4910 4911 /* 4912 * FUTURE: This could be default way for reading the VTOC 4913 * from the disk as supposed to sending the VD_OP_GET_VTOC 4914 * to the server. Currently this is a sanity check. 4915 * 4916 * find the slice that represents the entire "disk" and use that to 4917 * read the disk label. The convention in Solaris is that slice 2 4918 * represents the whole disk so we check that it is, otherwise we 4919 * default to slice 0 4920 */ 4921 if ((vdc->vdisk_type == VD_DISK_TYPE_DISK) && 4922 (vdc->vtoc->v_part[2].p_tag == V_BACKUP)) { 4923 slice = 2; 4924 } else { 4925 slice = 0; 4926 } 4927 4928 /* 4929 * Read disk label from start of disk 4930 */ 4931 vdc->label = kmem_zalloc(DK_LABEL_SIZE, KM_SLEEP); 4932 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 4933 bioinit(buf); 4934 buf->b_un.b_addr = (caddr_t)vdc->label; 4935 buf->b_bcount = DK_LABEL_SIZE; 4936 buf->b_flags = B_BUSY | B_READ; 4937 buf->b_dev = dev; 4938 rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)vdc->label, 4939 DK_LABEL_SIZE, slice, 0, CB_STRATEGY, buf, VIO_read_dir); 4940 if (rv) { 4941 DMSG(vdc, 1, "[%d] Failed to read disk block 0\n", 4942 vdc->instance); 4943 kmem_free(buf, sizeof (buf_t)); 4944 return (rv); 4945 } 4946 rv = biowait(buf); 4947 biofini(buf); 4948 kmem_free(buf, sizeof (buf_t)); 4949 4950 return (rv); 4951 } 4952 4953 /* 4954 * Function: 4955 * vdc_setup_devid() 4956 * 4957 * Description: 4958 * This routine discovers the devid of a vDisk. It requests the devid of 4959 * the underlying device from the vDisk server, builds an encapsulated 4960 * devid based on the retrieved devid and registers that new devid to 4961 * the vDisk. 4962 * 4963 * Arguments: 4964 * vdc - soft state pointer for this instance of the device driver. 4965 * 4966 * Return Code: 4967 * 0 - A devid was succesfully registered for the vDisk 4968 */ 4969 static int 4970 vdc_setup_devid(vdc_t *vdc) 4971 { 4972 int rv; 4973 vd_devid_t *vd_devid; 4974 size_t bufsize, bufid_len; 4975 4976 /* 4977 * At first sight, we don't know the size of the devid that the 4978 * server will return but this size will be encoded into the 4979 * reply. So we do a first request using a default size then we 4980 * check if this size was large enough. If not then we do a second 4981 * request with the correct size returned by the server. Note that 4982 * ldc requires size to be 8-byte aligned. 4983 */ 4984 bufsize = P2ROUNDUP(VD_DEVID_SIZE(VD_DEVID_DEFAULT_LEN), 4985 sizeof (uint64_t)); 4986 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 4987 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 4988 4989 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, (caddr_t)vd_devid, 4990 bufsize, 0, 0, CB_SYNC, 0, VIO_both_dir); 4991 4992 DMSG(vdc, 2, "sync_op returned %d\n", rv); 4993 4994 if (rv) { 4995 kmem_free(vd_devid, bufsize); 4996 return (rv); 4997 } 4998 4999 if (vd_devid->length > bufid_len) { 5000 /* 5001 * The returned devid is larger than the buffer used. Try again 5002 * with a buffer with the right size. 5003 */ 5004 kmem_free(vd_devid, bufsize); 5005 bufsize = P2ROUNDUP(VD_DEVID_SIZE(vd_devid->length), 5006 sizeof (uint64_t)); 5007 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 5008 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 5009 5010 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, 5011 (caddr_t)vd_devid, bufsize, 0, 0, CB_SYNC, 0, 5012 VIO_both_dir); 5013 5014 if (rv) { 5015 kmem_free(vd_devid, bufsize); 5016 return (rv); 5017 } 5018 } 5019 5020 /* 5021 * The virtual disk should have the same device id as the one associated 5022 * with the physical disk it is mapped on, otherwise sharing a disk 5023 * between a LDom and a non-LDom may not work (for example for a shared 5024 * SVM disk set). 5025 * 5026 * The DDI framework does not allow creating a device id with any 5027 * type so we first create a device id of type DEVID_ENCAP and then 5028 * we restore the orignal type of the physical device. 5029 */ 5030 5031 DMSG(vdc, 2, ": devid length = %d\n", vd_devid->length); 5032 5033 /* build an encapsulated devid based on the returned devid */ 5034 if (ddi_devid_init(vdc->dip, DEVID_ENCAP, vd_devid->length, 5035 vd_devid->id, &vdc->devid) != DDI_SUCCESS) { 5036 DMSG(vdc, 1, "[%d] Fail to created devid\n", vdc->instance); 5037 kmem_free(vd_devid, bufsize); 5038 return (1); 5039 } 5040 5041 DEVID_FORMTYPE((impl_devid_t *)vdc->devid, vd_devid->type); 5042 5043 ASSERT(ddi_devid_valid(vdc->devid) == DDI_SUCCESS); 5044 5045 kmem_free(vd_devid, bufsize); 5046 5047 if (ddi_devid_register(vdc->dip, vdc->devid) != DDI_SUCCESS) { 5048 DMSG(vdc, 1, "[%d] Fail to register devid\n", vdc->instance); 5049 return (1); 5050 } 5051 5052 return (0); 5053 } 5054 5055 static void 5056 vdc_store_efi(vdc_t *vdc, struct dk_gpt *efi) 5057 { 5058 struct vtoc *vtoc = vdc->vtoc; 5059 5060 vd_efi_to_vtoc(efi, vtoc); 5061 if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 5062 /* 5063 * vd_efi_to_vtoc() will store information about the EFI Sun 5064 * reserved partition (representing the entire disk) into 5065 * partition 7. However single-slice device will only have 5066 * that single partition and the vdc driver expects to find 5067 * information about that partition in slice 0. So we need 5068 * to copy information from slice 7 to slice 0. 5069 */ 5070 vtoc->v_part[0].p_tag = vtoc->v_part[VD_EFI_WD_SLICE].p_tag; 5071 vtoc->v_part[0].p_flag = vtoc->v_part[VD_EFI_WD_SLICE].p_flag; 5072 vtoc->v_part[0].p_start = vtoc->v_part[VD_EFI_WD_SLICE].p_start; 5073 vtoc->v_part[0].p_size = vtoc->v_part[VD_EFI_WD_SLICE].p_size; 5074 } 5075 } 5076