1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * LDoms virtual disk client (vdc) device driver 31 * 32 * This driver runs on a guest logical domain and communicates with the virtual 33 * disk server (vds) driver running on the service domain which is exporting 34 * virtualized "disks" to the guest logical domain. 35 * 36 * The driver can be divided into four sections: 37 * 38 * 1) generic device driver housekeeping 39 * _init, _fini, attach, detach, ops structures, etc. 40 * 41 * 2) communication channel setup 42 * Setup the communications link over the LDC channel that vdc uses to 43 * talk to the vDisk server. Initialise the descriptor ring which 44 * allows the LDC clients to transfer data via memory mappings. 45 * 46 * 3) Support exported to upper layers (filesystems, etc) 47 * The upper layers call into vdc via strategy(9E) and DKIO(7I) 48 * ioctl calls. vdc will copy the data to be written to the descriptor 49 * ring or maps the buffer to store the data read by the vDisk 50 * server into the descriptor ring. It then sends a message to the 51 * vDisk server requesting it to complete the operation. 52 * 53 * 4) Handling responses from vDisk server. 54 * The vDisk server will ACK some or all of the messages vdc sends to it 55 * (this is configured during the handshake). Upon receipt of an ACK 56 * vdc will check the descriptor ring and signal to the upper layer 57 * code waiting on the IO. 58 */ 59 60 #include <sys/atomic.h> 61 #include <sys/conf.h> 62 #include <sys/disp.h> 63 #include <sys/ddi.h> 64 #include <sys/dkio.h> 65 #include <sys/efi_partition.h> 66 #include <sys/fcntl.h> 67 #include <sys/file.h> 68 #include <sys/mach_descrip.h> 69 #include <sys/modctl.h> 70 #include <sys/mdeg.h> 71 #include <sys/note.h> 72 #include <sys/open.h> 73 #include <sys/sdt.h> 74 #include <sys/stat.h> 75 #include <sys/sunddi.h> 76 #include <sys/types.h> 77 #include <sys/promif.h> 78 #include <sys/vtoc.h> 79 #include <sys/archsystm.h> 80 #include <sys/sysmacros.h> 81 82 #include <sys/cdio.h> 83 #include <sys/dktp/cm.h> 84 #include <sys/dktp/fdisk.h> 85 #include <sys/scsi/generic/sense.h> 86 #include <sys/scsi/impl/uscsi.h> /* Needed for defn of USCSICMD ioctl */ 87 #include <sys/scsi/targets/sddef.h> 88 89 #include <sys/ldoms.h> 90 #include <sys/ldc.h> 91 #include <sys/vio_common.h> 92 #include <sys/vio_mailbox.h> 93 #include <sys/vdsk_common.h> 94 #include <sys/vdsk_mailbox.h> 95 #include <sys/vdc.h> 96 97 /* 98 * function prototypes 99 */ 100 101 /* standard driver functions */ 102 static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 103 static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 104 static int vdc_strategy(struct buf *buf); 105 static int vdc_print(dev_t dev, char *str); 106 static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 107 static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 108 static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 109 static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 110 cred_t *credp, int *rvalp); 111 static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 112 static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 113 114 static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 115 void *arg, void **resultp); 116 static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 117 static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 118 119 /* setup */ 120 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 121 static int vdc_do_ldc_init(vdc_t *vdc); 122 static int vdc_start_ldc_connection(vdc_t *vdc); 123 static int vdc_create_device_nodes(vdc_t *vdc); 124 static int vdc_create_device_nodes_efi(vdc_t *vdc); 125 static int vdc_create_device_nodes_vtoc(vdc_t *vdc); 126 static int vdc_create_device_nodes_props(vdc_t *vdc); 127 static int vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id); 128 static int vdc_do_ldc_up(vdc_t *vdc); 129 static void vdc_terminate_ldc(vdc_t *vdc); 130 static int vdc_init_descriptor_ring(vdc_t *vdc); 131 static void vdc_destroy_descriptor_ring(vdc_t *vdc); 132 static int vdc_setup_devid(vdc_t *vdc); 133 static void vdc_store_efi(vdc_t *vdc, struct dk_gpt *efi); 134 135 /* handshake with vds */ 136 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 137 static int vdc_ver_negotiation(vdc_t *vdcp); 138 static int vdc_init_attr_negotiation(vdc_t *vdc); 139 static int vdc_attr_negotiation(vdc_t *vdcp); 140 static int vdc_init_dring_negotiate(vdc_t *vdc); 141 static int vdc_dring_negotiation(vdc_t *vdcp); 142 static int vdc_send_rdx(vdc_t *vdcp); 143 static int vdc_rdx_exchange(vdc_t *vdcp); 144 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 145 146 /* processing incoming messages from vDisk server */ 147 static void vdc_process_msg_thread(vdc_t *vdc); 148 static int vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp); 149 150 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 151 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t *msg); 152 static int vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg); 153 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 154 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 155 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 156 static int vdc_send_request(vdc_t *vdcp, int operation, 157 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 158 int cb_type, void *cb_arg, vio_desc_direction_t dir); 159 static int vdc_map_to_shared_dring(vdc_t *vdcp, int idx); 160 static int vdc_populate_descriptor(vdc_t *vdcp, int operation, 161 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 162 int cb_type, void *cb_arg, vio_desc_direction_t dir); 163 static int vdc_do_sync_op(vdc_t *vdcp, int operation, 164 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 165 int cb_type, void *cb_arg, vio_desc_direction_t dir); 166 167 static int vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp); 168 static int vdc_drain_response(vdc_t *vdcp); 169 static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 170 static int vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep); 171 static int vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg); 172 173 /* dkio */ 174 static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode); 175 static int vdc_create_fake_geometry(vdc_t *vdc); 176 static int vdc_setup_disk_layout(vdc_t *vdc); 177 static int vdc_null_copy_func(vdc_t *vdc, void *from, void *to, 178 int mode, int dir); 179 static int vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 180 int mode, int dir); 181 static int vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 182 int mode, int dir); 183 static int vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, 184 int mode, int dir); 185 static int vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, 186 int mode, int dir); 187 static int vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, 188 int mode, int dir); 189 static int vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, 190 int mode, int dir); 191 static int vdc_uscsicmd_convert(vdc_t *vdc, void *from, void *to, 192 int mode, int dir); 193 static int vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, 194 int mode, int dir); 195 static int vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, 196 int mode, int dir); 197 198 /* 199 * Module variables 200 */ 201 202 /* 203 * Tunable variables to control how long vdc waits before timing out on 204 * various operations 205 */ 206 static int vdc_retries = 10; 207 208 /* calculated from 'vdc_usec_timeout' during attach */ 209 static uint64_t vdc_hz_timeout; /* units: Hz */ 210 static uint64_t vdc_usec_timeout = 30 * MICROSEC; /* 30s units: ns */ 211 212 static uint64_t vdc_hz_min_ldc_delay; 213 static uint64_t vdc_min_timeout_ldc = 1 * MILLISEC; 214 static uint64_t vdc_hz_max_ldc_delay; 215 static uint64_t vdc_max_timeout_ldc = 100 * MILLISEC; 216 217 static uint64_t vdc_ldc_read_init_delay = 1 * MILLISEC; 218 static uint64_t vdc_ldc_read_max_delay = 100 * MILLISEC; 219 220 /* values for dumping - need to run in a tighter loop */ 221 static uint64_t vdc_usec_timeout_dump = 100 * MILLISEC; /* 0.1s units: ns */ 222 static int vdc_dump_retries = 100; 223 224 /* Count of the number of vdc instances attached */ 225 static volatile uint32_t vdc_instance_count = 0; 226 227 /* Soft state pointer */ 228 static void *vdc_state; 229 230 /* 231 * Controlling the verbosity of the error/debug messages 232 * 233 * vdc_msglevel - controls level of messages 234 * vdc_matchinst - 64-bit variable where each bit corresponds 235 * to the vdc instance the vdc_msglevel applies. 236 */ 237 int vdc_msglevel = 0x0; 238 uint64_t vdc_matchinst = 0ull; 239 240 /* 241 * Supported vDisk protocol version pairs. 242 * 243 * The first array entry is the latest and preferred version. 244 */ 245 static const vio_ver_t vdc_version[] = {{1, 0}}; 246 247 static struct cb_ops vdc_cb_ops = { 248 vdc_open, /* cb_open */ 249 vdc_close, /* cb_close */ 250 vdc_strategy, /* cb_strategy */ 251 vdc_print, /* cb_print */ 252 vdc_dump, /* cb_dump */ 253 vdc_read, /* cb_read */ 254 vdc_write, /* cb_write */ 255 vdc_ioctl, /* cb_ioctl */ 256 nodev, /* cb_devmap */ 257 nodev, /* cb_mmap */ 258 nodev, /* cb_segmap */ 259 nochpoll, /* cb_chpoll */ 260 ddi_prop_op, /* cb_prop_op */ 261 NULL, /* cb_str */ 262 D_MP | D_64BIT, /* cb_flag */ 263 CB_REV, /* cb_rev */ 264 vdc_aread, /* cb_aread */ 265 vdc_awrite /* cb_awrite */ 266 }; 267 268 static struct dev_ops vdc_ops = { 269 DEVO_REV, /* devo_rev */ 270 0, /* devo_refcnt */ 271 vdc_getinfo, /* devo_getinfo */ 272 nulldev, /* devo_identify */ 273 nulldev, /* devo_probe */ 274 vdc_attach, /* devo_attach */ 275 vdc_detach, /* devo_detach */ 276 nodev, /* devo_reset */ 277 &vdc_cb_ops, /* devo_cb_ops */ 278 NULL, /* devo_bus_ops */ 279 nulldev /* devo_power */ 280 }; 281 282 static struct modldrv modldrv = { 283 &mod_driverops, 284 "virtual disk client %I%", 285 &vdc_ops, 286 }; 287 288 static struct modlinkage modlinkage = { 289 MODREV_1, 290 &modldrv, 291 NULL 292 }; 293 294 /* -------------------------------------------------------------------------- */ 295 296 /* 297 * Device Driver housekeeping and setup 298 */ 299 300 int 301 _init(void) 302 { 303 int status; 304 305 if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 306 return (status); 307 if ((status = mod_install(&modlinkage)) != 0) 308 ddi_soft_state_fini(&vdc_state); 309 vdc_efi_init(vd_process_ioctl); 310 return (status); 311 } 312 313 int 314 _info(struct modinfo *modinfop) 315 { 316 return (mod_info(&modlinkage, modinfop)); 317 } 318 319 int 320 _fini(void) 321 { 322 int status; 323 324 if ((status = mod_remove(&modlinkage)) != 0) 325 return (status); 326 vdc_efi_fini(); 327 ddi_soft_state_fini(&vdc_state); 328 return (0); 329 } 330 331 static int 332 vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 333 { 334 _NOTE(ARGUNUSED(dip)) 335 336 int instance = SDUNIT((dev_t)arg); 337 vdc_t *vdc = NULL; 338 339 switch (cmd) { 340 case DDI_INFO_DEVT2DEVINFO: 341 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 342 *resultp = NULL; 343 return (DDI_FAILURE); 344 } 345 *resultp = vdc->dip; 346 return (DDI_SUCCESS); 347 case DDI_INFO_DEVT2INSTANCE: 348 *resultp = (void *)(uintptr_t)instance; 349 return (DDI_SUCCESS); 350 default: 351 *resultp = NULL; 352 return (DDI_FAILURE); 353 } 354 } 355 356 static int 357 vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 358 { 359 int instance; 360 int rv; 361 vdc_t *vdc = NULL; 362 363 switch (cmd) { 364 case DDI_DETACH: 365 /* the real work happens below */ 366 break; 367 case DDI_SUSPEND: 368 /* nothing to do for this non-device */ 369 return (DDI_SUCCESS); 370 default: 371 return (DDI_FAILURE); 372 } 373 374 ASSERT(cmd == DDI_DETACH); 375 instance = ddi_get_instance(dip); 376 DMSGX(1, "[%d] Entered\n", instance); 377 378 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 379 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 380 return (DDI_FAILURE); 381 } 382 383 if (vdc->open_count) { 384 DMSG(vdc, 0, "[%d] Cannot detach: device is open", instance); 385 return (DDI_FAILURE); 386 } 387 388 DMSG(vdc, 0, "[%d] proceeding...\n", instance); 389 390 /* mark instance as detaching */ 391 vdc->lifecycle = VDC_LC_DETACHING; 392 393 /* 394 * try and disable callbacks to prevent another handshake 395 */ 396 rv = ldc_set_cb_mode(vdc->ldc_handle, LDC_CB_DISABLE); 397 DMSG(vdc, 0, "callback disabled (rv=%d)\n", rv); 398 399 if (vdc->initialized & VDC_THREAD) { 400 mutex_enter(&vdc->read_lock); 401 if ((vdc->read_state == VDC_READ_WAITING) || 402 (vdc->read_state == VDC_READ_RESET)) { 403 vdc->read_state = VDC_READ_RESET; 404 cv_signal(&vdc->read_cv); 405 } 406 407 mutex_exit(&vdc->read_lock); 408 409 /* wake up any thread waiting for connection to come online */ 410 mutex_enter(&vdc->lock); 411 if (vdc->state == VDC_STATE_INIT_WAITING) { 412 DMSG(vdc, 0, 413 "[%d] write reset - move to resetting state...\n", 414 instance); 415 vdc->state = VDC_STATE_RESETTING; 416 cv_signal(&vdc->initwait_cv); 417 } 418 mutex_exit(&vdc->lock); 419 420 /* now wait until state transitions to VDC_STATE_DETACH */ 421 thread_join(vdc->msg_proc_thr->t_did); 422 ASSERT(vdc->state == VDC_STATE_DETACH); 423 DMSG(vdc, 0, "[%d] Reset thread exit and join ..\n", 424 vdc->instance); 425 } 426 427 mutex_enter(&vdc->lock); 428 429 if (vdc->initialized & VDC_DRING) 430 vdc_destroy_descriptor_ring(vdc); 431 432 if (vdc->initialized & VDC_LDC) 433 vdc_terminate_ldc(vdc); 434 435 mutex_exit(&vdc->lock); 436 437 if (vdc->initialized & VDC_MINOR) { 438 ddi_prop_remove_all(dip); 439 ddi_remove_minor_node(dip, NULL); 440 } 441 442 if (vdc->initialized & VDC_LOCKS) { 443 mutex_destroy(&vdc->lock); 444 mutex_destroy(&vdc->read_lock); 445 cv_destroy(&vdc->initwait_cv); 446 cv_destroy(&vdc->dring_free_cv); 447 cv_destroy(&vdc->membind_cv); 448 cv_destroy(&vdc->sync_pending_cv); 449 cv_destroy(&vdc->sync_blocked_cv); 450 cv_destroy(&vdc->read_cv); 451 cv_destroy(&vdc->running_cv); 452 } 453 454 if (vdc->minfo) 455 kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 456 457 if (vdc->cinfo) 458 kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 459 460 if (vdc->vtoc) 461 kmem_free(vdc->vtoc, sizeof (struct vtoc)); 462 463 if (vdc->label) 464 kmem_free(vdc->label, DK_LABEL_SIZE); 465 466 if (vdc->devid) { 467 ddi_devid_unregister(dip); 468 ddi_devid_free(vdc->devid); 469 } 470 471 if (vdc->initialized & VDC_SOFT_STATE) 472 ddi_soft_state_free(vdc_state, instance); 473 474 DMSG(vdc, 0, "[%d] End %p\n", instance, (void *)vdc); 475 476 return (DDI_SUCCESS); 477 } 478 479 480 static int 481 vdc_do_attach(dev_info_t *dip) 482 { 483 int instance; 484 vdc_t *vdc = NULL; 485 int status; 486 487 ASSERT(dip != NULL); 488 489 instance = ddi_get_instance(dip); 490 if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 491 cmn_err(CE_NOTE, "[%d] Couldn't alloc state structure", 492 instance); 493 return (DDI_FAILURE); 494 } 495 496 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 497 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 498 return (DDI_FAILURE); 499 } 500 501 /* 502 * We assign the value to initialized in this case to zero out the 503 * variable and then set bits in it to indicate what has been done 504 */ 505 vdc->initialized = VDC_SOFT_STATE; 506 507 vdc_hz_timeout = drv_usectohz(vdc_usec_timeout); 508 509 vdc_hz_min_ldc_delay = drv_usectohz(vdc_min_timeout_ldc); 510 vdc_hz_max_ldc_delay = drv_usectohz(vdc_max_timeout_ldc); 511 512 vdc->dip = dip; 513 vdc->instance = instance; 514 vdc->open_count = 0; 515 vdc->vdisk_type = VD_DISK_TYPE_UNK; 516 vdc->vdisk_label = VD_DISK_LABEL_UNK; 517 vdc->state = VDC_STATE_INIT; 518 vdc->lifecycle = VDC_LC_ATTACHING; 519 vdc->ldc_state = 0; 520 vdc->session_id = 0; 521 vdc->block_size = DEV_BSIZE; 522 vdc->max_xfer_sz = maxphys / DEV_BSIZE; 523 524 vdc->vtoc = NULL; 525 vdc->cinfo = NULL; 526 vdc->minfo = NULL; 527 528 mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 529 cv_init(&vdc->initwait_cv, NULL, CV_DRIVER, NULL); 530 cv_init(&vdc->dring_free_cv, NULL, CV_DRIVER, NULL); 531 cv_init(&vdc->membind_cv, NULL, CV_DRIVER, NULL); 532 cv_init(&vdc->running_cv, NULL, CV_DRIVER, NULL); 533 534 vdc->threads_pending = 0; 535 vdc->sync_op_pending = B_FALSE; 536 vdc->sync_op_blocked = B_FALSE; 537 cv_init(&vdc->sync_pending_cv, NULL, CV_DRIVER, NULL); 538 cv_init(&vdc->sync_blocked_cv, NULL, CV_DRIVER, NULL); 539 540 /* init blocking msg read functionality */ 541 mutex_init(&vdc->read_lock, NULL, MUTEX_DRIVER, NULL); 542 cv_init(&vdc->read_cv, NULL, CV_DRIVER, NULL); 543 vdc->read_state = VDC_READ_IDLE; 544 545 vdc->initialized |= VDC_LOCKS; 546 547 /* initialise LDC channel which will be used to communicate with vds */ 548 if ((status = vdc_do_ldc_init(vdc)) != 0) { 549 cmn_err(CE_NOTE, "[%d] Couldn't initialize LDC", instance); 550 goto return_status; 551 } 552 553 /* initialize the thread responsible for managing state with server */ 554 vdc->msg_proc_thr = thread_create(NULL, 0, vdc_process_msg_thread, 555 vdc, 0, &p0, TS_RUN, minclsyspri); 556 if (vdc->msg_proc_thr == NULL) { 557 cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 558 instance); 559 return (DDI_FAILURE); 560 } 561 562 vdc->initialized |= VDC_THREAD; 563 564 atomic_inc_32(&vdc_instance_count); 565 566 /* 567 * Once the handshake is complete, we can use the DRing to send 568 * requests to the vDisk server to calculate the geometry and 569 * VTOC of the "disk" 570 */ 571 status = vdc_setup_disk_layout(vdc); 572 if (status != 0) { 573 DMSG(vdc, 0, "[%d] Failed to discover disk layout (err%d)", 574 vdc->instance, status); 575 goto return_status; 576 } 577 578 /* 579 * Now that we have the device info we can create the 580 * device nodes and properties 581 */ 582 status = vdc_create_device_nodes(vdc); 583 if (status) { 584 DMSG(vdc, 0, "[%d] Failed to create device nodes", 585 instance); 586 goto return_status; 587 } 588 status = vdc_create_device_nodes_props(vdc); 589 if (status) { 590 DMSG(vdc, 0, "[%d] Failed to create device nodes" 591 " properties (%d)", instance, status); 592 goto return_status; 593 } 594 595 /* 596 * Setup devid 597 */ 598 if (vdc_setup_devid(vdc)) { 599 DMSG(vdc, 0, "[%d] No device id available\n", instance); 600 } 601 602 ddi_report_dev(dip); 603 vdc->lifecycle = VDC_LC_ONLINE; 604 DMSG(vdc, 0, "[%d] Attach tasks successful\n", instance); 605 606 return_status: 607 DMSG(vdc, 0, "[%d] Attach completed\n", instance); 608 return (status); 609 } 610 611 static int 612 vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 613 { 614 int status; 615 616 switch (cmd) { 617 case DDI_ATTACH: 618 if ((status = vdc_do_attach(dip)) != 0) 619 (void) vdc_detach(dip, DDI_DETACH); 620 return (status); 621 case DDI_RESUME: 622 /* nothing to do for this non-device */ 623 return (DDI_SUCCESS); 624 default: 625 return (DDI_FAILURE); 626 } 627 } 628 629 static int 630 vdc_do_ldc_init(vdc_t *vdc) 631 { 632 int status = 0; 633 ldc_status_t ldc_state; 634 ldc_attr_t ldc_attr; 635 uint64_t ldc_id = 0; 636 dev_info_t *dip = NULL; 637 638 ASSERT(vdc != NULL); 639 640 dip = vdc->dip; 641 vdc->initialized |= VDC_LDC; 642 643 if ((status = vdc_get_ldc_id(dip, &ldc_id)) != 0) { 644 DMSG(vdc, 0, "[%d] Failed to get LDC channel ID property", 645 vdc->instance); 646 return (EIO); 647 } 648 vdc->ldc_id = ldc_id; 649 650 ldc_attr.devclass = LDC_DEV_BLK; 651 ldc_attr.instance = vdc->instance; 652 ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 653 ldc_attr.mtu = VD_LDC_MTU; 654 655 if ((vdc->initialized & VDC_LDC_INIT) == 0) { 656 status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle); 657 if (status != 0) { 658 DMSG(vdc, 0, "[%d] ldc_init(chan %ld) returned %d", 659 vdc->instance, ldc_id, status); 660 return (status); 661 } 662 vdc->initialized |= VDC_LDC_INIT; 663 } 664 status = ldc_status(vdc->ldc_handle, &ldc_state); 665 if (status != 0) { 666 DMSG(vdc, 0, "[%d] Cannot discover LDC status [err=%d]", 667 vdc->instance, status); 668 return (status); 669 } 670 vdc->ldc_state = ldc_state; 671 672 if ((vdc->initialized & VDC_LDC_CB) == 0) { 673 status = ldc_reg_callback(vdc->ldc_handle, vdc_handle_cb, 674 (caddr_t)vdc); 675 if (status != 0) { 676 DMSG(vdc, 0, "[%d] LDC callback reg. failed (%d)", 677 vdc->instance, status); 678 return (status); 679 } 680 vdc->initialized |= VDC_LDC_CB; 681 } 682 683 vdc->initialized |= VDC_LDC; 684 685 /* 686 * At this stage we have initialised LDC, we will now try and open 687 * the connection. 688 */ 689 if (vdc->ldc_state == LDC_INIT) { 690 status = ldc_open(vdc->ldc_handle); 691 if (status != 0) { 692 DMSG(vdc, 0, "[%d] ldc_open(chan %ld) returned %d", 693 vdc->instance, vdc->ldc_id, status); 694 return (status); 695 } 696 vdc->initialized |= VDC_LDC_OPEN; 697 } 698 699 return (status); 700 } 701 702 static int 703 vdc_start_ldc_connection(vdc_t *vdc) 704 { 705 int status = 0; 706 707 ASSERT(vdc != NULL); 708 709 ASSERT(MUTEX_HELD(&vdc->lock)); 710 711 status = vdc_do_ldc_up(vdc); 712 713 DMSG(vdc, 0, "[%d] Finished bringing up LDC\n", vdc->instance); 714 715 return (status); 716 } 717 718 static int 719 vdc_stop_ldc_connection(vdc_t *vdcp) 720 { 721 int status; 722 723 DMSG(vdcp, 0, ": Resetting connection to vDisk server : state %d\n", 724 vdcp->state); 725 726 status = ldc_down(vdcp->ldc_handle); 727 DMSG(vdcp, 0, "ldc_down() = %d\n", status); 728 729 vdcp->initialized &= ~VDC_HANDSHAKE; 730 DMSG(vdcp, 0, "initialized=%x\n", vdcp->initialized); 731 732 return (status); 733 } 734 735 static int 736 vdc_create_device_nodes_efi(vdc_t *vdc) 737 { 738 ddi_remove_minor_node(vdc->dip, "h"); 739 ddi_remove_minor_node(vdc->dip, "h,raw"); 740 741 if (ddi_create_minor_node(vdc->dip, "wd", S_IFBLK, 742 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 743 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 744 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd'", 745 vdc->instance); 746 return (EIO); 747 } 748 749 /* if any device node is created we set this flag */ 750 vdc->initialized |= VDC_MINOR; 751 752 if (ddi_create_minor_node(vdc->dip, "wd,raw", S_IFCHR, 753 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 754 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 755 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd,raw'", 756 vdc->instance); 757 return (EIO); 758 } 759 760 return (0); 761 } 762 763 static int 764 vdc_create_device_nodes_vtoc(vdc_t *vdc) 765 { 766 ddi_remove_minor_node(vdc->dip, "wd"); 767 ddi_remove_minor_node(vdc->dip, "wd,raw"); 768 769 if (ddi_create_minor_node(vdc->dip, "h", S_IFBLK, 770 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 771 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 772 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h'", 773 vdc->instance); 774 return (EIO); 775 } 776 777 /* if any device node is created we set this flag */ 778 vdc->initialized |= VDC_MINOR; 779 780 if (ddi_create_minor_node(vdc->dip, "h,raw", S_IFCHR, 781 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 782 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 783 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h,raw'", 784 vdc->instance); 785 return (EIO); 786 } 787 788 return (0); 789 } 790 791 /* 792 * Function: 793 * vdc_create_device_nodes 794 * 795 * Description: 796 * This function creates the block and character device nodes under 797 * /devices along with the node properties. It is called as part of 798 * the attach(9E) of the instance during the handshake with vds after 799 * vds has sent the attributes to vdc. 800 * 801 * If the device is of type VD_DISK_TYPE_SLICE then the minor node 802 * of 2 is used in keeping with the Solaris convention that slice 2 803 * refers to a whole disk. Slices start at 'a' 804 * 805 * Parameters: 806 * vdc - soft state pointer 807 * 808 * Return Values 809 * 0 - Success 810 * EIO - Failed to create node 811 * EINVAL - Unknown type of disk exported 812 */ 813 static int 814 vdc_create_device_nodes(vdc_t *vdc) 815 { 816 char name[sizeof ("s,raw")]; 817 dev_info_t *dip = NULL; 818 int instance, status; 819 int num_slices = 1; 820 int i; 821 822 ASSERT(vdc != NULL); 823 824 instance = vdc->instance; 825 dip = vdc->dip; 826 827 switch (vdc->vdisk_type) { 828 case VD_DISK_TYPE_DISK: 829 num_slices = V_NUMPAR; 830 break; 831 case VD_DISK_TYPE_SLICE: 832 num_slices = 1; 833 break; 834 case VD_DISK_TYPE_UNK: 835 default: 836 return (EINVAL); 837 } 838 839 /* 840 * Minor nodes are different for EFI disks: EFI disks do not have 841 * a minor node 'g' for the minor number corresponding to slice 842 * VD_EFI_WD_SLICE (slice 7) instead they have a minor node 'wd' 843 * representing the whole disk. 844 */ 845 for (i = 0; i < num_slices; i++) { 846 847 if (i == VD_EFI_WD_SLICE) { 848 if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 849 status = vdc_create_device_nodes_efi(vdc); 850 else 851 status = vdc_create_device_nodes_vtoc(vdc); 852 if (status != 0) 853 return (status); 854 continue; 855 } 856 857 (void) snprintf(name, sizeof (name), "%c", 'a' + i); 858 if (ddi_create_minor_node(dip, name, S_IFBLK, 859 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 860 cmn_err(CE_NOTE, "[%d] Couldn't add block node '%s'", 861 instance, name); 862 return (EIO); 863 } 864 865 /* if any device node is created we set this flag */ 866 vdc->initialized |= VDC_MINOR; 867 868 (void) snprintf(name, sizeof (name), "%c%s", 869 'a' + i, ",raw"); 870 if (ddi_create_minor_node(dip, name, S_IFCHR, 871 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 872 cmn_err(CE_NOTE, "[%d] Couldn't add raw node '%s'", 873 instance, name); 874 return (EIO); 875 } 876 } 877 878 return (0); 879 } 880 881 /* 882 * Function: 883 * vdc_create_device_nodes_props 884 * 885 * Description: 886 * This function creates the block and character device nodes under 887 * /devices along with the node properties. It is called as part of 888 * the attach(9E) of the instance during the handshake with vds after 889 * vds has sent the attributes to vdc. 890 * 891 * Parameters: 892 * vdc - soft state pointer 893 * 894 * Return Values 895 * 0 - Success 896 * EIO - Failed to create device node property 897 * EINVAL - Unknown type of disk exported 898 */ 899 static int 900 vdc_create_device_nodes_props(vdc_t *vdc) 901 { 902 dev_info_t *dip = NULL; 903 int instance; 904 int num_slices = 1; 905 int64_t size = 0; 906 dev_t dev; 907 int rv; 908 int i; 909 910 ASSERT(vdc != NULL); 911 912 instance = vdc->instance; 913 dip = vdc->dip; 914 915 if ((vdc->vtoc == NULL) || (vdc->vtoc->v_sanity != VTOC_SANE)) { 916 DMSG(vdc, 0, "![%d] Could not create device node property." 917 " No VTOC available", instance); 918 return (ENXIO); 919 } 920 921 switch (vdc->vdisk_type) { 922 case VD_DISK_TYPE_DISK: 923 num_slices = V_NUMPAR; 924 break; 925 case VD_DISK_TYPE_SLICE: 926 num_slices = 1; 927 break; 928 case VD_DISK_TYPE_UNK: 929 default: 930 return (EINVAL); 931 } 932 933 for (i = 0; i < num_slices; i++) { 934 dev = makedevice(ddi_driver_major(dip), 935 VD_MAKE_DEV(instance, i)); 936 937 size = vdc->vtoc->v_part[i].p_size * vdc->vtoc->v_sectorsz; 938 DMSG(vdc, 0, "[%d] sz %ld (%ld Mb) p_size %lx\n", 939 instance, size, size / (1024 * 1024), 940 vdc->vtoc->v_part[i].p_size); 941 942 rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size); 943 if (rv != DDI_PROP_SUCCESS) { 944 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop of [%ld]", 945 instance, VDC_SIZE_PROP_NAME, size); 946 return (EIO); 947 } 948 949 rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME, 950 lbtodb(size)); 951 if (rv != DDI_PROP_SUCCESS) { 952 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop [%llu]", 953 instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size)); 954 return (EIO); 955 } 956 } 957 958 return (0); 959 } 960 961 static int 962 vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 963 { 964 _NOTE(ARGUNUSED(cred)) 965 966 int instance; 967 vdc_t *vdc; 968 969 ASSERT(dev != NULL); 970 instance = SDUNIT(*dev); 971 972 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 973 return (EINVAL); 974 975 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 976 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 977 return (ENXIO); 978 } 979 980 DMSG(vdc, 0, "minor = %d flag = %x, otyp = %x\n", 981 getminor(*dev), flag, otyp); 982 983 mutex_enter(&vdc->lock); 984 vdc->open_count++; 985 mutex_exit(&vdc->lock); 986 987 return (0); 988 } 989 990 static int 991 vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 992 { 993 _NOTE(ARGUNUSED(cred)) 994 995 int instance; 996 vdc_t *vdc; 997 998 instance = SDUNIT(dev); 999 1000 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 1001 return (EINVAL); 1002 1003 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1004 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1005 return (ENXIO); 1006 } 1007 1008 DMSG(vdc, 0, "[%d] flag = %x, otyp = %x\n", instance, flag, otyp); 1009 if (vdc->dkio_flush_pending) { 1010 DMSG(vdc, 0, 1011 "[%d] Cannot detach: %d outstanding DKIO flushes\n", 1012 instance, vdc->dkio_flush_pending); 1013 return (EBUSY); 1014 } 1015 1016 /* 1017 * Should not need the mutex here, since the framework should protect 1018 * against more opens on this device, but just in case. 1019 */ 1020 mutex_enter(&vdc->lock); 1021 vdc->open_count--; 1022 mutex_exit(&vdc->lock); 1023 1024 return (0); 1025 } 1026 1027 static int 1028 vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1029 { 1030 _NOTE(ARGUNUSED(credp)) 1031 _NOTE(ARGUNUSED(rvalp)) 1032 1033 return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode)); 1034 } 1035 1036 static int 1037 vdc_print(dev_t dev, char *str) 1038 { 1039 cmn_err(CE_NOTE, "vdc%d: %s", SDUNIT(dev), str); 1040 return (0); 1041 } 1042 1043 static int 1044 vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 1045 { 1046 int rv; 1047 size_t nbytes = nblk * DEV_BSIZE; 1048 int instance = SDUNIT(dev); 1049 vdc_t *vdc = NULL; 1050 1051 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1052 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1053 return (ENXIO); 1054 } 1055 1056 DMSG(vdc, 2, "[%d] dump %ld bytes at block 0x%lx : addr=0x%p\n", 1057 instance, nbytes, blkno, (void *)addr); 1058 rv = vdc_send_request(vdc, VD_OP_BWRITE, addr, nbytes, 1059 SDPART(dev), blkno, CB_STRATEGY, 0, VIO_write_dir); 1060 if (rv) { 1061 DMSG(vdc, 0, "Failed to do a disk dump (err=%d)\n", rv); 1062 return (rv); 1063 } 1064 1065 if (ddi_in_panic()) 1066 (void) vdc_drain_response(vdc); 1067 1068 DMSG(vdc, 0, "[%d] End\n", instance); 1069 1070 return (0); 1071 } 1072 1073 /* -------------------------------------------------------------------------- */ 1074 1075 /* 1076 * Disk access routines 1077 * 1078 */ 1079 1080 /* 1081 * vdc_strategy() 1082 * 1083 * Return Value: 1084 * 0: As per strategy(9E), the strategy() function must return 0 1085 * [ bioerror(9f) sets b_flags to the proper error code ] 1086 */ 1087 static int 1088 vdc_strategy(struct buf *buf) 1089 { 1090 int rv = -1; 1091 vdc_t *vdc = NULL; 1092 int instance = SDUNIT(buf->b_edev); 1093 int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 1094 1095 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1096 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1097 bioerror(buf, ENXIO); 1098 biodone(buf); 1099 return (0); 1100 } 1101 1102 DMSG(vdc, 2, "[%d] %s %ld bytes at block %llx : b_addr=0x%p\n", 1103 instance, (buf->b_flags & B_READ) ? "Read" : "Write", 1104 buf->b_bcount, buf->b_lblkno, (void *)buf->b_un.b_addr); 1105 DTRACE_IO2(vstart, buf_t *, buf, vdc_t *, vdc); 1106 1107 bp_mapin(buf); 1108 1109 rv = vdc_send_request(vdc, op, (caddr_t)buf->b_un.b_addr, 1110 buf->b_bcount, SDPART(buf->b_edev), buf->b_lblkno, 1111 CB_STRATEGY, buf, (op == VD_OP_BREAD) ? VIO_read_dir : 1112 VIO_write_dir); 1113 1114 ASSERT(rv == 0 || rv == EINVAL); 1115 1116 /* 1117 * If the request was successfully sent, the strategy call returns and 1118 * the ACK handler calls the bioxxx functions when the vDisk server is 1119 * done. 1120 */ 1121 if (rv) { 1122 DMSG(vdc, 0, "Failed to read/write (err=%d)\n", rv); 1123 bioerror(buf, rv); 1124 biodone(buf); 1125 } 1126 1127 return (0); 1128 } 1129 1130 1131 static int 1132 vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 1133 { 1134 _NOTE(ARGUNUSED(cred)) 1135 1136 DMSGX(1, "[%d] Entered", SDUNIT(dev)); 1137 return (physio(vdc_strategy, NULL, dev, B_READ, minphys, uio)); 1138 } 1139 1140 static int 1141 vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 1142 { 1143 _NOTE(ARGUNUSED(cred)) 1144 1145 DMSGX(1, "[%d] Entered", SDUNIT(dev)); 1146 return (physio(vdc_strategy, NULL, dev, B_WRITE, minphys, uio)); 1147 } 1148 1149 static int 1150 vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 1151 { 1152 _NOTE(ARGUNUSED(cred)) 1153 1154 DMSGX(1, "[%d] Entered", SDUNIT(dev)); 1155 return (aphysio(vdc_strategy, anocancel, dev, B_READ, minphys, aio)); 1156 } 1157 1158 static int 1159 vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 1160 { 1161 _NOTE(ARGUNUSED(cred)) 1162 1163 DMSGX(1, "[%d] Entered", SDUNIT(dev)); 1164 return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, minphys, aio)); 1165 } 1166 1167 1168 /* -------------------------------------------------------------------------- */ 1169 1170 /* 1171 * Handshake support 1172 */ 1173 1174 1175 /* 1176 * Function: 1177 * vdc_init_ver_negotiation() 1178 * 1179 * Description: 1180 * 1181 * Arguments: 1182 * vdc - soft state pointer for this instance of the device driver. 1183 * 1184 * Return Code: 1185 * 0 - Success 1186 */ 1187 static int 1188 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 1189 { 1190 vio_ver_msg_t pkt; 1191 size_t msglen = sizeof (pkt); 1192 int status = -1; 1193 1194 ASSERT(vdc != NULL); 1195 ASSERT(mutex_owned(&vdc->lock)); 1196 1197 DMSG(vdc, 0, "[%d] Entered.\n", vdc->instance); 1198 1199 /* 1200 * set the Session ID to a unique value 1201 * (the lower 32 bits of the clock tick) 1202 */ 1203 vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 1204 DMSG(vdc, 0, "[%d] Set SID to 0x%lx\n", vdc->instance, vdc->session_id); 1205 1206 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1207 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1208 pkt.tag.vio_subtype_env = VIO_VER_INFO; 1209 pkt.tag.vio_sid = vdc->session_id; 1210 pkt.dev_class = VDEV_DISK; 1211 pkt.ver_major = ver.major; 1212 pkt.ver_minor = ver.minor; 1213 1214 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1215 DMSG(vdc, 0, "[%d] Ver info sent (status = %d)\n", 1216 vdc->instance, status); 1217 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1218 DMSG(vdc, 0, "[%d] Failed to send Ver negotiation info: " 1219 "id(%lx) rv(%d) size(%ld)", 1220 vdc->instance, vdc->ldc_handle, 1221 status, msglen); 1222 if (msglen != sizeof (vio_ver_msg_t)) 1223 status = ENOMSG; 1224 } 1225 1226 return (status); 1227 } 1228 1229 /* 1230 * Function: 1231 * vdc_ver_negotiation() 1232 * 1233 * Description: 1234 * 1235 * Arguments: 1236 * vdcp - soft state pointer for this instance of the device driver. 1237 * 1238 * Return Code: 1239 * 0 - Success 1240 */ 1241 static int 1242 vdc_ver_negotiation(vdc_t *vdcp) 1243 { 1244 vio_msg_t vio_msg; 1245 int status; 1246 1247 if (status = vdc_init_ver_negotiation(vdcp, vdc_version[0])) 1248 return (status); 1249 1250 /* release lock and wait for response */ 1251 mutex_exit(&vdcp->lock); 1252 status = vdc_wait_for_response(vdcp, &vio_msg); 1253 mutex_enter(&vdcp->lock); 1254 if (status) { 1255 DMSG(vdcp, 0, 1256 "[%d] Failed waiting for Ver negotiation response, rv(%d)", 1257 vdcp->instance, status); 1258 return (status); 1259 } 1260 1261 /* check type and sub_type ... */ 1262 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1263 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1264 DMSG(vdcp, 0, "[%d] Invalid ver negotiation response\n", 1265 vdcp->instance); 1266 return (EPROTO); 1267 } 1268 1269 return (vdc_handle_ver_msg(vdcp, (vio_ver_msg_t *)&vio_msg)); 1270 } 1271 1272 /* 1273 * Function: 1274 * vdc_init_attr_negotiation() 1275 * 1276 * Description: 1277 * 1278 * Arguments: 1279 * vdc - soft state pointer for this instance of the device driver. 1280 * 1281 * Return Code: 1282 * 0 - Success 1283 */ 1284 static int 1285 vdc_init_attr_negotiation(vdc_t *vdc) 1286 { 1287 vd_attr_msg_t pkt; 1288 size_t msglen = sizeof (pkt); 1289 int status; 1290 1291 ASSERT(vdc != NULL); 1292 ASSERT(mutex_owned(&vdc->lock)); 1293 1294 DMSG(vdc, 0, "[%d] entered\n", vdc->instance); 1295 1296 /* fill in tag */ 1297 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1298 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1299 pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 1300 pkt.tag.vio_sid = vdc->session_id; 1301 /* fill in payload */ 1302 pkt.max_xfer_sz = vdc->max_xfer_sz; 1303 pkt.vdisk_block_size = vdc->block_size; 1304 pkt.xfer_mode = VIO_DRING_MODE; 1305 pkt.operations = 0; /* server will set bits of valid operations */ 1306 pkt.vdisk_type = 0; /* server will set to valid device type */ 1307 pkt.vdisk_size = 0; /* server will set to valid size */ 1308 1309 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1310 DMSG(vdc, 0, "Attr info sent (status = %d)\n", status); 1311 1312 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1313 DMSG(vdc, 0, "[%d] Failed to send Attr negotiation info: " 1314 "id(%lx) rv(%d) size(%ld)", 1315 vdc->instance, vdc->ldc_handle, 1316 status, msglen); 1317 if (msglen != sizeof (vio_ver_msg_t)) 1318 status = ENOMSG; 1319 } 1320 1321 return (status); 1322 } 1323 1324 /* 1325 * Function: 1326 * vdc_attr_negotiation() 1327 * 1328 * Description: 1329 * 1330 * Arguments: 1331 * vdc - soft state pointer for this instance of the device driver. 1332 * 1333 * Return Code: 1334 * 0 - Success 1335 */ 1336 static int 1337 vdc_attr_negotiation(vdc_t *vdcp) 1338 { 1339 int status; 1340 vio_msg_t vio_msg; 1341 1342 if (status = vdc_init_attr_negotiation(vdcp)) 1343 return (status); 1344 1345 /* release lock and wait for response */ 1346 mutex_exit(&vdcp->lock); 1347 status = vdc_wait_for_response(vdcp, &vio_msg); 1348 mutex_enter(&vdcp->lock); 1349 if (status) { 1350 DMSG(vdcp, 0, 1351 "[%d] Failed waiting for Attr negotiation response, rv(%d)", 1352 vdcp->instance, status); 1353 return (status); 1354 } 1355 1356 /* check type and sub_type ... */ 1357 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1358 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1359 DMSG(vdcp, 0, "[%d] Invalid attr negotiation response\n", 1360 vdcp->instance); 1361 return (EPROTO); 1362 } 1363 1364 return (vdc_handle_attr_msg(vdcp, (vd_attr_msg_t *)&vio_msg)); 1365 } 1366 1367 1368 /* 1369 * Function: 1370 * vdc_init_dring_negotiate() 1371 * 1372 * Description: 1373 * 1374 * Arguments: 1375 * vdc - soft state pointer for this instance of the device driver. 1376 * 1377 * Return Code: 1378 * 0 - Success 1379 */ 1380 static int 1381 vdc_init_dring_negotiate(vdc_t *vdc) 1382 { 1383 vio_dring_reg_msg_t pkt; 1384 size_t msglen = sizeof (pkt); 1385 int status = -1; 1386 int retry; 1387 int nretries = 10; 1388 1389 ASSERT(vdc != NULL); 1390 ASSERT(mutex_owned(&vdc->lock)); 1391 1392 for (retry = 0; retry < nretries; retry++) { 1393 status = vdc_init_descriptor_ring(vdc); 1394 if (status != EAGAIN) 1395 break; 1396 drv_usecwait(vdc_min_timeout_ldc); 1397 } 1398 1399 if (status != 0) { 1400 DMSG(vdc, 0, "[%d] Failed to init DRing (status = %d)\n", 1401 vdc->instance, status); 1402 return (status); 1403 } 1404 1405 DMSG(vdc, 0, "[%d] Init of descriptor ring completed (status = %d)\n", 1406 vdc->instance, status); 1407 1408 /* fill in tag */ 1409 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1410 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1411 pkt.tag.vio_subtype_env = VIO_DRING_REG; 1412 pkt.tag.vio_sid = vdc->session_id; 1413 /* fill in payload */ 1414 pkt.dring_ident = 0; 1415 pkt.num_descriptors = vdc->dring_len; 1416 pkt.descriptor_size = vdc->dring_entry_size; 1417 pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 1418 pkt.ncookies = vdc->dring_cookie_count; 1419 pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 1420 1421 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1422 if (status != 0) { 1423 DMSG(vdc, 0, "[%d] Failed to register DRing (err = %d)", 1424 vdc->instance, status); 1425 } 1426 1427 return (status); 1428 } 1429 1430 1431 /* 1432 * Function: 1433 * vdc_dring_negotiation() 1434 * 1435 * Description: 1436 * 1437 * Arguments: 1438 * vdc - soft state pointer for this instance of the device driver. 1439 * 1440 * Return Code: 1441 * 0 - Success 1442 */ 1443 static int 1444 vdc_dring_negotiation(vdc_t *vdcp) 1445 { 1446 int status; 1447 vio_msg_t vio_msg; 1448 1449 if (status = vdc_init_dring_negotiate(vdcp)) 1450 return (status); 1451 1452 /* release lock and wait for response */ 1453 mutex_exit(&vdcp->lock); 1454 status = vdc_wait_for_response(vdcp, &vio_msg); 1455 mutex_enter(&vdcp->lock); 1456 if (status) { 1457 DMSG(vdcp, 0, 1458 "[%d] Failed waiting for Dring negotiation response," 1459 " rv(%d)", vdcp->instance, status); 1460 return (status); 1461 } 1462 1463 /* check type and sub_type ... */ 1464 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1465 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1466 DMSG(vdcp, 0, "[%d] Invalid Dring negotiation response\n", 1467 vdcp->instance); 1468 return (EPROTO); 1469 } 1470 1471 return (vdc_handle_dring_reg_msg(vdcp, 1472 (vio_dring_reg_msg_t *)&vio_msg)); 1473 } 1474 1475 1476 /* 1477 * Function: 1478 * vdc_send_rdx() 1479 * 1480 * Description: 1481 * 1482 * Arguments: 1483 * vdc - soft state pointer for this instance of the device driver. 1484 * 1485 * Return Code: 1486 * 0 - Success 1487 */ 1488 static int 1489 vdc_send_rdx(vdc_t *vdcp) 1490 { 1491 vio_msg_t msg; 1492 size_t msglen = sizeof (vio_msg_t); 1493 int status; 1494 1495 /* 1496 * Send an RDX message to vds to indicate we are ready 1497 * to send data 1498 */ 1499 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 1500 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 1501 msg.tag.vio_subtype_env = VIO_RDX; 1502 msg.tag.vio_sid = vdcp->session_id; 1503 status = vdc_send(vdcp, (caddr_t)&msg, &msglen); 1504 if (status != 0) { 1505 DMSG(vdcp, 0, "[%d] Failed to send RDX message (%d)", 1506 vdcp->instance, status); 1507 } 1508 1509 return (status); 1510 } 1511 1512 /* 1513 * Function: 1514 * vdc_handle_rdx() 1515 * 1516 * Description: 1517 * 1518 * Arguments: 1519 * vdc - soft state pointer for this instance of the device driver. 1520 * msgp - received msg 1521 * 1522 * Return Code: 1523 * 0 - Success 1524 */ 1525 static int 1526 vdc_handle_rdx(vdc_t *vdcp, vio_rdx_msg_t *msgp) 1527 { 1528 _NOTE(ARGUNUSED(vdcp)) 1529 _NOTE(ARGUNUSED(msgp)) 1530 1531 ASSERT(msgp->tag.vio_msgtype == VIO_TYPE_CTRL); 1532 ASSERT(msgp->tag.vio_subtype == VIO_SUBTYPE_ACK); 1533 ASSERT(msgp->tag.vio_subtype_env == VIO_RDX); 1534 1535 DMSG(vdcp, 1, "[%d] Got an RDX msg", vdcp->instance); 1536 1537 return (0); 1538 } 1539 1540 /* 1541 * Function: 1542 * vdc_rdx_exchange() 1543 * 1544 * Description: 1545 * 1546 * Arguments: 1547 * vdc - soft state pointer for this instance of the device driver. 1548 * 1549 * Return Code: 1550 * 0 - Success 1551 */ 1552 static int 1553 vdc_rdx_exchange(vdc_t *vdcp) 1554 { 1555 int status; 1556 vio_msg_t vio_msg; 1557 1558 if (status = vdc_send_rdx(vdcp)) 1559 return (status); 1560 1561 /* release lock and wait for response */ 1562 mutex_exit(&vdcp->lock); 1563 status = vdc_wait_for_response(vdcp, &vio_msg); 1564 mutex_enter(&vdcp->lock); 1565 if (status) { 1566 DMSG(vdcp, 0, 1567 "[%d] Failed waiting for RDX response," 1568 " rv(%d)", vdcp->instance, status); 1569 return (status); 1570 } 1571 1572 /* check type and sub_type ... */ 1573 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1574 vio_msg.tag.vio_subtype != VIO_SUBTYPE_ACK) { 1575 DMSG(vdcp, 0, "[%d] Invalid RDX response\n", 1576 vdcp->instance); 1577 return (EPROTO); 1578 } 1579 1580 return (vdc_handle_rdx(vdcp, (vio_rdx_msg_t *)&vio_msg)); 1581 } 1582 1583 1584 /* -------------------------------------------------------------------------- */ 1585 1586 /* 1587 * LDC helper routines 1588 */ 1589 1590 static int 1591 vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp) 1592 { 1593 int status; 1594 boolean_t q_has_pkts = B_FALSE; 1595 int delay_time; 1596 size_t len; 1597 1598 mutex_enter(&vdc->read_lock); 1599 1600 if (vdc->read_state == VDC_READ_IDLE) 1601 vdc->read_state = VDC_READ_WAITING; 1602 1603 while (vdc->read_state != VDC_READ_PENDING) { 1604 1605 /* detect if the connection has been reset */ 1606 if (vdc->read_state == VDC_READ_RESET) { 1607 status = ECONNRESET; 1608 goto done; 1609 } 1610 1611 cv_wait(&vdc->read_cv, &vdc->read_lock); 1612 } 1613 1614 /* 1615 * Until we get a blocking ldc read we have to retry 1616 * until the entire LDC message has arrived before 1617 * ldc_read() will succeed. Note we also bail out if 1618 * the chanel is reset or goes away. 1619 */ 1620 delay_time = vdc_ldc_read_init_delay; 1621 loop: 1622 len = *nbytesp; 1623 status = ldc_read(vdc->ldc_handle, (caddr_t)msgp, &len); 1624 switch (status) { 1625 case EAGAIN: 1626 delay_time *= 2; 1627 if (delay_time >= vdc_ldc_read_max_delay) 1628 delay_time = vdc_ldc_read_max_delay; 1629 delay(delay_time); 1630 goto loop; 1631 1632 case 0: 1633 if (len == 0) { 1634 DMSG(vdc, 0, "[%d] ldc_read returned 0 bytes with " 1635 "no error!\n", vdc->instance); 1636 goto loop; 1637 } 1638 1639 *nbytesp = len; 1640 1641 /* 1642 * If there are pending messages, leave the 1643 * read state as pending. Otherwise, set the state 1644 * back to idle. 1645 */ 1646 status = ldc_chkq(vdc->ldc_handle, &q_has_pkts); 1647 if (status == 0 && !q_has_pkts) 1648 vdc->read_state = VDC_READ_IDLE; 1649 1650 break; 1651 default: 1652 DMSG(vdc, 0, "ldc_read returned %d\n", status); 1653 break; 1654 } 1655 1656 done: 1657 mutex_exit(&vdc->read_lock); 1658 1659 return (status); 1660 } 1661 1662 1663 1664 #ifdef DEBUG 1665 void 1666 vdc_decode_tag(vdc_t *vdcp, vio_msg_t *msg) 1667 { 1668 char *ms, *ss, *ses; 1669 switch (msg->tag.vio_msgtype) { 1670 #define Q(_s) case _s : ms = #_s; break; 1671 Q(VIO_TYPE_CTRL) 1672 Q(VIO_TYPE_DATA) 1673 Q(VIO_TYPE_ERR) 1674 #undef Q 1675 default: ms = "unknown"; break; 1676 } 1677 1678 switch (msg->tag.vio_subtype) { 1679 #define Q(_s) case _s : ss = #_s; break; 1680 Q(VIO_SUBTYPE_INFO) 1681 Q(VIO_SUBTYPE_ACK) 1682 Q(VIO_SUBTYPE_NACK) 1683 #undef Q 1684 default: ss = "unknown"; break; 1685 } 1686 1687 switch (msg->tag.vio_subtype_env) { 1688 #define Q(_s) case _s : ses = #_s; break; 1689 Q(VIO_VER_INFO) 1690 Q(VIO_ATTR_INFO) 1691 Q(VIO_DRING_REG) 1692 Q(VIO_DRING_UNREG) 1693 Q(VIO_RDX) 1694 Q(VIO_PKT_DATA) 1695 Q(VIO_DESC_DATA) 1696 Q(VIO_DRING_DATA) 1697 #undef Q 1698 default: ses = "unknown"; break; 1699 } 1700 1701 DMSG(vdcp, 3, "(%x/%x/%x) message : (%s/%s/%s)\n", 1702 msg->tag.vio_msgtype, msg->tag.vio_subtype, 1703 msg->tag.vio_subtype_env, ms, ss, ses); 1704 } 1705 #endif 1706 1707 /* 1708 * Function: 1709 * vdc_send() 1710 * 1711 * Description: 1712 * The function encapsulates the call to write a message using LDC. 1713 * If LDC indicates that the call failed due to the queue being full, 1714 * we retry the ldc_write() [ up to 'vdc_retries' time ], otherwise 1715 * we return the error returned by LDC. 1716 * 1717 * Arguments: 1718 * ldc_handle - LDC handle for the channel this instance of vdc uses 1719 * pkt - address of LDC message to be sent 1720 * msglen - the size of the message being sent. When the function 1721 * returns, this contains the number of bytes written. 1722 * 1723 * Return Code: 1724 * 0 - Success. 1725 * EINVAL - pkt or msglen were NULL 1726 * ECONNRESET - The connection was not up. 1727 * EWOULDBLOCK - LDC queue is full 1728 * xxx - other error codes returned by ldc_write 1729 */ 1730 static int 1731 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 1732 { 1733 size_t size = 0; 1734 int status = 0; 1735 clock_t delay_ticks; 1736 1737 ASSERT(vdc != NULL); 1738 ASSERT(mutex_owned(&vdc->lock)); 1739 ASSERT(msglen != NULL); 1740 ASSERT(*msglen != 0); 1741 1742 #ifdef DEBUG 1743 vdc_decode_tag(vdc, (vio_msg_t *)pkt); 1744 #endif 1745 /* 1746 * Wait indefinitely to send if channel 1747 * is busy, but bail out if we succeed or 1748 * if the channel closes or is reset. 1749 */ 1750 delay_ticks = vdc_hz_min_ldc_delay; 1751 do { 1752 size = *msglen; 1753 status = ldc_write(vdc->ldc_handle, pkt, &size); 1754 if (status == EWOULDBLOCK) { 1755 delay(delay_ticks); 1756 /* geometric backoff */ 1757 delay_ticks *= 2; 1758 if (delay_ticks > vdc_hz_max_ldc_delay) 1759 delay_ticks = vdc_hz_max_ldc_delay; 1760 } 1761 } while (status == EWOULDBLOCK); 1762 1763 /* if LDC had serious issues --- reset vdc state */ 1764 if (status == EIO || status == ECONNRESET) { 1765 /* LDC had serious issues --- reset vdc state */ 1766 mutex_enter(&vdc->read_lock); 1767 if ((vdc->read_state == VDC_READ_WAITING) || 1768 (vdc->read_state == VDC_READ_RESET)) 1769 cv_signal(&vdc->read_cv); 1770 vdc->read_state = VDC_READ_RESET; 1771 mutex_exit(&vdc->read_lock); 1772 1773 /* wake up any waiters in the reset thread */ 1774 if (vdc->state == VDC_STATE_INIT_WAITING) { 1775 DMSG(vdc, 0, "[%d] write reset - " 1776 "vdc is resetting ..\n", vdc->instance); 1777 vdc->state = VDC_STATE_RESETTING; 1778 cv_signal(&vdc->initwait_cv); 1779 } 1780 1781 return (ECONNRESET); 1782 } 1783 1784 /* return the last size written */ 1785 *msglen = size; 1786 1787 return (status); 1788 } 1789 1790 /* 1791 * Function: 1792 * vdc_get_ldc_id() 1793 * 1794 * Description: 1795 * This function gets the 'ldc-id' for this particular instance of vdc. 1796 * The id returned is the guest domain channel endpoint LDC uses for 1797 * communication with vds. 1798 * 1799 * Arguments: 1800 * dip - dev info pointer for this instance of the device driver. 1801 * ldc_id - pointer to variable used to return the 'ldc-id' found. 1802 * 1803 * Return Code: 1804 * 0 - Success. 1805 * ENOENT - Expected node or property did not exist. 1806 * ENXIO - Unexpected error communicating with MD framework 1807 */ 1808 static int 1809 vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id) 1810 { 1811 int status = ENOENT; 1812 char *node_name = NULL; 1813 md_t *mdp = NULL; 1814 int num_nodes; 1815 int num_vdevs; 1816 int num_chans; 1817 mde_cookie_t rootnode; 1818 mde_cookie_t *listp = NULL; 1819 mde_cookie_t *chanp = NULL; 1820 boolean_t found_inst = B_FALSE; 1821 int listsz; 1822 int idx; 1823 uint64_t md_inst; 1824 int obp_inst; 1825 int instance = ddi_get_instance(dip); 1826 1827 ASSERT(ldc_id != NULL); 1828 *ldc_id = 0; 1829 1830 /* 1831 * Get the OBP instance number for comparison with the MD instance 1832 * 1833 * The "cfg-handle" property of a vdc node in an MD contains the MD's 1834 * notion of "instance", or unique identifier, for that node; OBP 1835 * stores the value of the "cfg-handle" MD property as the value of 1836 * the "reg" property on the node in the device tree it builds from 1837 * the MD and passes to Solaris. Thus, we look up the devinfo node's 1838 * "reg" property value to uniquely identify this device instance. 1839 * If the "reg" property cannot be found, the device tree state is 1840 * presumably so broken that there is no point in continuing. 1841 */ 1842 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 1843 cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 1844 return (ENOENT); 1845 } 1846 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1847 OBP_REG, -1); 1848 DMSGX(1, "[%d] OBP inst=%d\n", instance, obp_inst); 1849 1850 /* 1851 * We now walk the MD nodes and if an instance of a vdc node matches 1852 * the instance got from OBP we get the ldc-id property. 1853 */ 1854 if ((mdp = md_get_handle()) == NULL) { 1855 cmn_err(CE_WARN, "unable to init machine description"); 1856 return (ENXIO); 1857 } 1858 1859 num_nodes = md_node_count(mdp); 1860 ASSERT(num_nodes > 0); 1861 1862 listsz = num_nodes * sizeof (mde_cookie_t); 1863 1864 /* allocate memory for nodes */ 1865 listp = kmem_zalloc(listsz, KM_SLEEP); 1866 chanp = kmem_zalloc(listsz, KM_SLEEP); 1867 1868 rootnode = md_root_node(mdp); 1869 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 1870 1871 /* 1872 * Search for all the virtual devices, we will then check to see which 1873 * ones are disk nodes. 1874 */ 1875 num_vdevs = md_scan_dag(mdp, rootnode, 1876 md_find_name(mdp, VDC_MD_VDEV_NAME), 1877 md_find_name(mdp, "fwd"), listp); 1878 1879 if (num_vdevs <= 0) { 1880 cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 1881 status = ENOENT; 1882 goto done; 1883 } 1884 1885 DMSGX(1, "[%d] num_vdevs=%d\n", instance, num_vdevs); 1886 for (idx = 0; idx < num_vdevs; idx++) { 1887 status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 1888 if ((status != 0) || (node_name == NULL)) { 1889 cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 1890 ": err %d", VDC_MD_VDEV_NAME, status); 1891 continue; 1892 } 1893 1894 DMSGX(1, "[%d] Found node '%s'\n", instance, node_name); 1895 if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 1896 status = md_get_prop_val(mdp, listp[idx], 1897 VDC_MD_CFG_HDL, &md_inst); 1898 DMSGX(1, "[%d] vdc inst in MD=%lx\n", 1899 instance, md_inst); 1900 if ((status == 0) && (md_inst == obp_inst)) { 1901 found_inst = B_TRUE; 1902 break; 1903 } 1904 } 1905 } 1906 1907 if (!found_inst) { 1908 DMSGX(0, "Unable to find correct '%s' node", VDC_MD_DISK_NAME); 1909 status = ENOENT; 1910 goto done; 1911 } 1912 DMSGX(0, "[%d] MD inst=%lx\n", instance, md_inst); 1913 1914 /* get the channels for this node */ 1915 num_chans = md_scan_dag(mdp, listp[idx], 1916 md_find_name(mdp, VDC_MD_CHAN_NAME), 1917 md_find_name(mdp, "fwd"), chanp); 1918 1919 /* expecting at least one channel */ 1920 if (num_chans <= 0) { 1921 cmn_err(CE_NOTE, "No '%s' node for '%s' port", 1922 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 1923 status = ENOENT; 1924 goto done; 1925 1926 } else if (num_chans != 1) { 1927 DMSGX(0, "[%d] Expected 1 '%s' node for '%s' port, found %d\n", 1928 instance, VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, 1929 num_chans); 1930 } 1931 1932 /* 1933 * We use the first channel found (index 0), irrespective of how 1934 * many are there in total. 1935 */ 1936 if (md_get_prop_val(mdp, chanp[0], VDC_ID_PROP, ldc_id) != 0) { 1937 cmn_err(CE_NOTE, "Channel '%s' property not found", 1938 VDC_ID_PROP); 1939 status = ENOENT; 1940 } 1941 1942 DMSGX(0, "[%d] LDC id is 0x%lx\n", instance, *ldc_id); 1943 1944 done: 1945 if (chanp) 1946 kmem_free(chanp, listsz); 1947 if (listp) 1948 kmem_free(listp, listsz); 1949 1950 (void) md_fini_handle(mdp); 1951 1952 return (status); 1953 } 1954 1955 static int 1956 vdc_do_ldc_up(vdc_t *vdc) 1957 { 1958 int status; 1959 ldc_status_t ldc_state; 1960 1961 DMSG(vdc, 0, "[%d] Bringing up channel %lx\n", 1962 vdc->instance, vdc->ldc_id); 1963 1964 if (vdc->lifecycle == VDC_LC_DETACHING) 1965 return (EINVAL); 1966 1967 if ((status = ldc_up(vdc->ldc_handle)) != 0) { 1968 switch (status) { 1969 case ECONNREFUSED: /* listener not ready at other end */ 1970 DMSG(vdc, 0, "[%d] ldc_up(%lx,...) return %d\n", 1971 vdc->instance, vdc->ldc_id, status); 1972 status = 0; 1973 break; 1974 default: 1975 DMSG(vdc, 0, "[%d] Failed to bring up LDC: " 1976 "channel=%ld, err=%d", vdc->instance, vdc->ldc_id, 1977 status); 1978 break; 1979 } 1980 } 1981 1982 if (ldc_status(vdc->ldc_handle, &ldc_state) == 0) { 1983 vdc->ldc_state = ldc_state; 1984 if (ldc_state == LDC_UP) { 1985 DMSG(vdc, 0, "[%d] LDC channel already up\n", 1986 vdc->instance); 1987 vdc->seq_num = 1; 1988 vdc->seq_num_reply = 0; 1989 } 1990 } 1991 1992 return (status); 1993 } 1994 1995 /* 1996 * Function: 1997 * vdc_terminate_ldc() 1998 * 1999 * Description: 2000 * 2001 * Arguments: 2002 * vdc - soft state pointer for this instance of the device driver. 2003 * 2004 * Return Code: 2005 * None 2006 */ 2007 static void 2008 vdc_terminate_ldc(vdc_t *vdc) 2009 { 2010 int instance = ddi_get_instance(vdc->dip); 2011 2012 ASSERT(vdc != NULL); 2013 ASSERT(mutex_owned(&vdc->lock)); 2014 2015 DMSG(vdc, 0, "[%d] initialized=%x\n", instance, vdc->initialized); 2016 2017 if (vdc->initialized & VDC_LDC_OPEN) { 2018 DMSG(vdc, 0, "[%d] ldc_close()\n", instance); 2019 (void) ldc_close(vdc->ldc_handle); 2020 } 2021 if (vdc->initialized & VDC_LDC_CB) { 2022 DMSG(vdc, 0, "[%d] ldc_unreg_callback()\n", instance); 2023 (void) ldc_unreg_callback(vdc->ldc_handle); 2024 } 2025 if (vdc->initialized & VDC_LDC) { 2026 DMSG(vdc, 0, "[%d] ldc_fini()\n", instance); 2027 (void) ldc_fini(vdc->ldc_handle); 2028 vdc->ldc_handle = NULL; 2029 } 2030 2031 vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN); 2032 } 2033 2034 /* -------------------------------------------------------------------------- */ 2035 2036 /* 2037 * Descriptor Ring helper routines 2038 */ 2039 2040 /* 2041 * Function: 2042 * vdc_init_descriptor_ring() 2043 * 2044 * Description: 2045 * 2046 * Arguments: 2047 * vdc - soft state pointer for this instance of the device driver. 2048 * 2049 * Return Code: 2050 * 0 - Success 2051 */ 2052 static int 2053 vdc_init_descriptor_ring(vdc_t *vdc) 2054 { 2055 vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 2056 int status = 0; 2057 int i; 2058 2059 DMSG(vdc, 0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized); 2060 2061 ASSERT(vdc != NULL); 2062 ASSERT(mutex_owned(&vdc->lock)); 2063 ASSERT(vdc->ldc_handle != NULL); 2064 2065 /* ensure we have enough room to store max sized block */ 2066 ASSERT(maxphys <= VD_MAX_BLOCK_SIZE); 2067 2068 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 2069 DMSG(vdc, 0, "[%d] ldc_mem_dring_create\n", vdc->instance); 2070 /* 2071 * Calculate the maximum block size we can transmit using one 2072 * Descriptor Ring entry from the attributes returned by the 2073 * vDisk server. This is subject to a minimum of 'maxphys' 2074 * as we do not have the capability to split requests over 2075 * multiple DRing entries. 2076 */ 2077 if ((vdc->max_xfer_sz * vdc->block_size) < maxphys) { 2078 DMSG(vdc, 0, "[%d] using minimum DRing size\n", 2079 vdc->instance); 2080 vdc->dring_max_cookies = maxphys / PAGESIZE; 2081 } else { 2082 vdc->dring_max_cookies = 2083 (vdc->max_xfer_sz * vdc->block_size) / PAGESIZE; 2084 } 2085 vdc->dring_entry_size = (sizeof (vd_dring_entry_t) + 2086 (sizeof (ldc_mem_cookie_t) * 2087 (vdc->dring_max_cookies - 1))); 2088 vdc->dring_len = VD_DRING_LEN; 2089 2090 status = ldc_mem_dring_create(vdc->dring_len, 2091 vdc->dring_entry_size, &vdc->ldc_dring_hdl); 2092 if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) { 2093 DMSG(vdc, 0, "[%d] Descriptor ring creation failed", 2094 vdc->instance); 2095 return (status); 2096 } 2097 vdc->initialized |= VDC_DRING_INIT; 2098 } 2099 2100 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 2101 DMSG(vdc, 0, "[%d] ldc_mem_dring_bind\n", vdc->instance); 2102 vdc->dring_cookie = 2103 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 2104 2105 status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl, 2106 LDC_SHADOW_MAP|LDC_DIRECT_MAP, LDC_MEM_RW, 2107 &vdc->dring_cookie[0], 2108 &vdc->dring_cookie_count); 2109 if (status != 0) { 2110 DMSG(vdc, 0, "[%d] Failed to bind descriptor ring " 2111 "(%lx) to channel (%lx) status=%d\n", 2112 vdc->instance, vdc->ldc_dring_hdl, 2113 vdc->ldc_handle, status); 2114 return (status); 2115 } 2116 ASSERT(vdc->dring_cookie_count == 1); 2117 vdc->initialized |= VDC_DRING_BOUND; 2118 } 2119 2120 status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info); 2121 if (status != 0) { 2122 DMSG(vdc, 0, 2123 "[%d] Failed to get info for descriptor ring (%lx)\n", 2124 vdc->instance, vdc->ldc_dring_hdl); 2125 return (status); 2126 } 2127 2128 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 2129 DMSG(vdc, 0, "[%d] local dring\n", vdc->instance); 2130 2131 /* Allocate the local copy of this dring */ 2132 vdc->local_dring = 2133 kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t), 2134 KM_SLEEP); 2135 vdc->initialized |= VDC_DRING_LOCAL; 2136 } 2137 2138 /* 2139 * Mark all DRing entries as free and initialize the private 2140 * descriptor's memory handles. If any entry is initialized, 2141 * we need to free it later so we set the bit in 'initialized' 2142 * at the start. 2143 */ 2144 vdc->initialized |= VDC_DRING_ENTRY; 2145 for (i = 0; i < vdc->dring_len; i++) { 2146 dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 2147 dep->hdr.dstate = VIO_DESC_FREE; 2148 2149 status = ldc_mem_alloc_handle(vdc->ldc_handle, 2150 &vdc->local_dring[i].desc_mhdl); 2151 if (status != 0) { 2152 DMSG(vdc, 0, "![%d] Failed to alloc mem handle for" 2153 " descriptor %d", vdc->instance, i); 2154 return (status); 2155 } 2156 vdc->local_dring[i].is_free = B_TRUE; 2157 vdc->local_dring[i].dep = dep; 2158 } 2159 2160 /* Initialize the starting index */ 2161 vdc->dring_curr_idx = 0; 2162 2163 return (status); 2164 } 2165 2166 /* 2167 * Function: 2168 * vdc_destroy_descriptor_ring() 2169 * 2170 * Description: 2171 * 2172 * Arguments: 2173 * vdc - soft state pointer for this instance of the device driver. 2174 * 2175 * Return Code: 2176 * None 2177 */ 2178 static void 2179 vdc_destroy_descriptor_ring(vdc_t *vdc) 2180 { 2181 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2182 ldc_mem_handle_t mhdl = NULL; 2183 ldc_mem_info_t minfo; 2184 int status = -1; 2185 int i; /* loop */ 2186 2187 ASSERT(vdc != NULL); 2188 ASSERT(mutex_owned(&vdc->lock)); 2189 2190 DMSG(vdc, 0, "[%d] Entered\n", vdc->instance); 2191 2192 if (vdc->initialized & VDC_DRING_ENTRY) { 2193 DMSG(vdc, 0, 2194 "[%d] Removing Local DRing entries\n", vdc->instance); 2195 for (i = 0; i < vdc->dring_len; i++) { 2196 ldep = &vdc->local_dring[i]; 2197 mhdl = ldep->desc_mhdl; 2198 2199 if (mhdl == NULL) 2200 continue; 2201 2202 if ((status = ldc_mem_info(mhdl, &minfo)) != 0) { 2203 DMSG(vdc, 0, 2204 "ldc_mem_info returned an error: %d\n", 2205 status); 2206 2207 /* 2208 * This must mean that the mem handle 2209 * is not valid. Clear it out so that 2210 * no one tries to use it. 2211 */ 2212 ldep->desc_mhdl = NULL; 2213 continue; 2214 } 2215 2216 if (minfo.status == LDC_BOUND) { 2217 (void) ldc_mem_unbind_handle(mhdl); 2218 } 2219 2220 (void) ldc_mem_free_handle(mhdl); 2221 2222 ldep->desc_mhdl = NULL; 2223 } 2224 vdc->initialized &= ~VDC_DRING_ENTRY; 2225 } 2226 2227 if (vdc->initialized & VDC_DRING_LOCAL) { 2228 DMSG(vdc, 0, "[%d] Freeing Local DRing\n", vdc->instance); 2229 kmem_free(vdc->local_dring, 2230 vdc->dring_len * sizeof (vdc_local_desc_t)); 2231 vdc->initialized &= ~VDC_DRING_LOCAL; 2232 } 2233 2234 if (vdc->initialized & VDC_DRING_BOUND) { 2235 DMSG(vdc, 0, "[%d] Unbinding DRing\n", vdc->instance); 2236 status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl); 2237 if (status == 0) { 2238 vdc->initialized &= ~VDC_DRING_BOUND; 2239 } else { 2240 DMSG(vdc, 0, "[%d] Error %d unbinding DRing %lx", 2241 vdc->instance, status, vdc->ldc_dring_hdl); 2242 } 2243 kmem_free(vdc->dring_cookie, sizeof (ldc_mem_cookie_t)); 2244 } 2245 2246 if (vdc->initialized & VDC_DRING_INIT) { 2247 DMSG(vdc, 0, "[%d] Destroying DRing\n", vdc->instance); 2248 status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl); 2249 if (status == 0) { 2250 vdc->ldc_dring_hdl = NULL; 2251 bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 2252 vdc->initialized &= ~VDC_DRING_INIT; 2253 } else { 2254 DMSG(vdc, 0, "[%d] Error %d destroying DRing (%lx)", 2255 vdc->instance, status, vdc->ldc_dring_hdl); 2256 } 2257 } 2258 } 2259 2260 /* 2261 * Function: 2262 * vdc_map_to_shared_ring() 2263 * 2264 * Description: 2265 * Copy contents of the local descriptor to the shared 2266 * memory descriptor. 2267 * 2268 * Arguments: 2269 * vdcp - soft state pointer for this instance of the device driver. 2270 * idx - descriptor ring index 2271 * 2272 * Return Code: 2273 * None 2274 */ 2275 static int 2276 vdc_map_to_shared_dring(vdc_t *vdcp, int idx) 2277 { 2278 vdc_local_desc_t *ldep; 2279 vd_dring_entry_t *dep; 2280 int rv; 2281 2282 ldep = &(vdcp->local_dring[idx]); 2283 2284 /* for now leave in the old pop_mem_hdl stuff */ 2285 if (ldep->nbytes > 0) { 2286 rv = vdc_populate_mem_hdl(vdcp, ldep); 2287 if (rv) { 2288 DMSG(vdcp, 0, "[%d] Cannot populate mem handle\n", 2289 vdcp->instance); 2290 return (rv); 2291 } 2292 } 2293 2294 /* 2295 * fill in the data details into the DRing 2296 */ 2297 dep = ldep->dep; 2298 ASSERT(dep != NULL); 2299 2300 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdcp); 2301 dep->payload.operation = ldep->operation; 2302 dep->payload.addr = ldep->offset; 2303 dep->payload.nbytes = ldep->nbytes; 2304 dep->payload.status = (uint32_t)-1; /* vds will set valid value */ 2305 dep->payload.slice = ldep->slice; 2306 dep->hdr.dstate = VIO_DESC_READY; 2307 dep->hdr.ack = 1; /* request an ACK for every message */ 2308 2309 return (0); 2310 } 2311 2312 /* 2313 * Function: 2314 * vdc_send_request 2315 * 2316 * Description: 2317 * This routine writes the data to be transmitted to vds into the 2318 * descriptor, notifies vds that the ring has been updated and 2319 * then waits for the request to be processed. 2320 * 2321 * Arguments: 2322 * vdcp - the soft state pointer 2323 * operation - operation we want vds to perform (VD_OP_XXX) 2324 * addr - address of data buf to be read/written. 2325 * nbytes - number of bytes to read/write 2326 * slice - the disk slice this request is for 2327 * offset - relative disk offset 2328 * cb_type - type of call - STRATEGY or SYNC 2329 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2330 * . mode for ioctl(9e) 2331 * . LP64 diskaddr_t (block I/O) 2332 * dir - direction of operation (READ/WRITE/BOTH) 2333 * 2334 * Return Codes: 2335 * 0 2336 * EAGAIN 2337 * EFAULT 2338 * ENXIO 2339 * EIO 2340 */ 2341 static int 2342 vdc_send_request(vdc_t *vdcp, int operation, caddr_t addr, 2343 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 2344 void *cb_arg, vio_desc_direction_t dir) 2345 { 2346 ASSERT(vdcp != NULL); 2347 ASSERT(slice < V_NUMPAR); 2348 2349 mutex_enter(&vdcp->lock); 2350 2351 do { 2352 while (vdcp->state != VDC_STATE_RUNNING) 2353 cv_wait(&vdcp->running_cv, &vdcp->lock); 2354 2355 } while (vdc_populate_descriptor(vdcp, operation, addr, 2356 nbytes, slice, offset, cb_type, cb_arg, dir)); 2357 2358 mutex_exit(&vdcp->lock); 2359 return (0); 2360 } 2361 2362 2363 /* 2364 * Function: 2365 * vdc_populate_descriptor 2366 * 2367 * Description: 2368 * This routine writes the data to be transmitted to vds into the 2369 * descriptor, notifies vds that the ring has been updated and 2370 * then waits for the request to be processed. 2371 * 2372 * Arguments: 2373 * vdcp - the soft state pointer 2374 * operation - operation we want vds to perform (VD_OP_XXX) 2375 * addr - address of data buf to be read/written. 2376 * nbytes - number of bytes to read/write 2377 * slice - the disk slice this request is for 2378 * offset - relative disk offset 2379 * cb_type - type of call - STRATEGY or SYNC 2380 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2381 * . mode for ioctl(9e) 2382 * . LP64 diskaddr_t (block I/O) 2383 * dir - direction of operation (READ/WRITE/BOTH) 2384 * 2385 * Return Codes: 2386 * 0 2387 * EAGAIN 2388 * EFAULT 2389 * ENXIO 2390 * EIO 2391 */ 2392 static int 2393 vdc_populate_descriptor(vdc_t *vdcp, int operation, caddr_t addr, 2394 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 2395 void *cb_arg, vio_desc_direction_t dir) 2396 { 2397 vdc_local_desc_t *local_dep = NULL; /* Local Dring Pointer */ 2398 int idx; /* Index of DRing entry used */ 2399 int next_idx; 2400 vio_dring_msg_t dmsg; 2401 size_t msglen; 2402 int rv; 2403 2404 ASSERT(MUTEX_HELD(&vdcp->lock)); 2405 vdcp->threads_pending++; 2406 loop: 2407 DMSG(vdcp, 2, ": dring_curr_idx = %d\n", vdcp->dring_curr_idx); 2408 2409 /* Get next available D-Ring entry */ 2410 idx = vdcp->dring_curr_idx; 2411 local_dep = &(vdcp->local_dring[idx]); 2412 2413 if (!local_dep->is_free) { 2414 DMSG(vdcp, 2, "[%d]: dring full - waiting for space\n", 2415 vdcp->instance); 2416 cv_wait(&vdcp->dring_free_cv, &vdcp->lock); 2417 if (vdcp->state == VDC_STATE_RUNNING || 2418 vdcp->state == VDC_STATE_HANDLE_PENDING) { 2419 goto loop; 2420 } 2421 vdcp->threads_pending--; 2422 return (ECONNRESET); 2423 } 2424 2425 next_idx = idx + 1; 2426 if (next_idx >= vdcp->dring_len) 2427 next_idx = 0; 2428 vdcp->dring_curr_idx = next_idx; 2429 2430 ASSERT(local_dep->is_free); 2431 2432 local_dep->operation = operation; 2433 local_dep->addr = addr; 2434 local_dep->nbytes = nbytes; 2435 local_dep->slice = slice; 2436 local_dep->offset = offset; 2437 local_dep->cb_type = cb_type; 2438 local_dep->cb_arg = cb_arg; 2439 local_dep->dir = dir; 2440 2441 local_dep->is_free = B_FALSE; 2442 2443 rv = vdc_map_to_shared_dring(vdcp, idx); 2444 if (rv) { 2445 DMSG(vdcp, 0, "[%d]: cannot bind memory - waiting ..\n", 2446 vdcp->instance); 2447 /* free the descriptor */ 2448 local_dep->is_free = B_TRUE; 2449 vdcp->dring_curr_idx = idx; 2450 cv_wait(&vdcp->membind_cv, &vdcp->lock); 2451 if (vdcp->state == VDC_STATE_RUNNING || 2452 vdcp->state == VDC_STATE_HANDLE_PENDING) { 2453 goto loop; 2454 } 2455 vdcp->threads_pending--; 2456 return (ECONNRESET); 2457 } 2458 2459 /* 2460 * Send a msg with the DRing details to vds 2461 */ 2462 VIO_INIT_DRING_DATA_TAG(dmsg); 2463 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdcp); 2464 dmsg.dring_ident = vdcp->dring_ident; 2465 dmsg.start_idx = idx; 2466 dmsg.end_idx = idx; 2467 vdcp->seq_num++; 2468 2469 DTRACE_IO2(send, vio_dring_msg_t *, &dmsg, vdc_t *, vdcp); 2470 2471 DMSG(vdcp, 2, "ident=0x%lx, st=%u, end=%u, seq=%ld\n", 2472 vdcp->dring_ident, dmsg.start_idx, dmsg.end_idx, dmsg.seq_num); 2473 2474 /* 2475 * note we're still holding the lock here to 2476 * make sure the message goes out in order !!!... 2477 */ 2478 msglen = sizeof (dmsg); 2479 rv = vdc_send(vdcp, (caddr_t)&dmsg, &msglen); 2480 switch (rv) { 2481 case ECONNRESET: 2482 /* 2483 * vdc_send initiates the reset on failure. 2484 * Since the transaction has already been put 2485 * on the local dring, it will automatically get 2486 * retried when the channel is reset. Given that, 2487 * it is ok to just return success even though the 2488 * send failed. 2489 */ 2490 rv = 0; 2491 break; 2492 2493 case 0: /* EOK */ 2494 DMSG(vdcp, 1, "sent via LDC: rv=%d\n", rv); 2495 break; 2496 2497 default: 2498 goto cleanup_and_exit; 2499 } 2500 2501 vdcp->threads_pending--; 2502 return (rv); 2503 2504 cleanup_and_exit: 2505 DMSG(vdcp, 0, "unexpected error, rv=%d\n", rv); 2506 return (ENXIO); 2507 } 2508 2509 /* 2510 * Function: 2511 * vdc_do_sync_op 2512 * 2513 * Description: 2514 * Wrapper around vdc_populate_descriptor that blocks until the 2515 * response to the message is available. 2516 * 2517 * Arguments: 2518 * vdcp - the soft state pointer 2519 * operation - operation we want vds to perform (VD_OP_XXX) 2520 * addr - address of data buf to be read/written. 2521 * nbytes - number of bytes to read/write 2522 * slice - the disk slice this request is for 2523 * offset - relative disk offset 2524 * cb_type - type of call - STRATEGY or SYNC 2525 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2526 * . mode for ioctl(9e) 2527 * . LP64 diskaddr_t (block I/O) 2528 * dir - direction of operation (READ/WRITE/BOTH) 2529 * 2530 * Return Codes: 2531 * 0 2532 * EAGAIN 2533 * EFAULT 2534 * ENXIO 2535 * EIO 2536 */ 2537 static int 2538 vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, size_t nbytes, 2539 int slice, diskaddr_t offset, int cb_type, void *cb_arg, 2540 vio_desc_direction_t dir) 2541 { 2542 int status; 2543 2544 ASSERT(cb_type == CB_SYNC); 2545 2546 /* 2547 * Grab the lock, if blocked wait until the server 2548 * response causes us to wake up again. 2549 */ 2550 mutex_enter(&vdcp->lock); 2551 vdcp->sync_op_cnt++; 2552 while (vdcp->sync_op_blocked && vdcp->state != VDC_STATE_DETACH) 2553 cv_wait(&vdcp->sync_blocked_cv, &vdcp->lock); 2554 2555 if (vdcp->state == VDC_STATE_DETACH) { 2556 cv_broadcast(&vdcp->sync_blocked_cv); 2557 vdcp->sync_op_cnt--; 2558 mutex_exit(&vdcp->lock); 2559 return (ENXIO); 2560 } 2561 2562 /* now block anyone other thread entering after us */ 2563 vdcp->sync_op_blocked = B_TRUE; 2564 vdcp->sync_op_pending = B_TRUE; 2565 mutex_exit(&vdcp->lock); 2566 2567 /* 2568 * No need to check return value - will return error only 2569 * in the DETACH case and we can fall through 2570 */ 2571 (void) vdc_send_request(vdcp, operation, addr, 2572 nbytes, slice, offset, cb_type, cb_arg, dir); 2573 2574 /* 2575 * block until our transaction completes. 2576 * Also anyone else waiting also gets to go next. 2577 */ 2578 mutex_enter(&vdcp->lock); 2579 while (vdcp->sync_op_pending && vdcp->state != VDC_STATE_DETACH) 2580 cv_wait(&vdcp->sync_pending_cv, &vdcp->lock); 2581 2582 DMSG(vdcp, 2, ": operation returned %d\n", vdcp->sync_op_status); 2583 if (vdcp->state == VDC_STATE_DETACH) 2584 status = ENXIO; 2585 else 2586 status = vdcp->sync_op_status; 2587 vdcp->sync_op_status = 0; 2588 vdcp->sync_op_blocked = B_FALSE; 2589 vdcp->sync_op_cnt--; 2590 2591 /* signal the next waiting thread */ 2592 cv_signal(&vdcp->sync_blocked_cv); 2593 mutex_exit(&vdcp->lock); 2594 2595 return (status); 2596 } 2597 2598 2599 /* 2600 * Function: 2601 * vdc_drain_response() 2602 * 2603 * Description: 2604 * When a guest is panicking, the completion of requests needs to be 2605 * handled differently because interrupts are disabled and vdc 2606 * will not get messages. We have to poll for the messages instead. 2607 * 2608 * Arguments: 2609 * vdc - soft state pointer for this instance of the device driver. 2610 * 2611 * Return Code: 2612 * 0 - Success 2613 */ 2614 static int 2615 vdc_drain_response(vdc_t *vdc) 2616 { 2617 int rv, idx, retries; 2618 size_t msglen; 2619 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2620 vio_dring_msg_t dmsg; 2621 2622 mutex_enter(&vdc->lock); 2623 2624 retries = 0; 2625 for (;;) { 2626 msglen = sizeof (dmsg); 2627 rv = ldc_read(vdc->ldc_handle, (caddr_t)&dmsg, &msglen); 2628 if (rv) { 2629 rv = EINVAL; 2630 break; 2631 } 2632 2633 /* 2634 * if there are no packets wait and check again 2635 */ 2636 if ((rv == 0) && (msglen == 0)) { 2637 if (retries++ > vdc_dump_retries) { 2638 rv = EAGAIN; 2639 break; 2640 } 2641 2642 drv_usecwait(vdc_usec_timeout_dump); 2643 continue; 2644 } 2645 2646 /* 2647 * Ignore all messages that are not ACKs/NACKs to 2648 * DRing requests. 2649 */ 2650 if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 2651 (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 2652 DMSG(vdc, 0, "discard pkt: type=%d sub=%d env=%d\n", 2653 dmsg.tag.vio_msgtype, 2654 dmsg.tag.vio_subtype, 2655 dmsg.tag.vio_subtype_env); 2656 continue; 2657 } 2658 2659 /* 2660 * set the appropriate return value for the current request. 2661 */ 2662 switch (dmsg.tag.vio_subtype) { 2663 case VIO_SUBTYPE_ACK: 2664 rv = 0; 2665 break; 2666 case VIO_SUBTYPE_NACK: 2667 rv = EAGAIN; 2668 break; 2669 default: 2670 continue; 2671 } 2672 2673 idx = dmsg.start_idx; 2674 if (idx >= vdc->dring_len) { 2675 DMSG(vdc, 0, "[%d] Bogus ack data : start %d\n", 2676 vdc->instance, idx); 2677 continue; 2678 } 2679 ldep = &vdc->local_dring[idx]; 2680 if (ldep->dep->hdr.dstate != VIO_DESC_DONE) { 2681 DMSG(vdc, 0, "[%d] Entry @ %d - state !DONE %d\n", 2682 vdc->instance, idx, ldep->dep->hdr.dstate); 2683 continue; 2684 } 2685 2686 DMSG(vdc, 1, "[%d] Depopulating idx=%d state=%d\n", 2687 vdc->instance, idx, ldep->dep->hdr.dstate); 2688 rv = vdc_depopulate_descriptor(vdc, idx); 2689 if (rv) { 2690 DMSG(vdc, 0, 2691 "[%d] Entry @ %d - depopulate failed ..\n", 2692 vdc->instance, idx); 2693 } 2694 2695 /* if this is the last descriptor - break out of loop */ 2696 if ((idx + 1) % vdc->dring_len == vdc->dring_curr_idx) 2697 break; 2698 } 2699 2700 mutex_exit(&vdc->lock); 2701 DMSG(vdc, 0, "End idx=%d\n", idx); 2702 2703 return (rv); 2704 } 2705 2706 2707 /* 2708 * Function: 2709 * vdc_depopulate_descriptor() 2710 * 2711 * Description: 2712 * 2713 * Arguments: 2714 * vdc - soft state pointer for this instance of the device driver. 2715 * idx - Index of the Descriptor Ring entry being modified 2716 * 2717 * Return Code: 2718 * 0 - Success 2719 */ 2720 static int 2721 vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 2722 { 2723 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2724 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2725 int status = ENXIO; 2726 int operation; 2727 int rv = 0; 2728 2729 ASSERT(vdc != NULL); 2730 ASSERT(idx < vdc->dring_len); 2731 ldep = &vdc->local_dring[idx]; 2732 ASSERT(ldep != NULL); 2733 ASSERT(MUTEX_HELD(&vdc->lock)); 2734 2735 DMSG(vdc, 2, ": idx = %d\n", idx); 2736 dep = ldep->dep; 2737 ASSERT(dep != NULL); 2738 ASSERT((dep->hdr.dstate == VIO_DESC_DONE) || 2739 (dep->payload.status == ECANCELED)); 2740 2741 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 2742 2743 ldep->is_free = B_TRUE; 2744 DMSG(vdc, 2, ": is_free = %d\n", ldep->is_free); 2745 status = dep->payload.status; 2746 operation = dep->payload.operation; 2747 2748 /* the DKIO FLUSH operation never bind handles so we can return now */ 2749 if (operation == VD_OP_FLUSH) 2750 return (status); 2751 2752 /* 2753 * If the upper layer passed in a misaligned address we copied the 2754 * data into an aligned buffer before sending it to LDC - we now 2755 * copy it back to the original buffer. 2756 */ 2757 if (ldep->align_addr) { 2758 ASSERT(ldep->addr != NULL); 2759 ASSERT(dep->payload.nbytes > 0); 2760 2761 bcopy(ldep->align_addr, ldep->addr, dep->payload.nbytes); 2762 kmem_free(ldep->align_addr, 2763 sizeof (caddr_t) * P2ROUNDUP(dep->payload.nbytes, 8)); 2764 ldep->align_addr = NULL; 2765 } 2766 2767 rv = ldc_mem_unbind_handle(ldep->desc_mhdl); 2768 if (rv != 0) { 2769 DMSG(vdc, 0, "?[%d] unbind mhdl 0x%lx @ idx %d failed (%d)", 2770 vdc->instance, ldep->desc_mhdl, idx, rv); 2771 /* 2772 * The error returned by the vDisk server is more informative 2773 * and thus has a higher priority but if it isn't set we ensure 2774 * that this function returns an error. 2775 */ 2776 if (status == 0) 2777 status = EINVAL; 2778 } 2779 2780 cv_signal(&vdc->membind_cv); 2781 cv_signal(&vdc->dring_free_cv); 2782 2783 return (status); 2784 } 2785 2786 /* 2787 * Function: 2788 * vdc_populate_mem_hdl() 2789 * 2790 * Description: 2791 * 2792 * Arguments: 2793 * vdc - soft state pointer for this instance of the device driver. 2794 * idx - Index of the Descriptor Ring entry being modified 2795 * addr - virtual address being mapped in 2796 * nybtes - number of bytes in 'addr' 2797 * operation - the vDisk operation being performed (VD_OP_xxx) 2798 * 2799 * Return Code: 2800 * 0 - Success 2801 */ 2802 static int 2803 vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep) 2804 { 2805 vd_dring_entry_t *dep = NULL; 2806 ldc_mem_handle_t mhdl; 2807 caddr_t vaddr; 2808 size_t nbytes; 2809 uint8_t perm = LDC_MEM_RW; 2810 uint8_t maptype; 2811 int rv = 0; 2812 int i; 2813 2814 ASSERT(vdcp != NULL); 2815 2816 dep = ldep->dep; 2817 mhdl = ldep->desc_mhdl; 2818 2819 switch (ldep->dir) { 2820 case VIO_read_dir: 2821 perm = LDC_MEM_W; 2822 break; 2823 2824 case VIO_write_dir: 2825 perm = LDC_MEM_R; 2826 break; 2827 2828 case VIO_both_dir: 2829 perm = LDC_MEM_RW; 2830 break; 2831 2832 default: 2833 ASSERT(0); /* catch bad programming in vdc */ 2834 } 2835 2836 /* 2837 * LDC expects any addresses passed in to be 8-byte aligned. We need 2838 * to copy the contents of any misaligned buffers to a newly allocated 2839 * buffer and bind it instead (and copy the the contents back to the 2840 * original buffer passed in when depopulating the descriptor) 2841 */ 2842 vaddr = ldep->addr; 2843 nbytes = ldep->nbytes; 2844 if (((uint64_t)vaddr & 0x7) != 0) { 2845 ASSERT(ldep->align_addr == NULL); 2846 ldep->align_addr = 2847 kmem_alloc(sizeof (caddr_t) * 2848 P2ROUNDUP(nbytes, 8), KM_SLEEP); 2849 DMSG(vdcp, 0, "[%d] Misaligned address %p reallocating " 2850 "(buf=%p nb=%ld op=%d)\n", 2851 vdcp->instance, (void *)vaddr, (void *)ldep->align_addr, 2852 nbytes, ldep->operation); 2853 if (perm != LDC_MEM_W) 2854 bcopy(vaddr, ldep->align_addr, nbytes); 2855 vaddr = ldep->align_addr; 2856 } 2857 2858 maptype = LDC_IO_MAP|LDC_SHADOW_MAP|LDC_DIRECT_MAP; 2859 rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 2860 maptype, perm, &dep->payload.cookie[0], 2861 &dep->payload.ncookies); 2862 DMSG(vdcp, 2, "[%d] bound mem handle; ncookies=%d\n", 2863 vdcp->instance, dep->payload.ncookies); 2864 if (rv != 0) { 2865 DMSG(vdcp, 0, "[%d] Failed to bind LDC memory handle " 2866 "(mhdl=%p, buf=%p, err=%d)\n", 2867 vdcp->instance, (void *)mhdl, (void *)vaddr, rv); 2868 if (ldep->align_addr) { 2869 kmem_free(ldep->align_addr, 2870 sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); 2871 ldep->align_addr = NULL; 2872 } 2873 return (EAGAIN); 2874 } 2875 2876 /* 2877 * Get the other cookies (if any). 2878 */ 2879 for (i = 1; i < dep->payload.ncookies; i++) { 2880 rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 2881 if (rv != 0) { 2882 (void) ldc_mem_unbind_handle(mhdl); 2883 DMSG(vdcp, 0, "?[%d] Failed to get next cookie " 2884 "(mhdl=%lx cnum=%d), err=%d", 2885 vdcp->instance, mhdl, i, rv); 2886 if (ldep->align_addr) { 2887 kmem_free(ldep->align_addr, 2888 sizeof (caddr_t) * dep->payload.nbytes); 2889 ldep->align_addr = NULL; 2890 } 2891 return (EAGAIN); 2892 } 2893 } 2894 2895 return (rv); 2896 } 2897 2898 /* 2899 * Interrupt handlers for messages from LDC 2900 */ 2901 2902 /* 2903 * Function: 2904 * vdc_handle_cb() 2905 * 2906 * Description: 2907 * 2908 * Arguments: 2909 * event - Type of event (LDC_EVT_xxx) that triggered the callback 2910 * arg - soft state pointer for this instance of the device driver. 2911 * 2912 * Return Code: 2913 * 0 - Success 2914 */ 2915 static uint_t 2916 vdc_handle_cb(uint64_t event, caddr_t arg) 2917 { 2918 ldc_status_t ldc_state; 2919 int rv = 0; 2920 2921 vdc_t *vdc = (vdc_t *)(void *)arg; 2922 2923 ASSERT(vdc != NULL); 2924 2925 DMSG(vdc, 1, "evt=%lx seqID=%ld\n", event, vdc->seq_num); 2926 2927 /* 2928 * Depending on the type of event that triggered this callback, 2929 * we modify the handshake state or read the data. 2930 * 2931 * NOTE: not done as a switch() as event could be triggered by 2932 * a state change and a read request. Also the ordering of the 2933 * check for the event types is deliberate. 2934 */ 2935 if (event & LDC_EVT_UP) { 2936 DMSG(vdc, 0, "[%d] Received LDC_EVT_UP\n", vdc->instance); 2937 2938 mutex_enter(&vdc->lock); 2939 2940 /* get LDC state */ 2941 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2942 if (rv != 0) { 2943 DMSG(vdc, 0, "[%d] Couldn't get LDC status %d", 2944 vdc->instance, rv); 2945 return (LDC_SUCCESS); 2946 } 2947 if (vdc->ldc_state != LDC_UP && ldc_state == LDC_UP) { 2948 /* 2949 * Reset the transaction sequence numbers when 2950 * LDC comes up. We then kick off the handshake 2951 * negotiation with the vDisk server. 2952 */ 2953 vdc->seq_num = 1; 2954 vdc->seq_num_reply = 0; 2955 vdc->ldc_state = ldc_state; 2956 cv_signal(&vdc->initwait_cv); 2957 } 2958 2959 mutex_exit(&vdc->lock); 2960 } 2961 2962 if (event & LDC_EVT_READ) { 2963 DMSG(vdc, 0, "[%d] Received LDC_EVT_READ\n", vdc->instance); 2964 mutex_enter(&vdc->read_lock); 2965 cv_signal(&vdc->read_cv); 2966 vdc->read_state = VDC_READ_PENDING; 2967 mutex_exit(&vdc->read_lock); 2968 2969 /* that's all we have to do - no need to handle DOWN/RESET */ 2970 return (LDC_SUCCESS); 2971 } 2972 2973 if (event & (LDC_EVT_RESET|LDC_EVT_DOWN)) { 2974 2975 DMSG(vdc, 0, "[%d] Received LDC RESET event\n", vdc->instance); 2976 2977 mutex_enter(&vdc->lock); 2978 /* 2979 * Need to wake up any readers so they will 2980 * detect that a reset has occurred. 2981 */ 2982 mutex_enter(&vdc->read_lock); 2983 if ((vdc->read_state == VDC_READ_WAITING) || 2984 (vdc->read_state == VDC_READ_RESET)) 2985 cv_signal(&vdc->read_cv); 2986 vdc->read_state = VDC_READ_RESET; 2987 mutex_exit(&vdc->read_lock); 2988 2989 /* wake up any threads waiting for connection to come up */ 2990 if (vdc->state == VDC_STATE_INIT_WAITING) { 2991 vdc->state = VDC_STATE_RESETTING; 2992 cv_signal(&vdc->initwait_cv); 2993 } 2994 2995 mutex_exit(&vdc->lock); 2996 } 2997 2998 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 2999 DMSG(vdc, 0, "![%d] Unexpected LDC event (%lx) received", 3000 vdc->instance, event); 3001 3002 return (LDC_SUCCESS); 3003 } 3004 3005 /* 3006 * Function: 3007 * vdc_wait_for_response() 3008 * 3009 * Description: 3010 * Block waiting for a response from the server. If there is 3011 * no data the thread block on the read_cv that is signalled 3012 * by the callback when an EVT_READ occurs. 3013 * 3014 * Arguments: 3015 * vdcp - soft state pointer for this instance of the device driver. 3016 * 3017 * Return Code: 3018 * 0 - Success 3019 */ 3020 static int 3021 vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp) 3022 { 3023 size_t nbytes = sizeof (*msgp); 3024 int status; 3025 3026 ASSERT(vdcp != NULL); 3027 3028 DMSG(vdcp, 1, "[%d] Entered\n", vdcp->instance); 3029 3030 status = vdc_recv(vdcp, msgp, &nbytes); 3031 DMSG(vdcp, 3, "vdc_read() done.. status=0x%x size=0x%x\n", 3032 status, (int)nbytes); 3033 if (status) { 3034 DMSG(vdcp, 0, "?[%d] Error %d reading LDC msg\n", 3035 vdcp->instance, status); 3036 return (status); 3037 } 3038 3039 if (nbytes < sizeof (vio_msg_tag_t)) { 3040 DMSG(vdcp, 0, "?[%d] Expect %lu bytes; recv'd %lu\n", 3041 vdcp->instance, sizeof (vio_msg_tag_t), nbytes); 3042 return (ENOMSG); 3043 } 3044 3045 DMSG(vdcp, 2, "[%d] (%x/%x/%x)\n", vdcp->instance, 3046 msgp->tag.vio_msgtype, 3047 msgp->tag.vio_subtype, 3048 msgp->tag.vio_subtype_env); 3049 3050 /* 3051 * Verify the Session ID of the message 3052 * 3053 * Every message after the Version has been negotiated should 3054 * have the correct session ID set. 3055 */ 3056 if ((msgp->tag.vio_sid != vdcp->session_id) && 3057 (msgp->tag.vio_subtype_env != VIO_VER_INFO)) { 3058 DMSG(vdcp, 0, "[%d] Invalid SID: received 0x%x, " 3059 "expected 0x%lx [seq num %lx @ %d]", 3060 vdcp->instance, msgp->tag.vio_sid, 3061 vdcp->session_id, 3062 ((vio_dring_msg_t *)msgp)->seq_num, 3063 ((vio_dring_msg_t *)msgp)->start_idx); 3064 return (ENOMSG); 3065 } 3066 return (0); 3067 } 3068 3069 3070 /* 3071 * Function: 3072 * vdc_resubmit_backup_dring() 3073 * 3074 * Description: 3075 * Resubmit each descriptor in the backed up dring to 3076 * vDisk server. The Dring was backed up during connection 3077 * reset. 3078 * 3079 * Arguments: 3080 * vdcp - soft state pointer for this instance of the device driver. 3081 * 3082 * Return Code: 3083 * 0 - Success 3084 */ 3085 static int 3086 vdc_resubmit_backup_dring(vdc_t *vdcp) 3087 { 3088 int count; 3089 int b_idx; 3090 int rv; 3091 int dring_size; 3092 int status; 3093 vio_msg_t vio_msg; 3094 vdc_local_desc_t *curr_ldep; 3095 3096 ASSERT(MUTEX_NOT_HELD(&vdcp->lock)); 3097 ASSERT(vdcp->state == VDC_STATE_HANDLE_PENDING); 3098 3099 DMSG(vdcp, 1, "restoring pending dring entries (len=%d, tail=%d)\n", 3100 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3101 3102 /* 3103 * Walk the backup copy of the local descriptor ring and 3104 * resubmit all the outstanding transactions. 3105 */ 3106 b_idx = vdcp->local_dring_backup_tail; 3107 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 3108 3109 curr_ldep = &(vdcp->local_dring_backup[b_idx]); 3110 3111 /* only resubmit oustanding transactions */ 3112 if (!curr_ldep->is_free) { 3113 3114 DMSG(vdcp, 1, "resubmitting entry idx=%x\n", b_idx); 3115 mutex_enter(&vdcp->lock); 3116 rv = vdc_populate_descriptor(vdcp, curr_ldep->operation, 3117 curr_ldep->addr, curr_ldep->nbytes, 3118 curr_ldep->slice, curr_ldep->offset, 3119 curr_ldep->cb_type, curr_ldep->cb_arg, 3120 curr_ldep->dir); 3121 mutex_exit(&vdcp->lock); 3122 if (rv) { 3123 DMSG(vdcp, 1, "[%d] cannot resubmit entry %d\n", 3124 vdcp->instance, b_idx); 3125 return (rv); 3126 } 3127 3128 /* Wait for the response message. */ 3129 DMSG(vdcp, 1, "waiting for response to idx=%x\n", 3130 b_idx); 3131 status = vdc_wait_for_response(vdcp, &vio_msg); 3132 if (status) { 3133 DMSG(vdcp, 1, "[%d] wait_for_response " 3134 "returned err=%d\n", vdcp->instance, 3135 status); 3136 return (status); 3137 } 3138 3139 DMSG(vdcp, 1, "processing msg for idx=%x\n", b_idx); 3140 status = vdc_process_data_msg(vdcp, &vio_msg); 3141 if (status) { 3142 DMSG(vdcp, 1, "[%d] process_data_msg " 3143 "returned err=%d\n", vdcp->instance, 3144 status); 3145 return (status); 3146 } 3147 } 3148 3149 /* get the next element to submit */ 3150 if (++b_idx >= vdcp->local_dring_backup_len) 3151 b_idx = 0; 3152 } 3153 3154 /* all done - now clear up pending dring copy */ 3155 dring_size = vdcp->local_dring_backup_len * 3156 sizeof (vdcp->local_dring_backup[0]); 3157 3158 (void) kmem_free(vdcp->local_dring_backup, dring_size); 3159 3160 vdcp->local_dring_backup = NULL; 3161 3162 return (0); 3163 } 3164 3165 /* 3166 * Function: 3167 * vdc_backup_local_dring() 3168 * 3169 * Description: 3170 * Backup the current dring in the event of a reset. The Dring 3171 * transactions will be resubmitted to the server when the 3172 * connection is restored. 3173 * 3174 * Arguments: 3175 * vdcp - soft state pointer for this instance of the device driver. 3176 * 3177 * Return Code: 3178 * NONE 3179 */ 3180 static void 3181 vdc_backup_local_dring(vdc_t *vdcp) 3182 { 3183 int dring_size; 3184 3185 ASSERT(vdcp->state == VDC_STATE_RESETTING); 3186 3187 /* 3188 * If the backup dring is stil around, it means 3189 * that the last restore did not complete. However, 3190 * since we never got back into the running state, 3191 * the backup copy we have is still valid. 3192 */ 3193 if (vdcp->local_dring_backup != NULL) { 3194 DMSG(vdcp, 1, "reusing local descriptor ring backup " 3195 "(len=%d, tail=%d)\n", vdcp->local_dring_backup_len, 3196 vdcp->local_dring_backup_tail); 3197 return; 3198 } 3199 3200 DMSG(vdcp, 1, "backing up the local descriptor ring (len=%d, " 3201 "tail=%d)\n", vdcp->dring_len, vdcp->dring_curr_idx); 3202 3203 dring_size = vdcp->dring_len * sizeof (vdcp->local_dring[0]); 3204 3205 vdcp->local_dring_backup = kmem_alloc(dring_size, KM_SLEEP); 3206 bcopy(vdcp->local_dring, vdcp->local_dring_backup, dring_size); 3207 3208 vdcp->local_dring_backup_tail = vdcp->dring_curr_idx; 3209 vdcp->local_dring_backup_len = vdcp->dring_len; 3210 } 3211 3212 /* -------------------------------------------------------------------------- */ 3213 3214 /* 3215 * The following functions process the incoming messages from vds 3216 */ 3217 3218 /* 3219 * Function: 3220 * vdc_process_msg_thread() 3221 * 3222 * Description: 3223 * 3224 * Main VDC message processing thread. Each vDisk instance 3225 * consists of a copy of this thread. This thread triggers 3226 * all the handshakes and data exchange with the server. It 3227 * also handles all channel resets 3228 * 3229 * Arguments: 3230 * vdc - soft state pointer for this instance of the device driver. 3231 * 3232 * Return Code: 3233 * None 3234 */ 3235 static void 3236 vdc_process_msg_thread(vdc_t *vdcp) 3237 { 3238 int status; 3239 3240 mutex_enter(&vdcp->lock); 3241 3242 for (;;) { 3243 3244 #define Q(_s) (vdcp->state == _s) ? #_s : 3245 DMSG(vdcp, 3, "state = %d (%s)\n", vdcp->state, 3246 Q(VDC_STATE_INIT) 3247 Q(VDC_STATE_INIT_WAITING) 3248 Q(VDC_STATE_NEGOTIATE) 3249 Q(VDC_STATE_HANDLE_PENDING) 3250 Q(VDC_STATE_RUNNING) 3251 Q(VDC_STATE_RESETTING) 3252 Q(VDC_STATE_DETACH) 3253 "UNKNOWN"); 3254 3255 switch (vdcp->state) { 3256 case VDC_STATE_INIT: 3257 3258 /* Check if have re-initializing repeatedly */ 3259 if (vdcp->hshake_cnt++ > VDC_RETRIES) { 3260 vdcp->state = VDC_STATE_DETACH; 3261 break; 3262 } 3263 3264 /* Bring up connection with vds via LDC */ 3265 status = vdc_start_ldc_connection(vdcp); 3266 switch (status) { 3267 case EINVAL: 3268 DMSG(vdcp, 0, "[%d] Could not start LDC", 3269 vdcp->instance); 3270 vdcp->state = VDC_STATE_DETACH; 3271 break; 3272 case 0: 3273 vdcp->state = VDC_STATE_INIT_WAITING; 3274 break; 3275 default: 3276 vdcp->state = VDC_STATE_INIT_WAITING; 3277 break; 3278 } 3279 break; 3280 3281 case VDC_STATE_INIT_WAITING: 3282 3283 /* 3284 * Let the callback event move us on 3285 * when channel is open to server 3286 */ 3287 while (vdcp->ldc_state != LDC_UP) { 3288 cv_wait(&vdcp->initwait_cv, &vdcp->lock); 3289 if (vdcp->state != VDC_STATE_INIT_WAITING) { 3290 DMSG(vdcp, 0, 3291 "state moved to %d out from under us...\n", 3292 vdcp->state); 3293 3294 break; 3295 } 3296 } 3297 if (vdcp->state == VDC_STATE_INIT_WAITING && 3298 vdcp->ldc_state == LDC_UP) { 3299 vdcp->state = VDC_STATE_NEGOTIATE; 3300 } 3301 break; 3302 3303 case VDC_STATE_NEGOTIATE: 3304 switch (status = vdc_ver_negotiation(vdcp)) { 3305 case 0: 3306 break; 3307 default: 3308 DMSG(vdcp, 0, "ver negotiate failed (%d)..\n", 3309 status); 3310 goto reset; 3311 } 3312 3313 switch (status = vdc_attr_negotiation(vdcp)) { 3314 case 0: 3315 break; 3316 default: 3317 DMSG(vdcp, 0, "attr negotiate failed (%d)..\n", 3318 status); 3319 goto reset; 3320 } 3321 3322 switch (status = vdc_dring_negotiation(vdcp)) { 3323 case 0: 3324 break; 3325 default: 3326 DMSG(vdcp, 0, "dring negotiate failed (%d)..\n", 3327 status); 3328 goto reset; 3329 } 3330 3331 switch (status = vdc_rdx_exchange(vdcp)) { 3332 case 0: 3333 vdcp->state = VDC_STATE_HANDLE_PENDING; 3334 goto done; 3335 default: 3336 DMSG(vdcp, 0, "RDX xchg failed ..(%d)\n", 3337 status); 3338 goto reset; 3339 } 3340 reset: 3341 DMSG(vdcp, 0, "negotiation failed: resetting (%d)\n", 3342 status); 3343 vdcp->state = VDC_STATE_RESETTING; 3344 done: 3345 DMSG(vdcp, 0, "negotiation complete (state=0x%x)...\n", 3346 vdcp->state); 3347 break; 3348 3349 case VDC_STATE_HANDLE_PENDING: 3350 3351 mutex_exit(&vdcp->lock); 3352 status = vdc_resubmit_backup_dring(vdcp); 3353 mutex_enter(&vdcp->lock); 3354 3355 if (status) 3356 vdcp->state = VDC_STATE_RESETTING; 3357 else 3358 vdcp->state = VDC_STATE_RUNNING; 3359 3360 break; 3361 3362 /* enter running state */ 3363 case VDC_STATE_RUNNING: 3364 /* 3365 * Signal anyone waiting for the connection 3366 * to come on line. 3367 */ 3368 vdcp->hshake_cnt = 0; 3369 cv_broadcast(&vdcp->running_cv); 3370 mutex_exit(&vdcp->lock); 3371 3372 for (;;) { 3373 vio_msg_t msg; 3374 status = vdc_wait_for_response(vdcp, &msg); 3375 if (status) break; 3376 3377 DMSG(vdcp, 1, "[%d] new pkt(s) available\n", 3378 vdcp->instance); 3379 status = vdc_process_data_msg(vdcp, &msg); 3380 if (status) { 3381 DMSG(vdcp, 1, "[%d] process_data_msg " 3382 "returned err=%d\n", vdcp->instance, 3383 status); 3384 break; 3385 } 3386 3387 } 3388 3389 mutex_enter(&vdcp->lock); 3390 3391 vdcp->state = VDC_STATE_RESETTING; 3392 break; 3393 3394 case VDC_STATE_RESETTING: 3395 DMSG(vdcp, 0, "Initiating channel reset " 3396 "(pending = %d)\n", (int)vdcp->threads_pending); 3397 3398 if (vdcp->self_reset) { 3399 DMSG(vdcp, 0, 3400 "[%d] calling stop_ldc_connection.\n", 3401 vdcp->instance); 3402 status = vdc_stop_ldc_connection(vdcp); 3403 vdcp->self_reset = B_FALSE; 3404 } 3405 3406 /* 3407 * Wait for all threads currently waiting 3408 * for a free dring entry to use. 3409 */ 3410 while (vdcp->threads_pending) { 3411 cv_broadcast(&vdcp->membind_cv); 3412 cv_broadcast(&vdcp->dring_free_cv); 3413 mutex_exit(&vdcp->lock); 3414 /* let them wake up */ 3415 drv_usecwait(vdc_min_timeout_ldc); 3416 mutex_enter(&vdcp->lock); 3417 } 3418 3419 ASSERT(vdcp->threads_pending == 0); 3420 3421 /* Sanity check that no thread is receiving */ 3422 ASSERT(vdcp->read_state != VDC_READ_WAITING); 3423 3424 vdcp->read_state = VDC_READ_IDLE; 3425 3426 vdc_backup_local_dring(vdcp); 3427 3428 /* cleanup the old d-ring */ 3429 vdc_destroy_descriptor_ring(vdcp); 3430 3431 /* go and start again */ 3432 vdcp->state = VDC_STATE_INIT; 3433 3434 break; 3435 3436 case VDC_STATE_DETACH: 3437 DMSG(vdcp, 0, "[%d] Reset thread exit cleanup ..\n", 3438 vdcp->instance); 3439 3440 while (vdcp->sync_op_pending) { 3441 cv_signal(&vdcp->sync_pending_cv); 3442 cv_signal(&vdcp->sync_blocked_cv); 3443 mutex_exit(&vdcp->lock); 3444 drv_usecwait(vdc_min_timeout_ldc); 3445 mutex_enter(&vdcp->lock); 3446 } 3447 3448 cv_signal(&vdcp->running_cv); 3449 mutex_exit(&vdcp->lock); 3450 3451 DMSG(vdcp, 0, "[%d] Msg processing thread exiting ..\n", 3452 vdcp->instance); 3453 thread_exit(); 3454 break; 3455 } 3456 } 3457 } 3458 3459 3460 /* 3461 * Function: 3462 * vdc_process_data_msg() 3463 * 3464 * Description: 3465 * This function is called by the message processing thread each time 3466 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 3467 * be an ACK or NACK from vds[1] which vdc handles as follows. 3468 * ACK - wake up the waiting thread 3469 * NACK - resend any messages necessary 3470 * 3471 * [1] Although the message format allows it, vds should not send a 3472 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 3473 * some bizarre reason it does, vdc will reset the connection. 3474 * 3475 * Arguments: 3476 * vdc - soft state pointer for this instance of the device driver. 3477 * msg - the LDC message sent by vds 3478 * 3479 * Return Code: 3480 * 0 - Success. 3481 * > 0 - error value returned by LDC 3482 */ 3483 static int 3484 vdc_process_data_msg(vdc_t *vdcp, vio_msg_t *msg) 3485 { 3486 int status = 0; 3487 vio_dring_msg_t *dring_msg; 3488 vdc_local_desc_t *ldep = NULL; 3489 int start, end; 3490 int idx; 3491 3492 dring_msg = (vio_dring_msg_t *)msg; 3493 3494 ASSERT(msg->tag.vio_msgtype == VIO_TYPE_DATA); 3495 ASSERT(vdcp != NULL); 3496 3497 mutex_enter(&vdcp->lock); 3498 3499 /* 3500 * Check to see if the message has bogus data 3501 */ 3502 idx = start = dring_msg->start_idx; 3503 end = dring_msg->end_idx; 3504 if ((start >= vdcp->dring_len) || 3505 (end >= vdcp->dring_len) || (end < -1)) { 3506 DMSG(vdcp, 0, "[%d] Bogus ACK data : start %d, end %d\n", 3507 vdcp->instance, start, end); 3508 mutex_exit(&vdcp->lock); 3509 return (EINVAL); 3510 } 3511 3512 /* 3513 * Verify that the sequence number is what vdc expects. 3514 */ 3515 switch (vdc_verify_seq_num(vdcp, dring_msg)) { 3516 case VDC_SEQ_NUM_TODO: 3517 break; /* keep processing this message */ 3518 case VDC_SEQ_NUM_SKIP: 3519 mutex_exit(&vdcp->lock); 3520 return (0); 3521 case VDC_SEQ_NUM_INVALID: 3522 mutex_exit(&vdcp->lock); 3523 DMSG(vdcp, 0, "[%d] invalid seqno\n", vdcp->instance); 3524 return (ENXIO); 3525 } 3526 3527 if (msg->tag.vio_subtype == VIO_SUBTYPE_NACK) { 3528 DMSG(vdcp, 0, "[%d] DATA NACK\n", vdcp->instance); 3529 VDC_DUMP_DRING_MSG(dring_msg); 3530 mutex_exit(&vdcp->lock); 3531 return (EIO); 3532 3533 } else if (msg->tag.vio_subtype == VIO_SUBTYPE_INFO) { 3534 mutex_exit(&vdcp->lock); 3535 return (EPROTO); 3536 } 3537 3538 DTRACE_IO2(recv, vio_dring_msg_t, dring_msg, vdc_t *, vdcp); 3539 DMSG(vdcp, 1, ": start %d end %d\n", start, end); 3540 ASSERT(start == end); 3541 3542 ldep = &vdcp->local_dring[idx]; 3543 3544 DMSG(vdcp, 1, ": state 0x%x - cb_type 0x%x\n", 3545 ldep->dep->hdr.dstate, ldep->cb_type); 3546 3547 if (ldep->dep->hdr.dstate == VIO_DESC_DONE) { 3548 struct buf *bufp; 3549 3550 switch (ldep->cb_type) { 3551 case CB_SYNC: 3552 ASSERT(vdcp->sync_op_pending); 3553 3554 status = vdc_depopulate_descriptor(vdcp, idx); 3555 vdcp->sync_op_status = status; 3556 vdcp->sync_op_pending = B_FALSE; 3557 cv_signal(&vdcp->sync_pending_cv); 3558 break; 3559 3560 case CB_STRATEGY: 3561 bufp = ldep->cb_arg; 3562 ASSERT(bufp != NULL); 3563 status = ldep->dep->payload.status; /* Future:ntoh */ 3564 if (status != 0) { 3565 DMSG(vdcp, 1, "strategy status=%d\n", status); 3566 bioerror(bufp, status); 3567 } 3568 status = vdc_depopulate_descriptor(vdcp, idx); 3569 biodone(bufp); 3570 break; 3571 3572 default: 3573 ASSERT(0); 3574 } 3575 } 3576 3577 /* let the arrival signal propogate */ 3578 mutex_exit(&vdcp->lock); 3579 3580 /* probe gives the count of how many entries were processed */ 3581 DTRACE_IO2(processed, int, 1, vdc_t *, vdcp); 3582 3583 return (0); 3584 } 3585 3586 /* 3587 * Function: 3588 * vdc_process_err_msg() 3589 * 3590 * NOTE: No error messages are used as part of the vDisk protocol 3591 */ 3592 static int 3593 vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg) 3594 { 3595 _NOTE(ARGUNUSED(vdc)) 3596 _NOTE(ARGUNUSED(msg)) 3597 3598 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_ERR); 3599 DMSG(vdc, 1, "[%d] Got an ERR msg", vdc->instance); 3600 3601 return (ENOTSUP); 3602 } 3603 3604 /* 3605 * Function: 3606 * vdc_handle_ver_msg() 3607 * 3608 * Description: 3609 * 3610 * Arguments: 3611 * vdc - soft state pointer for this instance of the device driver. 3612 * ver_msg - LDC message sent by vDisk server 3613 * 3614 * Return Code: 3615 * 0 - Success 3616 */ 3617 static int 3618 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) 3619 { 3620 int status = 0; 3621 3622 ASSERT(vdc != NULL); 3623 ASSERT(mutex_owned(&vdc->lock)); 3624 3625 if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { 3626 return (EPROTO); 3627 } 3628 3629 if (ver_msg->dev_class != VDEV_DISK_SERVER) { 3630 return (EINVAL); 3631 } 3632 3633 switch (ver_msg->tag.vio_subtype) { 3634 case VIO_SUBTYPE_ACK: 3635 /* 3636 * We check to see if the version returned is indeed supported 3637 * (The server may have also adjusted the minor number downwards 3638 * and if so 'ver_msg' will contain the actual version agreed) 3639 */ 3640 if (vdc_is_supported_version(ver_msg)) { 3641 vdc->ver.major = ver_msg->ver_major; 3642 vdc->ver.minor = ver_msg->ver_minor; 3643 ASSERT(vdc->ver.major > 0); 3644 } else { 3645 status = EPROTO; 3646 } 3647 break; 3648 3649 case VIO_SUBTYPE_NACK: 3650 /* 3651 * call vdc_is_supported_version() which will return the next 3652 * supported version (if any) in 'ver_msg' 3653 */ 3654 (void) vdc_is_supported_version(ver_msg); 3655 if (ver_msg->ver_major > 0) { 3656 size_t len = sizeof (*ver_msg); 3657 3658 ASSERT(vdc->ver.major > 0); 3659 3660 /* reset the necessary fields and resend */ 3661 ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; 3662 ver_msg->dev_class = VDEV_DISK; 3663 3664 status = vdc_send(vdc, (caddr_t)ver_msg, &len); 3665 DMSG(vdc, 0, "[%d] Resend VER info (LDC status = %d)\n", 3666 vdc->instance, status); 3667 if (len != sizeof (*ver_msg)) 3668 status = EBADMSG; 3669 } else { 3670 DMSG(vdc, 0, "[%d] No common version with " 3671 "vDisk server", vdc->instance); 3672 status = ENOTSUP; 3673 } 3674 3675 break; 3676 case VIO_SUBTYPE_INFO: 3677 /* 3678 * Handle the case where vds starts handshake 3679 * (for now only vdc is the instigatior) 3680 */ 3681 status = ENOTSUP; 3682 break; 3683 3684 default: 3685 status = EINVAL; 3686 break; 3687 } 3688 3689 return (status); 3690 } 3691 3692 /* 3693 * Function: 3694 * vdc_handle_attr_msg() 3695 * 3696 * Description: 3697 * 3698 * Arguments: 3699 * vdc - soft state pointer for this instance of the device driver. 3700 * attr_msg - LDC message sent by vDisk server 3701 * 3702 * Return Code: 3703 * 0 - Success 3704 */ 3705 static int 3706 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) 3707 { 3708 int status = 0; 3709 3710 ASSERT(vdc != NULL); 3711 ASSERT(mutex_owned(&vdc->lock)); 3712 3713 if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { 3714 return (EPROTO); 3715 } 3716 3717 switch (attr_msg->tag.vio_subtype) { 3718 case VIO_SUBTYPE_ACK: 3719 /* 3720 * We now verify the attributes sent by vds. 3721 */ 3722 vdc->vdisk_size = attr_msg->vdisk_size; 3723 vdc->vdisk_type = attr_msg->vdisk_type; 3724 3725 DMSG(vdc, 0, "[%d] max_xfer_sz: sent %lx acked %lx\n", 3726 vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz); 3727 DMSG(vdc, 0, "[%d] vdisk_block_size: sent %lx acked %x\n", 3728 vdc->instance, vdc->block_size, 3729 attr_msg->vdisk_block_size); 3730 3731 /* 3732 * We don't know at compile time what the vDisk server will 3733 * think are good values but we apply an large (arbitrary) 3734 * upper bound to prevent memory exhaustion in vdc if it was 3735 * allocating a DRing based of huge values sent by the server. 3736 * We probably will never exceed this except if the message 3737 * was garbage. 3738 */ 3739 if ((attr_msg->max_xfer_sz * attr_msg->vdisk_block_size) <= 3740 (PAGESIZE * DEV_BSIZE)) { 3741 vdc->max_xfer_sz = attr_msg->max_xfer_sz; 3742 vdc->block_size = attr_msg->vdisk_block_size; 3743 } else { 3744 DMSG(vdc, 0, "[%d] vds block transfer size too big;" 3745 " using max supported by vdc", vdc->instance); 3746 } 3747 3748 if ((attr_msg->xfer_mode != VIO_DRING_MODE) || 3749 (attr_msg->vdisk_size > INT64_MAX) || 3750 (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 3751 DMSG(vdc, 0, "[%d] Invalid attributes from vds", 3752 vdc->instance); 3753 status = EINVAL; 3754 break; 3755 } 3756 3757 break; 3758 3759 case VIO_SUBTYPE_NACK: 3760 /* 3761 * vds could not handle the attributes we sent so we 3762 * stop negotiating. 3763 */ 3764 status = EPROTO; 3765 break; 3766 3767 case VIO_SUBTYPE_INFO: 3768 /* 3769 * Handle the case where vds starts the handshake 3770 * (for now; vdc is the only supported instigatior) 3771 */ 3772 status = ENOTSUP; 3773 break; 3774 3775 default: 3776 status = ENOTSUP; 3777 break; 3778 } 3779 3780 return (status); 3781 } 3782 3783 /* 3784 * Function: 3785 * vdc_handle_dring_reg_msg() 3786 * 3787 * Description: 3788 * 3789 * Arguments: 3790 * vdc - soft state pointer for this instance of the driver. 3791 * dring_msg - LDC message sent by vDisk server 3792 * 3793 * Return Code: 3794 * 0 - Success 3795 */ 3796 static int 3797 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) 3798 { 3799 int status = 0; 3800 3801 ASSERT(vdc != NULL); 3802 ASSERT(mutex_owned(&vdc->lock)); 3803 3804 if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { 3805 return (EPROTO); 3806 } 3807 3808 switch (dring_msg->tag.vio_subtype) { 3809 case VIO_SUBTYPE_ACK: 3810 /* save the received dring_ident */ 3811 vdc->dring_ident = dring_msg->dring_ident; 3812 DMSG(vdc, 0, "[%d] Received dring ident=0x%lx\n", 3813 vdc->instance, vdc->dring_ident); 3814 break; 3815 3816 case VIO_SUBTYPE_NACK: 3817 /* 3818 * vds could not handle the DRing info we sent so we 3819 * stop negotiating. 3820 */ 3821 DMSG(vdc, 0, "[%d] server could not register DRing\n", 3822 vdc->instance); 3823 status = EPROTO; 3824 break; 3825 3826 case VIO_SUBTYPE_INFO: 3827 /* 3828 * Handle the case where vds starts handshake 3829 * (for now only vdc is the instigatior) 3830 */ 3831 status = ENOTSUP; 3832 break; 3833 default: 3834 status = ENOTSUP; 3835 } 3836 3837 return (status); 3838 } 3839 3840 /* 3841 * Function: 3842 * vdc_verify_seq_num() 3843 * 3844 * Description: 3845 * This functions verifies that the sequence number sent back by the vDisk 3846 * server with the latest message is what is expected (i.e. it is greater 3847 * than the last seq num sent by the vDisk server and less than or equal 3848 * to the last seq num generated by vdc). 3849 * 3850 * It then checks the request ID to see if any requests need processing 3851 * in the DRing. 3852 * 3853 * Arguments: 3854 * vdc - soft state pointer for this instance of the driver. 3855 * dring_msg - pointer to the LDC message sent by vds 3856 * 3857 * Return Code: 3858 * VDC_SEQ_NUM_TODO - Message needs to be processed 3859 * VDC_SEQ_NUM_SKIP - Message has already been processed 3860 * VDC_SEQ_NUM_INVALID - The seq numbers are so out of sync, 3861 * vdc cannot deal with them 3862 */ 3863 static int 3864 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg) 3865 { 3866 ASSERT(vdc != NULL); 3867 ASSERT(dring_msg != NULL); 3868 ASSERT(mutex_owned(&vdc->lock)); 3869 3870 /* 3871 * Check to see if the messages were responded to in the correct 3872 * order by vds. 3873 */ 3874 if ((dring_msg->seq_num <= vdc->seq_num_reply) || 3875 (dring_msg->seq_num > vdc->seq_num)) { 3876 DMSG(vdc, 0, "?[%d] Bogus sequence_number %lu: " 3877 "%lu > expected <= %lu (last proc req %lu sent %lu)\n", 3878 vdc->instance, dring_msg->seq_num, 3879 vdc->seq_num_reply, vdc->seq_num, 3880 vdc->req_id_proc, vdc->req_id); 3881 return (VDC_SEQ_NUM_INVALID); 3882 } 3883 vdc->seq_num_reply = dring_msg->seq_num; 3884 3885 if (vdc->req_id_proc < vdc->req_id) 3886 return (VDC_SEQ_NUM_TODO); 3887 else 3888 return (VDC_SEQ_NUM_SKIP); 3889 } 3890 3891 3892 /* 3893 * Function: 3894 * vdc_is_supported_version() 3895 * 3896 * Description: 3897 * This routine checks if the major/minor version numbers specified in 3898 * 'ver_msg' are supported. If not it finds the next version that is 3899 * in the supported version list 'vdc_version[]' and sets the fields in 3900 * 'ver_msg' to those values 3901 * 3902 * Arguments: 3903 * ver_msg - LDC message sent by vDisk server 3904 * 3905 * Return Code: 3906 * B_TRUE - Success 3907 * B_FALSE - Version not supported 3908 */ 3909 static boolean_t 3910 vdc_is_supported_version(vio_ver_msg_t *ver_msg) 3911 { 3912 int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); 3913 3914 for (int i = 0; i < vdc_num_versions; i++) { 3915 ASSERT(vdc_version[i].major > 0); 3916 ASSERT((i == 0) || 3917 (vdc_version[i].major < vdc_version[i-1].major)); 3918 3919 /* 3920 * If the major versions match, adjust the minor version, if 3921 * necessary, down to the highest value supported by this 3922 * client. The server should support all minor versions lower 3923 * than the value it sent 3924 */ 3925 if (ver_msg->ver_major == vdc_version[i].major) { 3926 if (ver_msg->ver_minor > vdc_version[i].minor) { 3927 DMSGX(0, 3928 "Adjusting minor version from %u to %u", 3929 ver_msg->ver_minor, vdc_version[i].minor); 3930 ver_msg->ver_minor = vdc_version[i].minor; 3931 } 3932 return (B_TRUE); 3933 } 3934 3935 /* 3936 * If the message contains a higher major version number, set 3937 * the message's major/minor versions to the current values 3938 * and return false, so this message will get resent with 3939 * these values, and the server will potentially try again 3940 * with the same or a lower version 3941 */ 3942 if (ver_msg->ver_major > vdc_version[i].major) { 3943 ver_msg->ver_major = vdc_version[i].major; 3944 ver_msg->ver_minor = vdc_version[i].minor; 3945 DMSGX(0, "Suggesting major/minor (0x%x/0x%x)\n", 3946 ver_msg->ver_major, ver_msg->ver_minor); 3947 3948 return (B_FALSE); 3949 } 3950 3951 /* 3952 * Otherwise, the message's major version is less than the 3953 * current major version, so continue the loop to the next 3954 * (lower) supported version 3955 */ 3956 } 3957 3958 /* 3959 * No common version was found; "ground" the version pair in the 3960 * message to terminate negotiation 3961 */ 3962 ver_msg->ver_major = 0; 3963 ver_msg->ver_minor = 0; 3964 3965 return (B_FALSE); 3966 } 3967 /* -------------------------------------------------------------------------- */ 3968 3969 /* 3970 * DKIO(7) support 3971 */ 3972 3973 typedef struct vdc_dk_arg { 3974 struct dk_callback dkc; 3975 int mode; 3976 dev_t dev; 3977 vdc_t *vdc; 3978 } vdc_dk_arg_t; 3979 3980 /* 3981 * Function: 3982 * vdc_dkio_flush_cb() 3983 * 3984 * Description: 3985 * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 3986 * by kernel code. 3987 * 3988 * Arguments: 3989 * arg - a pointer to a vdc_dk_arg_t structure. 3990 */ 3991 void 3992 vdc_dkio_flush_cb(void *arg) 3993 { 3994 struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 3995 struct dk_callback *dkc = NULL; 3996 vdc_t *vdc = NULL; 3997 int rv; 3998 3999 if (dk_arg == NULL) { 4000 cmn_err(CE_NOTE, "?[Unk] DKIOCFLUSHWRITECACHE arg is NULL\n"); 4001 return; 4002 } 4003 dkc = &dk_arg->dkc; 4004 vdc = dk_arg->vdc; 4005 ASSERT(vdc != NULL); 4006 4007 rv = vdc_do_sync_op(vdc, VD_OP_FLUSH, NULL, 0, 4008 SDPART(dk_arg->dev), 0, CB_SYNC, 0, VIO_both_dir); 4009 if (rv != 0) { 4010 DMSG(vdc, 0, "[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n", 4011 vdc->instance, rv, 4012 ddi_model_convert_from(dk_arg->mode & FMODELS)); 4013 } 4014 4015 /* 4016 * Trigger the call back to notify the caller the the ioctl call has 4017 * been completed. 4018 */ 4019 if ((dk_arg->mode & FKIOCTL) && 4020 (dkc != NULL) && 4021 (dkc->dkc_callback != NULL)) { 4022 ASSERT(dkc->dkc_cookie != NULL); 4023 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 4024 } 4025 4026 /* Indicate that one less DKIO write flush is outstanding */ 4027 mutex_enter(&vdc->lock); 4028 vdc->dkio_flush_pending--; 4029 ASSERT(vdc->dkio_flush_pending >= 0); 4030 mutex_exit(&vdc->lock); 4031 4032 /* free the mem that was allocated when the callback was dispatched */ 4033 kmem_free(arg, sizeof (vdc_dk_arg_t)); 4034 } 4035 4036 /* 4037 * This structure is used in the DKIO(7I) array below. 4038 */ 4039 typedef struct vdc_dk_ioctl { 4040 uint8_t op; /* VD_OP_XXX value */ 4041 int cmd; /* Solaris ioctl operation number */ 4042 size_t nbytes; /* size of structure to be copied */ 4043 4044 /* function to convert between vDisk and Solaris structure formats */ 4045 int (*convert)(vdc_t *vdc, void *vd_buf, void *ioctl_arg, 4046 int mode, int dir); 4047 } vdc_dk_ioctl_t; 4048 4049 /* 4050 * Subset of DKIO(7I) operations currently supported 4051 */ 4052 static vdc_dk_ioctl_t dk_ioctl[] = { 4053 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, sizeof (int), 4054 vdc_null_copy_func}, 4055 {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), 4056 vdc_get_wce_convert}, 4057 {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), 4058 vdc_set_wce_convert}, 4059 {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), 4060 vdc_get_vtoc_convert}, 4061 {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), 4062 vdc_set_vtoc_convert}, 4063 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), 4064 vdc_get_geom_convert}, 4065 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), 4066 vdc_get_geom_convert}, 4067 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), 4068 vdc_get_geom_convert}, 4069 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 4070 vdc_set_geom_convert}, 4071 {VD_OP_GET_EFI, DKIOCGETEFI, 0, 4072 vdc_get_efi_convert}, 4073 {VD_OP_SET_EFI, DKIOCSETEFI, 0, 4074 vdc_set_efi_convert}, 4075 4076 /* 4077 * These particular ioctls are not sent to the server - vdc fakes up 4078 * the necessary info. 4079 */ 4080 {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, 4081 {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, 4082 {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, 4083 {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, 4084 {0, CDROMREADOFFSET, 0, vdc_null_copy_func} 4085 }; 4086 4087 /* 4088 * Function: 4089 * vd_process_ioctl() 4090 * 4091 * Description: 4092 * This routine processes disk specific ioctl calls 4093 * 4094 * Arguments: 4095 * dev - the device number 4096 * cmd - the operation [dkio(7I)] to be processed 4097 * arg - pointer to user provided structure 4098 * (contains data to be set or reference parameter for get) 4099 * mode - bit flag, indicating open settings, 32/64 bit type, etc 4100 * 4101 * Return Code: 4102 * 0 4103 * EFAULT 4104 * ENXIO 4105 * EIO 4106 * ENOTSUP 4107 */ 4108 static int 4109 vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode) 4110 { 4111 int instance = SDUNIT(dev); 4112 vdc_t *vdc = NULL; 4113 int rv = -1; 4114 int idx = 0; /* index into dk_ioctl[] */ 4115 size_t len = 0; /* #bytes to send to vds */ 4116 size_t alloc_len = 0; /* #bytes to allocate mem for */ 4117 caddr_t mem_p = NULL; 4118 size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 4119 struct vtoc vtoc_saved; 4120 vdc_dk_ioctl_t *iop; 4121 4122 vdc = ddi_get_soft_state(vdc_state, instance); 4123 if (vdc == NULL) { 4124 cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 4125 instance); 4126 return (ENXIO); 4127 } 4128 4129 DMSG(vdc, 0, "[%d] Processing ioctl(%x) for dev %lx : model %x\n", 4130 instance, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 4131 4132 /* 4133 * Validate the ioctl operation to be performed. 4134 * 4135 * If we have looped through the array without finding a match then we 4136 * don't support this ioctl. 4137 */ 4138 for (idx = 0; idx < nioctls; idx++) { 4139 if (cmd == dk_ioctl[idx].cmd) 4140 break; 4141 } 4142 4143 if (idx >= nioctls) { 4144 DMSG(vdc, 0, "[%d] Unsupported ioctl (0x%x)\n", 4145 vdc->instance, cmd); 4146 return (ENOTSUP); 4147 } 4148 4149 iop = &(dk_ioctl[idx]); 4150 4151 if (cmd == DKIOCGETEFI || cmd == DKIOCSETEFI) { 4152 /* size is not fixed for EFI ioctls, it depends on ioctl arg */ 4153 dk_efi_t dk_efi; 4154 4155 rv = ddi_copyin(arg, &dk_efi, sizeof (dk_efi_t), mode); 4156 if (rv != 0) 4157 return (EFAULT); 4158 4159 len = sizeof (vd_efi_t) - 1 + dk_efi.dki_length; 4160 } else { 4161 len = iop->nbytes; 4162 } 4163 4164 /* 4165 * Deal with the ioctls which the server does not provide. vdc can 4166 * fake these up and return immediately 4167 */ 4168 switch (cmd) { 4169 case CDROMREADOFFSET: 4170 case DKIOCREMOVABLE: 4171 case USCSICMD: 4172 return (ENOTTY); 4173 4174 case DKIOCINFO: 4175 { 4176 struct dk_cinfo cinfo; 4177 if (vdc->cinfo == NULL) 4178 return (ENXIO); 4179 4180 bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 4181 cinfo.dki_partition = SDPART(dev); 4182 4183 rv = ddi_copyout(&cinfo, (void *)arg, 4184 sizeof (struct dk_cinfo), mode); 4185 if (rv != 0) 4186 return (EFAULT); 4187 4188 return (0); 4189 } 4190 4191 case DKIOCGMEDIAINFO: 4192 { 4193 if (vdc->minfo == NULL) 4194 return (ENXIO); 4195 4196 rv = ddi_copyout(vdc->minfo, (void *)arg, 4197 sizeof (struct dk_minfo), mode); 4198 if (rv != 0) 4199 return (EFAULT); 4200 4201 return (0); 4202 } 4203 4204 case DKIOCFLUSHWRITECACHE: 4205 { 4206 struct dk_callback *dkc = (struct dk_callback *)arg; 4207 vdc_dk_arg_t *dkarg = NULL; 4208 4209 DMSG(vdc, 1, "[%d] Flush W$: mode %x\n", 4210 instance, mode); 4211 4212 /* 4213 * If the backing device is not a 'real' disk then the 4214 * W$ operation request to the vDisk server will fail 4215 * so we might as well save the cycles and return now. 4216 */ 4217 if (vdc->vdisk_type != VD_DISK_TYPE_DISK) 4218 return (ENOTTY); 4219 4220 /* 4221 * If arg is NULL, then there is no callback function 4222 * registered and the call operates synchronously; we 4223 * break and continue with the rest of the function and 4224 * wait for vds to return (i.e. after the request to 4225 * vds returns successfully, all writes completed prior 4226 * to the ioctl will have been flushed from the disk 4227 * write cache to persistent media. 4228 * 4229 * If a callback function is registered, we dispatch 4230 * the request on a task queue and return immediately. 4231 * The callback will deal with informing the calling 4232 * thread that the flush request is completed. 4233 */ 4234 if (dkc == NULL) 4235 break; 4236 4237 dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP); 4238 4239 dkarg->mode = mode; 4240 dkarg->dev = dev; 4241 bcopy(dkc, &dkarg->dkc, sizeof (*dkc)); 4242 4243 mutex_enter(&vdc->lock); 4244 vdc->dkio_flush_pending++; 4245 dkarg->vdc = vdc; 4246 mutex_exit(&vdc->lock); 4247 4248 /* put the request on a task queue */ 4249 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 4250 (void *)dkarg, DDI_SLEEP); 4251 if (rv == NULL) { 4252 /* clean up if dispatch fails */ 4253 mutex_enter(&vdc->lock); 4254 vdc->dkio_flush_pending--; 4255 kmem_free(dkarg, sizeof (vdc_dk_arg_t)); 4256 } 4257 4258 return (rv == NULL ? ENOMEM : 0); 4259 } 4260 } 4261 4262 /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 4263 ASSERT(iop->op != 0); 4264 4265 /* LDC requires that the memory being mapped is 8-byte aligned */ 4266 alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 4267 DMSG(vdc, 1, "[%d] struct size %ld alloc %ld\n", 4268 instance, len, alloc_len); 4269 4270 ASSERT(alloc_len != 0); /* sanity check */ 4271 mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 4272 4273 if (cmd == DKIOCSVTOC) { 4274 /* 4275 * Save a copy of the current VTOC so that we can roll back 4276 * if the setting of the new VTOC fails. 4277 */ 4278 bcopy(vdc->vtoc, &vtoc_saved, sizeof (struct vtoc)); 4279 } 4280 4281 /* 4282 * Call the conversion function for this ioctl whhich if necessary 4283 * converts from the Solaris format to the format ARC'ed 4284 * as part of the vDisk protocol (FWARC 2006/195) 4285 */ 4286 ASSERT(iop->convert != NULL); 4287 rv = (iop->convert)(vdc, arg, mem_p, mode, VD_COPYIN); 4288 if (rv != 0) { 4289 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 4290 instance, rv, cmd); 4291 if (mem_p != NULL) 4292 kmem_free(mem_p, alloc_len); 4293 return (rv); 4294 } 4295 4296 /* 4297 * send request to vds to service the ioctl. 4298 */ 4299 rv = vdc_do_sync_op(vdc, iop->op, mem_p, alloc_len, 4300 SDPART(dev), 0, CB_SYNC, (void*)(uint64_t)mode, 4301 VIO_both_dir); 4302 4303 if (rv != 0) { 4304 /* 4305 * This is not necessarily an error. The ioctl could 4306 * be returning a value such as ENOTTY to indicate 4307 * that the ioctl is not applicable. 4308 */ 4309 DMSG(vdc, 0, "[%d] vds returned %d for ioctl 0x%x\n", 4310 instance, rv, cmd); 4311 if (mem_p != NULL) 4312 kmem_free(mem_p, alloc_len); 4313 4314 if (cmd == DKIOCSVTOC) { 4315 /* update of the VTOC has failed, roll back */ 4316 bcopy(&vtoc_saved, vdc->vtoc, sizeof (struct vtoc)); 4317 } 4318 4319 return (rv); 4320 } 4321 4322 if (cmd == DKIOCSVTOC) { 4323 /* 4324 * The VTOC has been changed. We need to update the device 4325 * nodes to handle the case where an EFI label has been 4326 * changed to a VTOC label. We also try and update the device 4327 * node properties. Failing to set the properties should 4328 * not cause an error to be return the caller though. 4329 */ 4330 vdc->vdisk_label = VD_DISK_LABEL_VTOC; 4331 (void) vdc_create_device_nodes_vtoc(vdc); 4332 4333 if (vdc_create_device_nodes_props(vdc)) { 4334 DMSG(vdc, 0, "![%d] Failed to update device nodes" 4335 " properties", vdc->instance); 4336 } 4337 4338 } else if (cmd == DKIOCSETEFI) { 4339 /* 4340 * The EFI has been changed. We need to update the device 4341 * nodes to handle the case where a VTOC label has been 4342 * changed to an EFI label. We also try and update the device 4343 * node properties. Failing to set the properties should 4344 * not cause an error to be return the caller though. 4345 */ 4346 struct dk_gpt *efi; 4347 size_t efi_len; 4348 4349 vdc->vdisk_label = VD_DISK_LABEL_EFI; 4350 (void) vdc_create_device_nodes_efi(vdc); 4351 4352 rv = vdc_efi_alloc_and_read(dev, &efi, &efi_len); 4353 4354 if (rv == 0) { 4355 vdc_store_efi(vdc, efi); 4356 rv = vdc_create_device_nodes_props(vdc); 4357 vd_efi_free(efi, efi_len); 4358 } 4359 4360 if (rv) { 4361 DMSG(vdc, 0, "![%d] Failed to update device nodes" 4362 " properties", vdc->instance); 4363 } 4364 } 4365 4366 /* 4367 * Call the conversion function (if it exists) for this ioctl 4368 * which converts from the format ARC'ed as part of the vDisk 4369 * protocol (FWARC 2006/195) back to a format understood by 4370 * the rest of Solaris. 4371 */ 4372 rv = (iop->convert)(vdc, mem_p, arg, mode, VD_COPYOUT); 4373 if (rv != 0) { 4374 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 4375 instance, rv, cmd); 4376 if (mem_p != NULL) 4377 kmem_free(mem_p, alloc_len); 4378 return (rv); 4379 } 4380 4381 if (mem_p != NULL) 4382 kmem_free(mem_p, alloc_len); 4383 4384 return (rv); 4385 } 4386 4387 /* 4388 * Function: 4389 * 4390 * Description: 4391 * This is an empty conversion function used by ioctl calls which 4392 * do not need to convert the data being passed in/out to userland 4393 */ 4394 static int 4395 vdc_null_copy_func(vdc_t *vdc, void *from, void *to, int mode, int dir) 4396 { 4397 _NOTE(ARGUNUSED(vdc)) 4398 _NOTE(ARGUNUSED(from)) 4399 _NOTE(ARGUNUSED(to)) 4400 _NOTE(ARGUNUSED(mode)) 4401 _NOTE(ARGUNUSED(dir)) 4402 4403 return (0); 4404 } 4405 4406 static int 4407 vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 4408 int mode, int dir) 4409 { 4410 _NOTE(ARGUNUSED(vdc)) 4411 4412 if (dir == VD_COPYIN) 4413 return (0); /* nothing to do */ 4414 4415 if (ddi_copyout(from, to, sizeof (int), mode) != 0) 4416 return (EFAULT); 4417 4418 return (0); 4419 } 4420 4421 static int 4422 vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 4423 int mode, int dir) 4424 { 4425 _NOTE(ARGUNUSED(vdc)) 4426 4427 if (dir == VD_COPYOUT) 4428 return (0); /* nothing to do */ 4429 4430 if (ddi_copyin(from, to, sizeof (int), mode) != 0) 4431 return (EFAULT); 4432 4433 return (0); 4434 } 4435 4436 /* 4437 * Function: 4438 * vdc_get_vtoc_convert() 4439 * 4440 * Description: 4441 * This routine performs the necessary convertions from the DKIOCGVTOC 4442 * Solaris structure to the format defined in FWARC 2006/195. 4443 * 4444 * In the struct vtoc definition, the timestamp field is marked as not 4445 * supported so it is not part of vDisk protocol (FWARC 2006/195). 4446 * However SVM uses that field to check it can write into the VTOC, 4447 * so we fake up the info of that field. 4448 * 4449 * Arguments: 4450 * vdc - the vDisk client 4451 * from - the buffer containing the data to be copied from 4452 * to - the buffer to be copied to 4453 * mode - flags passed to ioctl() call 4454 * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT 4455 * 4456 * Return Code: 4457 * 0 - Success 4458 * ENXIO - incorrect buffer passed in. 4459 * EFAULT - ddi_copyout routine encountered an error. 4460 */ 4461 static int 4462 vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4463 { 4464 int i; 4465 void *tmp_mem = NULL; 4466 void *tmp_memp; 4467 struct vtoc vt; 4468 struct vtoc32 vt32; 4469 int copy_len = 0; 4470 int rv = 0; 4471 4472 if (dir != VD_COPYOUT) 4473 return (0); /* nothing to do */ 4474 4475 if ((from == NULL) || (to == NULL)) 4476 return (ENXIO); 4477 4478 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 4479 copy_len = sizeof (struct vtoc32); 4480 else 4481 copy_len = sizeof (struct vtoc); 4482 4483 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 4484 4485 VD_VTOC2VTOC((vd_vtoc_t *)from, &vt); 4486 4487 /* fake the VTOC timestamp field */ 4488 for (i = 0; i < V_NUMPAR; i++) { 4489 vt.timestamp[i] = vdc->vtoc->timestamp[i]; 4490 } 4491 4492 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 4493 vtoctovtoc32(vt, vt32); 4494 tmp_memp = &vt32; 4495 } else { 4496 tmp_memp = &vt; 4497 } 4498 rv = ddi_copyout(tmp_memp, to, copy_len, mode); 4499 if (rv != 0) 4500 rv = EFAULT; 4501 4502 kmem_free(tmp_mem, copy_len); 4503 return (rv); 4504 } 4505 4506 /* 4507 * Function: 4508 * vdc_set_vtoc_convert() 4509 * 4510 * Description: 4511 * This routine performs the necessary convertions from the DKIOCSVTOC 4512 * Solaris structure to the format defined in FWARC 2006/195. 4513 * 4514 * Arguments: 4515 * vdc - the vDisk client 4516 * from - Buffer with data 4517 * to - Buffer where data is to be copied to 4518 * mode - flags passed to ioctl 4519 * dir - direction of copy (in or out) 4520 * 4521 * Return Code: 4522 * 0 - Success 4523 * ENXIO - Invalid buffer passed in 4524 * EFAULT - ddi_copyin of data failed 4525 */ 4526 static int 4527 vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4528 { 4529 void *tmp_mem = NULL; 4530 struct vtoc vt; 4531 struct vtoc *vtp = &vt; 4532 vd_vtoc_t vtvd; 4533 int copy_len = 0; 4534 int rv = 0; 4535 4536 if (dir != VD_COPYIN) 4537 return (0); /* nothing to do */ 4538 4539 if ((from == NULL) || (to == NULL)) 4540 return (ENXIO); 4541 4542 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 4543 copy_len = sizeof (struct vtoc32); 4544 else 4545 copy_len = sizeof (struct vtoc); 4546 4547 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 4548 4549 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 4550 if (rv != 0) { 4551 kmem_free(tmp_mem, copy_len); 4552 return (EFAULT); 4553 } 4554 4555 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 4556 vtoc32tovtoc((*(struct vtoc32 *)tmp_mem), vt); 4557 } else { 4558 vtp = tmp_mem; 4559 } 4560 4561 /* 4562 * The VTOC is being changed, then vdc needs to update the copy 4563 * it saved in the soft state structure. 4564 */ 4565 bcopy(vtp, vdc->vtoc, sizeof (struct vtoc)); 4566 4567 VTOC2VD_VTOC(vtp, &vtvd); 4568 bcopy(&vtvd, to, sizeof (vd_vtoc_t)); 4569 kmem_free(tmp_mem, copy_len); 4570 4571 return (0); 4572 } 4573 4574 /* 4575 * Function: 4576 * vdc_get_geom_convert() 4577 * 4578 * Description: 4579 * This routine performs the necessary convertions from the DKIOCGGEOM, 4580 * DKIOCG_PHYSGEOM and DKIOG_VIRTGEOM Solaris structures to the format 4581 * defined in FWARC 2006/195 4582 * 4583 * Arguments: 4584 * vdc - the vDisk client 4585 * from - Buffer with data 4586 * to - Buffer where data is to be copied to 4587 * mode - flags passed to ioctl 4588 * dir - direction of copy (in or out) 4589 * 4590 * Return Code: 4591 * 0 - Success 4592 * ENXIO - Invalid buffer passed in 4593 * EFAULT - ddi_copyout of data failed 4594 */ 4595 static int 4596 vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4597 { 4598 _NOTE(ARGUNUSED(vdc)) 4599 4600 struct dk_geom geom; 4601 int copy_len = sizeof (struct dk_geom); 4602 int rv = 0; 4603 4604 if (dir != VD_COPYOUT) 4605 return (0); /* nothing to do */ 4606 4607 if ((from == NULL) || (to == NULL)) 4608 return (ENXIO); 4609 4610 VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); 4611 rv = ddi_copyout(&geom, to, copy_len, mode); 4612 if (rv != 0) 4613 rv = EFAULT; 4614 4615 return (rv); 4616 } 4617 4618 /* 4619 * Function: 4620 * vdc_set_geom_convert() 4621 * 4622 * Description: 4623 * This routine performs the necessary convertions from the DKIOCSGEOM 4624 * Solaris structure to the format defined in FWARC 2006/195. 4625 * 4626 * Arguments: 4627 * vdc - the vDisk client 4628 * from - Buffer with data 4629 * to - Buffer where data is to be copied to 4630 * mode - flags passed to ioctl 4631 * dir - direction of copy (in or out) 4632 * 4633 * Return Code: 4634 * 0 - Success 4635 * ENXIO - Invalid buffer passed in 4636 * EFAULT - ddi_copyin of data failed 4637 */ 4638 static int 4639 vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4640 { 4641 _NOTE(ARGUNUSED(vdc)) 4642 4643 vd_geom_t vdgeom; 4644 void *tmp_mem = NULL; 4645 int copy_len = sizeof (struct dk_geom); 4646 int rv = 0; 4647 4648 if (dir != VD_COPYIN) 4649 return (0); /* nothing to do */ 4650 4651 if ((from == NULL) || (to == NULL)) 4652 return (ENXIO); 4653 4654 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 4655 4656 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 4657 if (rv != 0) { 4658 kmem_free(tmp_mem, copy_len); 4659 return (EFAULT); 4660 } 4661 DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); 4662 bcopy(&vdgeom, to, sizeof (vdgeom)); 4663 kmem_free(tmp_mem, copy_len); 4664 4665 return (0); 4666 } 4667 4668 static int 4669 vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4670 { 4671 _NOTE(ARGUNUSED(vdc)) 4672 4673 vd_efi_t *vd_efi; 4674 dk_efi_t dk_efi; 4675 int rv = 0; 4676 void *uaddr; 4677 4678 if ((from == NULL) || (to == NULL)) 4679 return (ENXIO); 4680 4681 if (dir == VD_COPYIN) { 4682 4683 vd_efi = (vd_efi_t *)to; 4684 4685 rv = ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode); 4686 if (rv != 0) 4687 return (EFAULT); 4688 4689 vd_efi->lba = dk_efi.dki_lba; 4690 vd_efi->length = dk_efi.dki_length; 4691 bzero(vd_efi->data, vd_efi->length); 4692 4693 } else { 4694 4695 rv = ddi_copyin(to, &dk_efi, sizeof (dk_efi_t), mode); 4696 if (rv != 0) 4697 return (EFAULT); 4698 4699 uaddr = dk_efi.dki_data; 4700 4701 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 4702 4703 VD_EFI2DK_EFI((vd_efi_t *)from, &dk_efi); 4704 4705 rv = ddi_copyout(dk_efi.dki_data, uaddr, dk_efi.dki_length, 4706 mode); 4707 if (rv != 0) 4708 return (EFAULT); 4709 4710 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 4711 } 4712 4713 return (0); 4714 } 4715 4716 static int 4717 vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4718 { 4719 _NOTE(ARGUNUSED(vdc)) 4720 4721 dk_efi_t dk_efi; 4722 void *uaddr; 4723 4724 if (dir == VD_COPYOUT) 4725 return (0); /* nothing to do */ 4726 4727 if ((from == NULL) || (to == NULL)) 4728 return (ENXIO); 4729 4730 if (ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode) != 0) 4731 return (EFAULT); 4732 4733 uaddr = dk_efi.dki_data; 4734 4735 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 4736 4737 if (ddi_copyin(uaddr, dk_efi.dki_data, dk_efi.dki_length, mode) != 0) 4738 return (EFAULT); 4739 4740 DK_EFI2VD_EFI(&dk_efi, (vd_efi_t *)to); 4741 4742 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 4743 4744 return (0); 4745 } 4746 4747 /* 4748 * Function: 4749 * vdc_create_fake_geometry() 4750 * 4751 * Description: 4752 * This routine fakes up the disk info needed for some DKIO ioctls. 4753 * - DKIOCINFO 4754 * - DKIOCGMEDIAINFO 4755 * 4756 * [ just like lofi(7D) and ramdisk(7D) ] 4757 * 4758 * Arguments: 4759 * vdc - soft state pointer for this instance of the device driver. 4760 * 4761 * Return Code: 4762 * 0 - Success 4763 */ 4764 static int 4765 vdc_create_fake_geometry(vdc_t *vdc) 4766 { 4767 int rv = 0; 4768 4769 ASSERT(vdc != NULL); 4770 4771 /* 4772 * DKIOCINFO support 4773 */ 4774 vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 4775 4776 (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 4777 (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 4778 /* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */ 4779 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; 4780 vdc->cinfo->dki_ctype = DKC_SCSI_CCS; 4781 vdc->cinfo->dki_flags = DKI_FMTVOL; 4782 vdc->cinfo->dki_cnum = 0; 4783 vdc->cinfo->dki_addr = 0; 4784 vdc->cinfo->dki_space = 0; 4785 vdc->cinfo->dki_prio = 0; 4786 vdc->cinfo->dki_vec = 0; 4787 vdc->cinfo->dki_unit = vdc->instance; 4788 vdc->cinfo->dki_slave = 0; 4789 /* 4790 * The partition number will be created on the fly depending on the 4791 * actual slice (i.e. minor node) that is used to request the data. 4792 */ 4793 vdc->cinfo->dki_partition = 0; 4794 4795 /* 4796 * DKIOCGMEDIAINFO support 4797 */ 4798 if (vdc->minfo == NULL) 4799 vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 4800 vdc->minfo->dki_media_type = DK_FIXED_DISK; 4801 vdc->minfo->dki_capacity = vdc->vdisk_size; 4802 vdc->minfo->dki_lbsize = DEV_BSIZE; 4803 4804 return (rv); 4805 } 4806 4807 /* 4808 * Function: 4809 * vdc_setup_disk_layout() 4810 * 4811 * Description: 4812 * This routine discovers all the necessary details about the "disk" 4813 * by requesting the data that is available from the vDisk server and by 4814 * faking up the rest of the data. 4815 * 4816 * Arguments: 4817 * vdc - soft state pointer for this instance of the device driver. 4818 * 4819 * Return Code: 4820 * 0 - Success 4821 */ 4822 static int 4823 vdc_setup_disk_layout(vdc_t *vdc) 4824 { 4825 buf_t *buf; /* BREAD requests need to be in a buf_t structure */ 4826 dev_t dev; 4827 int slice = 0; 4828 int rv; 4829 4830 ASSERT(vdc != NULL); 4831 4832 rv = vdc_create_fake_geometry(vdc); 4833 if (rv != 0) { 4834 DMSG(vdc, 0, "[%d] Failed to create disk geometry (err%d)", 4835 vdc->instance, rv); 4836 } 4837 4838 if (vdc->vtoc == NULL) 4839 vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); 4840 4841 dev = makedevice(ddi_driver_major(vdc->dip), 4842 VD_MAKE_DEV(vdc->instance, 0)); 4843 rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)vdc->vtoc, FKIOCTL); 4844 4845 if (rv && rv != ENOTSUP) { 4846 DMSG(vdc, 0, "[%d] Failed to get VTOC (err=%d)", 4847 vdc->instance, rv); 4848 return (rv); 4849 } 4850 4851 if (rv == ENOTSUP) { 4852 /* 4853 * If the device does not support VTOC then we try 4854 * to read an EFI label. 4855 */ 4856 struct dk_gpt *efi; 4857 size_t efi_len; 4858 4859 rv = vdc_efi_alloc_and_read(dev, &efi, &efi_len); 4860 4861 if (rv) { 4862 DMSG(vdc, 0, "[%d] Failed to get EFI (err=%d)", 4863 vdc->instance, rv); 4864 return (rv); 4865 } 4866 4867 vdc->vdisk_label = VD_DISK_LABEL_EFI; 4868 vdc_store_efi(vdc, efi); 4869 vd_efi_free(efi, efi_len); 4870 4871 return (0); 4872 } 4873 4874 vdc->vdisk_label = VD_DISK_LABEL_VTOC; 4875 4876 /* 4877 * FUTURE: This could be default way for reading the VTOC 4878 * from the disk as supposed to sending the VD_OP_GET_VTOC 4879 * to the server. Currently this is a sanity check. 4880 * 4881 * find the slice that represents the entire "disk" and use that to 4882 * read the disk label. The convention in Solaris is that slice 2 4883 * represents the whole disk so we check that it is, otherwise we 4884 * default to slice 0 4885 */ 4886 if ((vdc->vdisk_type == VD_DISK_TYPE_DISK) && 4887 (vdc->vtoc->v_part[2].p_tag == V_BACKUP)) { 4888 slice = 2; 4889 } else { 4890 slice = 0; 4891 } 4892 4893 /* 4894 * Read disk label from start of disk 4895 */ 4896 vdc->label = kmem_zalloc(DK_LABEL_SIZE, KM_SLEEP); 4897 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 4898 bioinit(buf); 4899 buf->b_un.b_addr = (caddr_t)vdc->label; 4900 buf->b_bcount = DK_LABEL_SIZE; 4901 buf->b_flags = B_BUSY | B_READ; 4902 buf->b_dev = dev; 4903 rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)vdc->label, 4904 DK_LABEL_SIZE, slice, 0, CB_STRATEGY, buf, VIO_read_dir); 4905 if (rv) { 4906 DMSG(vdc, 1, "[%d] Failed to read disk block 0\n", 4907 vdc->instance); 4908 kmem_free(buf, sizeof (buf_t)); 4909 return (rv); 4910 } 4911 rv = biowait(buf); 4912 biofini(buf); 4913 kmem_free(buf, sizeof (buf_t)); 4914 4915 return (rv); 4916 } 4917 4918 /* 4919 * Function: 4920 * vdc_setup_devid() 4921 * 4922 * Description: 4923 * This routine discovers the devid of a vDisk. It requests the devid of 4924 * the underlying device from the vDisk server, builds an encapsulated 4925 * devid based on the retrieved devid and registers that new devid to 4926 * the vDisk. 4927 * 4928 * Arguments: 4929 * vdc - soft state pointer for this instance of the device driver. 4930 * 4931 * Return Code: 4932 * 0 - A devid was succesfully registered for the vDisk 4933 */ 4934 static int 4935 vdc_setup_devid(vdc_t *vdc) 4936 { 4937 int rv; 4938 vd_devid_t *vd_devid; 4939 size_t bufsize, bufid_len; 4940 4941 /* 4942 * At first sight, we don't know the size of the devid that the 4943 * server will return but this size will be encoded into the 4944 * reply. So we do a first request using a default size then we 4945 * check if this size was large enough. If not then we do a second 4946 * request with the correct size returned by the server. Note that 4947 * ldc requires size to be 8-byte aligned. 4948 */ 4949 bufsize = P2ROUNDUP(VD_DEVID_SIZE(VD_DEVID_DEFAULT_LEN), 4950 sizeof (uint64_t)); 4951 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 4952 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 4953 4954 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, (caddr_t)vd_devid, 4955 bufsize, 0, 0, CB_SYNC, 0, VIO_both_dir); 4956 4957 DMSG(vdc, 2, "sync_op returned %d\n", rv); 4958 4959 if (rv) { 4960 kmem_free(vd_devid, bufsize); 4961 return (rv); 4962 } 4963 4964 if (vd_devid->length > bufid_len) { 4965 /* 4966 * The returned devid is larger than the buffer used. Try again 4967 * with a buffer with the right size. 4968 */ 4969 kmem_free(vd_devid, bufsize); 4970 bufsize = P2ROUNDUP(VD_DEVID_SIZE(vd_devid->length), 4971 sizeof (uint64_t)); 4972 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 4973 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 4974 4975 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, 4976 (caddr_t)vd_devid, bufsize, 0, 0, CB_SYNC, 0, 4977 VIO_both_dir); 4978 4979 if (rv) { 4980 kmem_free(vd_devid, bufsize); 4981 return (rv); 4982 } 4983 } 4984 4985 /* 4986 * The virtual disk should have the same device id as the one associated 4987 * with the physical disk it is mapped on, otherwise sharing a disk 4988 * between a LDom and a non-LDom may not work (for example for a shared 4989 * SVM disk set). 4990 * 4991 * The DDI framework does not allow creating a device id with any 4992 * type so we first create a device id of type DEVID_ENCAP and then 4993 * we restore the orignal type of the physical device. 4994 */ 4995 4996 DMSG(vdc, 2, ": devid length = %d\n", vd_devid->length); 4997 4998 /* build an encapsulated devid based on the returned devid */ 4999 if (ddi_devid_init(vdc->dip, DEVID_ENCAP, vd_devid->length, 5000 vd_devid->id, &vdc->devid) != DDI_SUCCESS) { 5001 DMSG(vdc, 1, "[%d] Fail to created devid\n", vdc->instance); 5002 kmem_free(vd_devid, bufsize); 5003 return (1); 5004 } 5005 5006 DEVID_FORMTYPE((impl_devid_t *)vdc->devid, vd_devid->type); 5007 5008 ASSERT(ddi_devid_valid(vdc->devid) == DDI_SUCCESS); 5009 5010 kmem_free(vd_devid, bufsize); 5011 5012 if (ddi_devid_register(vdc->dip, vdc->devid) != DDI_SUCCESS) { 5013 DMSG(vdc, 1, "[%d] Fail to register devid\n", vdc->instance); 5014 return (1); 5015 } 5016 5017 return (0); 5018 } 5019 5020 static void 5021 vdc_store_efi(vdc_t *vdc, struct dk_gpt *efi) 5022 { 5023 struct vtoc *vtoc = vdc->vtoc; 5024 5025 vd_efi_to_vtoc(efi, vtoc); 5026 if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 5027 /* 5028 * vd_efi_to_vtoc() will store information about the EFI Sun 5029 * reserved partition (representing the entire disk) into 5030 * partition 7. However single-slice device will only have 5031 * that single partition and the vdc driver expects to find 5032 * information about that partition in slice 0. So we need 5033 * to copy information from slice 7 to slice 0. 5034 */ 5035 vtoc->v_part[0].p_tag = vtoc->v_part[VD_EFI_WD_SLICE].p_tag; 5036 vtoc->v_part[0].p_flag = vtoc->v_part[VD_EFI_WD_SLICE].p_flag; 5037 vtoc->v_part[0].p_start = vtoc->v_part[VD_EFI_WD_SLICE].p_start; 5038 vtoc->v_part[0].p_size = vtoc->v_part[VD_EFI_WD_SLICE].p_size; 5039 } 5040 } 5041