1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * LDoms virtual disk client (vdc) device driver 31 * 32 * This driver runs on a guest logical domain and communicates with the virtual 33 * disk server (vds) driver running on the service domain which is exporting 34 * virtualized "disks" to the guest logical domain. 35 * 36 * The driver can be divided into four sections: 37 * 38 * 1) generic device driver housekeeping 39 * _init, _fini, attach, detach, ops structures, etc. 40 * 41 * 2) communication channel setup 42 * Setup the communications link over the LDC channel that vdc uses to 43 * talk to the vDisk server. Initialise the descriptor ring which 44 * allows the LDC clients to transfer data via memory mappings. 45 * 46 * 3) Support exported to upper layers (filesystems, etc) 47 * The upper layers call into vdc via strategy(9E) and DKIO(7I) 48 * ioctl calls. vdc will copy the data to be written to the descriptor 49 * ring or maps the buffer to store the data read by the vDisk 50 * server into the descriptor ring. It then sends a message to the 51 * vDisk server requesting it to complete the operation. 52 * 53 * 4) Handling responses from vDisk server. 54 * The vDisk server will ACK some or all of the messages vdc sends to it 55 * (this is configured during the handshake). Upon receipt of an ACK 56 * vdc will check the descriptor ring and signal to the upper layer 57 * code waiting on the IO. 58 */ 59 60 #include <sys/atomic.h> 61 #include <sys/conf.h> 62 #include <sys/disp.h> 63 #include <sys/ddi.h> 64 #include <sys/dkio.h> 65 #include <sys/efi_partition.h> 66 #include <sys/fcntl.h> 67 #include <sys/file.h> 68 #include <sys/mach_descrip.h> 69 #include <sys/modctl.h> 70 #include <sys/mdeg.h> 71 #include <sys/note.h> 72 #include <sys/open.h> 73 #include <sys/sdt.h> 74 #include <sys/stat.h> 75 #include <sys/sunddi.h> 76 #include <sys/types.h> 77 #include <sys/promif.h> 78 #include <sys/vtoc.h> 79 #include <sys/archsystm.h> 80 #include <sys/sysmacros.h> 81 82 #include <sys/cdio.h> 83 #include <sys/dktp/fdisk.h> 84 #include <sys/dktp/dadkio.h> 85 #include <sys/scsi/generic/sense.h> 86 #include <sys/scsi/impl/uscsi.h> /* Needed for defn of USCSICMD ioctl */ 87 88 #include <sys/ldoms.h> 89 #include <sys/ldc.h> 90 #include <sys/vio_common.h> 91 #include <sys/vio_mailbox.h> 92 #include <sys/vdsk_common.h> 93 #include <sys/vdsk_mailbox.h> 94 #include <sys/vdc.h> 95 96 /* 97 * function prototypes 98 */ 99 100 /* standard driver functions */ 101 static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 102 static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 103 static int vdc_strategy(struct buf *buf); 104 static int vdc_print(dev_t dev, char *str); 105 static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 106 static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 107 static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 108 static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 109 cred_t *credp, int *rvalp); 110 static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 111 static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 112 113 static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 114 void *arg, void **resultp); 115 static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 116 static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 117 118 /* setup */ 119 static void vdc_min(struct buf *bufp); 120 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 121 static int vdc_do_ldc_init(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_node); 122 static int vdc_start_ldc_connection(vdc_t *vdc); 123 static int vdc_create_device_nodes(vdc_t *vdc); 124 static int vdc_create_device_nodes_efi(vdc_t *vdc); 125 static int vdc_create_device_nodes_vtoc(vdc_t *vdc); 126 static int vdc_create_device_nodes_props(vdc_t *vdc); 127 static int vdc_get_md_node(dev_info_t *dip, md_t **mdpp, 128 mde_cookie_t *vd_nodep, mde_cookie_t *vd_portp); 129 static int vdc_get_ldc_id(md_t *, mde_cookie_t, uint64_t *); 130 static int vdc_do_ldc_up(vdc_t *vdc); 131 static void vdc_terminate_ldc(vdc_t *vdc); 132 static int vdc_init_descriptor_ring(vdc_t *vdc); 133 static void vdc_destroy_descriptor_ring(vdc_t *vdc); 134 static int vdc_setup_devid(vdc_t *vdc); 135 static void vdc_store_efi(vdc_t *vdc, struct dk_gpt *efi); 136 137 /* handshake with vds */ 138 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 139 static int vdc_ver_negotiation(vdc_t *vdcp); 140 static int vdc_init_attr_negotiation(vdc_t *vdc); 141 static int vdc_attr_negotiation(vdc_t *vdcp); 142 static int vdc_init_dring_negotiate(vdc_t *vdc); 143 static int vdc_dring_negotiation(vdc_t *vdcp); 144 static int vdc_send_rdx(vdc_t *vdcp); 145 static int vdc_rdx_exchange(vdc_t *vdcp); 146 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 147 148 /* processing incoming messages from vDisk server */ 149 static void vdc_process_msg_thread(vdc_t *vdc); 150 static int vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp); 151 152 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 153 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t *msg); 154 static int vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg); 155 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 156 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 157 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 158 static int vdc_send_request(vdc_t *vdcp, int operation, 159 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 160 int cb_type, void *cb_arg, vio_desc_direction_t dir); 161 static int vdc_map_to_shared_dring(vdc_t *vdcp, int idx); 162 static int vdc_populate_descriptor(vdc_t *vdcp, int operation, 163 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 164 int cb_type, void *cb_arg, vio_desc_direction_t dir); 165 static int vdc_do_sync_op(vdc_t *vdcp, int operation, 166 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 167 int cb_type, void *cb_arg, vio_desc_direction_t dir); 168 169 static int vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp); 170 static int vdc_drain_response(vdc_t *vdcp); 171 static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 172 static int vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep); 173 static int vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg); 174 175 /* dkio */ 176 static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode); 177 static int vdc_create_fake_geometry(vdc_t *vdc); 178 static int vdc_setup_disk_layout(vdc_t *vdc); 179 static int vdc_null_copy_func(vdc_t *vdc, void *from, void *to, 180 int mode, int dir); 181 static int vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 182 int mode, int dir); 183 static int vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 184 int mode, int dir); 185 static int vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, 186 int mode, int dir); 187 static int vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, 188 int mode, int dir); 189 static int vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, 190 int mode, int dir); 191 static int vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, 192 int mode, int dir); 193 static int vdc_uscsicmd_convert(vdc_t *vdc, void *from, void *to, 194 int mode, int dir); 195 static int vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, 196 int mode, int dir); 197 static int vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, 198 int mode, int dir); 199 200 /* 201 * Module variables 202 */ 203 204 /* 205 * Tunable variables to control how long vdc waits before timing out on 206 * various operations 207 */ 208 static int vdc_retries = 10; 209 static int vdc_hshake_retries = 3; 210 211 static int vdc_timeout = 0; /* units: seconds */ 212 213 /* calculated from 'vdc_usec_timeout' during attach */ 214 static uint64_t vdc_hz_timeout; /* units: Hz */ 215 static uint64_t vdc_usec_timeout = 30 * MICROSEC; /* 30s units: ns */ 216 217 static uint64_t vdc_hz_min_ldc_delay; 218 static uint64_t vdc_min_timeout_ldc = 1 * MILLISEC; 219 static uint64_t vdc_hz_max_ldc_delay; 220 static uint64_t vdc_max_timeout_ldc = 100 * MILLISEC; 221 222 static uint64_t vdc_ldc_read_init_delay = 1 * MILLISEC; 223 static uint64_t vdc_ldc_read_max_delay = 100 * MILLISEC; 224 225 /* values for dumping - need to run in a tighter loop */ 226 static uint64_t vdc_usec_timeout_dump = 100 * MILLISEC; /* 0.1s units: ns */ 227 static int vdc_dump_retries = 100; 228 229 /* Count of the number of vdc instances attached */ 230 static volatile uint32_t vdc_instance_count = 0; 231 232 /* Soft state pointer */ 233 static void *vdc_state; 234 235 /* 236 * Controlling the verbosity of the error/debug messages 237 * 238 * vdc_msglevel - controls level of messages 239 * vdc_matchinst - 64-bit variable where each bit corresponds 240 * to the vdc instance the vdc_msglevel applies. 241 */ 242 int vdc_msglevel = 0x0; 243 uint64_t vdc_matchinst = 0ull; 244 245 /* 246 * Supported vDisk protocol version pairs. 247 * 248 * The first array entry is the latest and preferred version. 249 */ 250 static const vio_ver_t vdc_version[] = {{1, 0}}; 251 252 static struct cb_ops vdc_cb_ops = { 253 vdc_open, /* cb_open */ 254 vdc_close, /* cb_close */ 255 vdc_strategy, /* cb_strategy */ 256 vdc_print, /* cb_print */ 257 vdc_dump, /* cb_dump */ 258 vdc_read, /* cb_read */ 259 vdc_write, /* cb_write */ 260 vdc_ioctl, /* cb_ioctl */ 261 nodev, /* cb_devmap */ 262 nodev, /* cb_mmap */ 263 nodev, /* cb_segmap */ 264 nochpoll, /* cb_chpoll */ 265 ddi_prop_op, /* cb_prop_op */ 266 NULL, /* cb_str */ 267 D_MP | D_64BIT, /* cb_flag */ 268 CB_REV, /* cb_rev */ 269 vdc_aread, /* cb_aread */ 270 vdc_awrite /* cb_awrite */ 271 }; 272 273 static struct dev_ops vdc_ops = { 274 DEVO_REV, /* devo_rev */ 275 0, /* devo_refcnt */ 276 vdc_getinfo, /* devo_getinfo */ 277 nulldev, /* devo_identify */ 278 nulldev, /* devo_probe */ 279 vdc_attach, /* devo_attach */ 280 vdc_detach, /* devo_detach */ 281 nodev, /* devo_reset */ 282 &vdc_cb_ops, /* devo_cb_ops */ 283 NULL, /* devo_bus_ops */ 284 nulldev /* devo_power */ 285 }; 286 287 static struct modldrv modldrv = { 288 &mod_driverops, 289 "virtual disk client", 290 &vdc_ops, 291 }; 292 293 static struct modlinkage modlinkage = { 294 MODREV_1, 295 &modldrv, 296 NULL 297 }; 298 299 /* -------------------------------------------------------------------------- */ 300 301 /* 302 * Device Driver housekeeping and setup 303 */ 304 305 int 306 _init(void) 307 { 308 int status; 309 310 if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 311 return (status); 312 if ((status = mod_install(&modlinkage)) != 0) 313 ddi_soft_state_fini(&vdc_state); 314 vdc_efi_init(vd_process_ioctl); 315 return (status); 316 } 317 318 int 319 _info(struct modinfo *modinfop) 320 { 321 return (mod_info(&modlinkage, modinfop)); 322 } 323 324 int 325 _fini(void) 326 { 327 int status; 328 329 if ((status = mod_remove(&modlinkage)) != 0) 330 return (status); 331 vdc_efi_fini(); 332 ddi_soft_state_fini(&vdc_state); 333 return (0); 334 } 335 336 static int 337 vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 338 { 339 _NOTE(ARGUNUSED(dip)) 340 341 int instance = VDCUNIT((dev_t)arg); 342 vdc_t *vdc = NULL; 343 344 switch (cmd) { 345 case DDI_INFO_DEVT2DEVINFO: 346 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 347 *resultp = NULL; 348 return (DDI_FAILURE); 349 } 350 *resultp = vdc->dip; 351 return (DDI_SUCCESS); 352 case DDI_INFO_DEVT2INSTANCE: 353 *resultp = (void *)(uintptr_t)instance; 354 return (DDI_SUCCESS); 355 default: 356 *resultp = NULL; 357 return (DDI_FAILURE); 358 } 359 } 360 361 static int 362 vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 363 { 364 int instance; 365 int rv; 366 vdc_t *vdc = NULL; 367 368 switch (cmd) { 369 case DDI_DETACH: 370 /* the real work happens below */ 371 break; 372 case DDI_SUSPEND: 373 /* nothing to do for this non-device */ 374 return (DDI_SUCCESS); 375 default: 376 return (DDI_FAILURE); 377 } 378 379 ASSERT(cmd == DDI_DETACH); 380 instance = ddi_get_instance(dip); 381 DMSGX(1, "[%d] Entered\n", instance); 382 383 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 384 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 385 return (DDI_FAILURE); 386 } 387 388 if (vdc->open_count) { 389 DMSG(vdc, 0, "[%d] Cannot detach: device is open", instance); 390 return (DDI_FAILURE); 391 } 392 393 DMSG(vdc, 0, "[%d] proceeding...\n", instance); 394 395 /* mark instance as detaching */ 396 vdc->lifecycle = VDC_LC_DETACHING; 397 398 /* 399 * try and disable callbacks to prevent another handshake 400 */ 401 rv = ldc_set_cb_mode(vdc->ldc_handle, LDC_CB_DISABLE); 402 DMSG(vdc, 0, "callback disabled (rv=%d)\n", rv); 403 404 if (vdc->initialized & VDC_THREAD) { 405 mutex_enter(&vdc->read_lock); 406 if ((vdc->read_state == VDC_READ_WAITING) || 407 (vdc->read_state == VDC_READ_RESET)) { 408 vdc->read_state = VDC_READ_RESET; 409 cv_signal(&vdc->read_cv); 410 } 411 412 mutex_exit(&vdc->read_lock); 413 414 /* wake up any thread waiting for connection to come online */ 415 mutex_enter(&vdc->lock); 416 if (vdc->state == VDC_STATE_INIT_WAITING) { 417 DMSG(vdc, 0, 418 "[%d] write reset - move to resetting state...\n", 419 instance); 420 vdc->state = VDC_STATE_RESETTING; 421 cv_signal(&vdc->initwait_cv); 422 } 423 mutex_exit(&vdc->lock); 424 425 /* now wait until state transitions to VDC_STATE_DETACH */ 426 thread_join(vdc->msg_proc_thr->t_did); 427 ASSERT(vdc->state == VDC_STATE_DETACH); 428 DMSG(vdc, 0, "[%d] Reset thread exit and join ..\n", 429 vdc->instance); 430 } 431 432 mutex_enter(&vdc->lock); 433 434 if (vdc->initialized & VDC_DRING) 435 vdc_destroy_descriptor_ring(vdc); 436 437 if (vdc->initialized & VDC_LDC) 438 vdc_terminate_ldc(vdc); 439 440 mutex_exit(&vdc->lock); 441 442 if (vdc->initialized & VDC_MINOR) { 443 ddi_prop_remove_all(dip); 444 ddi_remove_minor_node(dip, NULL); 445 } 446 447 if (vdc->initialized & VDC_LOCKS) { 448 mutex_destroy(&vdc->lock); 449 mutex_destroy(&vdc->read_lock); 450 cv_destroy(&vdc->initwait_cv); 451 cv_destroy(&vdc->dring_free_cv); 452 cv_destroy(&vdc->membind_cv); 453 cv_destroy(&vdc->sync_pending_cv); 454 cv_destroy(&vdc->sync_blocked_cv); 455 cv_destroy(&vdc->read_cv); 456 cv_destroy(&vdc->running_cv); 457 } 458 459 if (vdc->minfo) 460 kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 461 462 if (vdc->cinfo) 463 kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 464 465 if (vdc->vtoc) 466 kmem_free(vdc->vtoc, sizeof (struct vtoc)); 467 468 if (vdc->label) 469 kmem_free(vdc->label, DK_LABEL_SIZE); 470 471 if (vdc->devid) { 472 ddi_devid_unregister(dip); 473 ddi_devid_free(vdc->devid); 474 } 475 476 if (vdc->initialized & VDC_SOFT_STATE) 477 ddi_soft_state_free(vdc_state, instance); 478 479 DMSG(vdc, 0, "[%d] End %p\n", instance, (void *)vdc); 480 481 return (DDI_SUCCESS); 482 } 483 484 485 static int 486 vdc_do_attach(dev_info_t *dip) 487 { 488 int instance; 489 vdc_t *vdc = NULL; 490 int status; 491 md_t *mdp; 492 mde_cookie_t vd_node, vd_port; 493 494 ASSERT(dip != NULL); 495 496 instance = ddi_get_instance(dip); 497 if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 498 cmn_err(CE_NOTE, "[%d] Couldn't alloc state structure", 499 instance); 500 return (DDI_FAILURE); 501 } 502 503 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 504 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 505 return (DDI_FAILURE); 506 } 507 508 /* 509 * We assign the value to initialized in this case to zero out the 510 * variable and then set bits in it to indicate what has been done 511 */ 512 vdc->initialized = VDC_SOFT_STATE; 513 514 vdc_hz_timeout = drv_usectohz(vdc_usec_timeout); 515 516 vdc_hz_min_ldc_delay = drv_usectohz(vdc_min_timeout_ldc); 517 vdc_hz_max_ldc_delay = drv_usectohz(vdc_max_timeout_ldc); 518 519 vdc->dip = dip; 520 vdc->instance = instance; 521 vdc->open_count = 0; 522 vdc->vdisk_type = VD_DISK_TYPE_UNK; 523 vdc->vdisk_label = VD_DISK_LABEL_UNK; 524 vdc->state = VDC_STATE_INIT; 525 vdc->lifecycle = VDC_LC_ATTACHING; 526 vdc->ldc_state = 0; 527 vdc->session_id = 0; 528 vdc->block_size = DEV_BSIZE; 529 vdc->max_xfer_sz = maxphys / DEV_BSIZE; 530 531 vdc->vtoc = NULL; 532 vdc->cinfo = NULL; 533 vdc->minfo = NULL; 534 535 mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 536 cv_init(&vdc->initwait_cv, NULL, CV_DRIVER, NULL); 537 cv_init(&vdc->dring_free_cv, NULL, CV_DRIVER, NULL); 538 cv_init(&vdc->membind_cv, NULL, CV_DRIVER, NULL); 539 cv_init(&vdc->running_cv, NULL, CV_DRIVER, NULL); 540 541 vdc->threads_pending = 0; 542 vdc->sync_op_pending = B_FALSE; 543 vdc->sync_op_blocked = B_FALSE; 544 cv_init(&vdc->sync_pending_cv, NULL, CV_DRIVER, NULL); 545 cv_init(&vdc->sync_blocked_cv, NULL, CV_DRIVER, NULL); 546 547 /* init blocking msg read functionality */ 548 mutex_init(&vdc->read_lock, NULL, MUTEX_DRIVER, NULL); 549 cv_init(&vdc->read_cv, NULL, CV_DRIVER, NULL); 550 vdc->read_state = VDC_READ_IDLE; 551 552 vdc->initialized |= VDC_LOCKS; 553 554 /* get device and port MD node for this disk instance */ 555 if (vdc_get_md_node(dip, &mdp, &vd_node, &vd_port) != 0) { 556 cmn_err(CE_NOTE, "[%d] Could not get machine description node", 557 instance); 558 return (DDI_FAILURE); 559 } 560 561 /* set the connection timeout */ 562 if (vd_port == NULL || (md_get_prop_val(mdp, vd_port, 563 VDC_MD_TIMEOUT, &vdc->ctimeout) != 0)) { 564 vdc->ctimeout = 0; 565 } 566 567 /* initialise LDC channel which will be used to communicate with vds */ 568 status = vdc_do_ldc_init(vdc, mdp, vd_node); 569 570 (void) md_fini_handle(mdp); 571 572 if (status != 0) { 573 cmn_err(CE_NOTE, "[%d] Couldn't initialize LDC", instance); 574 goto return_status; 575 } 576 577 /* initialize the thread responsible for managing state with server */ 578 vdc->msg_proc_thr = thread_create(NULL, 0, vdc_process_msg_thread, 579 vdc, 0, &p0, TS_RUN, minclsyspri); 580 if (vdc->msg_proc_thr == NULL) { 581 cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 582 instance); 583 return (DDI_FAILURE); 584 } 585 586 vdc->initialized |= VDC_THREAD; 587 588 atomic_inc_32(&vdc_instance_count); 589 590 /* 591 * Once the handshake is complete, we can use the DRing to send 592 * requests to the vDisk server to calculate the geometry and 593 * VTOC of the "disk" 594 */ 595 status = vdc_setup_disk_layout(vdc); 596 if (status != 0) { 597 DMSG(vdc, 0, "[%d] Failed to discover disk layout (err%d)", 598 vdc->instance, status); 599 goto return_status; 600 } 601 602 /* 603 * Now that we have the device info we can create the 604 * device nodes and properties 605 */ 606 status = vdc_create_device_nodes(vdc); 607 if (status) { 608 DMSG(vdc, 0, "[%d] Failed to create device nodes", 609 instance); 610 goto return_status; 611 } 612 status = vdc_create_device_nodes_props(vdc); 613 if (status) { 614 DMSG(vdc, 0, "[%d] Failed to create device nodes" 615 " properties (%d)", instance, status); 616 goto return_status; 617 } 618 619 /* 620 * Setup devid 621 */ 622 if (vdc_setup_devid(vdc)) { 623 DMSG(vdc, 0, "[%d] No device id available\n", instance); 624 } 625 626 ddi_report_dev(dip); 627 vdc->lifecycle = VDC_LC_ONLINE; 628 DMSG(vdc, 0, "[%d] Attach tasks successful\n", instance); 629 630 return_status: 631 DMSG(vdc, 0, "[%d] Attach completed\n", instance); 632 return (status); 633 } 634 635 static int 636 vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 637 { 638 int status; 639 640 switch (cmd) { 641 case DDI_ATTACH: 642 if ((status = vdc_do_attach(dip)) != 0) 643 (void) vdc_detach(dip, DDI_DETACH); 644 return (status); 645 case DDI_RESUME: 646 /* nothing to do for this non-device */ 647 return (DDI_SUCCESS); 648 default: 649 return (DDI_FAILURE); 650 } 651 } 652 653 static int 654 vdc_do_ldc_init(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_node) 655 { 656 int status = 0; 657 ldc_status_t ldc_state; 658 ldc_attr_t ldc_attr; 659 uint64_t ldc_id = 0; 660 661 ASSERT(vdc != NULL); 662 663 vdc->initialized |= VDC_LDC; 664 665 if ((status = vdc_get_ldc_id(mdp, vd_node, &ldc_id)) != 0) { 666 DMSG(vdc, 0, "[%d] Failed to get LDC channel ID property", 667 vdc->instance); 668 return (EIO); 669 } 670 671 DMSGX(0, "[%d] LDC id is 0x%lx\n", vdc->instance, ldc_id); 672 673 vdc->ldc_id = ldc_id; 674 675 ldc_attr.devclass = LDC_DEV_BLK; 676 ldc_attr.instance = vdc->instance; 677 ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 678 ldc_attr.mtu = VD_LDC_MTU; 679 680 if ((vdc->initialized & VDC_LDC_INIT) == 0) { 681 status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle); 682 if (status != 0) { 683 DMSG(vdc, 0, "[%d] ldc_init(chan %ld) returned %d", 684 vdc->instance, ldc_id, status); 685 return (status); 686 } 687 vdc->initialized |= VDC_LDC_INIT; 688 } 689 status = ldc_status(vdc->ldc_handle, &ldc_state); 690 if (status != 0) { 691 DMSG(vdc, 0, "[%d] Cannot discover LDC status [err=%d]", 692 vdc->instance, status); 693 return (status); 694 } 695 vdc->ldc_state = ldc_state; 696 697 if ((vdc->initialized & VDC_LDC_CB) == 0) { 698 status = ldc_reg_callback(vdc->ldc_handle, vdc_handle_cb, 699 (caddr_t)vdc); 700 if (status != 0) { 701 DMSG(vdc, 0, "[%d] LDC callback reg. failed (%d)", 702 vdc->instance, status); 703 return (status); 704 } 705 vdc->initialized |= VDC_LDC_CB; 706 } 707 708 vdc->initialized |= VDC_LDC; 709 710 /* 711 * At this stage we have initialised LDC, we will now try and open 712 * the connection. 713 */ 714 if (vdc->ldc_state == LDC_INIT) { 715 status = ldc_open(vdc->ldc_handle); 716 if (status != 0) { 717 DMSG(vdc, 0, "[%d] ldc_open(chan %ld) returned %d", 718 vdc->instance, vdc->ldc_id, status); 719 return (status); 720 } 721 vdc->initialized |= VDC_LDC_OPEN; 722 } 723 724 return (status); 725 } 726 727 static int 728 vdc_start_ldc_connection(vdc_t *vdc) 729 { 730 int status = 0; 731 732 ASSERT(vdc != NULL); 733 734 ASSERT(MUTEX_HELD(&vdc->lock)); 735 736 status = vdc_do_ldc_up(vdc); 737 738 DMSG(vdc, 0, "[%d] Finished bringing up LDC\n", vdc->instance); 739 740 return (status); 741 } 742 743 static int 744 vdc_stop_ldc_connection(vdc_t *vdcp) 745 { 746 int status; 747 748 DMSG(vdcp, 0, ": Resetting connection to vDisk server : state %d\n", 749 vdcp->state); 750 751 status = ldc_down(vdcp->ldc_handle); 752 DMSG(vdcp, 0, "ldc_down() = %d\n", status); 753 754 vdcp->initialized &= ~VDC_HANDSHAKE; 755 DMSG(vdcp, 0, "initialized=%x\n", vdcp->initialized); 756 757 return (status); 758 } 759 760 static int 761 vdc_create_device_nodes_efi(vdc_t *vdc) 762 { 763 ddi_remove_minor_node(vdc->dip, "h"); 764 ddi_remove_minor_node(vdc->dip, "h,raw"); 765 766 if (ddi_create_minor_node(vdc->dip, "wd", S_IFBLK, 767 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 768 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 769 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd'", 770 vdc->instance); 771 return (EIO); 772 } 773 774 /* if any device node is created we set this flag */ 775 vdc->initialized |= VDC_MINOR; 776 777 if (ddi_create_minor_node(vdc->dip, "wd,raw", S_IFCHR, 778 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 779 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 780 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd,raw'", 781 vdc->instance); 782 return (EIO); 783 } 784 785 return (0); 786 } 787 788 static int 789 vdc_create_device_nodes_vtoc(vdc_t *vdc) 790 { 791 ddi_remove_minor_node(vdc->dip, "wd"); 792 ddi_remove_minor_node(vdc->dip, "wd,raw"); 793 794 if (ddi_create_minor_node(vdc->dip, "h", S_IFBLK, 795 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 796 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 797 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h'", 798 vdc->instance); 799 return (EIO); 800 } 801 802 /* if any device node is created we set this flag */ 803 vdc->initialized |= VDC_MINOR; 804 805 if (ddi_create_minor_node(vdc->dip, "h,raw", S_IFCHR, 806 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 807 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 808 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h,raw'", 809 vdc->instance); 810 return (EIO); 811 } 812 813 return (0); 814 } 815 816 /* 817 * Function: 818 * vdc_create_device_nodes 819 * 820 * Description: 821 * This function creates the block and character device nodes under 822 * /devices along with the node properties. It is called as part of 823 * the attach(9E) of the instance during the handshake with vds after 824 * vds has sent the attributes to vdc. 825 * 826 * If the device is of type VD_DISK_TYPE_SLICE then the minor node 827 * of 2 is used in keeping with the Solaris convention that slice 2 828 * refers to a whole disk. Slices start at 'a' 829 * 830 * Parameters: 831 * vdc - soft state pointer 832 * 833 * Return Values 834 * 0 - Success 835 * EIO - Failed to create node 836 * EINVAL - Unknown type of disk exported 837 */ 838 static int 839 vdc_create_device_nodes(vdc_t *vdc) 840 { 841 char name[sizeof ("s,raw")]; 842 dev_info_t *dip = NULL; 843 int instance, status; 844 int num_slices = 1; 845 int i; 846 847 ASSERT(vdc != NULL); 848 849 instance = vdc->instance; 850 dip = vdc->dip; 851 852 switch (vdc->vdisk_type) { 853 case VD_DISK_TYPE_DISK: 854 num_slices = V_NUMPAR; 855 break; 856 case VD_DISK_TYPE_SLICE: 857 num_slices = 1; 858 break; 859 case VD_DISK_TYPE_UNK: 860 default: 861 return (EINVAL); 862 } 863 864 /* 865 * Minor nodes are different for EFI disks: EFI disks do not have 866 * a minor node 'g' for the minor number corresponding to slice 867 * VD_EFI_WD_SLICE (slice 7) instead they have a minor node 'wd' 868 * representing the whole disk. 869 */ 870 for (i = 0; i < num_slices; i++) { 871 872 if (i == VD_EFI_WD_SLICE) { 873 if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 874 status = vdc_create_device_nodes_efi(vdc); 875 else 876 status = vdc_create_device_nodes_vtoc(vdc); 877 if (status != 0) 878 return (status); 879 continue; 880 } 881 882 (void) snprintf(name, sizeof (name), "%c", 'a' + i); 883 if (ddi_create_minor_node(dip, name, S_IFBLK, 884 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 885 cmn_err(CE_NOTE, "[%d] Couldn't add block node '%s'", 886 instance, name); 887 return (EIO); 888 } 889 890 /* if any device node is created we set this flag */ 891 vdc->initialized |= VDC_MINOR; 892 893 (void) snprintf(name, sizeof (name), "%c%s", 'a' + i, ",raw"); 894 895 if (ddi_create_minor_node(dip, name, S_IFCHR, 896 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 897 cmn_err(CE_NOTE, "[%d] Couldn't add raw node '%s'", 898 instance, name); 899 return (EIO); 900 } 901 } 902 903 return (0); 904 } 905 906 /* 907 * Function: 908 * vdc_create_device_nodes_props 909 * 910 * Description: 911 * This function creates the block and character device nodes under 912 * /devices along with the node properties. It is called as part of 913 * the attach(9E) of the instance during the handshake with vds after 914 * vds has sent the attributes to vdc. 915 * 916 * Parameters: 917 * vdc - soft state pointer 918 * 919 * Return Values 920 * 0 - Success 921 * EIO - Failed to create device node property 922 * EINVAL - Unknown type of disk exported 923 */ 924 static int 925 vdc_create_device_nodes_props(vdc_t *vdc) 926 { 927 dev_info_t *dip = NULL; 928 int instance; 929 int num_slices = 1; 930 int64_t size = 0; 931 dev_t dev; 932 int rv; 933 int i; 934 935 ASSERT(vdc != NULL); 936 937 instance = vdc->instance; 938 dip = vdc->dip; 939 940 if ((vdc->vtoc == NULL) || (vdc->vtoc->v_sanity != VTOC_SANE)) { 941 DMSG(vdc, 0, "![%d] Could not create device node property." 942 " No VTOC available", instance); 943 return (ENXIO); 944 } 945 946 switch (vdc->vdisk_type) { 947 case VD_DISK_TYPE_DISK: 948 num_slices = V_NUMPAR; 949 break; 950 case VD_DISK_TYPE_SLICE: 951 num_slices = 1; 952 break; 953 case VD_DISK_TYPE_UNK: 954 default: 955 return (EINVAL); 956 } 957 958 for (i = 0; i < num_slices; i++) { 959 dev = makedevice(ddi_driver_major(dip), 960 VD_MAKE_DEV(instance, i)); 961 962 size = vdc->vtoc->v_part[i].p_size * vdc->vtoc->v_sectorsz; 963 DMSG(vdc, 0, "[%d] sz %ld (%ld Mb) p_size %lx\n", 964 instance, size, size / (1024 * 1024), 965 vdc->vtoc->v_part[i].p_size); 966 967 rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size); 968 if (rv != DDI_PROP_SUCCESS) { 969 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop of [%ld]", 970 instance, VDC_SIZE_PROP_NAME, size); 971 return (EIO); 972 } 973 974 rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME, 975 lbtodb(size)); 976 if (rv != DDI_PROP_SUCCESS) { 977 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop [%llu]", 978 instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size)); 979 return (EIO); 980 } 981 } 982 983 return (0); 984 } 985 986 static int 987 vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 988 { 989 _NOTE(ARGUNUSED(cred)) 990 991 int instance; 992 vdc_t *vdc; 993 994 ASSERT(dev != NULL); 995 instance = VDCUNIT(*dev); 996 997 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 998 return (EINVAL); 999 1000 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1001 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1002 return (ENXIO); 1003 } 1004 1005 DMSG(vdc, 0, "minor = %d flag = %x, otyp = %x\n", 1006 getminor(*dev), flag, otyp); 1007 1008 mutex_enter(&vdc->lock); 1009 vdc->open_count++; 1010 mutex_exit(&vdc->lock); 1011 1012 return (0); 1013 } 1014 1015 static int 1016 vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 1017 { 1018 _NOTE(ARGUNUSED(cred)) 1019 1020 int instance; 1021 vdc_t *vdc; 1022 1023 instance = VDCUNIT(dev); 1024 1025 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 1026 return (EINVAL); 1027 1028 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1029 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1030 return (ENXIO); 1031 } 1032 1033 DMSG(vdc, 0, "[%d] flag = %x, otyp = %x\n", instance, flag, otyp); 1034 if (vdc->dkio_flush_pending) { 1035 DMSG(vdc, 0, 1036 "[%d] Cannot detach: %d outstanding DKIO flushes\n", 1037 instance, vdc->dkio_flush_pending); 1038 return (EBUSY); 1039 } 1040 1041 /* 1042 * Should not need the mutex here, since the framework should protect 1043 * against more opens on this device, but just in case. 1044 */ 1045 mutex_enter(&vdc->lock); 1046 vdc->open_count--; 1047 mutex_exit(&vdc->lock); 1048 1049 return (0); 1050 } 1051 1052 static int 1053 vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1054 { 1055 _NOTE(ARGUNUSED(credp)) 1056 _NOTE(ARGUNUSED(rvalp)) 1057 1058 return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode)); 1059 } 1060 1061 static int 1062 vdc_print(dev_t dev, char *str) 1063 { 1064 cmn_err(CE_NOTE, "vdc%d: %s", VDCUNIT(dev), str); 1065 return (0); 1066 } 1067 1068 static int 1069 vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 1070 { 1071 int rv; 1072 size_t nbytes = nblk * DEV_BSIZE; 1073 int instance = VDCUNIT(dev); 1074 vdc_t *vdc = NULL; 1075 1076 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1077 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1078 return (ENXIO); 1079 } 1080 1081 DMSG(vdc, 2, "[%d] dump %ld bytes at block 0x%lx : addr=0x%p\n", 1082 instance, nbytes, blkno, (void *)addr); 1083 rv = vdc_send_request(vdc, VD_OP_BWRITE, addr, nbytes, 1084 VDCPART(dev), blkno, CB_STRATEGY, 0, VIO_write_dir); 1085 if (rv) { 1086 DMSG(vdc, 0, "Failed to do a disk dump (err=%d)\n", rv); 1087 return (rv); 1088 } 1089 1090 if (ddi_in_panic()) 1091 (void) vdc_drain_response(vdc); 1092 1093 DMSG(vdc, 0, "[%d] End\n", instance); 1094 1095 return (0); 1096 } 1097 1098 /* -------------------------------------------------------------------------- */ 1099 1100 /* 1101 * Disk access routines 1102 * 1103 */ 1104 1105 /* 1106 * vdc_strategy() 1107 * 1108 * Return Value: 1109 * 0: As per strategy(9E), the strategy() function must return 0 1110 * [ bioerror(9f) sets b_flags to the proper error code ] 1111 */ 1112 static int 1113 vdc_strategy(struct buf *buf) 1114 { 1115 int rv = -1; 1116 vdc_t *vdc = NULL; 1117 int instance = VDCUNIT(buf->b_edev); 1118 int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 1119 int slice; 1120 1121 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1122 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1123 bioerror(buf, ENXIO); 1124 biodone(buf); 1125 return (0); 1126 } 1127 1128 DMSG(vdc, 2, "[%d] %s %ld bytes at block %llx : b_addr=0x%p\n", 1129 instance, (buf->b_flags & B_READ) ? "Read" : "Write", 1130 buf->b_bcount, buf->b_lblkno, (void *)buf->b_un.b_addr); 1131 DTRACE_IO2(vstart, buf_t *, buf, vdc_t *, vdc); 1132 1133 bp_mapin(buf); 1134 1135 if ((long)buf->b_private == VD_SLICE_NONE) { 1136 /* I/O using an absolute disk offset */ 1137 slice = VD_SLICE_NONE; 1138 } else { 1139 slice = VDCPART(buf->b_edev); 1140 } 1141 1142 rv = vdc_send_request(vdc, op, (caddr_t)buf->b_un.b_addr, 1143 buf->b_bcount, slice, buf->b_lblkno, 1144 CB_STRATEGY, buf, (op == VD_OP_BREAD) ? VIO_read_dir : 1145 VIO_write_dir); 1146 1147 /* 1148 * If the request was successfully sent, the strategy call returns and 1149 * the ACK handler calls the bioxxx functions when the vDisk server is 1150 * done. 1151 */ 1152 if (rv) { 1153 DMSG(vdc, 0, "Failed to read/write (err=%d)\n", rv); 1154 bioerror(buf, rv); 1155 biodone(buf); 1156 } 1157 1158 return (0); 1159 } 1160 1161 /* 1162 * Function: 1163 * vdc_min 1164 * 1165 * Description: 1166 * Routine to limit the size of a data transfer. Used in 1167 * conjunction with physio(9F). 1168 * 1169 * Arguments: 1170 * bp - pointer to the indicated buf(9S) struct. 1171 * 1172 */ 1173 static void 1174 vdc_min(struct buf *bufp) 1175 { 1176 vdc_t *vdc = NULL; 1177 int instance = VDCUNIT(bufp->b_edev); 1178 1179 vdc = ddi_get_soft_state(vdc_state, instance); 1180 VERIFY(vdc != NULL); 1181 1182 if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->block_size)) { 1183 bufp->b_bcount = vdc->max_xfer_sz * vdc->block_size; 1184 } 1185 } 1186 1187 static int 1188 vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 1189 { 1190 _NOTE(ARGUNUSED(cred)) 1191 1192 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1193 return (physio(vdc_strategy, NULL, dev, B_READ, vdc_min, uio)); 1194 } 1195 1196 static int 1197 vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 1198 { 1199 _NOTE(ARGUNUSED(cred)) 1200 1201 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1202 return (physio(vdc_strategy, NULL, dev, B_WRITE, vdc_min, uio)); 1203 } 1204 1205 static int 1206 vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 1207 { 1208 _NOTE(ARGUNUSED(cred)) 1209 1210 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1211 return (aphysio(vdc_strategy, anocancel, dev, B_READ, vdc_min, aio)); 1212 } 1213 1214 static int 1215 vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 1216 { 1217 _NOTE(ARGUNUSED(cred)) 1218 1219 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1220 return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, vdc_min, aio)); 1221 } 1222 1223 1224 /* -------------------------------------------------------------------------- */ 1225 1226 /* 1227 * Handshake support 1228 */ 1229 1230 1231 /* 1232 * Function: 1233 * vdc_init_ver_negotiation() 1234 * 1235 * Description: 1236 * 1237 * Arguments: 1238 * vdc - soft state pointer for this instance of the device driver. 1239 * 1240 * Return Code: 1241 * 0 - Success 1242 */ 1243 static int 1244 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 1245 { 1246 vio_ver_msg_t pkt; 1247 size_t msglen = sizeof (pkt); 1248 int status = -1; 1249 1250 ASSERT(vdc != NULL); 1251 ASSERT(mutex_owned(&vdc->lock)); 1252 1253 DMSG(vdc, 0, "[%d] Entered.\n", vdc->instance); 1254 1255 /* 1256 * set the Session ID to a unique value 1257 * (the lower 32 bits of the clock tick) 1258 */ 1259 vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 1260 DMSG(vdc, 0, "[%d] Set SID to 0x%lx\n", vdc->instance, vdc->session_id); 1261 1262 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1263 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1264 pkt.tag.vio_subtype_env = VIO_VER_INFO; 1265 pkt.tag.vio_sid = vdc->session_id; 1266 pkt.dev_class = VDEV_DISK; 1267 pkt.ver_major = ver.major; 1268 pkt.ver_minor = ver.minor; 1269 1270 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1271 DMSG(vdc, 0, "[%d] Ver info sent (status = %d)\n", 1272 vdc->instance, status); 1273 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1274 DMSG(vdc, 0, "[%d] Failed to send Ver negotiation info: " 1275 "id(%lx) rv(%d) size(%ld)", vdc->instance, vdc->ldc_handle, 1276 status, msglen); 1277 if (msglen != sizeof (vio_ver_msg_t)) 1278 status = ENOMSG; 1279 } 1280 1281 return (status); 1282 } 1283 1284 /* 1285 * Function: 1286 * vdc_ver_negotiation() 1287 * 1288 * Description: 1289 * 1290 * Arguments: 1291 * vdcp - soft state pointer for this instance of the device driver. 1292 * 1293 * Return Code: 1294 * 0 - Success 1295 */ 1296 static int 1297 vdc_ver_negotiation(vdc_t *vdcp) 1298 { 1299 vio_msg_t vio_msg; 1300 int status; 1301 1302 if (status = vdc_init_ver_negotiation(vdcp, vdc_version[0])) 1303 return (status); 1304 1305 /* release lock and wait for response */ 1306 mutex_exit(&vdcp->lock); 1307 status = vdc_wait_for_response(vdcp, &vio_msg); 1308 mutex_enter(&vdcp->lock); 1309 if (status) { 1310 DMSG(vdcp, 0, 1311 "[%d] Failed waiting for Ver negotiation response, rv(%d)", 1312 vdcp->instance, status); 1313 return (status); 1314 } 1315 1316 /* check type and sub_type ... */ 1317 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1318 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1319 DMSG(vdcp, 0, "[%d] Invalid ver negotiation response\n", 1320 vdcp->instance); 1321 return (EPROTO); 1322 } 1323 1324 return (vdc_handle_ver_msg(vdcp, (vio_ver_msg_t *)&vio_msg)); 1325 } 1326 1327 /* 1328 * Function: 1329 * vdc_init_attr_negotiation() 1330 * 1331 * Description: 1332 * 1333 * Arguments: 1334 * vdc - soft state pointer for this instance of the device driver. 1335 * 1336 * Return Code: 1337 * 0 - Success 1338 */ 1339 static int 1340 vdc_init_attr_negotiation(vdc_t *vdc) 1341 { 1342 vd_attr_msg_t pkt; 1343 size_t msglen = sizeof (pkt); 1344 int status; 1345 1346 ASSERT(vdc != NULL); 1347 ASSERT(mutex_owned(&vdc->lock)); 1348 1349 DMSG(vdc, 0, "[%d] entered\n", vdc->instance); 1350 1351 /* fill in tag */ 1352 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1353 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1354 pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 1355 pkt.tag.vio_sid = vdc->session_id; 1356 /* fill in payload */ 1357 pkt.max_xfer_sz = vdc->max_xfer_sz; 1358 pkt.vdisk_block_size = vdc->block_size; 1359 pkt.xfer_mode = VIO_DRING_MODE; 1360 pkt.operations = 0; /* server will set bits of valid operations */ 1361 pkt.vdisk_type = 0; /* server will set to valid device type */ 1362 pkt.vdisk_size = 0; /* server will set to valid size */ 1363 1364 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1365 DMSG(vdc, 0, "Attr info sent (status = %d)\n", status); 1366 1367 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1368 DMSG(vdc, 0, "[%d] Failed to send Attr negotiation info: " 1369 "id(%lx) rv(%d) size(%ld)", vdc->instance, vdc->ldc_handle, 1370 status, msglen); 1371 if (msglen != sizeof (vio_ver_msg_t)) 1372 status = ENOMSG; 1373 } 1374 1375 return (status); 1376 } 1377 1378 /* 1379 * Function: 1380 * vdc_attr_negotiation() 1381 * 1382 * Description: 1383 * 1384 * Arguments: 1385 * vdc - soft state pointer for this instance of the device driver. 1386 * 1387 * Return Code: 1388 * 0 - Success 1389 */ 1390 static int 1391 vdc_attr_negotiation(vdc_t *vdcp) 1392 { 1393 int status; 1394 vio_msg_t vio_msg; 1395 1396 if (status = vdc_init_attr_negotiation(vdcp)) 1397 return (status); 1398 1399 /* release lock and wait for response */ 1400 mutex_exit(&vdcp->lock); 1401 status = vdc_wait_for_response(vdcp, &vio_msg); 1402 mutex_enter(&vdcp->lock); 1403 if (status) { 1404 DMSG(vdcp, 0, 1405 "[%d] Failed waiting for Attr negotiation response, rv(%d)", 1406 vdcp->instance, status); 1407 return (status); 1408 } 1409 1410 /* check type and sub_type ... */ 1411 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1412 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1413 DMSG(vdcp, 0, "[%d] Invalid attr negotiation response\n", 1414 vdcp->instance); 1415 return (EPROTO); 1416 } 1417 1418 return (vdc_handle_attr_msg(vdcp, (vd_attr_msg_t *)&vio_msg)); 1419 } 1420 1421 1422 /* 1423 * Function: 1424 * vdc_init_dring_negotiate() 1425 * 1426 * Description: 1427 * 1428 * Arguments: 1429 * vdc - soft state pointer for this instance of the device driver. 1430 * 1431 * Return Code: 1432 * 0 - Success 1433 */ 1434 static int 1435 vdc_init_dring_negotiate(vdc_t *vdc) 1436 { 1437 vio_dring_reg_msg_t pkt; 1438 size_t msglen = sizeof (pkt); 1439 int status = -1; 1440 int retry; 1441 int nretries = 10; 1442 1443 ASSERT(vdc != NULL); 1444 ASSERT(mutex_owned(&vdc->lock)); 1445 1446 for (retry = 0; retry < nretries; retry++) { 1447 status = vdc_init_descriptor_ring(vdc); 1448 if (status != EAGAIN) 1449 break; 1450 drv_usecwait(vdc_min_timeout_ldc); 1451 } 1452 1453 if (status != 0) { 1454 DMSG(vdc, 0, "[%d] Failed to init DRing (status = %d)\n", 1455 vdc->instance, status); 1456 return (status); 1457 } 1458 1459 DMSG(vdc, 0, "[%d] Init of descriptor ring completed (status = %d)\n", 1460 vdc->instance, status); 1461 1462 /* fill in tag */ 1463 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1464 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1465 pkt.tag.vio_subtype_env = VIO_DRING_REG; 1466 pkt.tag.vio_sid = vdc->session_id; 1467 /* fill in payload */ 1468 pkt.dring_ident = 0; 1469 pkt.num_descriptors = vdc->dring_len; 1470 pkt.descriptor_size = vdc->dring_entry_size; 1471 pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 1472 pkt.ncookies = vdc->dring_cookie_count; 1473 pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 1474 1475 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1476 if (status != 0) { 1477 DMSG(vdc, 0, "[%d] Failed to register DRing (err = %d)", 1478 vdc->instance, status); 1479 } 1480 1481 return (status); 1482 } 1483 1484 1485 /* 1486 * Function: 1487 * vdc_dring_negotiation() 1488 * 1489 * Description: 1490 * 1491 * Arguments: 1492 * vdc - soft state pointer for this instance of the device driver. 1493 * 1494 * Return Code: 1495 * 0 - Success 1496 */ 1497 static int 1498 vdc_dring_negotiation(vdc_t *vdcp) 1499 { 1500 int status; 1501 vio_msg_t vio_msg; 1502 1503 if (status = vdc_init_dring_negotiate(vdcp)) 1504 return (status); 1505 1506 /* release lock and wait for response */ 1507 mutex_exit(&vdcp->lock); 1508 status = vdc_wait_for_response(vdcp, &vio_msg); 1509 mutex_enter(&vdcp->lock); 1510 if (status) { 1511 DMSG(vdcp, 0, 1512 "[%d] Failed waiting for Dring negotiation response," 1513 " rv(%d)", vdcp->instance, status); 1514 return (status); 1515 } 1516 1517 /* check type and sub_type ... */ 1518 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1519 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1520 DMSG(vdcp, 0, "[%d] Invalid Dring negotiation response\n", 1521 vdcp->instance); 1522 return (EPROTO); 1523 } 1524 1525 return (vdc_handle_dring_reg_msg(vdcp, 1526 (vio_dring_reg_msg_t *)&vio_msg)); 1527 } 1528 1529 1530 /* 1531 * Function: 1532 * vdc_send_rdx() 1533 * 1534 * Description: 1535 * 1536 * Arguments: 1537 * vdc - soft state pointer for this instance of the device driver. 1538 * 1539 * Return Code: 1540 * 0 - Success 1541 */ 1542 static int 1543 vdc_send_rdx(vdc_t *vdcp) 1544 { 1545 vio_msg_t msg; 1546 size_t msglen = sizeof (vio_msg_t); 1547 int status; 1548 1549 /* 1550 * Send an RDX message to vds to indicate we are ready 1551 * to send data 1552 */ 1553 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 1554 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 1555 msg.tag.vio_subtype_env = VIO_RDX; 1556 msg.tag.vio_sid = vdcp->session_id; 1557 status = vdc_send(vdcp, (caddr_t)&msg, &msglen); 1558 if (status != 0) { 1559 DMSG(vdcp, 0, "[%d] Failed to send RDX message (%d)", 1560 vdcp->instance, status); 1561 } 1562 1563 return (status); 1564 } 1565 1566 /* 1567 * Function: 1568 * vdc_handle_rdx() 1569 * 1570 * Description: 1571 * 1572 * Arguments: 1573 * vdc - soft state pointer for this instance of the device driver. 1574 * msgp - received msg 1575 * 1576 * Return Code: 1577 * 0 - Success 1578 */ 1579 static int 1580 vdc_handle_rdx(vdc_t *vdcp, vio_rdx_msg_t *msgp) 1581 { 1582 _NOTE(ARGUNUSED(vdcp)) 1583 _NOTE(ARGUNUSED(msgp)) 1584 1585 ASSERT(msgp->tag.vio_msgtype == VIO_TYPE_CTRL); 1586 ASSERT(msgp->tag.vio_subtype == VIO_SUBTYPE_ACK); 1587 ASSERT(msgp->tag.vio_subtype_env == VIO_RDX); 1588 1589 DMSG(vdcp, 1, "[%d] Got an RDX msg", vdcp->instance); 1590 1591 return (0); 1592 } 1593 1594 /* 1595 * Function: 1596 * vdc_rdx_exchange() 1597 * 1598 * Description: 1599 * 1600 * Arguments: 1601 * vdc - soft state pointer for this instance of the device driver. 1602 * 1603 * Return Code: 1604 * 0 - Success 1605 */ 1606 static int 1607 vdc_rdx_exchange(vdc_t *vdcp) 1608 { 1609 int status; 1610 vio_msg_t vio_msg; 1611 1612 if (status = vdc_send_rdx(vdcp)) 1613 return (status); 1614 1615 /* release lock and wait for response */ 1616 mutex_exit(&vdcp->lock); 1617 status = vdc_wait_for_response(vdcp, &vio_msg); 1618 mutex_enter(&vdcp->lock); 1619 if (status) { 1620 DMSG(vdcp, 0, "[%d] Failed waiting for RDX response, rv(%d)", 1621 vdcp->instance, status); 1622 return (status); 1623 } 1624 1625 /* check type and sub_type ... */ 1626 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1627 vio_msg.tag.vio_subtype != VIO_SUBTYPE_ACK) { 1628 DMSG(vdcp, 0, "[%d] Invalid RDX response\n", vdcp->instance); 1629 return (EPROTO); 1630 } 1631 1632 return (vdc_handle_rdx(vdcp, (vio_rdx_msg_t *)&vio_msg)); 1633 } 1634 1635 1636 /* -------------------------------------------------------------------------- */ 1637 1638 /* 1639 * LDC helper routines 1640 */ 1641 1642 static int 1643 vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp) 1644 { 1645 int status; 1646 boolean_t q_has_pkts = B_FALSE; 1647 int delay_time; 1648 size_t len; 1649 1650 mutex_enter(&vdc->read_lock); 1651 1652 if (vdc->read_state == VDC_READ_IDLE) 1653 vdc->read_state = VDC_READ_WAITING; 1654 1655 while (vdc->read_state != VDC_READ_PENDING) { 1656 1657 /* detect if the connection has been reset */ 1658 if (vdc->read_state == VDC_READ_RESET) { 1659 status = ECONNRESET; 1660 goto done; 1661 } 1662 1663 cv_wait(&vdc->read_cv, &vdc->read_lock); 1664 } 1665 1666 /* 1667 * Until we get a blocking ldc read we have to retry 1668 * until the entire LDC message has arrived before 1669 * ldc_read() will succeed. Note we also bail out if 1670 * the channel is reset or goes away. 1671 */ 1672 delay_time = vdc_ldc_read_init_delay; 1673 loop: 1674 len = *nbytesp; 1675 status = ldc_read(vdc->ldc_handle, (caddr_t)msgp, &len); 1676 switch (status) { 1677 case EAGAIN: 1678 delay_time *= 2; 1679 if (delay_time >= vdc_ldc_read_max_delay) 1680 delay_time = vdc_ldc_read_max_delay; 1681 delay(delay_time); 1682 goto loop; 1683 1684 case 0: 1685 if (len == 0) { 1686 DMSG(vdc, 0, "[%d] ldc_read returned 0 bytes with " 1687 "no error!\n", vdc->instance); 1688 goto loop; 1689 } 1690 1691 *nbytesp = len; 1692 1693 /* 1694 * If there are pending messages, leave the 1695 * read state as pending. Otherwise, set the state 1696 * back to idle. 1697 */ 1698 status = ldc_chkq(vdc->ldc_handle, &q_has_pkts); 1699 if (status == 0 && !q_has_pkts) 1700 vdc->read_state = VDC_READ_IDLE; 1701 1702 break; 1703 default: 1704 DMSG(vdc, 0, "ldc_read returned %d\n", status); 1705 break; 1706 } 1707 1708 done: 1709 mutex_exit(&vdc->read_lock); 1710 1711 return (status); 1712 } 1713 1714 1715 1716 #ifdef DEBUG 1717 void 1718 vdc_decode_tag(vdc_t *vdcp, vio_msg_t *msg) 1719 { 1720 char *ms, *ss, *ses; 1721 switch (msg->tag.vio_msgtype) { 1722 #define Q(_s) case _s : ms = #_s; break; 1723 Q(VIO_TYPE_CTRL) 1724 Q(VIO_TYPE_DATA) 1725 Q(VIO_TYPE_ERR) 1726 #undef Q 1727 default: ms = "unknown"; break; 1728 } 1729 1730 switch (msg->tag.vio_subtype) { 1731 #define Q(_s) case _s : ss = #_s; break; 1732 Q(VIO_SUBTYPE_INFO) 1733 Q(VIO_SUBTYPE_ACK) 1734 Q(VIO_SUBTYPE_NACK) 1735 #undef Q 1736 default: ss = "unknown"; break; 1737 } 1738 1739 switch (msg->tag.vio_subtype_env) { 1740 #define Q(_s) case _s : ses = #_s; break; 1741 Q(VIO_VER_INFO) 1742 Q(VIO_ATTR_INFO) 1743 Q(VIO_DRING_REG) 1744 Q(VIO_DRING_UNREG) 1745 Q(VIO_RDX) 1746 Q(VIO_PKT_DATA) 1747 Q(VIO_DESC_DATA) 1748 Q(VIO_DRING_DATA) 1749 #undef Q 1750 default: ses = "unknown"; break; 1751 } 1752 1753 DMSG(vdcp, 3, "(%x/%x/%x) message : (%s/%s/%s)\n", 1754 msg->tag.vio_msgtype, msg->tag.vio_subtype, 1755 msg->tag.vio_subtype_env, ms, ss, ses); 1756 } 1757 #endif 1758 1759 /* 1760 * Function: 1761 * vdc_send() 1762 * 1763 * Description: 1764 * The function encapsulates the call to write a message using LDC. 1765 * If LDC indicates that the call failed due to the queue being full, 1766 * we retry the ldc_write() [ up to 'vdc_retries' time ], otherwise 1767 * we return the error returned by LDC. 1768 * 1769 * Arguments: 1770 * ldc_handle - LDC handle for the channel this instance of vdc uses 1771 * pkt - address of LDC message to be sent 1772 * msglen - the size of the message being sent. When the function 1773 * returns, this contains the number of bytes written. 1774 * 1775 * Return Code: 1776 * 0 - Success. 1777 * EINVAL - pkt or msglen were NULL 1778 * ECONNRESET - The connection was not up. 1779 * EWOULDBLOCK - LDC queue is full 1780 * xxx - other error codes returned by ldc_write 1781 */ 1782 static int 1783 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 1784 { 1785 size_t size = 0; 1786 int status = 0; 1787 clock_t delay_ticks; 1788 1789 ASSERT(vdc != NULL); 1790 ASSERT(mutex_owned(&vdc->lock)); 1791 ASSERT(msglen != NULL); 1792 ASSERT(*msglen != 0); 1793 1794 #ifdef DEBUG 1795 vdc_decode_tag(vdc, (vio_msg_t *)pkt); 1796 #endif 1797 /* 1798 * Wait indefinitely to send if channel 1799 * is busy, but bail out if we succeed or 1800 * if the channel closes or is reset. 1801 */ 1802 delay_ticks = vdc_hz_min_ldc_delay; 1803 do { 1804 size = *msglen; 1805 status = ldc_write(vdc->ldc_handle, pkt, &size); 1806 if (status == EWOULDBLOCK) { 1807 delay(delay_ticks); 1808 /* geometric backoff */ 1809 delay_ticks *= 2; 1810 if (delay_ticks > vdc_hz_max_ldc_delay) 1811 delay_ticks = vdc_hz_max_ldc_delay; 1812 } 1813 } while (status == EWOULDBLOCK); 1814 1815 /* if LDC had serious issues --- reset vdc state */ 1816 if (status == EIO || status == ECONNRESET) { 1817 /* LDC had serious issues --- reset vdc state */ 1818 mutex_enter(&vdc->read_lock); 1819 if ((vdc->read_state == VDC_READ_WAITING) || 1820 (vdc->read_state == VDC_READ_RESET)) 1821 cv_signal(&vdc->read_cv); 1822 vdc->read_state = VDC_READ_RESET; 1823 mutex_exit(&vdc->read_lock); 1824 1825 /* wake up any waiters in the reset thread */ 1826 if (vdc->state == VDC_STATE_INIT_WAITING) { 1827 DMSG(vdc, 0, "[%d] write reset - " 1828 "vdc is resetting ..\n", vdc->instance); 1829 vdc->state = VDC_STATE_RESETTING; 1830 cv_signal(&vdc->initwait_cv); 1831 } 1832 1833 return (ECONNRESET); 1834 } 1835 1836 /* return the last size written */ 1837 *msglen = size; 1838 1839 return (status); 1840 } 1841 1842 /* 1843 * Function: 1844 * vdc_get_md_node 1845 * 1846 * Description: 1847 * Get the MD, the device node and the port node for the given 1848 * disk instance. The caller is responsible for cleaning up the 1849 * reference to the returned MD (mdpp) by calling md_fini_handle(). 1850 * 1851 * Arguments: 1852 * dip - dev info pointer for this instance of the device driver. 1853 * mdpp - the returned MD. 1854 * vd_nodep - the returned device node. 1855 * vd_portp - the returned port node. The returned port node is NULL 1856 * if no port node is found. 1857 * 1858 * Return Code: 1859 * 0 - Success. 1860 * ENOENT - Expected node or property did not exist. 1861 * ENXIO - Unexpected error communicating with MD framework 1862 */ 1863 static int 1864 vdc_get_md_node(dev_info_t *dip, md_t **mdpp, mde_cookie_t *vd_nodep, 1865 mde_cookie_t *vd_portp) 1866 { 1867 int status = ENOENT; 1868 char *node_name = NULL; 1869 md_t *mdp = NULL; 1870 int num_nodes; 1871 int num_vdevs; 1872 int num_vports; 1873 mde_cookie_t rootnode; 1874 mde_cookie_t *listp = NULL; 1875 boolean_t found_inst = B_FALSE; 1876 int listsz; 1877 int idx; 1878 uint64_t md_inst; 1879 int obp_inst; 1880 int instance = ddi_get_instance(dip); 1881 1882 /* 1883 * Get the OBP instance number for comparison with the MD instance 1884 * 1885 * The "cfg-handle" property of a vdc node in an MD contains the MD's 1886 * notion of "instance", or unique identifier, for that node; OBP 1887 * stores the value of the "cfg-handle" MD property as the value of 1888 * the "reg" property on the node in the device tree it builds from 1889 * the MD and passes to Solaris. Thus, we look up the devinfo node's 1890 * "reg" property value to uniquely identify this device instance. 1891 * If the "reg" property cannot be found, the device tree state is 1892 * presumably so broken that there is no point in continuing. 1893 */ 1894 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 1895 cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 1896 return (ENOENT); 1897 } 1898 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1899 OBP_REG, -1); 1900 DMSGX(1, "[%d] OBP inst=%d\n", instance, obp_inst); 1901 1902 /* 1903 * We now walk the MD nodes to find the node for this vdisk. 1904 */ 1905 if ((mdp = md_get_handle()) == NULL) { 1906 cmn_err(CE_WARN, "unable to init machine description"); 1907 return (ENXIO); 1908 } 1909 1910 num_nodes = md_node_count(mdp); 1911 ASSERT(num_nodes > 0); 1912 1913 listsz = num_nodes * sizeof (mde_cookie_t); 1914 1915 /* allocate memory for nodes */ 1916 listp = kmem_zalloc(listsz, KM_SLEEP); 1917 1918 rootnode = md_root_node(mdp); 1919 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 1920 1921 /* 1922 * Search for all the virtual devices, we will then check to see which 1923 * ones are disk nodes. 1924 */ 1925 num_vdevs = md_scan_dag(mdp, rootnode, 1926 md_find_name(mdp, VDC_MD_VDEV_NAME), 1927 md_find_name(mdp, "fwd"), listp); 1928 1929 if (num_vdevs <= 0) { 1930 cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 1931 status = ENOENT; 1932 goto done; 1933 } 1934 1935 DMSGX(1, "[%d] num_vdevs=%d\n", instance, num_vdevs); 1936 for (idx = 0; idx < num_vdevs; idx++) { 1937 status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 1938 if ((status != 0) || (node_name == NULL)) { 1939 cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 1940 ": err %d", VDC_MD_VDEV_NAME, status); 1941 continue; 1942 } 1943 1944 DMSGX(1, "[%d] Found node '%s'\n", instance, node_name); 1945 if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 1946 status = md_get_prop_val(mdp, listp[idx], 1947 VDC_MD_CFG_HDL, &md_inst); 1948 DMSGX(1, "[%d] vdc inst in MD=%lx\n", 1949 instance, md_inst); 1950 if ((status == 0) && (md_inst == obp_inst)) { 1951 found_inst = B_TRUE; 1952 break; 1953 } 1954 } 1955 } 1956 1957 if (!found_inst) { 1958 DMSGX(0, "Unable to find correct '%s' node", VDC_MD_DISK_NAME); 1959 status = ENOENT; 1960 goto done; 1961 } 1962 DMSGX(0, "[%d] MD inst=%lx\n", instance, md_inst); 1963 1964 *vd_nodep = listp[idx]; 1965 *mdpp = mdp; 1966 1967 num_vports = md_scan_dag(mdp, *vd_nodep, 1968 md_find_name(mdp, VDC_MD_PORT_NAME), 1969 md_find_name(mdp, "fwd"), listp); 1970 1971 if (num_vports != 1) { 1972 DMSGX(0, "Expected 1 '%s' node for '%s' port, found %d\n", 1973 VDC_MD_PORT_NAME, VDC_MD_VDEV_NAME, num_vports); 1974 } 1975 1976 *vd_portp = (num_vports == 0)? NULL: listp[0]; 1977 1978 done: 1979 kmem_free(listp, listsz); 1980 return (status); 1981 } 1982 1983 /* 1984 * Function: 1985 * vdc_get_ldc_id() 1986 * 1987 * Description: 1988 * This function gets the 'ldc-id' for this particular instance of vdc. 1989 * The id returned is the guest domain channel endpoint LDC uses for 1990 * communication with vds. 1991 * 1992 * Arguments: 1993 * mdp - pointer to the machine description. 1994 * vd_node - the vdisk element from the MD. 1995 * ldc_id - pointer to variable used to return the 'ldc-id' found. 1996 * 1997 * Return Code: 1998 * 0 - Success. 1999 * ENOENT - Expected node or property did not exist. 2000 */ 2001 static int 2002 vdc_get_ldc_id(md_t *mdp, mde_cookie_t vd_node, uint64_t *ldc_id) 2003 { 2004 mde_cookie_t *chanp = NULL; 2005 int listsz; 2006 int num_chans; 2007 int num_nodes; 2008 int status = 0; 2009 2010 num_nodes = md_node_count(mdp); 2011 ASSERT(num_nodes > 0); 2012 2013 listsz = num_nodes * sizeof (mde_cookie_t); 2014 2015 /* allocate memory for nodes */ 2016 chanp = kmem_zalloc(listsz, KM_SLEEP); 2017 2018 /* get the channels for this node */ 2019 num_chans = md_scan_dag(mdp, vd_node, 2020 md_find_name(mdp, VDC_MD_CHAN_NAME), 2021 md_find_name(mdp, "fwd"), chanp); 2022 2023 /* expecting at least one channel */ 2024 if (num_chans <= 0) { 2025 cmn_err(CE_NOTE, "No '%s' node for '%s' port", 2026 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 2027 status = ENOENT; 2028 goto done; 2029 2030 } else if (num_chans != 1) { 2031 DMSGX(0, "Expected 1 '%s' node for '%s' port, found %d\n", 2032 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, num_chans); 2033 } 2034 2035 /* 2036 * We use the first channel found (index 0), irrespective of how 2037 * many are there in total. 2038 */ 2039 if (md_get_prop_val(mdp, chanp[0], VDC_MD_ID, ldc_id) != 0) { 2040 cmn_err(CE_NOTE, "Channel '%s' property not found", VDC_MD_ID); 2041 status = ENOENT; 2042 } 2043 2044 done: 2045 kmem_free(chanp, listsz); 2046 return (status); 2047 } 2048 2049 static int 2050 vdc_do_ldc_up(vdc_t *vdc) 2051 { 2052 int status; 2053 ldc_status_t ldc_state; 2054 2055 DMSG(vdc, 0, "[%d] Bringing up channel %lx\n", 2056 vdc->instance, vdc->ldc_id); 2057 2058 if (vdc->lifecycle == VDC_LC_DETACHING) 2059 return (EINVAL); 2060 2061 if ((status = ldc_up(vdc->ldc_handle)) != 0) { 2062 switch (status) { 2063 case ECONNREFUSED: /* listener not ready at other end */ 2064 DMSG(vdc, 0, "[%d] ldc_up(%lx,...) return %d\n", 2065 vdc->instance, vdc->ldc_id, status); 2066 status = 0; 2067 break; 2068 default: 2069 DMSG(vdc, 0, "[%d] Failed to bring up LDC: " 2070 "channel=%ld, err=%d", vdc->instance, vdc->ldc_id, 2071 status); 2072 break; 2073 } 2074 } 2075 2076 if (ldc_status(vdc->ldc_handle, &ldc_state) == 0) { 2077 vdc->ldc_state = ldc_state; 2078 if (ldc_state == LDC_UP) { 2079 DMSG(vdc, 0, "[%d] LDC channel already up\n", 2080 vdc->instance); 2081 vdc->seq_num = 1; 2082 vdc->seq_num_reply = 0; 2083 } 2084 } 2085 2086 return (status); 2087 } 2088 2089 /* 2090 * Function: 2091 * vdc_terminate_ldc() 2092 * 2093 * Description: 2094 * 2095 * Arguments: 2096 * vdc - soft state pointer for this instance of the device driver. 2097 * 2098 * Return Code: 2099 * None 2100 */ 2101 static void 2102 vdc_terminate_ldc(vdc_t *vdc) 2103 { 2104 int instance = ddi_get_instance(vdc->dip); 2105 2106 ASSERT(vdc != NULL); 2107 ASSERT(mutex_owned(&vdc->lock)); 2108 2109 DMSG(vdc, 0, "[%d] initialized=%x\n", instance, vdc->initialized); 2110 2111 if (vdc->initialized & VDC_LDC_OPEN) { 2112 DMSG(vdc, 0, "[%d] ldc_close()\n", instance); 2113 (void) ldc_close(vdc->ldc_handle); 2114 } 2115 if (vdc->initialized & VDC_LDC_CB) { 2116 DMSG(vdc, 0, "[%d] ldc_unreg_callback()\n", instance); 2117 (void) ldc_unreg_callback(vdc->ldc_handle); 2118 } 2119 if (vdc->initialized & VDC_LDC) { 2120 DMSG(vdc, 0, "[%d] ldc_fini()\n", instance); 2121 (void) ldc_fini(vdc->ldc_handle); 2122 vdc->ldc_handle = NULL; 2123 } 2124 2125 vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN); 2126 } 2127 2128 /* -------------------------------------------------------------------------- */ 2129 2130 /* 2131 * Descriptor Ring helper routines 2132 */ 2133 2134 /* 2135 * Function: 2136 * vdc_init_descriptor_ring() 2137 * 2138 * Description: 2139 * 2140 * Arguments: 2141 * vdc - soft state pointer for this instance of the device driver. 2142 * 2143 * Return Code: 2144 * 0 - Success 2145 */ 2146 static int 2147 vdc_init_descriptor_ring(vdc_t *vdc) 2148 { 2149 vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 2150 int status = 0; 2151 int i; 2152 2153 DMSG(vdc, 0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized); 2154 2155 ASSERT(vdc != NULL); 2156 ASSERT(mutex_owned(&vdc->lock)); 2157 ASSERT(vdc->ldc_handle != NULL); 2158 2159 /* ensure we have enough room to store max sized block */ 2160 ASSERT(maxphys <= VD_MAX_BLOCK_SIZE); 2161 2162 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 2163 DMSG(vdc, 0, "[%d] ldc_mem_dring_create\n", vdc->instance); 2164 /* 2165 * Calculate the maximum block size we can transmit using one 2166 * Descriptor Ring entry from the attributes returned by the 2167 * vDisk server. This is subject to a minimum of 'maxphys' 2168 * as we do not have the capability to split requests over 2169 * multiple DRing entries. 2170 */ 2171 if ((vdc->max_xfer_sz * vdc->block_size) < maxphys) { 2172 DMSG(vdc, 0, "[%d] using minimum DRing size\n", 2173 vdc->instance); 2174 vdc->dring_max_cookies = maxphys / PAGESIZE; 2175 } else { 2176 vdc->dring_max_cookies = 2177 (vdc->max_xfer_sz * vdc->block_size) / PAGESIZE; 2178 } 2179 vdc->dring_entry_size = (sizeof (vd_dring_entry_t) + 2180 (sizeof (ldc_mem_cookie_t) * 2181 (vdc->dring_max_cookies - 1))); 2182 vdc->dring_len = VD_DRING_LEN; 2183 2184 status = ldc_mem_dring_create(vdc->dring_len, 2185 vdc->dring_entry_size, &vdc->ldc_dring_hdl); 2186 if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) { 2187 DMSG(vdc, 0, "[%d] Descriptor ring creation failed", 2188 vdc->instance); 2189 return (status); 2190 } 2191 vdc->initialized |= VDC_DRING_INIT; 2192 } 2193 2194 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 2195 DMSG(vdc, 0, "[%d] ldc_mem_dring_bind\n", vdc->instance); 2196 vdc->dring_cookie = 2197 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 2198 2199 status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl, 2200 LDC_SHADOW_MAP|LDC_DIRECT_MAP, LDC_MEM_RW, 2201 &vdc->dring_cookie[0], 2202 &vdc->dring_cookie_count); 2203 if (status != 0) { 2204 DMSG(vdc, 0, "[%d] Failed to bind descriptor ring " 2205 "(%lx) to channel (%lx) status=%d\n", 2206 vdc->instance, vdc->ldc_dring_hdl, 2207 vdc->ldc_handle, status); 2208 return (status); 2209 } 2210 ASSERT(vdc->dring_cookie_count == 1); 2211 vdc->initialized |= VDC_DRING_BOUND; 2212 } 2213 2214 status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info); 2215 if (status != 0) { 2216 DMSG(vdc, 0, 2217 "[%d] Failed to get info for descriptor ring (%lx)\n", 2218 vdc->instance, vdc->ldc_dring_hdl); 2219 return (status); 2220 } 2221 2222 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 2223 DMSG(vdc, 0, "[%d] local dring\n", vdc->instance); 2224 2225 /* Allocate the local copy of this dring */ 2226 vdc->local_dring = 2227 kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t), 2228 KM_SLEEP); 2229 vdc->initialized |= VDC_DRING_LOCAL; 2230 } 2231 2232 /* 2233 * Mark all DRing entries as free and initialize the private 2234 * descriptor's memory handles. If any entry is initialized, 2235 * we need to free it later so we set the bit in 'initialized' 2236 * at the start. 2237 */ 2238 vdc->initialized |= VDC_DRING_ENTRY; 2239 for (i = 0; i < vdc->dring_len; i++) { 2240 dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 2241 dep->hdr.dstate = VIO_DESC_FREE; 2242 2243 status = ldc_mem_alloc_handle(vdc->ldc_handle, 2244 &vdc->local_dring[i].desc_mhdl); 2245 if (status != 0) { 2246 DMSG(vdc, 0, "![%d] Failed to alloc mem handle for" 2247 " descriptor %d", vdc->instance, i); 2248 return (status); 2249 } 2250 vdc->local_dring[i].is_free = B_TRUE; 2251 vdc->local_dring[i].dep = dep; 2252 } 2253 2254 /* Initialize the starting index */ 2255 vdc->dring_curr_idx = 0; 2256 2257 return (status); 2258 } 2259 2260 /* 2261 * Function: 2262 * vdc_destroy_descriptor_ring() 2263 * 2264 * Description: 2265 * 2266 * Arguments: 2267 * vdc - soft state pointer for this instance of the device driver. 2268 * 2269 * Return Code: 2270 * None 2271 */ 2272 static void 2273 vdc_destroy_descriptor_ring(vdc_t *vdc) 2274 { 2275 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2276 ldc_mem_handle_t mhdl = NULL; 2277 ldc_mem_info_t minfo; 2278 int status = -1; 2279 int i; /* loop */ 2280 2281 ASSERT(vdc != NULL); 2282 ASSERT(mutex_owned(&vdc->lock)); 2283 2284 DMSG(vdc, 0, "[%d] Entered\n", vdc->instance); 2285 2286 if (vdc->initialized & VDC_DRING_ENTRY) { 2287 DMSG(vdc, 0, 2288 "[%d] Removing Local DRing entries\n", vdc->instance); 2289 for (i = 0; i < vdc->dring_len; i++) { 2290 ldep = &vdc->local_dring[i]; 2291 mhdl = ldep->desc_mhdl; 2292 2293 if (mhdl == NULL) 2294 continue; 2295 2296 if ((status = ldc_mem_info(mhdl, &minfo)) != 0) { 2297 DMSG(vdc, 0, 2298 "ldc_mem_info returned an error: %d\n", 2299 status); 2300 2301 /* 2302 * This must mean that the mem handle 2303 * is not valid. Clear it out so that 2304 * no one tries to use it. 2305 */ 2306 ldep->desc_mhdl = NULL; 2307 continue; 2308 } 2309 2310 if (minfo.status == LDC_BOUND) { 2311 (void) ldc_mem_unbind_handle(mhdl); 2312 } 2313 2314 (void) ldc_mem_free_handle(mhdl); 2315 2316 ldep->desc_mhdl = NULL; 2317 } 2318 vdc->initialized &= ~VDC_DRING_ENTRY; 2319 } 2320 2321 if (vdc->initialized & VDC_DRING_LOCAL) { 2322 DMSG(vdc, 0, "[%d] Freeing Local DRing\n", vdc->instance); 2323 kmem_free(vdc->local_dring, 2324 vdc->dring_len * sizeof (vdc_local_desc_t)); 2325 vdc->initialized &= ~VDC_DRING_LOCAL; 2326 } 2327 2328 if (vdc->initialized & VDC_DRING_BOUND) { 2329 DMSG(vdc, 0, "[%d] Unbinding DRing\n", vdc->instance); 2330 status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl); 2331 if (status == 0) { 2332 vdc->initialized &= ~VDC_DRING_BOUND; 2333 } else { 2334 DMSG(vdc, 0, "[%d] Error %d unbinding DRing %lx", 2335 vdc->instance, status, vdc->ldc_dring_hdl); 2336 } 2337 kmem_free(vdc->dring_cookie, sizeof (ldc_mem_cookie_t)); 2338 } 2339 2340 if (vdc->initialized & VDC_DRING_INIT) { 2341 DMSG(vdc, 0, "[%d] Destroying DRing\n", vdc->instance); 2342 status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl); 2343 if (status == 0) { 2344 vdc->ldc_dring_hdl = NULL; 2345 bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 2346 vdc->initialized &= ~VDC_DRING_INIT; 2347 } else { 2348 DMSG(vdc, 0, "[%d] Error %d destroying DRing (%lx)", 2349 vdc->instance, status, vdc->ldc_dring_hdl); 2350 } 2351 } 2352 } 2353 2354 /* 2355 * Function: 2356 * vdc_map_to_shared_ring() 2357 * 2358 * Description: 2359 * Copy contents of the local descriptor to the shared 2360 * memory descriptor. 2361 * 2362 * Arguments: 2363 * vdcp - soft state pointer for this instance of the device driver. 2364 * idx - descriptor ring index 2365 * 2366 * Return Code: 2367 * None 2368 */ 2369 static int 2370 vdc_map_to_shared_dring(vdc_t *vdcp, int idx) 2371 { 2372 vdc_local_desc_t *ldep; 2373 vd_dring_entry_t *dep; 2374 int rv; 2375 2376 ldep = &(vdcp->local_dring[idx]); 2377 2378 /* for now leave in the old pop_mem_hdl stuff */ 2379 if (ldep->nbytes > 0) { 2380 rv = vdc_populate_mem_hdl(vdcp, ldep); 2381 if (rv) { 2382 DMSG(vdcp, 0, "[%d] Cannot populate mem handle\n", 2383 vdcp->instance); 2384 return (rv); 2385 } 2386 } 2387 2388 /* 2389 * fill in the data details into the DRing 2390 */ 2391 dep = ldep->dep; 2392 ASSERT(dep != NULL); 2393 2394 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdcp); 2395 dep->payload.operation = ldep->operation; 2396 dep->payload.addr = ldep->offset; 2397 dep->payload.nbytes = ldep->nbytes; 2398 dep->payload.status = (uint32_t)-1; /* vds will set valid value */ 2399 dep->payload.slice = ldep->slice; 2400 dep->hdr.dstate = VIO_DESC_READY; 2401 dep->hdr.ack = 1; /* request an ACK for every message */ 2402 2403 return (0); 2404 } 2405 2406 /* 2407 * Function: 2408 * vdc_send_request 2409 * 2410 * Description: 2411 * This routine writes the data to be transmitted to vds into the 2412 * descriptor, notifies vds that the ring has been updated and 2413 * then waits for the request to be processed. 2414 * 2415 * Arguments: 2416 * vdcp - the soft state pointer 2417 * operation - operation we want vds to perform (VD_OP_XXX) 2418 * addr - address of data buf to be read/written. 2419 * nbytes - number of bytes to read/write 2420 * slice - the disk slice this request is for 2421 * offset - relative disk offset 2422 * cb_type - type of call - STRATEGY or SYNC 2423 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2424 * . mode for ioctl(9e) 2425 * . LP64 diskaddr_t (block I/O) 2426 * dir - direction of operation (READ/WRITE/BOTH) 2427 * 2428 * Return Codes: 2429 * 0 2430 * ENXIO 2431 */ 2432 static int 2433 vdc_send_request(vdc_t *vdcp, int operation, caddr_t addr, 2434 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 2435 void *cb_arg, vio_desc_direction_t dir) 2436 { 2437 ASSERT(vdcp != NULL); 2438 ASSERT(slice == VD_SLICE_NONE || slice < V_NUMPAR); 2439 2440 mutex_enter(&vdcp->lock); 2441 2442 do { 2443 while (vdcp->state != VDC_STATE_RUNNING) { 2444 2445 /* return error if detaching */ 2446 if (vdcp->state == VDC_STATE_DETACH) { 2447 mutex_exit(&vdcp->lock); 2448 return (ENXIO); 2449 } 2450 2451 /* fail request if connection timeout is reached */ 2452 if (vdcp->ctimeout_reached) { 2453 mutex_exit(&vdcp->lock); 2454 return (EIO); 2455 } 2456 2457 cv_wait(&vdcp->running_cv, &vdcp->lock); 2458 } 2459 2460 } while (vdc_populate_descriptor(vdcp, operation, addr, 2461 nbytes, slice, offset, cb_type, cb_arg, dir)); 2462 2463 mutex_exit(&vdcp->lock); 2464 return (0); 2465 } 2466 2467 2468 /* 2469 * Function: 2470 * vdc_populate_descriptor 2471 * 2472 * Description: 2473 * This routine writes the data to be transmitted to vds into the 2474 * descriptor, notifies vds that the ring has been updated and 2475 * then waits for the request to be processed. 2476 * 2477 * Arguments: 2478 * vdcp - the soft state pointer 2479 * operation - operation we want vds to perform (VD_OP_XXX) 2480 * addr - address of data buf to be read/written. 2481 * nbytes - number of bytes to read/write 2482 * slice - the disk slice this request is for 2483 * offset - relative disk offset 2484 * cb_type - type of call - STRATEGY or SYNC 2485 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2486 * . mode for ioctl(9e) 2487 * . LP64 diskaddr_t (block I/O) 2488 * dir - direction of operation (READ/WRITE/BOTH) 2489 * 2490 * Return Codes: 2491 * 0 2492 * EAGAIN 2493 * EFAULT 2494 * ENXIO 2495 * EIO 2496 */ 2497 static int 2498 vdc_populate_descriptor(vdc_t *vdcp, int operation, caddr_t addr, 2499 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 2500 void *cb_arg, vio_desc_direction_t dir) 2501 { 2502 vdc_local_desc_t *local_dep = NULL; /* Local Dring Pointer */ 2503 int idx; /* Index of DRing entry used */ 2504 int next_idx; 2505 vio_dring_msg_t dmsg; 2506 size_t msglen; 2507 int rv; 2508 2509 ASSERT(MUTEX_HELD(&vdcp->lock)); 2510 vdcp->threads_pending++; 2511 loop: 2512 DMSG(vdcp, 2, ": dring_curr_idx = %d\n", vdcp->dring_curr_idx); 2513 2514 /* Get next available D-Ring entry */ 2515 idx = vdcp->dring_curr_idx; 2516 local_dep = &(vdcp->local_dring[idx]); 2517 2518 if (!local_dep->is_free) { 2519 DMSG(vdcp, 2, "[%d]: dring full - waiting for space\n", 2520 vdcp->instance); 2521 cv_wait(&vdcp->dring_free_cv, &vdcp->lock); 2522 if (vdcp->state == VDC_STATE_RUNNING || 2523 vdcp->state == VDC_STATE_HANDLE_PENDING) { 2524 goto loop; 2525 } 2526 vdcp->threads_pending--; 2527 return (ECONNRESET); 2528 } 2529 2530 next_idx = idx + 1; 2531 if (next_idx >= vdcp->dring_len) 2532 next_idx = 0; 2533 vdcp->dring_curr_idx = next_idx; 2534 2535 ASSERT(local_dep->is_free); 2536 2537 local_dep->operation = operation; 2538 local_dep->addr = addr; 2539 local_dep->nbytes = nbytes; 2540 local_dep->slice = slice; 2541 local_dep->offset = offset; 2542 local_dep->cb_type = cb_type; 2543 local_dep->cb_arg = cb_arg; 2544 local_dep->dir = dir; 2545 2546 local_dep->is_free = B_FALSE; 2547 2548 rv = vdc_map_to_shared_dring(vdcp, idx); 2549 if (rv) { 2550 DMSG(vdcp, 0, "[%d]: cannot bind memory - waiting ..\n", 2551 vdcp->instance); 2552 /* free the descriptor */ 2553 local_dep->is_free = B_TRUE; 2554 vdcp->dring_curr_idx = idx; 2555 cv_wait(&vdcp->membind_cv, &vdcp->lock); 2556 if (vdcp->state == VDC_STATE_RUNNING || 2557 vdcp->state == VDC_STATE_HANDLE_PENDING) { 2558 goto loop; 2559 } 2560 vdcp->threads_pending--; 2561 return (ECONNRESET); 2562 } 2563 2564 /* 2565 * Send a msg with the DRing details to vds 2566 */ 2567 VIO_INIT_DRING_DATA_TAG(dmsg); 2568 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdcp); 2569 dmsg.dring_ident = vdcp->dring_ident; 2570 dmsg.start_idx = idx; 2571 dmsg.end_idx = idx; 2572 vdcp->seq_num++; 2573 2574 DTRACE_IO2(send, vio_dring_msg_t *, &dmsg, vdc_t *, vdcp); 2575 2576 DMSG(vdcp, 2, "ident=0x%lx, st=%u, end=%u, seq=%ld\n", 2577 vdcp->dring_ident, dmsg.start_idx, dmsg.end_idx, dmsg.seq_num); 2578 2579 /* 2580 * note we're still holding the lock here to 2581 * make sure the message goes out in order !!!... 2582 */ 2583 msglen = sizeof (dmsg); 2584 rv = vdc_send(vdcp, (caddr_t)&dmsg, &msglen); 2585 switch (rv) { 2586 case ECONNRESET: 2587 /* 2588 * vdc_send initiates the reset on failure. 2589 * Since the transaction has already been put 2590 * on the local dring, it will automatically get 2591 * retried when the channel is reset. Given that, 2592 * it is ok to just return success even though the 2593 * send failed. 2594 */ 2595 rv = 0; 2596 break; 2597 2598 case 0: /* EOK */ 2599 DMSG(vdcp, 1, "sent via LDC: rv=%d\n", rv); 2600 break; 2601 2602 default: 2603 goto cleanup_and_exit; 2604 } 2605 2606 vdcp->threads_pending--; 2607 return (rv); 2608 2609 cleanup_and_exit: 2610 DMSG(vdcp, 0, "unexpected error, rv=%d\n", rv); 2611 return (ENXIO); 2612 } 2613 2614 /* 2615 * Function: 2616 * vdc_do_sync_op 2617 * 2618 * Description: 2619 * Wrapper around vdc_populate_descriptor that blocks until the 2620 * response to the message is available. 2621 * 2622 * Arguments: 2623 * vdcp - the soft state pointer 2624 * operation - operation we want vds to perform (VD_OP_XXX) 2625 * addr - address of data buf to be read/written. 2626 * nbytes - number of bytes to read/write 2627 * slice - the disk slice this request is for 2628 * offset - relative disk offset 2629 * cb_type - type of call - STRATEGY or SYNC 2630 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2631 * . mode for ioctl(9e) 2632 * . LP64 diskaddr_t (block I/O) 2633 * dir - direction of operation (READ/WRITE/BOTH) 2634 * 2635 * Return Codes: 2636 * 0 2637 * EAGAIN 2638 * EFAULT 2639 * ENXIO 2640 * EIO 2641 */ 2642 static int 2643 vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, size_t nbytes, 2644 int slice, diskaddr_t offset, int cb_type, void *cb_arg, 2645 vio_desc_direction_t dir) 2646 { 2647 int status; 2648 2649 ASSERT(cb_type == CB_SYNC); 2650 2651 /* 2652 * Grab the lock, if blocked wait until the server 2653 * response causes us to wake up again. 2654 */ 2655 mutex_enter(&vdcp->lock); 2656 vdcp->sync_op_cnt++; 2657 while (vdcp->sync_op_blocked && vdcp->state != VDC_STATE_DETACH) 2658 cv_wait(&vdcp->sync_blocked_cv, &vdcp->lock); 2659 2660 if (vdcp->state == VDC_STATE_DETACH) { 2661 cv_broadcast(&vdcp->sync_blocked_cv); 2662 vdcp->sync_op_cnt--; 2663 mutex_exit(&vdcp->lock); 2664 return (ENXIO); 2665 } 2666 2667 /* now block anyone other thread entering after us */ 2668 vdcp->sync_op_blocked = B_TRUE; 2669 vdcp->sync_op_pending = B_TRUE; 2670 mutex_exit(&vdcp->lock); 2671 2672 status = vdc_send_request(vdcp, operation, addr, 2673 nbytes, slice, offset, cb_type, cb_arg, dir); 2674 2675 mutex_enter(&vdcp->lock); 2676 2677 if (status != 0) { 2678 vdcp->sync_op_pending = B_FALSE; 2679 } else { 2680 /* 2681 * block until our transaction completes. 2682 * Also anyone else waiting also gets to go next. 2683 */ 2684 while (vdcp->sync_op_pending && vdcp->state != VDC_STATE_DETACH) 2685 cv_wait(&vdcp->sync_pending_cv, &vdcp->lock); 2686 2687 DMSG(vdcp, 2, ": operation returned %d\n", 2688 vdcp->sync_op_status); 2689 if (vdcp->state == VDC_STATE_DETACH) { 2690 vdcp->sync_op_pending = B_FALSE; 2691 status = ENXIO; 2692 } else { 2693 status = vdcp->sync_op_status; 2694 } 2695 } 2696 2697 vdcp->sync_op_status = 0; 2698 vdcp->sync_op_blocked = B_FALSE; 2699 vdcp->sync_op_cnt--; 2700 2701 /* signal the next waiting thread */ 2702 cv_signal(&vdcp->sync_blocked_cv); 2703 mutex_exit(&vdcp->lock); 2704 2705 return (status); 2706 } 2707 2708 2709 /* 2710 * Function: 2711 * vdc_drain_response() 2712 * 2713 * Description: 2714 * When a guest is panicking, the completion of requests needs to be 2715 * handled differently because interrupts are disabled and vdc 2716 * will not get messages. We have to poll for the messages instead. 2717 * 2718 * Arguments: 2719 * vdc - soft state pointer for this instance of the device driver. 2720 * 2721 * Return Code: 2722 * 0 - Success 2723 */ 2724 static int 2725 vdc_drain_response(vdc_t *vdc) 2726 { 2727 int rv, idx, retries; 2728 size_t msglen; 2729 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2730 vio_dring_msg_t dmsg; 2731 2732 mutex_enter(&vdc->lock); 2733 2734 retries = 0; 2735 for (;;) { 2736 msglen = sizeof (dmsg); 2737 rv = ldc_read(vdc->ldc_handle, (caddr_t)&dmsg, &msglen); 2738 if (rv) { 2739 rv = EINVAL; 2740 break; 2741 } 2742 2743 /* 2744 * if there are no packets wait and check again 2745 */ 2746 if ((rv == 0) && (msglen == 0)) { 2747 if (retries++ > vdc_dump_retries) { 2748 rv = EAGAIN; 2749 break; 2750 } 2751 2752 drv_usecwait(vdc_usec_timeout_dump); 2753 continue; 2754 } 2755 2756 /* 2757 * Ignore all messages that are not ACKs/NACKs to 2758 * DRing requests. 2759 */ 2760 if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 2761 (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 2762 DMSG(vdc, 0, "discard pkt: type=%d sub=%d env=%d\n", 2763 dmsg.tag.vio_msgtype, 2764 dmsg.tag.vio_subtype, 2765 dmsg.tag.vio_subtype_env); 2766 continue; 2767 } 2768 2769 /* 2770 * set the appropriate return value for the current request. 2771 */ 2772 switch (dmsg.tag.vio_subtype) { 2773 case VIO_SUBTYPE_ACK: 2774 rv = 0; 2775 break; 2776 case VIO_SUBTYPE_NACK: 2777 rv = EAGAIN; 2778 break; 2779 default: 2780 continue; 2781 } 2782 2783 idx = dmsg.start_idx; 2784 if (idx >= vdc->dring_len) { 2785 DMSG(vdc, 0, "[%d] Bogus ack data : start %d\n", 2786 vdc->instance, idx); 2787 continue; 2788 } 2789 ldep = &vdc->local_dring[idx]; 2790 if (ldep->dep->hdr.dstate != VIO_DESC_DONE) { 2791 DMSG(vdc, 0, "[%d] Entry @ %d - state !DONE %d\n", 2792 vdc->instance, idx, ldep->dep->hdr.dstate); 2793 continue; 2794 } 2795 2796 DMSG(vdc, 1, "[%d] Depopulating idx=%d state=%d\n", 2797 vdc->instance, idx, ldep->dep->hdr.dstate); 2798 rv = vdc_depopulate_descriptor(vdc, idx); 2799 if (rv) { 2800 DMSG(vdc, 0, 2801 "[%d] Entry @ %d - depopulate failed ..\n", 2802 vdc->instance, idx); 2803 } 2804 2805 /* if this is the last descriptor - break out of loop */ 2806 if ((idx + 1) % vdc->dring_len == vdc->dring_curr_idx) 2807 break; 2808 } 2809 2810 mutex_exit(&vdc->lock); 2811 DMSG(vdc, 0, "End idx=%d\n", idx); 2812 2813 return (rv); 2814 } 2815 2816 2817 /* 2818 * Function: 2819 * vdc_depopulate_descriptor() 2820 * 2821 * Description: 2822 * 2823 * Arguments: 2824 * vdc - soft state pointer for this instance of the device driver. 2825 * idx - Index of the Descriptor Ring entry being modified 2826 * 2827 * Return Code: 2828 * 0 - Success 2829 */ 2830 static int 2831 vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 2832 { 2833 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2834 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2835 int status = ENXIO; 2836 int rv = 0; 2837 2838 ASSERT(vdc != NULL); 2839 ASSERT(idx < vdc->dring_len); 2840 ldep = &vdc->local_dring[idx]; 2841 ASSERT(ldep != NULL); 2842 ASSERT(MUTEX_HELD(&vdc->lock)); 2843 2844 DMSG(vdc, 2, ": idx = %d\n", idx); 2845 dep = ldep->dep; 2846 ASSERT(dep != NULL); 2847 ASSERT((dep->hdr.dstate == VIO_DESC_DONE) || 2848 (dep->payload.status == ECANCELED)); 2849 2850 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 2851 2852 ldep->is_free = B_TRUE; 2853 status = dep->payload.status; 2854 DMSG(vdc, 2, ": is_free = %d : status = %d\n", ldep->is_free, status); 2855 2856 /* 2857 * If no buffers were used to transfer information to the server when 2858 * populating the descriptor then no memory handles need to be unbound 2859 * and we can return now. 2860 */ 2861 if (ldep->nbytes == 0) { 2862 cv_signal(&vdc->dring_free_cv); 2863 return (status); 2864 } 2865 2866 /* 2867 * If the upper layer passed in a misaligned address we copied the 2868 * data into an aligned buffer before sending it to LDC - we now 2869 * copy it back to the original buffer. 2870 */ 2871 if (ldep->align_addr) { 2872 ASSERT(ldep->addr != NULL); 2873 2874 if (dep->payload.nbytes > 0) 2875 bcopy(ldep->align_addr, ldep->addr, 2876 dep->payload.nbytes); 2877 kmem_free(ldep->align_addr, 2878 sizeof (caddr_t) * P2ROUNDUP(ldep->nbytes, 8)); 2879 ldep->align_addr = NULL; 2880 } 2881 2882 rv = ldc_mem_unbind_handle(ldep->desc_mhdl); 2883 if (rv != 0) { 2884 DMSG(vdc, 0, "?[%d] unbind mhdl 0x%lx @ idx %d failed (%d)", 2885 vdc->instance, ldep->desc_mhdl, idx, rv); 2886 /* 2887 * The error returned by the vDisk server is more informative 2888 * and thus has a higher priority but if it isn't set we ensure 2889 * that this function returns an error. 2890 */ 2891 if (status == 0) 2892 status = EINVAL; 2893 } 2894 2895 cv_signal(&vdc->membind_cv); 2896 cv_signal(&vdc->dring_free_cv); 2897 2898 return (status); 2899 } 2900 2901 /* 2902 * Function: 2903 * vdc_populate_mem_hdl() 2904 * 2905 * Description: 2906 * 2907 * Arguments: 2908 * vdc - soft state pointer for this instance of the device driver. 2909 * idx - Index of the Descriptor Ring entry being modified 2910 * addr - virtual address being mapped in 2911 * nybtes - number of bytes in 'addr' 2912 * operation - the vDisk operation being performed (VD_OP_xxx) 2913 * 2914 * Return Code: 2915 * 0 - Success 2916 */ 2917 static int 2918 vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep) 2919 { 2920 vd_dring_entry_t *dep = NULL; 2921 ldc_mem_handle_t mhdl; 2922 caddr_t vaddr; 2923 size_t nbytes; 2924 uint8_t perm = LDC_MEM_RW; 2925 uint8_t maptype; 2926 int rv = 0; 2927 int i; 2928 2929 ASSERT(vdcp != NULL); 2930 2931 dep = ldep->dep; 2932 mhdl = ldep->desc_mhdl; 2933 2934 switch (ldep->dir) { 2935 case VIO_read_dir: 2936 perm = LDC_MEM_W; 2937 break; 2938 2939 case VIO_write_dir: 2940 perm = LDC_MEM_R; 2941 break; 2942 2943 case VIO_both_dir: 2944 perm = LDC_MEM_RW; 2945 break; 2946 2947 default: 2948 ASSERT(0); /* catch bad programming in vdc */ 2949 } 2950 2951 /* 2952 * LDC expects any addresses passed in to be 8-byte aligned. We need 2953 * to copy the contents of any misaligned buffers to a newly allocated 2954 * buffer and bind it instead (and copy the the contents back to the 2955 * original buffer passed in when depopulating the descriptor) 2956 */ 2957 vaddr = ldep->addr; 2958 nbytes = ldep->nbytes; 2959 if (((uint64_t)vaddr & 0x7) != 0) { 2960 ASSERT(ldep->align_addr == NULL); 2961 ldep->align_addr = 2962 kmem_alloc(sizeof (caddr_t) * 2963 P2ROUNDUP(nbytes, 8), KM_SLEEP); 2964 DMSG(vdcp, 0, "[%d] Misaligned address %p reallocating " 2965 "(buf=%p nb=%ld op=%d)\n", 2966 vdcp->instance, (void *)vaddr, (void *)ldep->align_addr, 2967 nbytes, ldep->operation); 2968 if (perm != LDC_MEM_W) 2969 bcopy(vaddr, ldep->align_addr, nbytes); 2970 vaddr = ldep->align_addr; 2971 } 2972 2973 maptype = LDC_IO_MAP|LDC_SHADOW_MAP|LDC_DIRECT_MAP; 2974 rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 2975 maptype, perm, &dep->payload.cookie[0], &dep->payload.ncookies); 2976 DMSG(vdcp, 2, "[%d] bound mem handle; ncookies=%d\n", 2977 vdcp->instance, dep->payload.ncookies); 2978 if (rv != 0) { 2979 DMSG(vdcp, 0, "[%d] Failed to bind LDC memory handle " 2980 "(mhdl=%p, buf=%p, err=%d)\n", 2981 vdcp->instance, (void *)mhdl, (void *)vaddr, rv); 2982 if (ldep->align_addr) { 2983 kmem_free(ldep->align_addr, 2984 sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); 2985 ldep->align_addr = NULL; 2986 } 2987 return (EAGAIN); 2988 } 2989 2990 /* 2991 * Get the other cookies (if any). 2992 */ 2993 for (i = 1; i < dep->payload.ncookies; i++) { 2994 rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 2995 if (rv != 0) { 2996 (void) ldc_mem_unbind_handle(mhdl); 2997 DMSG(vdcp, 0, "?[%d] Failed to get next cookie " 2998 "(mhdl=%lx cnum=%d), err=%d", 2999 vdcp->instance, mhdl, i, rv); 3000 if (ldep->align_addr) { 3001 kmem_free(ldep->align_addr, 3002 sizeof (caddr_t) * ldep->nbytes); 3003 ldep->align_addr = NULL; 3004 } 3005 return (EAGAIN); 3006 } 3007 } 3008 3009 return (rv); 3010 } 3011 3012 /* 3013 * Interrupt handlers for messages from LDC 3014 */ 3015 3016 /* 3017 * Function: 3018 * vdc_handle_cb() 3019 * 3020 * Description: 3021 * 3022 * Arguments: 3023 * event - Type of event (LDC_EVT_xxx) that triggered the callback 3024 * arg - soft state pointer for this instance of the device driver. 3025 * 3026 * Return Code: 3027 * 0 - Success 3028 */ 3029 static uint_t 3030 vdc_handle_cb(uint64_t event, caddr_t arg) 3031 { 3032 ldc_status_t ldc_state; 3033 int rv = 0; 3034 3035 vdc_t *vdc = (vdc_t *)(void *)arg; 3036 3037 ASSERT(vdc != NULL); 3038 3039 DMSG(vdc, 1, "evt=%lx seqID=%ld\n", event, vdc->seq_num); 3040 3041 /* 3042 * Depending on the type of event that triggered this callback, 3043 * we modify the handshake state or read the data. 3044 * 3045 * NOTE: not done as a switch() as event could be triggered by 3046 * a state change and a read request. Also the ordering of the 3047 * check for the event types is deliberate. 3048 */ 3049 if (event & LDC_EVT_UP) { 3050 DMSG(vdc, 0, "[%d] Received LDC_EVT_UP\n", vdc->instance); 3051 3052 mutex_enter(&vdc->lock); 3053 3054 /* get LDC state */ 3055 rv = ldc_status(vdc->ldc_handle, &ldc_state); 3056 if (rv != 0) { 3057 DMSG(vdc, 0, "[%d] Couldn't get LDC status %d", 3058 vdc->instance, rv); 3059 return (LDC_SUCCESS); 3060 } 3061 if (vdc->ldc_state != LDC_UP && ldc_state == LDC_UP) { 3062 /* 3063 * Reset the transaction sequence numbers when 3064 * LDC comes up. We then kick off the handshake 3065 * negotiation with the vDisk server. 3066 */ 3067 vdc->seq_num = 1; 3068 vdc->seq_num_reply = 0; 3069 vdc->ldc_state = ldc_state; 3070 cv_signal(&vdc->initwait_cv); 3071 } 3072 3073 mutex_exit(&vdc->lock); 3074 } 3075 3076 if (event & LDC_EVT_READ) { 3077 DMSG(vdc, 0, "[%d] Received LDC_EVT_READ\n", vdc->instance); 3078 mutex_enter(&vdc->read_lock); 3079 cv_signal(&vdc->read_cv); 3080 vdc->read_state = VDC_READ_PENDING; 3081 mutex_exit(&vdc->read_lock); 3082 3083 /* that's all we have to do - no need to handle DOWN/RESET */ 3084 return (LDC_SUCCESS); 3085 } 3086 3087 if (event & (LDC_EVT_RESET|LDC_EVT_DOWN)) { 3088 3089 DMSG(vdc, 0, "[%d] Received LDC RESET event\n", vdc->instance); 3090 3091 mutex_enter(&vdc->lock); 3092 /* 3093 * Need to wake up any readers so they will 3094 * detect that a reset has occurred. 3095 */ 3096 mutex_enter(&vdc->read_lock); 3097 if ((vdc->read_state == VDC_READ_WAITING) || 3098 (vdc->read_state == VDC_READ_RESET)) 3099 cv_signal(&vdc->read_cv); 3100 vdc->read_state = VDC_READ_RESET; 3101 mutex_exit(&vdc->read_lock); 3102 3103 /* wake up any threads waiting for connection to come up */ 3104 if (vdc->state == VDC_STATE_INIT_WAITING) { 3105 vdc->state = VDC_STATE_RESETTING; 3106 cv_signal(&vdc->initwait_cv); 3107 } 3108 3109 mutex_exit(&vdc->lock); 3110 } 3111 3112 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 3113 DMSG(vdc, 0, "![%d] Unexpected LDC event (%lx) received", 3114 vdc->instance, event); 3115 3116 return (LDC_SUCCESS); 3117 } 3118 3119 /* 3120 * Function: 3121 * vdc_wait_for_response() 3122 * 3123 * Description: 3124 * Block waiting for a response from the server. If there is 3125 * no data the thread block on the read_cv that is signalled 3126 * by the callback when an EVT_READ occurs. 3127 * 3128 * Arguments: 3129 * vdcp - soft state pointer for this instance of the device driver. 3130 * 3131 * Return Code: 3132 * 0 - Success 3133 */ 3134 static int 3135 vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp) 3136 { 3137 size_t nbytes = sizeof (*msgp); 3138 int status; 3139 3140 ASSERT(vdcp != NULL); 3141 3142 DMSG(vdcp, 1, "[%d] Entered\n", vdcp->instance); 3143 3144 status = vdc_recv(vdcp, msgp, &nbytes); 3145 DMSG(vdcp, 3, "vdc_read() done.. status=0x%x size=0x%x\n", 3146 status, (int)nbytes); 3147 if (status) { 3148 DMSG(vdcp, 0, "?[%d] Error %d reading LDC msg\n", 3149 vdcp->instance, status); 3150 return (status); 3151 } 3152 3153 if (nbytes < sizeof (vio_msg_tag_t)) { 3154 DMSG(vdcp, 0, "?[%d] Expect %lu bytes; recv'd %lu\n", 3155 vdcp->instance, sizeof (vio_msg_tag_t), nbytes); 3156 return (ENOMSG); 3157 } 3158 3159 DMSG(vdcp, 2, "[%d] (%x/%x/%x)\n", vdcp->instance, 3160 msgp->tag.vio_msgtype, 3161 msgp->tag.vio_subtype, 3162 msgp->tag.vio_subtype_env); 3163 3164 /* 3165 * Verify the Session ID of the message 3166 * 3167 * Every message after the Version has been negotiated should 3168 * have the correct session ID set. 3169 */ 3170 if ((msgp->tag.vio_sid != vdcp->session_id) && 3171 (msgp->tag.vio_subtype_env != VIO_VER_INFO)) { 3172 DMSG(vdcp, 0, "[%d] Invalid SID: received 0x%x, " 3173 "expected 0x%lx [seq num %lx @ %d]", 3174 vdcp->instance, msgp->tag.vio_sid, 3175 vdcp->session_id, 3176 ((vio_dring_msg_t *)msgp)->seq_num, 3177 ((vio_dring_msg_t *)msgp)->start_idx); 3178 return (ENOMSG); 3179 } 3180 return (0); 3181 } 3182 3183 3184 /* 3185 * Function: 3186 * vdc_resubmit_backup_dring() 3187 * 3188 * Description: 3189 * Resubmit each descriptor in the backed up dring to 3190 * vDisk server. The Dring was backed up during connection 3191 * reset. 3192 * 3193 * Arguments: 3194 * vdcp - soft state pointer for this instance of the device driver. 3195 * 3196 * Return Code: 3197 * 0 - Success 3198 */ 3199 static int 3200 vdc_resubmit_backup_dring(vdc_t *vdcp) 3201 { 3202 int count; 3203 int b_idx; 3204 int rv; 3205 int dring_size; 3206 int status; 3207 vio_msg_t vio_msg; 3208 vdc_local_desc_t *curr_ldep; 3209 3210 ASSERT(MUTEX_NOT_HELD(&vdcp->lock)); 3211 ASSERT(vdcp->state == VDC_STATE_HANDLE_PENDING); 3212 3213 if (vdcp->local_dring_backup == NULL) { 3214 /* the pending requests have already been processed */ 3215 return (0); 3216 } 3217 3218 DMSG(vdcp, 1, "restoring pending dring entries (len=%d, tail=%d)\n", 3219 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3220 3221 /* 3222 * Walk the backup copy of the local descriptor ring and 3223 * resubmit all the outstanding transactions. 3224 */ 3225 b_idx = vdcp->local_dring_backup_tail; 3226 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 3227 3228 curr_ldep = &(vdcp->local_dring_backup[b_idx]); 3229 3230 /* only resubmit outstanding transactions */ 3231 if (!curr_ldep->is_free) { 3232 3233 DMSG(vdcp, 1, "resubmitting entry idx=%x\n", b_idx); 3234 mutex_enter(&vdcp->lock); 3235 rv = vdc_populate_descriptor(vdcp, curr_ldep->operation, 3236 curr_ldep->addr, curr_ldep->nbytes, 3237 curr_ldep->slice, curr_ldep->offset, 3238 curr_ldep->cb_type, curr_ldep->cb_arg, 3239 curr_ldep->dir); 3240 mutex_exit(&vdcp->lock); 3241 if (rv) { 3242 DMSG(vdcp, 1, "[%d] cannot resubmit entry %d\n", 3243 vdcp->instance, b_idx); 3244 return (rv); 3245 } 3246 3247 /* Wait for the response message. */ 3248 DMSG(vdcp, 1, "waiting for response to idx=%x\n", 3249 b_idx); 3250 status = vdc_wait_for_response(vdcp, &vio_msg); 3251 if (status) { 3252 DMSG(vdcp, 1, "[%d] wait_for_response " 3253 "returned err=%d\n", vdcp->instance, 3254 status); 3255 return (status); 3256 } 3257 3258 DMSG(vdcp, 1, "processing msg for idx=%x\n", b_idx); 3259 status = vdc_process_data_msg(vdcp, &vio_msg); 3260 if (status) { 3261 DMSG(vdcp, 1, "[%d] process_data_msg " 3262 "returned err=%d\n", vdcp->instance, 3263 status); 3264 return (status); 3265 } 3266 } 3267 3268 /* get the next element to submit */ 3269 if (++b_idx >= vdcp->local_dring_backup_len) 3270 b_idx = 0; 3271 } 3272 3273 /* all done - now clear up pending dring copy */ 3274 dring_size = vdcp->local_dring_backup_len * 3275 sizeof (vdcp->local_dring_backup[0]); 3276 3277 (void) kmem_free(vdcp->local_dring_backup, dring_size); 3278 3279 vdcp->local_dring_backup = NULL; 3280 3281 return (0); 3282 } 3283 3284 /* 3285 * Function: 3286 * vdc_cancel_backup_dring 3287 * 3288 * Description: 3289 * Cancel each descriptor in the backed up dring to vDisk server. 3290 * The Dring was backed up during connection reset. 3291 * 3292 * Arguments: 3293 * vdcp - soft state pointer for this instance of the device driver. 3294 * 3295 * Return Code: 3296 * None 3297 */ 3298 void 3299 vdc_cancel_backup_ring(vdc_t *vdcp) 3300 { 3301 vdc_local_desc_t *ldep; 3302 struct buf *bufp; 3303 int count; 3304 int b_idx; 3305 int dring_size; 3306 3307 ASSERT(MUTEX_HELD(&vdcp->lock)); 3308 ASSERT(vdcp->state == VDC_STATE_INIT || 3309 vdcp->state == VDC_STATE_INIT_WAITING || 3310 vdcp->state == VDC_STATE_NEGOTIATE || 3311 vdcp->state == VDC_STATE_RESETTING); 3312 3313 if (vdcp->local_dring_backup == NULL) { 3314 /* the pending requests have already been processed */ 3315 return; 3316 } 3317 3318 DMSG(vdcp, 1, "cancelling pending dring entries (len=%d, tail=%d)\n", 3319 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3320 3321 /* 3322 * Walk the backup copy of the local descriptor ring and 3323 * cancel all the outstanding transactions. 3324 */ 3325 b_idx = vdcp->local_dring_backup_tail; 3326 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 3327 3328 ldep = &(vdcp->local_dring_backup[b_idx]); 3329 3330 /* only cancel outstanding transactions */ 3331 if (!ldep->is_free) { 3332 3333 DMSG(vdcp, 1, "cancelling entry idx=%x\n", b_idx); 3334 3335 /* 3336 * All requests have already been cleared from the 3337 * local descriptor ring and the LDC channel has been 3338 * reset so we will never get any reply for these 3339 * requests. Now we just have to notify threads waiting 3340 * for replies that the request has failed. 3341 */ 3342 switch (ldep->cb_type) { 3343 case CB_SYNC: 3344 ASSERT(vdcp->sync_op_pending); 3345 vdcp->sync_op_status = EIO; 3346 vdcp->sync_op_pending = B_FALSE; 3347 cv_signal(&vdcp->sync_pending_cv); 3348 break; 3349 3350 case CB_STRATEGY: 3351 bufp = ldep->cb_arg; 3352 ASSERT(bufp != NULL); 3353 bufp->b_resid = bufp->b_bcount; 3354 bioerror(bufp, EIO); 3355 biodone(bufp); 3356 break; 3357 3358 default: 3359 ASSERT(0); 3360 } 3361 3362 } 3363 3364 /* get the next element to cancel */ 3365 if (++b_idx >= vdcp->local_dring_backup_len) 3366 b_idx = 0; 3367 } 3368 3369 /* all done - now clear up pending dring copy */ 3370 dring_size = vdcp->local_dring_backup_len * 3371 sizeof (vdcp->local_dring_backup[0]); 3372 3373 (void) kmem_free(vdcp->local_dring_backup, dring_size); 3374 3375 vdcp->local_dring_backup = NULL; 3376 3377 DTRACE_IO2(processed, int, count, vdc_t *, vdcp); 3378 } 3379 3380 /* 3381 * Function: 3382 * vdc_connection_timeout 3383 * 3384 * Description: 3385 * This function is invoked if the timeout set to establish the connection 3386 * with vds expires. This will happen if we spend too much time in the 3387 * VDC_STATE_INIT_WAITING or VDC_STATE_NEGOTIATE states. Then we will 3388 * cancel any pending request and mark them as failed. 3389 * 3390 * If the timeout does not expire, it will be cancelled when we reach the 3391 * VDC_STATE_HANDLE_PENDING or VDC_STATE_RESETTING state. This function can 3392 * be invoked while we are in the VDC_STATE_HANDLE_PENDING or 3393 * VDC_STATE_RESETTING state in which case we do nothing because the 3394 * timeout is being cancelled. 3395 * 3396 * Arguments: 3397 * arg - argument of the timeout function actually a soft state 3398 * pointer for the instance of the device driver. 3399 * 3400 * Return Code: 3401 * None 3402 */ 3403 void 3404 vdc_connection_timeout(void *arg) 3405 { 3406 vdc_t *vdcp = (vdc_t *)arg; 3407 3408 mutex_enter(&vdcp->lock); 3409 3410 if (vdcp->state == VDC_STATE_HANDLE_PENDING || 3411 vdcp->state == VDC_STATE_DETACH) { 3412 /* 3413 * The connection has just been re-established or 3414 * we are detaching. 3415 */ 3416 vdcp->ctimeout_reached = B_FALSE; 3417 mutex_exit(&vdcp->lock); 3418 return; 3419 } 3420 3421 vdcp->ctimeout_reached = B_TRUE; 3422 3423 /* notify requests waiting for sending */ 3424 cv_broadcast(&vdcp->running_cv); 3425 3426 /* cancel requests waiting for a result */ 3427 vdc_cancel_backup_ring(vdcp); 3428 3429 mutex_exit(&vdcp->lock); 3430 3431 cmn_err(CE_NOTE, "[%d] connection to service domain timeout", 3432 vdcp->instance); 3433 } 3434 3435 /* 3436 * Function: 3437 * vdc_backup_local_dring() 3438 * 3439 * Description: 3440 * Backup the current dring in the event of a reset. The Dring 3441 * transactions will be resubmitted to the server when the 3442 * connection is restored. 3443 * 3444 * Arguments: 3445 * vdcp - soft state pointer for this instance of the device driver. 3446 * 3447 * Return Code: 3448 * NONE 3449 */ 3450 static void 3451 vdc_backup_local_dring(vdc_t *vdcp) 3452 { 3453 int dring_size; 3454 3455 ASSERT(MUTEX_HELD(&vdcp->lock)); 3456 ASSERT(vdcp->state == VDC_STATE_RESETTING); 3457 3458 /* 3459 * If the backup dring is stil around, it means 3460 * that the last restore did not complete. However, 3461 * since we never got back into the running state, 3462 * the backup copy we have is still valid. 3463 */ 3464 if (vdcp->local_dring_backup != NULL) { 3465 DMSG(vdcp, 1, "reusing local descriptor ring backup " 3466 "(len=%d, tail=%d)\n", vdcp->local_dring_backup_len, 3467 vdcp->local_dring_backup_tail); 3468 return; 3469 } 3470 3471 /* 3472 * The backup dring can be NULL and the local dring may not be 3473 * initialized. This can happen if we had a reset while establishing 3474 * a new connection but after the connection has timed out. In that 3475 * case the backup dring is NULL because the requests have been 3476 * cancelled and the request occured before the local dring is 3477 * initialized. 3478 */ 3479 if (!(vdcp->initialized & VDC_DRING_LOCAL)) 3480 return; 3481 3482 DMSG(vdcp, 1, "backing up the local descriptor ring (len=%d, " 3483 "tail=%d)\n", vdcp->dring_len, vdcp->dring_curr_idx); 3484 3485 dring_size = vdcp->dring_len * sizeof (vdcp->local_dring[0]); 3486 3487 vdcp->local_dring_backup = kmem_alloc(dring_size, KM_SLEEP); 3488 bcopy(vdcp->local_dring, vdcp->local_dring_backup, dring_size); 3489 3490 vdcp->local_dring_backup_tail = vdcp->dring_curr_idx; 3491 vdcp->local_dring_backup_len = vdcp->dring_len; 3492 } 3493 3494 /* -------------------------------------------------------------------------- */ 3495 3496 /* 3497 * The following functions process the incoming messages from vds 3498 */ 3499 3500 /* 3501 * Function: 3502 * vdc_process_msg_thread() 3503 * 3504 * Description: 3505 * 3506 * Main VDC message processing thread. Each vDisk instance 3507 * consists of a copy of this thread. This thread triggers 3508 * all the handshakes and data exchange with the server. It 3509 * also handles all channel resets 3510 * 3511 * Arguments: 3512 * vdc - soft state pointer for this instance of the device driver. 3513 * 3514 * Return Code: 3515 * None 3516 */ 3517 static void 3518 vdc_process_msg_thread(vdc_t *vdcp) 3519 { 3520 int status; 3521 int ctimeout; 3522 timeout_id_t tmid = 0; 3523 3524 mutex_enter(&vdcp->lock); 3525 3526 for (;;) { 3527 3528 #define Q(_s) (vdcp->state == _s) ? #_s : 3529 DMSG(vdcp, 3, "state = %d (%s)\n", vdcp->state, 3530 Q(VDC_STATE_INIT) 3531 Q(VDC_STATE_INIT_WAITING) 3532 Q(VDC_STATE_NEGOTIATE) 3533 Q(VDC_STATE_HANDLE_PENDING) 3534 Q(VDC_STATE_RUNNING) 3535 Q(VDC_STATE_RESETTING) 3536 Q(VDC_STATE_DETACH) 3537 "UNKNOWN"); 3538 3539 switch (vdcp->state) { 3540 case VDC_STATE_INIT: 3541 3542 /* 3543 * If requested, start a timeout to check if the 3544 * connection with vds is established in the 3545 * specified delay. If the timeout expires, we 3546 * will cancel any pending request. 3547 * 3548 * If some reset have occurred while establishing 3549 * the connection, we already have a timeout armed 3550 * and in that case we don't need to arm a new one. 3551 */ 3552 ctimeout = (vdc_timeout != 0)? 3553 vdc_timeout : vdcp->ctimeout; 3554 3555 if (ctimeout != 0 && tmid == 0) { 3556 tmid = timeout(vdc_connection_timeout, vdcp, 3557 ctimeout * drv_usectohz(1000000)); 3558 } 3559 3560 /* Check if have re-initializing repeatedly */ 3561 if (vdcp->hshake_cnt++ > vdc_hshake_retries && 3562 vdcp->lifecycle != VDC_LC_ONLINE) { 3563 cmn_err(CE_NOTE, "[%d] disk access failed.\n", 3564 vdcp->instance); 3565 vdcp->state = VDC_STATE_DETACH; 3566 break; 3567 } 3568 3569 /* Bring up connection with vds via LDC */ 3570 status = vdc_start_ldc_connection(vdcp); 3571 if (status == EINVAL) { 3572 DMSG(vdcp, 0, "[%d] Could not start LDC", 3573 vdcp->instance); 3574 vdcp->state = VDC_STATE_DETACH; 3575 } else { 3576 vdcp->state = VDC_STATE_INIT_WAITING; 3577 } 3578 break; 3579 3580 case VDC_STATE_INIT_WAITING: 3581 3582 /* 3583 * Let the callback event move us on 3584 * when channel is open to server 3585 */ 3586 while (vdcp->ldc_state != LDC_UP) { 3587 cv_wait(&vdcp->initwait_cv, &vdcp->lock); 3588 if (vdcp->state != VDC_STATE_INIT_WAITING) { 3589 DMSG(vdcp, 0, 3590 "state moved to %d out from under us...\n", 3591 vdcp->state); 3592 3593 break; 3594 } 3595 } 3596 if (vdcp->state == VDC_STATE_INIT_WAITING && 3597 vdcp->ldc_state == LDC_UP) { 3598 vdcp->state = VDC_STATE_NEGOTIATE; 3599 } 3600 break; 3601 3602 case VDC_STATE_NEGOTIATE: 3603 switch (status = vdc_ver_negotiation(vdcp)) { 3604 case 0: 3605 break; 3606 default: 3607 DMSG(vdcp, 0, "ver negotiate failed (%d)..\n", 3608 status); 3609 goto reset; 3610 } 3611 3612 switch (status = vdc_attr_negotiation(vdcp)) { 3613 case 0: 3614 break; 3615 default: 3616 DMSG(vdcp, 0, "attr negotiate failed (%d)..\n", 3617 status); 3618 goto reset; 3619 } 3620 3621 switch (status = vdc_dring_negotiation(vdcp)) { 3622 case 0: 3623 break; 3624 default: 3625 DMSG(vdcp, 0, "dring negotiate failed (%d)..\n", 3626 status); 3627 goto reset; 3628 } 3629 3630 switch (status = vdc_rdx_exchange(vdcp)) { 3631 case 0: 3632 vdcp->state = VDC_STATE_HANDLE_PENDING; 3633 goto done; 3634 default: 3635 DMSG(vdcp, 0, "RDX xchg failed ..(%d)\n", 3636 status); 3637 goto reset; 3638 } 3639 reset: 3640 DMSG(vdcp, 0, "negotiation failed: resetting (%d)\n", 3641 status); 3642 vdcp->state = VDC_STATE_RESETTING; 3643 vdcp->self_reset = B_TRUE; 3644 done: 3645 DMSG(vdcp, 0, "negotiation complete (state=0x%x)...\n", 3646 vdcp->state); 3647 break; 3648 3649 case VDC_STATE_HANDLE_PENDING: 3650 3651 if (vdcp->ctimeout_reached) { 3652 /* 3653 * The connection timeout had been reached so 3654 * pending requests have been cancelled. Now 3655 * that the connection is back we can reset 3656 * the timeout. 3657 */ 3658 ASSERT(vdcp->local_dring_backup == NULL); 3659 ASSERT(tmid != 0); 3660 tmid = 0; 3661 vdcp->ctimeout_reached = B_FALSE; 3662 vdcp->state = VDC_STATE_RUNNING; 3663 DMSG(vdcp, 0, "[%d] connection to service " 3664 "domain is up", vdcp->instance); 3665 break; 3666 } 3667 3668 mutex_exit(&vdcp->lock); 3669 if (tmid != 0) { 3670 (void) untimeout(tmid); 3671 tmid = 0; 3672 } 3673 status = vdc_resubmit_backup_dring(vdcp); 3674 mutex_enter(&vdcp->lock); 3675 3676 if (status) 3677 vdcp->state = VDC_STATE_RESETTING; 3678 else 3679 vdcp->state = VDC_STATE_RUNNING; 3680 3681 break; 3682 3683 /* enter running state */ 3684 case VDC_STATE_RUNNING: 3685 /* 3686 * Signal anyone waiting for the connection 3687 * to come on line. 3688 */ 3689 vdcp->hshake_cnt = 0; 3690 cv_broadcast(&vdcp->running_cv); 3691 mutex_exit(&vdcp->lock); 3692 3693 for (;;) { 3694 vio_msg_t msg; 3695 status = vdc_wait_for_response(vdcp, &msg); 3696 if (status) break; 3697 3698 DMSG(vdcp, 1, "[%d] new pkt(s) available\n", 3699 vdcp->instance); 3700 status = vdc_process_data_msg(vdcp, &msg); 3701 if (status) { 3702 DMSG(vdcp, 1, "[%d] process_data_msg " 3703 "returned err=%d\n", vdcp->instance, 3704 status); 3705 break; 3706 } 3707 3708 } 3709 3710 mutex_enter(&vdcp->lock); 3711 3712 vdcp->state = VDC_STATE_RESETTING; 3713 vdcp->self_reset = B_TRUE; 3714 break; 3715 3716 case VDC_STATE_RESETTING: 3717 /* 3718 * When we reach this state, we either come from the 3719 * VDC_STATE_RUNNING state and we can have pending 3720 * request but no timeout is armed; or we come from 3721 * the VDC_STATE_INIT_WAITING, VDC_NEGOTIATE or 3722 * VDC_HANDLE_PENDING state and there is no pending 3723 * request or pending requests have already been copied 3724 * into the backup dring. So we can safely keep the 3725 * connection timeout armed while we are in this state. 3726 */ 3727 3728 DMSG(vdcp, 0, "Initiating channel reset " 3729 "(pending = %d)\n", (int)vdcp->threads_pending); 3730 3731 if (vdcp->self_reset) { 3732 DMSG(vdcp, 0, 3733 "[%d] calling stop_ldc_connection.\n", 3734 vdcp->instance); 3735 status = vdc_stop_ldc_connection(vdcp); 3736 vdcp->self_reset = B_FALSE; 3737 } 3738 3739 /* 3740 * Wait for all threads currently waiting 3741 * for a free dring entry to use. 3742 */ 3743 while (vdcp->threads_pending) { 3744 cv_broadcast(&vdcp->membind_cv); 3745 cv_broadcast(&vdcp->dring_free_cv); 3746 mutex_exit(&vdcp->lock); 3747 /* give the waiters enough time to wake up */ 3748 delay(vdc_hz_min_ldc_delay); 3749 mutex_enter(&vdcp->lock); 3750 } 3751 3752 ASSERT(vdcp->threads_pending == 0); 3753 3754 /* Sanity check that no thread is receiving */ 3755 ASSERT(vdcp->read_state != VDC_READ_WAITING); 3756 3757 vdcp->read_state = VDC_READ_IDLE; 3758 3759 vdc_backup_local_dring(vdcp); 3760 3761 /* cleanup the old d-ring */ 3762 vdc_destroy_descriptor_ring(vdcp); 3763 3764 /* go and start again */ 3765 vdcp->state = VDC_STATE_INIT; 3766 3767 break; 3768 3769 case VDC_STATE_DETACH: 3770 DMSG(vdcp, 0, "[%d] Reset thread exit cleanup ..\n", 3771 vdcp->instance); 3772 3773 /* cancel any pending timeout */ 3774 mutex_exit(&vdcp->lock); 3775 if (tmid != 0) { 3776 (void) untimeout(tmid); 3777 tmid = 0; 3778 } 3779 mutex_enter(&vdcp->lock); 3780 3781 /* 3782 * Signal anyone waiting for connection 3783 * to come online 3784 */ 3785 cv_broadcast(&vdcp->running_cv); 3786 3787 while (vdcp->sync_op_pending) { 3788 cv_signal(&vdcp->sync_pending_cv); 3789 cv_signal(&vdcp->sync_blocked_cv); 3790 mutex_exit(&vdcp->lock); 3791 /* give the waiters enough time to wake up */ 3792 delay(vdc_hz_min_ldc_delay); 3793 mutex_enter(&vdcp->lock); 3794 } 3795 3796 mutex_exit(&vdcp->lock); 3797 3798 DMSG(vdcp, 0, "[%d] Msg processing thread exiting ..\n", 3799 vdcp->instance); 3800 thread_exit(); 3801 break; 3802 } 3803 } 3804 } 3805 3806 3807 /* 3808 * Function: 3809 * vdc_process_data_msg() 3810 * 3811 * Description: 3812 * This function is called by the message processing thread each time 3813 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 3814 * be an ACK or NACK from vds[1] which vdc handles as follows. 3815 * ACK - wake up the waiting thread 3816 * NACK - resend any messages necessary 3817 * 3818 * [1] Although the message format allows it, vds should not send a 3819 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 3820 * some bizarre reason it does, vdc will reset the connection. 3821 * 3822 * Arguments: 3823 * vdc - soft state pointer for this instance of the device driver. 3824 * msg - the LDC message sent by vds 3825 * 3826 * Return Code: 3827 * 0 - Success. 3828 * > 0 - error value returned by LDC 3829 */ 3830 static int 3831 vdc_process_data_msg(vdc_t *vdcp, vio_msg_t *msg) 3832 { 3833 int status = 0; 3834 vio_dring_msg_t *dring_msg; 3835 vdc_local_desc_t *ldep = NULL; 3836 int start, end; 3837 int idx; 3838 3839 dring_msg = (vio_dring_msg_t *)msg; 3840 3841 ASSERT(msg->tag.vio_msgtype == VIO_TYPE_DATA); 3842 ASSERT(vdcp != NULL); 3843 3844 mutex_enter(&vdcp->lock); 3845 3846 /* 3847 * Check to see if the message has bogus data 3848 */ 3849 idx = start = dring_msg->start_idx; 3850 end = dring_msg->end_idx; 3851 if ((start >= vdcp->dring_len) || 3852 (end >= vdcp->dring_len) || (end < -1)) { 3853 DMSG(vdcp, 0, "[%d] Bogus ACK data : start %d, end %d\n", 3854 vdcp->instance, start, end); 3855 mutex_exit(&vdcp->lock); 3856 return (EINVAL); 3857 } 3858 3859 /* 3860 * Verify that the sequence number is what vdc expects. 3861 */ 3862 switch (vdc_verify_seq_num(vdcp, dring_msg)) { 3863 case VDC_SEQ_NUM_TODO: 3864 break; /* keep processing this message */ 3865 case VDC_SEQ_NUM_SKIP: 3866 mutex_exit(&vdcp->lock); 3867 return (0); 3868 case VDC_SEQ_NUM_INVALID: 3869 mutex_exit(&vdcp->lock); 3870 DMSG(vdcp, 0, "[%d] invalid seqno\n", vdcp->instance); 3871 return (ENXIO); 3872 } 3873 3874 if (msg->tag.vio_subtype == VIO_SUBTYPE_NACK) { 3875 DMSG(vdcp, 0, "[%d] DATA NACK\n", vdcp->instance); 3876 VDC_DUMP_DRING_MSG(dring_msg); 3877 mutex_exit(&vdcp->lock); 3878 return (EIO); 3879 3880 } else if (msg->tag.vio_subtype == VIO_SUBTYPE_INFO) { 3881 mutex_exit(&vdcp->lock); 3882 return (EPROTO); 3883 } 3884 3885 DTRACE_IO2(recv, vio_dring_msg_t, dring_msg, vdc_t *, vdcp); 3886 DMSG(vdcp, 1, ": start %d end %d\n", start, end); 3887 ASSERT(start == end); 3888 3889 ldep = &vdcp->local_dring[idx]; 3890 3891 DMSG(vdcp, 1, ": state 0x%x - cb_type 0x%x\n", 3892 ldep->dep->hdr.dstate, ldep->cb_type); 3893 3894 if (ldep->dep->hdr.dstate == VIO_DESC_DONE) { 3895 struct buf *bufp; 3896 3897 switch (ldep->cb_type) { 3898 case CB_SYNC: 3899 ASSERT(vdcp->sync_op_pending); 3900 3901 status = vdc_depopulate_descriptor(vdcp, idx); 3902 vdcp->sync_op_status = status; 3903 vdcp->sync_op_pending = B_FALSE; 3904 cv_signal(&vdcp->sync_pending_cv); 3905 break; 3906 3907 case CB_STRATEGY: 3908 bufp = ldep->cb_arg; 3909 ASSERT(bufp != NULL); 3910 bufp->b_resid = 3911 bufp->b_bcount - ldep->dep->payload.nbytes; 3912 status = ldep->dep->payload.status; /* Future:ntoh */ 3913 if (status != 0) { 3914 DMSG(vdcp, 1, "strategy status=%d\n", status); 3915 bioerror(bufp, status); 3916 } 3917 status = vdc_depopulate_descriptor(vdcp, idx); 3918 biodone(bufp); 3919 3920 DMSG(vdcp, 1, 3921 "strategy complete req=%ld bytes resp=%ld bytes\n", 3922 bufp->b_bcount, ldep->dep->payload.nbytes); 3923 break; 3924 3925 default: 3926 ASSERT(0); 3927 } 3928 } 3929 3930 /* let the arrival signal propogate */ 3931 mutex_exit(&vdcp->lock); 3932 3933 /* probe gives the count of how many entries were processed */ 3934 DTRACE_IO2(processed, int, 1, vdc_t *, vdcp); 3935 3936 return (0); 3937 } 3938 3939 /* 3940 * Function: 3941 * vdc_process_err_msg() 3942 * 3943 * NOTE: No error messages are used as part of the vDisk protocol 3944 */ 3945 static int 3946 vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg) 3947 { 3948 _NOTE(ARGUNUSED(vdc)) 3949 _NOTE(ARGUNUSED(msg)) 3950 3951 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_ERR); 3952 DMSG(vdc, 1, "[%d] Got an ERR msg", vdc->instance); 3953 3954 return (ENOTSUP); 3955 } 3956 3957 /* 3958 * Function: 3959 * vdc_handle_ver_msg() 3960 * 3961 * Description: 3962 * 3963 * Arguments: 3964 * vdc - soft state pointer for this instance of the device driver. 3965 * ver_msg - LDC message sent by vDisk server 3966 * 3967 * Return Code: 3968 * 0 - Success 3969 */ 3970 static int 3971 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) 3972 { 3973 int status = 0; 3974 3975 ASSERT(vdc != NULL); 3976 ASSERT(mutex_owned(&vdc->lock)); 3977 3978 if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { 3979 return (EPROTO); 3980 } 3981 3982 if (ver_msg->dev_class != VDEV_DISK_SERVER) { 3983 return (EINVAL); 3984 } 3985 3986 switch (ver_msg->tag.vio_subtype) { 3987 case VIO_SUBTYPE_ACK: 3988 /* 3989 * We check to see if the version returned is indeed supported 3990 * (The server may have also adjusted the minor number downwards 3991 * and if so 'ver_msg' will contain the actual version agreed) 3992 */ 3993 if (vdc_is_supported_version(ver_msg)) { 3994 vdc->ver.major = ver_msg->ver_major; 3995 vdc->ver.minor = ver_msg->ver_minor; 3996 ASSERT(vdc->ver.major > 0); 3997 } else { 3998 status = EPROTO; 3999 } 4000 break; 4001 4002 case VIO_SUBTYPE_NACK: 4003 /* 4004 * call vdc_is_supported_version() which will return the next 4005 * supported version (if any) in 'ver_msg' 4006 */ 4007 (void) vdc_is_supported_version(ver_msg); 4008 if (ver_msg->ver_major > 0) { 4009 size_t len = sizeof (*ver_msg); 4010 4011 ASSERT(vdc->ver.major > 0); 4012 4013 /* reset the necessary fields and resend */ 4014 ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; 4015 ver_msg->dev_class = VDEV_DISK; 4016 4017 status = vdc_send(vdc, (caddr_t)ver_msg, &len); 4018 DMSG(vdc, 0, "[%d] Resend VER info (LDC status = %d)\n", 4019 vdc->instance, status); 4020 if (len != sizeof (*ver_msg)) 4021 status = EBADMSG; 4022 } else { 4023 DMSG(vdc, 0, "[%d] No common version with vDisk server", 4024 vdc->instance); 4025 status = ENOTSUP; 4026 } 4027 4028 break; 4029 case VIO_SUBTYPE_INFO: 4030 /* 4031 * Handle the case where vds starts handshake 4032 * (for now only vdc is the instigator) 4033 */ 4034 status = ENOTSUP; 4035 break; 4036 4037 default: 4038 status = EINVAL; 4039 break; 4040 } 4041 4042 return (status); 4043 } 4044 4045 /* 4046 * Function: 4047 * vdc_handle_attr_msg() 4048 * 4049 * Description: 4050 * 4051 * Arguments: 4052 * vdc - soft state pointer for this instance of the device driver. 4053 * attr_msg - LDC message sent by vDisk server 4054 * 4055 * Return Code: 4056 * 0 - Success 4057 */ 4058 static int 4059 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) 4060 { 4061 int status = 0; 4062 4063 ASSERT(vdc != NULL); 4064 ASSERT(mutex_owned(&vdc->lock)); 4065 4066 if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { 4067 return (EPROTO); 4068 } 4069 4070 switch (attr_msg->tag.vio_subtype) { 4071 case VIO_SUBTYPE_ACK: 4072 /* 4073 * We now verify the attributes sent by vds. 4074 */ 4075 vdc->vdisk_size = attr_msg->vdisk_size; 4076 vdc->vdisk_type = attr_msg->vdisk_type; 4077 4078 DMSG(vdc, 0, "[%d] max_xfer_sz: sent %lx acked %lx\n", 4079 vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz); 4080 DMSG(vdc, 0, "[%d] vdisk_block_size: sent %lx acked %x\n", 4081 vdc->instance, vdc->block_size, 4082 attr_msg->vdisk_block_size); 4083 4084 /* 4085 * We don't know at compile time what the vDisk server will 4086 * think are good values but we apply an large (arbitrary) 4087 * upper bound to prevent memory exhaustion in vdc if it was 4088 * allocating a DRing based of huge values sent by the server. 4089 * We probably will never exceed this except if the message 4090 * was garbage. 4091 */ 4092 if ((attr_msg->max_xfer_sz * attr_msg->vdisk_block_size) <= 4093 (PAGESIZE * DEV_BSIZE)) { 4094 vdc->max_xfer_sz = attr_msg->max_xfer_sz; 4095 vdc->block_size = attr_msg->vdisk_block_size; 4096 } else { 4097 DMSG(vdc, 0, "[%d] vds block transfer size too big;" 4098 " using max supported by vdc", vdc->instance); 4099 } 4100 4101 if ((attr_msg->xfer_mode != VIO_DRING_MODE) || 4102 (attr_msg->vdisk_size > INT64_MAX) || 4103 (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 4104 DMSG(vdc, 0, "[%d] Invalid attributes from vds", 4105 vdc->instance); 4106 status = EINVAL; 4107 break; 4108 } 4109 4110 break; 4111 4112 case VIO_SUBTYPE_NACK: 4113 /* 4114 * vds could not handle the attributes we sent so we 4115 * stop negotiating. 4116 */ 4117 status = EPROTO; 4118 break; 4119 4120 case VIO_SUBTYPE_INFO: 4121 /* 4122 * Handle the case where vds starts the handshake 4123 * (for now; vdc is the only supported instigatior) 4124 */ 4125 status = ENOTSUP; 4126 break; 4127 4128 default: 4129 status = ENOTSUP; 4130 break; 4131 } 4132 4133 return (status); 4134 } 4135 4136 /* 4137 * Function: 4138 * vdc_handle_dring_reg_msg() 4139 * 4140 * Description: 4141 * 4142 * Arguments: 4143 * vdc - soft state pointer for this instance of the driver. 4144 * dring_msg - LDC message sent by vDisk server 4145 * 4146 * Return Code: 4147 * 0 - Success 4148 */ 4149 static int 4150 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) 4151 { 4152 int status = 0; 4153 4154 ASSERT(vdc != NULL); 4155 ASSERT(mutex_owned(&vdc->lock)); 4156 4157 if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { 4158 return (EPROTO); 4159 } 4160 4161 switch (dring_msg->tag.vio_subtype) { 4162 case VIO_SUBTYPE_ACK: 4163 /* save the received dring_ident */ 4164 vdc->dring_ident = dring_msg->dring_ident; 4165 DMSG(vdc, 0, "[%d] Received dring ident=0x%lx\n", 4166 vdc->instance, vdc->dring_ident); 4167 break; 4168 4169 case VIO_SUBTYPE_NACK: 4170 /* 4171 * vds could not handle the DRing info we sent so we 4172 * stop negotiating. 4173 */ 4174 DMSG(vdc, 0, "[%d] server could not register DRing\n", 4175 vdc->instance); 4176 status = EPROTO; 4177 break; 4178 4179 case VIO_SUBTYPE_INFO: 4180 /* 4181 * Handle the case where vds starts handshake 4182 * (for now only vdc is the instigatior) 4183 */ 4184 status = ENOTSUP; 4185 break; 4186 default: 4187 status = ENOTSUP; 4188 } 4189 4190 return (status); 4191 } 4192 4193 /* 4194 * Function: 4195 * vdc_verify_seq_num() 4196 * 4197 * Description: 4198 * This functions verifies that the sequence number sent back by the vDisk 4199 * server with the latest message is what is expected (i.e. it is greater 4200 * than the last seq num sent by the vDisk server and less than or equal 4201 * to the last seq num generated by vdc). 4202 * 4203 * It then checks the request ID to see if any requests need processing 4204 * in the DRing. 4205 * 4206 * Arguments: 4207 * vdc - soft state pointer for this instance of the driver. 4208 * dring_msg - pointer to the LDC message sent by vds 4209 * 4210 * Return Code: 4211 * VDC_SEQ_NUM_TODO - Message needs to be processed 4212 * VDC_SEQ_NUM_SKIP - Message has already been processed 4213 * VDC_SEQ_NUM_INVALID - The seq numbers are so out of sync, 4214 * vdc cannot deal with them 4215 */ 4216 static int 4217 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg) 4218 { 4219 ASSERT(vdc != NULL); 4220 ASSERT(dring_msg != NULL); 4221 ASSERT(mutex_owned(&vdc->lock)); 4222 4223 /* 4224 * Check to see if the messages were responded to in the correct 4225 * order by vds. 4226 */ 4227 if ((dring_msg->seq_num <= vdc->seq_num_reply) || 4228 (dring_msg->seq_num > vdc->seq_num)) { 4229 DMSG(vdc, 0, "?[%d] Bogus sequence_number %lu: " 4230 "%lu > expected <= %lu (last proc req %lu sent %lu)\n", 4231 vdc->instance, dring_msg->seq_num, 4232 vdc->seq_num_reply, vdc->seq_num, 4233 vdc->req_id_proc, vdc->req_id); 4234 return (VDC_SEQ_NUM_INVALID); 4235 } 4236 vdc->seq_num_reply = dring_msg->seq_num; 4237 4238 if (vdc->req_id_proc < vdc->req_id) 4239 return (VDC_SEQ_NUM_TODO); 4240 else 4241 return (VDC_SEQ_NUM_SKIP); 4242 } 4243 4244 4245 /* 4246 * Function: 4247 * vdc_is_supported_version() 4248 * 4249 * Description: 4250 * This routine checks if the major/minor version numbers specified in 4251 * 'ver_msg' are supported. If not it finds the next version that is 4252 * in the supported version list 'vdc_version[]' and sets the fields in 4253 * 'ver_msg' to those values 4254 * 4255 * Arguments: 4256 * ver_msg - LDC message sent by vDisk server 4257 * 4258 * Return Code: 4259 * B_TRUE - Success 4260 * B_FALSE - Version not supported 4261 */ 4262 static boolean_t 4263 vdc_is_supported_version(vio_ver_msg_t *ver_msg) 4264 { 4265 int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); 4266 4267 for (int i = 0; i < vdc_num_versions; i++) { 4268 ASSERT(vdc_version[i].major > 0); 4269 ASSERT((i == 0) || 4270 (vdc_version[i].major < vdc_version[i-1].major)); 4271 4272 /* 4273 * If the major versions match, adjust the minor version, if 4274 * necessary, down to the highest value supported by this 4275 * client. The server should support all minor versions lower 4276 * than the value it sent 4277 */ 4278 if (ver_msg->ver_major == vdc_version[i].major) { 4279 if (ver_msg->ver_minor > vdc_version[i].minor) { 4280 DMSGX(0, 4281 "Adjusting minor version from %u to %u", 4282 ver_msg->ver_minor, vdc_version[i].minor); 4283 ver_msg->ver_minor = vdc_version[i].minor; 4284 } 4285 return (B_TRUE); 4286 } 4287 4288 /* 4289 * If the message contains a higher major version number, set 4290 * the message's major/minor versions to the current values 4291 * and return false, so this message will get resent with 4292 * these values, and the server will potentially try again 4293 * with the same or a lower version 4294 */ 4295 if (ver_msg->ver_major > vdc_version[i].major) { 4296 ver_msg->ver_major = vdc_version[i].major; 4297 ver_msg->ver_minor = vdc_version[i].minor; 4298 DMSGX(0, "Suggesting major/minor (0x%x/0x%x)\n", 4299 ver_msg->ver_major, ver_msg->ver_minor); 4300 4301 return (B_FALSE); 4302 } 4303 4304 /* 4305 * Otherwise, the message's major version is less than the 4306 * current major version, so continue the loop to the next 4307 * (lower) supported version 4308 */ 4309 } 4310 4311 /* 4312 * No common version was found; "ground" the version pair in the 4313 * message to terminate negotiation 4314 */ 4315 ver_msg->ver_major = 0; 4316 ver_msg->ver_minor = 0; 4317 4318 return (B_FALSE); 4319 } 4320 /* -------------------------------------------------------------------------- */ 4321 4322 /* 4323 * DKIO(7) support 4324 */ 4325 4326 typedef struct vdc_dk_arg { 4327 struct dk_callback dkc; 4328 int mode; 4329 dev_t dev; 4330 vdc_t *vdc; 4331 } vdc_dk_arg_t; 4332 4333 /* 4334 * Function: 4335 * vdc_dkio_flush_cb() 4336 * 4337 * Description: 4338 * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 4339 * by kernel code. 4340 * 4341 * Arguments: 4342 * arg - a pointer to a vdc_dk_arg_t structure. 4343 */ 4344 void 4345 vdc_dkio_flush_cb(void *arg) 4346 { 4347 struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 4348 struct dk_callback *dkc = NULL; 4349 vdc_t *vdc = NULL; 4350 int rv; 4351 4352 if (dk_arg == NULL) { 4353 cmn_err(CE_NOTE, "?[Unk] DKIOCFLUSHWRITECACHE arg is NULL\n"); 4354 return; 4355 } 4356 dkc = &dk_arg->dkc; 4357 vdc = dk_arg->vdc; 4358 ASSERT(vdc != NULL); 4359 4360 rv = vdc_do_sync_op(vdc, VD_OP_FLUSH, NULL, 0, 4361 VDCPART(dk_arg->dev), 0, CB_SYNC, 0, VIO_both_dir); 4362 if (rv != 0) { 4363 DMSG(vdc, 0, "[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n", 4364 vdc->instance, rv, 4365 ddi_model_convert_from(dk_arg->mode & FMODELS)); 4366 } 4367 4368 /* 4369 * Trigger the call back to notify the caller the the ioctl call has 4370 * been completed. 4371 */ 4372 if ((dk_arg->mode & FKIOCTL) && 4373 (dkc != NULL) && 4374 (dkc->dkc_callback != NULL)) { 4375 ASSERT(dkc->dkc_cookie != NULL); 4376 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 4377 } 4378 4379 /* Indicate that one less DKIO write flush is outstanding */ 4380 mutex_enter(&vdc->lock); 4381 vdc->dkio_flush_pending--; 4382 ASSERT(vdc->dkio_flush_pending >= 0); 4383 mutex_exit(&vdc->lock); 4384 4385 /* free the mem that was allocated when the callback was dispatched */ 4386 kmem_free(arg, sizeof (vdc_dk_arg_t)); 4387 } 4388 4389 /* 4390 * Function: 4391 * vdc_dkio_get_partition() 4392 * 4393 * Description: 4394 * This function implements the DKIOCGAPART ioctl. 4395 * 4396 * Arguments: 4397 * dev - device 4398 * arg - a pointer to a dk_map[NDKMAP] or dk_map32[NDKMAP] structure 4399 * flag - ioctl flags 4400 */ 4401 static int 4402 vdc_dkio_get_partition(dev_t dev, caddr_t arg, int flag) 4403 { 4404 struct dk_geom geom; 4405 struct vtoc vtoc; 4406 union { 4407 struct dk_map map[NDKMAP]; 4408 struct dk_map32 map32[NDKMAP]; 4409 } data; 4410 int i, rv, size; 4411 4412 rv = vd_process_ioctl(dev, DKIOCGGEOM, (caddr_t)&geom, FKIOCTL); 4413 if (rv != 0) 4414 return (rv); 4415 4416 rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)&vtoc, FKIOCTL); 4417 if (rv != 0) 4418 return (rv); 4419 4420 if (vtoc.v_nparts != NDKMAP || 4421 geom.dkg_nhead == 0 || geom.dkg_nsect == 0) 4422 return (EINVAL); 4423 4424 if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 4425 4426 for (i = 0; i < NDKMAP; i++) { 4427 data.map32[i].dkl_cylno = vtoc.v_part[i].p_start / 4428 (geom.dkg_nhead * geom.dkg_nsect); 4429 data.map32[i].dkl_nblk = vtoc.v_part[i].p_size; 4430 } 4431 size = NDKMAP * sizeof (struct dk_map32); 4432 4433 } else { 4434 4435 for (i = 0; i < NDKMAP; i++) { 4436 data.map[i].dkl_cylno = vtoc.v_part[i].p_start / 4437 (geom.dkg_nhead * geom.dkg_nsect); 4438 data.map[i].dkl_nblk = vtoc.v_part[i].p_size; 4439 } 4440 size = NDKMAP * sizeof (struct dk_map); 4441 4442 } 4443 4444 if (ddi_copyout(&data, arg, size, flag) != 0) 4445 return (EFAULT); 4446 4447 return (0); 4448 } 4449 4450 /* 4451 * Function: 4452 * vdc_dioctl_rwcmd() 4453 * 4454 * Description: 4455 * This function implements the DIOCTL_RWCMD ioctl. This ioctl is used 4456 * for DKC_DIRECT disks to read or write at an absolute disk offset. 4457 * 4458 * Arguments: 4459 * dev - device 4460 * arg - a pointer to a dadkio_rwcmd or dadkio_rwcmd32 structure 4461 * flag - ioctl flags 4462 */ 4463 static int 4464 vdc_dioctl_rwcmd(dev_t dev, caddr_t arg, int flag) 4465 { 4466 struct dadkio_rwcmd32 rwcmd32; 4467 struct dadkio_rwcmd rwcmd; 4468 struct iovec aiov; 4469 struct uio auio; 4470 int rw, status; 4471 struct buf *buf; 4472 4473 if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 4474 if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd32, 4475 sizeof (struct dadkio_rwcmd32), flag)) { 4476 return (EFAULT); 4477 } 4478 rwcmd.cmd = rwcmd32.cmd; 4479 rwcmd.flags = rwcmd32.flags; 4480 rwcmd.blkaddr = (daddr_t)rwcmd32.blkaddr; 4481 rwcmd.buflen = rwcmd32.buflen; 4482 rwcmd.bufaddr = (caddr_t)(uintptr_t)rwcmd32.bufaddr; 4483 } else { 4484 if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd, 4485 sizeof (struct dadkio_rwcmd), flag)) { 4486 return (EFAULT); 4487 } 4488 } 4489 4490 switch (rwcmd.cmd) { 4491 case DADKIO_RWCMD_READ: 4492 rw = B_READ; 4493 break; 4494 case DADKIO_RWCMD_WRITE: 4495 rw = B_WRITE; 4496 break; 4497 default: 4498 return (EINVAL); 4499 } 4500 4501 bzero((caddr_t)&aiov, sizeof (struct iovec)); 4502 aiov.iov_base = rwcmd.bufaddr; 4503 aiov.iov_len = rwcmd.buflen; 4504 4505 bzero((caddr_t)&auio, sizeof (struct uio)); 4506 auio.uio_iov = &aiov; 4507 auio.uio_iovcnt = 1; 4508 auio.uio_loffset = rwcmd.blkaddr * DEV_BSIZE; 4509 auio.uio_resid = rwcmd.buflen; 4510 auio.uio_segflg = flag & FKIOCTL ? UIO_SYSSPACE : UIO_USERSPACE; 4511 4512 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 4513 bioinit(buf); 4514 /* 4515 * We use the private field of buf to specify that this is an 4516 * I/O using an absolute offset. 4517 */ 4518 buf->b_private = (void *)VD_SLICE_NONE; 4519 4520 status = physio(vdc_strategy, buf, dev, rw, vdc_min, &auio); 4521 4522 biofini(buf); 4523 kmem_free(buf, sizeof (buf_t)); 4524 4525 return (status); 4526 } 4527 4528 /* 4529 * This structure is used in the DKIO(7I) array below. 4530 */ 4531 typedef struct vdc_dk_ioctl { 4532 uint8_t op; /* VD_OP_XXX value */ 4533 int cmd; /* Solaris ioctl operation number */ 4534 size_t nbytes; /* size of structure to be copied */ 4535 4536 /* function to convert between vDisk and Solaris structure formats */ 4537 int (*convert)(vdc_t *vdc, void *vd_buf, void *ioctl_arg, 4538 int mode, int dir); 4539 } vdc_dk_ioctl_t; 4540 4541 /* 4542 * Subset of DKIO(7I) operations currently supported 4543 */ 4544 static vdc_dk_ioctl_t dk_ioctl[] = { 4545 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, 0, 4546 vdc_null_copy_func}, 4547 {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), 4548 vdc_get_wce_convert}, 4549 {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), 4550 vdc_set_wce_convert}, 4551 {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), 4552 vdc_get_vtoc_convert}, 4553 {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), 4554 vdc_set_vtoc_convert}, 4555 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), 4556 vdc_get_geom_convert}, 4557 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), 4558 vdc_get_geom_convert}, 4559 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), 4560 vdc_get_geom_convert}, 4561 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 4562 vdc_set_geom_convert}, 4563 {VD_OP_GET_EFI, DKIOCGETEFI, 0, 4564 vdc_get_efi_convert}, 4565 {VD_OP_SET_EFI, DKIOCSETEFI, 0, 4566 vdc_set_efi_convert}, 4567 4568 /* DIOCTL_RWCMD is converted to a read or a write */ 4569 {0, DIOCTL_RWCMD, sizeof (struct dadkio_rwcmd), NULL}, 4570 4571 /* 4572 * These particular ioctls are not sent to the server - vdc fakes up 4573 * the necessary info. 4574 */ 4575 {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, 4576 {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, 4577 {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, 4578 {0, DKIOCGAPART, 0, vdc_null_copy_func }, 4579 {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, 4580 {0, CDROMREADOFFSET, 0, vdc_null_copy_func} 4581 }; 4582 4583 /* 4584 * Function: 4585 * vd_process_ioctl() 4586 * 4587 * Description: 4588 * This routine processes disk specific ioctl calls 4589 * 4590 * Arguments: 4591 * dev - the device number 4592 * cmd - the operation [dkio(7I)] to be processed 4593 * arg - pointer to user provided structure 4594 * (contains data to be set or reference parameter for get) 4595 * mode - bit flag, indicating open settings, 32/64 bit type, etc 4596 * 4597 * Return Code: 4598 * 0 4599 * EFAULT 4600 * ENXIO 4601 * EIO 4602 * ENOTSUP 4603 */ 4604 static int 4605 vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode) 4606 { 4607 int instance = VDCUNIT(dev); 4608 vdc_t *vdc = NULL; 4609 int rv = -1; 4610 int idx = 0; /* index into dk_ioctl[] */ 4611 size_t len = 0; /* #bytes to send to vds */ 4612 size_t alloc_len = 0; /* #bytes to allocate mem for */ 4613 caddr_t mem_p = NULL; 4614 size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 4615 struct vtoc vtoc_saved; 4616 vdc_dk_ioctl_t *iop; 4617 4618 vdc = ddi_get_soft_state(vdc_state, instance); 4619 if (vdc == NULL) { 4620 cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 4621 instance); 4622 return (ENXIO); 4623 } 4624 4625 DMSG(vdc, 0, "[%d] Processing ioctl(%x) for dev %lx : model %x\n", 4626 instance, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 4627 4628 /* 4629 * Validate the ioctl operation to be performed. 4630 * 4631 * If we have looped through the array without finding a match then we 4632 * don't support this ioctl. 4633 */ 4634 for (idx = 0; idx < nioctls; idx++) { 4635 if (cmd == dk_ioctl[idx].cmd) 4636 break; 4637 } 4638 4639 if (idx >= nioctls) { 4640 DMSG(vdc, 0, "[%d] Unsupported ioctl (0x%x)\n", 4641 vdc->instance, cmd); 4642 return (ENOTSUP); 4643 } 4644 4645 iop = &(dk_ioctl[idx]); 4646 4647 if (cmd == DKIOCGETEFI || cmd == DKIOCSETEFI) { 4648 /* size is not fixed for EFI ioctls, it depends on ioctl arg */ 4649 dk_efi_t dk_efi; 4650 4651 rv = ddi_copyin(arg, &dk_efi, sizeof (dk_efi_t), mode); 4652 if (rv != 0) 4653 return (EFAULT); 4654 4655 len = sizeof (vd_efi_t) - 1 + dk_efi.dki_length; 4656 } else { 4657 len = iop->nbytes; 4658 } 4659 4660 /* 4661 * Deal with the ioctls which the server does not provide. vdc can 4662 * fake these up and return immediately 4663 */ 4664 switch (cmd) { 4665 case CDROMREADOFFSET: 4666 case DKIOCREMOVABLE: 4667 case USCSICMD: 4668 return (ENOTTY); 4669 4670 case DIOCTL_RWCMD: 4671 { 4672 if (vdc->cinfo->dki_ctype != DKC_DIRECT) 4673 return (ENOTTY); 4674 4675 return (vdc_dioctl_rwcmd(dev, arg, mode)); 4676 } 4677 4678 case DKIOCGAPART: 4679 { 4680 if (vdc->vdisk_label != VD_DISK_LABEL_VTOC) 4681 return (ENOTSUP); 4682 4683 return (vdc_dkio_get_partition(dev, arg, mode)); 4684 } 4685 4686 case DKIOCINFO: 4687 { 4688 struct dk_cinfo cinfo; 4689 if (vdc->cinfo == NULL) 4690 return (ENXIO); 4691 4692 bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 4693 cinfo.dki_partition = VDCPART(dev); 4694 4695 rv = ddi_copyout(&cinfo, (void *)arg, 4696 sizeof (struct dk_cinfo), mode); 4697 if (rv != 0) 4698 return (EFAULT); 4699 4700 return (0); 4701 } 4702 4703 case DKIOCGMEDIAINFO: 4704 { 4705 if (vdc->minfo == NULL) 4706 return (ENXIO); 4707 4708 rv = ddi_copyout(vdc->minfo, (void *)arg, 4709 sizeof (struct dk_minfo), mode); 4710 if (rv != 0) 4711 return (EFAULT); 4712 4713 return (0); 4714 } 4715 4716 case DKIOCFLUSHWRITECACHE: 4717 { 4718 struct dk_callback *dkc = (struct dk_callback *)arg; 4719 vdc_dk_arg_t *dkarg = NULL; 4720 4721 DMSG(vdc, 1, "[%d] Flush W$: mode %x\n", 4722 instance, mode); 4723 4724 /* 4725 * If the backing device is not a 'real' disk then the 4726 * W$ operation request to the vDisk server will fail 4727 * so we might as well save the cycles and return now. 4728 */ 4729 if (vdc->vdisk_type != VD_DISK_TYPE_DISK) 4730 return (ENOTTY); 4731 4732 /* 4733 * If arg is NULL, then there is no callback function 4734 * registered and the call operates synchronously; we 4735 * break and continue with the rest of the function and 4736 * wait for vds to return (i.e. after the request to 4737 * vds returns successfully, all writes completed prior 4738 * to the ioctl will have been flushed from the disk 4739 * write cache to persistent media. 4740 * 4741 * If a callback function is registered, we dispatch 4742 * the request on a task queue and return immediately. 4743 * The callback will deal with informing the calling 4744 * thread that the flush request is completed. 4745 */ 4746 if (dkc == NULL) 4747 break; 4748 4749 /* 4750 * the asynchronous callback is only supported if 4751 * invoked from within the kernel 4752 */ 4753 if ((mode & FKIOCTL) == 0) 4754 return (ENOTSUP); 4755 4756 dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP); 4757 4758 dkarg->mode = mode; 4759 dkarg->dev = dev; 4760 bcopy(dkc, &dkarg->dkc, sizeof (*dkc)); 4761 4762 mutex_enter(&vdc->lock); 4763 vdc->dkio_flush_pending++; 4764 dkarg->vdc = vdc; 4765 mutex_exit(&vdc->lock); 4766 4767 /* put the request on a task queue */ 4768 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 4769 (void *)dkarg, DDI_SLEEP); 4770 if (rv == NULL) { 4771 /* clean up if dispatch fails */ 4772 mutex_enter(&vdc->lock); 4773 vdc->dkio_flush_pending--; 4774 kmem_free(dkarg, sizeof (vdc_dk_arg_t)); 4775 } 4776 4777 return (rv == NULL ? ENOMEM : 0); 4778 } 4779 } 4780 4781 /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 4782 ASSERT(iop->op != 0); 4783 4784 /* LDC requires that the memory being mapped is 8-byte aligned */ 4785 alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 4786 DMSG(vdc, 1, "[%d] struct size %ld alloc %ld\n", 4787 instance, len, alloc_len); 4788 4789 ASSERT(alloc_len >= 0); /* sanity check */ 4790 if (alloc_len > 0) 4791 mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 4792 4793 if (cmd == DKIOCSVTOC) { 4794 /* 4795 * Save a copy of the current VTOC so that we can roll back 4796 * if the setting of the new VTOC fails. 4797 */ 4798 bcopy(vdc->vtoc, &vtoc_saved, sizeof (struct vtoc)); 4799 } 4800 4801 /* 4802 * Call the conversion function for this ioctl which, if necessary, 4803 * converts from the Solaris format to the format ARC'ed 4804 * as part of the vDisk protocol (FWARC 2006/195) 4805 */ 4806 ASSERT(iop->convert != NULL); 4807 rv = (iop->convert)(vdc, arg, mem_p, mode, VD_COPYIN); 4808 if (rv != 0) { 4809 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 4810 instance, rv, cmd); 4811 if (mem_p != NULL) 4812 kmem_free(mem_p, alloc_len); 4813 return (rv); 4814 } 4815 4816 /* 4817 * send request to vds to service the ioctl. 4818 */ 4819 rv = vdc_do_sync_op(vdc, iop->op, mem_p, alloc_len, 4820 VDCPART(dev), 0, CB_SYNC, (void *)(uint64_t)mode, 4821 VIO_both_dir); 4822 4823 if (rv != 0) { 4824 /* 4825 * This is not necessarily an error. The ioctl could 4826 * be returning a value such as ENOTTY to indicate 4827 * that the ioctl is not applicable. 4828 */ 4829 DMSG(vdc, 0, "[%d] vds returned %d for ioctl 0x%x\n", 4830 instance, rv, cmd); 4831 if (mem_p != NULL) 4832 kmem_free(mem_p, alloc_len); 4833 4834 if (cmd == DKIOCSVTOC) { 4835 /* update of the VTOC has failed, roll back */ 4836 bcopy(&vtoc_saved, vdc->vtoc, sizeof (struct vtoc)); 4837 } 4838 4839 return (rv); 4840 } 4841 4842 if (cmd == DKIOCSVTOC) { 4843 /* 4844 * The VTOC has been changed. We need to update the device 4845 * nodes to handle the case where an EFI label has been 4846 * changed to a VTOC label. We also try and update the device 4847 * node properties. Failing to set the properties should 4848 * not cause an error to be return the caller though. 4849 */ 4850 vdc->vdisk_label = VD_DISK_LABEL_VTOC; 4851 (void) vdc_create_device_nodes_vtoc(vdc); 4852 4853 if (vdc_create_device_nodes_props(vdc)) { 4854 DMSG(vdc, 0, "![%d] Failed to update device nodes" 4855 " properties", vdc->instance); 4856 } 4857 4858 } else if (cmd == DKIOCSETEFI) { 4859 /* 4860 * The EFI has been changed. We need to update the device 4861 * nodes to handle the case where a VTOC label has been 4862 * changed to an EFI label. We also try and update the device 4863 * node properties. Failing to set the properties should 4864 * not cause an error to be return the caller though. 4865 */ 4866 struct dk_gpt *efi; 4867 size_t efi_len; 4868 4869 vdc->vdisk_label = VD_DISK_LABEL_EFI; 4870 (void) vdc_create_device_nodes_efi(vdc); 4871 4872 rv = vdc_efi_alloc_and_read(dev, &efi, &efi_len); 4873 4874 if (rv == 0) { 4875 vdc_store_efi(vdc, efi); 4876 rv = vdc_create_device_nodes_props(vdc); 4877 vd_efi_free(efi, efi_len); 4878 } 4879 4880 if (rv) { 4881 DMSG(vdc, 0, "![%d] Failed to update device nodes" 4882 " properties", vdc->instance); 4883 } 4884 } 4885 4886 /* 4887 * Call the conversion function (if it exists) for this ioctl 4888 * which converts from the format ARC'ed as part of the vDisk 4889 * protocol (FWARC 2006/195) back to a format understood by 4890 * the rest of Solaris. 4891 */ 4892 rv = (iop->convert)(vdc, mem_p, arg, mode, VD_COPYOUT); 4893 if (rv != 0) { 4894 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 4895 instance, rv, cmd); 4896 if (mem_p != NULL) 4897 kmem_free(mem_p, alloc_len); 4898 return (rv); 4899 } 4900 4901 if (mem_p != NULL) 4902 kmem_free(mem_p, alloc_len); 4903 4904 return (rv); 4905 } 4906 4907 /* 4908 * Function: 4909 * 4910 * Description: 4911 * This is an empty conversion function used by ioctl calls which 4912 * do not need to convert the data being passed in/out to userland 4913 */ 4914 static int 4915 vdc_null_copy_func(vdc_t *vdc, void *from, void *to, int mode, int dir) 4916 { 4917 _NOTE(ARGUNUSED(vdc)) 4918 _NOTE(ARGUNUSED(from)) 4919 _NOTE(ARGUNUSED(to)) 4920 _NOTE(ARGUNUSED(mode)) 4921 _NOTE(ARGUNUSED(dir)) 4922 4923 return (0); 4924 } 4925 4926 static int 4927 vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 4928 int mode, int dir) 4929 { 4930 _NOTE(ARGUNUSED(vdc)) 4931 4932 if (dir == VD_COPYIN) 4933 return (0); /* nothing to do */ 4934 4935 if (ddi_copyout(from, to, sizeof (int), mode) != 0) 4936 return (EFAULT); 4937 4938 return (0); 4939 } 4940 4941 static int 4942 vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 4943 int mode, int dir) 4944 { 4945 _NOTE(ARGUNUSED(vdc)) 4946 4947 if (dir == VD_COPYOUT) 4948 return (0); /* nothing to do */ 4949 4950 if (ddi_copyin(from, to, sizeof (int), mode) != 0) 4951 return (EFAULT); 4952 4953 return (0); 4954 } 4955 4956 /* 4957 * Function: 4958 * vdc_get_vtoc_convert() 4959 * 4960 * Description: 4961 * This routine performs the necessary convertions from the DKIOCGVTOC 4962 * Solaris structure to the format defined in FWARC 2006/195. 4963 * 4964 * In the struct vtoc definition, the timestamp field is marked as not 4965 * supported so it is not part of vDisk protocol (FWARC 2006/195). 4966 * However SVM uses that field to check it can write into the VTOC, 4967 * so we fake up the info of that field. 4968 * 4969 * Arguments: 4970 * vdc - the vDisk client 4971 * from - the buffer containing the data to be copied from 4972 * to - the buffer to be copied to 4973 * mode - flags passed to ioctl() call 4974 * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT 4975 * 4976 * Return Code: 4977 * 0 - Success 4978 * ENXIO - incorrect buffer passed in. 4979 * EFAULT - ddi_copyout routine encountered an error. 4980 */ 4981 static int 4982 vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4983 { 4984 int i; 4985 void *tmp_mem = NULL; 4986 void *tmp_memp; 4987 struct vtoc vt; 4988 struct vtoc32 vt32; 4989 int copy_len = 0; 4990 int rv = 0; 4991 4992 if (dir != VD_COPYOUT) 4993 return (0); /* nothing to do */ 4994 4995 if ((from == NULL) || (to == NULL)) 4996 return (ENXIO); 4997 4998 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 4999 copy_len = sizeof (struct vtoc32); 5000 else 5001 copy_len = sizeof (struct vtoc); 5002 5003 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 5004 5005 VD_VTOC2VTOC((vd_vtoc_t *)from, &vt); 5006 5007 /* fake the VTOC timestamp field */ 5008 for (i = 0; i < V_NUMPAR; i++) { 5009 vt.timestamp[i] = vdc->vtoc->timestamp[i]; 5010 } 5011 5012 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 5013 vtoctovtoc32(vt, vt32); 5014 tmp_memp = &vt32; 5015 } else { 5016 tmp_memp = &vt; 5017 } 5018 rv = ddi_copyout(tmp_memp, to, copy_len, mode); 5019 if (rv != 0) 5020 rv = EFAULT; 5021 5022 kmem_free(tmp_mem, copy_len); 5023 return (rv); 5024 } 5025 5026 /* 5027 * Function: 5028 * vdc_set_vtoc_convert() 5029 * 5030 * Description: 5031 * This routine performs the necessary convertions from the DKIOCSVTOC 5032 * Solaris structure to the format defined in FWARC 2006/195. 5033 * 5034 * Arguments: 5035 * vdc - the vDisk client 5036 * from - Buffer with data 5037 * to - Buffer where data is to be copied to 5038 * mode - flags passed to ioctl 5039 * dir - direction of copy (in or out) 5040 * 5041 * Return Code: 5042 * 0 - Success 5043 * ENXIO - Invalid buffer passed in 5044 * EFAULT - ddi_copyin of data failed 5045 */ 5046 static int 5047 vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5048 { 5049 void *tmp_mem = NULL; 5050 struct vtoc vt; 5051 struct vtoc *vtp = &vt; 5052 vd_vtoc_t vtvd; 5053 int copy_len = 0; 5054 int rv = 0; 5055 5056 if (dir != VD_COPYIN) 5057 return (0); /* nothing to do */ 5058 5059 if ((from == NULL) || (to == NULL)) 5060 return (ENXIO); 5061 5062 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 5063 copy_len = sizeof (struct vtoc32); 5064 else 5065 copy_len = sizeof (struct vtoc); 5066 5067 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 5068 5069 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 5070 if (rv != 0) { 5071 kmem_free(tmp_mem, copy_len); 5072 return (EFAULT); 5073 } 5074 5075 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 5076 vtoc32tovtoc((*(struct vtoc32 *)tmp_mem), vt); 5077 } else { 5078 vtp = tmp_mem; 5079 } 5080 5081 /* 5082 * The VTOC is being changed, then vdc needs to update the copy 5083 * it saved in the soft state structure. 5084 */ 5085 bcopy(vtp, vdc->vtoc, sizeof (struct vtoc)); 5086 5087 VTOC2VD_VTOC(vtp, &vtvd); 5088 bcopy(&vtvd, to, sizeof (vd_vtoc_t)); 5089 kmem_free(tmp_mem, copy_len); 5090 5091 return (0); 5092 } 5093 5094 /* 5095 * Function: 5096 * vdc_get_geom_convert() 5097 * 5098 * Description: 5099 * This routine performs the necessary convertions from the DKIOCGGEOM, 5100 * DKIOCG_PHYSGEOM and DKIOG_VIRTGEOM Solaris structures to the format 5101 * defined in FWARC 2006/195 5102 * 5103 * Arguments: 5104 * vdc - the vDisk client 5105 * from - Buffer with data 5106 * to - Buffer where data is to be copied to 5107 * mode - flags passed to ioctl 5108 * dir - direction of copy (in or out) 5109 * 5110 * Return Code: 5111 * 0 - Success 5112 * ENXIO - Invalid buffer passed in 5113 * EFAULT - ddi_copyout of data failed 5114 */ 5115 static int 5116 vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5117 { 5118 _NOTE(ARGUNUSED(vdc)) 5119 5120 struct dk_geom geom; 5121 int copy_len = sizeof (struct dk_geom); 5122 int rv = 0; 5123 5124 if (dir != VD_COPYOUT) 5125 return (0); /* nothing to do */ 5126 5127 if ((from == NULL) || (to == NULL)) 5128 return (ENXIO); 5129 5130 VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); 5131 rv = ddi_copyout(&geom, to, copy_len, mode); 5132 if (rv != 0) 5133 rv = EFAULT; 5134 5135 return (rv); 5136 } 5137 5138 /* 5139 * Function: 5140 * vdc_set_geom_convert() 5141 * 5142 * Description: 5143 * This routine performs the necessary convertions from the DKIOCSGEOM 5144 * Solaris structure to the format defined in FWARC 2006/195. 5145 * 5146 * Arguments: 5147 * vdc - the vDisk client 5148 * from - Buffer with data 5149 * to - Buffer where data is to be copied to 5150 * mode - flags passed to ioctl 5151 * dir - direction of copy (in or out) 5152 * 5153 * Return Code: 5154 * 0 - Success 5155 * ENXIO - Invalid buffer passed in 5156 * EFAULT - ddi_copyin of data failed 5157 */ 5158 static int 5159 vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5160 { 5161 _NOTE(ARGUNUSED(vdc)) 5162 5163 vd_geom_t vdgeom; 5164 void *tmp_mem = NULL; 5165 int copy_len = sizeof (struct dk_geom); 5166 int rv = 0; 5167 5168 if (dir != VD_COPYIN) 5169 return (0); /* nothing to do */ 5170 5171 if ((from == NULL) || (to == NULL)) 5172 return (ENXIO); 5173 5174 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 5175 5176 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 5177 if (rv != 0) { 5178 kmem_free(tmp_mem, copy_len); 5179 return (EFAULT); 5180 } 5181 DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); 5182 bcopy(&vdgeom, to, sizeof (vdgeom)); 5183 kmem_free(tmp_mem, copy_len); 5184 5185 return (0); 5186 } 5187 5188 static int 5189 vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5190 { 5191 _NOTE(ARGUNUSED(vdc)) 5192 5193 vd_efi_t *vd_efi; 5194 dk_efi_t dk_efi; 5195 int rv = 0; 5196 void *uaddr; 5197 5198 if ((from == NULL) || (to == NULL)) 5199 return (ENXIO); 5200 5201 if (dir == VD_COPYIN) { 5202 5203 vd_efi = (vd_efi_t *)to; 5204 5205 rv = ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode); 5206 if (rv != 0) 5207 return (EFAULT); 5208 5209 vd_efi->lba = dk_efi.dki_lba; 5210 vd_efi->length = dk_efi.dki_length; 5211 bzero(vd_efi->data, vd_efi->length); 5212 5213 } else { 5214 5215 rv = ddi_copyin(to, &dk_efi, sizeof (dk_efi_t), mode); 5216 if (rv != 0) 5217 return (EFAULT); 5218 5219 uaddr = dk_efi.dki_data; 5220 5221 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 5222 5223 VD_EFI2DK_EFI((vd_efi_t *)from, &dk_efi); 5224 5225 rv = ddi_copyout(dk_efi.dki_data, uaddr, dk_efi.dki_length, 5226 mode); 5227 if (rv != 0) 5228 return (EFAULT); 5229 5230 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 5231 } 5232 5233 return (0); 5234 } 5235 5236 static int 5237 vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5238 { 5239 _NOTE(ARGUNUSED(vdc)) 5240 5241 dk_efi_t dk_efi; 5242 void *uaddr; 5243 5244 if (dir == VD_COPYOUT) 5245 return (0); /* nothing to do */ 5246 5247 if ((from == NULL) || (to == NULL)) 5248 return (ENXIO); 5249 5250 if (ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode) != 0) 5251 return (EFAULT); 5252 5253 uaddr = dk_efi.dki_data; 5254 5255 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 5256 5257 if (ddi_copyin(uaddr, dk_efi.dki_data, dk_efi.dki_length, mode) != 0) 5258 return (EFAULT); 5259 5260 DK_EFI2VD_EFI(&dk_efi, (vd_efi_t *)to); 5261 5262 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 5263 5264 return (0); 5265 } 5266 5267 /* 5268 * Function: 5269 * vdc_create_fake_geometry() 5270 * 5271 * Description: 5272 * This routine fakes up the disk info needed for some DKIO ioctls. 5273 * - DKIOCINFO 5274 * - DKIOCGMEDIAINFO 5275 * 5276 * [ just like lofi(7D) and ramdisk(7D) ] 5277 * 5278 * Arguments: 5279 * vdc - soft state pointer for this instance of the device driver. 5280 * 5281 * Return Code: 5282 * 0 - Success 5283 */ 5284 static int 5285 vdc_create_fake_geometry(vdc_t *vdc) 5286 { 5287 ASSERT(vdc != NULL); 5288 5289 /* 5290 * Check if max_xfer_sz and vdisk_size are valid 5291 */ 5292 if (vdc->vdisk_size == 0 || vdc->max_xfer_sz == 0) 5293 return (EIO); 5294 5295 /* 5296 * DKIOCINFO support 5297 */ 5298 vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 5299 5300 (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 5301 (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 5302 /* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */ 5303 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; 5304 /* 5305 * We currently set the controller type to DKC_DIRECT for any disk. 5306 * When SCSI support is implemented, we will eventually change this 5307 * type to DKC_SCSI_CCS for disks supporting the SCSI protocol. 5308 */ 5309 vdc->cinfo->dki_ctype = DKC_DIRECT; 5310 vdc->cinfo->dki_flags = DKI_FMTVOL; 5311 vdc->cinfo->dki_cnum = 0; 5312 vdc->cinfo->dki_addr = 0; 5313 vdc->cinfo->dki_space = 0; 5314 vdc->cinfo->dki_prio = 0; 5315 vdc->cinfo->dki_vec = 0; 5316 vdc->cinfo->dki_unit = vdc->instance; 5317 vdc->cinfo->dki_slave = 0; 5318 /* 5319 * The partition number will be created on the fly depending on the 5320 * actual slice (i.e. minor node) that is used to request the data. 5321 */ 5322 vdc->cinfo->dki_partition = 0; 5323 5324 /* 5325 * DKIOCGMEDIAINFO support 5326 */ 5327 if (vdc->minfo == NULL) 5328 vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 5329 vdc->minfo->dki_media_type = DK_FIXED_DISK; 5330 vdc->minfo->dki_capacity = vdc->vdisk_size; 5331 vdc->minfo->dki_lbsize = DEV_BSIZE; 5332 5333 return (0); 5334 } 5335 5336 /* 5337 * Function: 5338 * vdc_setup_disk_layout() 5339 * 5340 * Description: 5341 * This routine discovers all the necessary details about the "disk" 5342 * by requesting the data that is available from the vDisk server and by 5343 * faking up the rest of the data. 5344 * 5345 * Arguments: 5346 * vdc - soft state pointer for this instance of the device driver. 5347 * 5348 * Return Code: 5349 * 0 - Success 5350 */ 5351 static int 5352 vdc_setup_disk_layout(vdc_t *vdc) 5353 { 5354 buf_t *buf; /* BREAD requests need to be in a buf_t structure */ 5355 dev_t dev; 5356 int slice = 0; 5357 int rv, error; 5358 5359 ASSERT(vdc != NULL); 5360 5361 if (vdc->vtoc == NULL) 5362 vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); 5363 5364 dev = makedevice(ddi_driver_major(vdc->dip), 5365 VD_MAKE_DEV(vdc->instance, 0)); 5366 rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)vdc->vtoc, FKIOCTL); 5367 5368 if (rv && rv != ENOTSUP) { 5369 DMSG(vdc, 0, "[%d] Failed to get VTOC (err=%d)", 5370 vdc->instance, rv); 5371 return (rv); 5372 } 5373 5374 /* 5375 * The process of attempting to read VTOC will initiate 5376 * the handshake and establish a connection. Following 5377 * handshake, go ahead and create geometry. 5378 */ 5379 error = vdc_create_fake_geometry(vdc); 5380 if (error != 0) { 5381 DMSG(vdc, 0, "[%d] Failed to create disk geometry (err%d)", 5382 vdc->instance, error); 5383 return (error); 5384 } 5385 5386 if (rv == ENOTSUP) { 5387 /* 5388 * If the device does not support VTOC then we try 5389 * to read an EFI label. 5390 */ 5391 struct dk_gpt *efi; 5392 size_t efi_len; 5393 5394 rv = vdc_efi_alloc_and_read(dev, &efi, &efi_len); 5395 5396 if (rv) { 5397 DMSG(vdc, 0, "[%d] Failed to get EFI (err=%d)", 5398 vdc->instance, rv); 5399 return (rv); 5400 } 5401 5402 vdc->vdisk_label = VD_DISK_LABEL_EFI; 5403 vdc_store_efi(vdc, efi); 5404 vd_efi_free(efi, efi_len); 5405 5406 return (0); 5407 } 5408 5409 vdc->vdisk_label = VD_DISK_LABEL_VTOC; 5410 5411 /* 5412 * FUTURE: This could be default way for reading the VTOC 5413 * from the disk as supposed to sending the VD_OP_GET_VTOC 5414 * to the server. Currently this is a sanity check. 5415 * 5416 * find the slice that represents the entire "disk" and use that to 5417 * read the disk label. The convention in Solaris is that slice 2 5418 * represents the whole disk so we check that it is, otherwise we 5419 * default to slice 0 5420 */ 5421 if ((vdc->vdisk_type == VD_DISK_TYPE_DISK) && 5422 (vdc->vtoc->v_part[2].p_tag == V_BACKUP)) { 5423 slice = 2; 5424 } else { 5425 slice = 0; 5426 } 5427 5428 /* 5429 * Read disk label from start of disk 5430 */ 5431 vdc->label = kmem_zalloc(DK_LABEL_SIZE, KM_SLEEP); 5432 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 5433 bioinit(buf); 5434 buf->b_un.b_addr = (caddr_t)vdc->label; 5435 buf->b_bcount = DK_LABEL_SIZE; 5436 buf->b_flags = B_BUSY | B_READ; 5437 buf->b_dev = dev; 5438 rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)vdc->label, 5439 DK_LABEL_SIZE, slice, 0, CB_STRATEGY, buf, VIO_read_dir); 5440 if (rv) { 5441 DMSG(vdc, 1, "[%d] Failed to read disk block 0\n", 5442 vdc->instance); 5443 kmem_free(buf, sizeof (buf_t)); 5444 return (rv); 5445 } 5446 rv = biowait(buf); 5447 biofini(buf); 5448 kmem_free(buf, sizeof (buf_t)); 5449 5450 return (rv); 5451 } 5452 5453 /* 5454 * Function: 5455 * vdc_setup_devid() 5456 * 5457 * Description: 5458 * This routine discovers the devid of a vDisk. It requests the devid of 5459 * the underlying device from the vDisk server, builds an encapsulated 5460 * devid based on the retrieved devid and registers that new devid to 5461 * the vDisk. 5462 * 5463 * Arguments: 5464 * vdc - soft state pointer for this instance of the device driver. 5465 * 5466 * Return Code: 5467 * 0 - A devid was succesfully registered for the vDisk 5468 */ 5469 static int 5470 vdc_setup_devid(vdc_t *vdc) 5471 { 5472 int rv; 5473 vd_devid_t *vd_devid; 5474 size_t bufsize, bufid_len; 5475 5476 /* 5477 * At first sight, we don't know the size of the devid that the 5478 * server will return but this size will be encoded into the 5479 * reply. So we do a first request using a default size then we 5480 * check if this size was large enough. If not then we do a second 5481 * request with the correct size returned by the server. Note that 5482 * ldc requires size to be 8-byte aligned. 5483 */ 5484 bufsize = P2ROUNDUP(VD_DEVID_SIZE(VD_DEVID_DEFAULT_LEN), 5485 sizeof (uint64_t)); 5486 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 5487 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 5488 5489 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, (caddr_t)vd_devid, 5490 bufsize, 0, 0, CB_SYNC, 0, VIO_both_dir); 5491 5492 DMSG(vdc, 2, "sync_op returned %d\n", rv); 5493 5494 if (rv) { 5495 kmem_free(vd_devid, bufsize); 5496 return (rv); 5497 } 5498 5499 if (vd_devid->length > bufid_len) { 5500 /* 5501 * The returned devid is larger than the buffer used. Try again 5502 * with a buffer with the right size. 5503 */ 5504 kmem_free(vd_devid, bufsize); 5505 bufsize = P2ROUNDUP(VD_DEVID_SIZE(vd_devid->length), 5506 sizeof (uint64_t)); 5507 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 5508 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 5509 5510 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, 5511 (caddr_t)vd_devid, bufsize, 0, 0, CB_SYNC, 0, 5512 VIO_both_dir); 5513 5514 if (rv) { 5515 kmem_free(vd_devid, bufsize); 5516 return (rv); 5517 } 5518 } 5519 5520 /* 5521 * The virtual disk should have the same device id as the one associated 5522 * with the physical disk it is mapped on, otherwise sharing a disk 5523 * between a LDom and a non-LDom may not work (for example for a shared 5524 * SVM disk set). 5525 * 5526 * The DDI framework does not allow creating a device id with any 5527 * type so we first create a device id of type DEVID_ENCAP and then 5528 * we restore the orignal type of the physical device. 5529 */ 5530 5531 DMSG(vdc, 2, ": devid length = %d\n", vd_devid->length); 5532 5533 /* build an encapsulated devid based on the returned devid */ 5534 if (ddi_devid_init(vdc->dip, DEVID_ENCAP, vd_devid->length, 5535 vd_devid->id, &vdc->devid) != DDI_SUCCESS) { 5536 DMSG(vdc, 1, "[%d] Fail to created devid\n", vdc->instance); 5537 kmem_free(vd_devid, bufsize); 5538 return (1); 5539 } 5540 5541 DEVID_FORMTYPE((impl_devid_t *)vdc->devid, vd_devid->type); 5542 5543 ASSERT(ddi_devid_valid(vdc->devid) == DDI_SUCCESS); 5544 5545 kmem_free(vd_devid, bufsize); 5546 5547 if (ddi_devid_register(vdc->dip, vdc->devid) != DDI_SUCCESS) { 5548 DMSG(vdc, 1, "[%d] Fail to register devid\n", vdc->instance); 5549 return (1); 5550 } 5551 5552 return (0); 5553 } 5554 5555 static void 5556 vdc_store_efi(vdc_t *vdc, struct dk_gpt *efi) 5557 { 5558 struct vtoc *vtoc = vdc->vtoc; 5559 5560 vd_efi_to_vtoc(efi, vtoc); 5561 if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 5562 /* 5563 * vd_efi_to_vtoc() will store information about the EFI Sun 5564 * reserved partition (representing the entire disk) into 5565 * partition 7. However single-slice device will only have 5566 * that single partition and the vdc driver expects to find 5567 * information about that partition in slice 0. So we need 5568 * to copy information from slice 7 to slice 0. 5569 */ 5570 vtoc->v_part[0].p_tag = vtoc->v_part[VD_EFI_WD_SLICE].p_tag; 5571 vtoc->v_part[0].p_flag = vtoc->v_part[VD_EFI_WD_SLICE].p_flag; 5572 vtoc->v_part[0].p_start = vtoc->v_part[VD_EFI_WD_SLICE].p_start; 5573 vtoc->v_part[0].p_size = vtoc->v_part[VD_EFI_WD_SLICE].p_size; 5574 } 5575 } 5576