1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * LDoms virtual disk client (vdc) device driver 31 * 32 * This driver runs on a guest logical domain and communicates with the virtual 33 * disk server (vds) driver running on the service domain which is exporting 34 * virtualized "disks" to the guest logical domain. 35 * 36 * The driver can be divided into four sections: 37 * 38 * 1) generic device driver housekeeping 39 * _init, _fini, attach, detach, ops structures, etc. 40 * 41 * 2) communication channel setup 42 * Setup the communications link over the LDC channel that vdc uses to 43 * talk to the vDisk server. Initialise the descriptor ring which 44 * allows the LDC clients to transfer data via memory mappings. 45 * 46 * 3) Support exported to upper layers (filesystems, etc) 47 * The upper layers call into vdc via strategy(9E) and DKIO(7I) 48 * ioctl calls. vdc will copy the data to be written to the descriptor 49 * ring or maps the buffer to store the data read by the vDisk 50 * server into the descriptor ring. It then sends a message to the 51 * vDisk server requesting it to complete the operation. 52 * 53 * 4) Handling responses from vDisk server. 54 * The vDisk server will ACK some or all of the messages vdc sends to it 55 * (this is configured during the handshake). Upon receipt of an ACK 56 * vdc will check the descriptor ring and signal to the upper layer 57 * code waiting on the IO. 58 */ 59 60 #include <sys/atomic.h> 61 #include <sys/conf.h> 62 #include <sys/disp.h> 63 #include <sys/ddi.h> 64 #include <sys/dkio.h> 65 #include <sys/efi_partition.h> 66 #include <sys/fcntl.h> 67 #include <sys/file.h> 68 #include <sys/mach_descrip.h> 69 #include <sys/modctl.h> 70 #include <sys/mdeg.h> 71 #include <sys/note.h> 72 #include <sys/open.h> 73 #include <sys/sdt.h> 74 #include <sys/stat.h> 75 #include <sys/sunddi.h> 76 #include <sys/types.h> 77 #include <sys/promif.h> 78 #include <sys/vtoc.h> 79 #include <sys/archsystm.h> 80 #include <sys/sysmacros.h> 81 82 #include <sys/cdio.h> 83 #include <sys/dktp/cm.h> 84 #include <sys/dktp/fdisk.h> 85 #include <sys/scsi/generic/sense.h> 86 #include <sys/scsi/impl/uscsi.h> /* Needed for defn of USCSICMD ioctl */ 87 #include <sys/scsi/targets/sddef.h> 88 89 #include <sys/ldoms.h> 90 #include <sys/ldc.h> 91 #include <sys/vio_common.h> 92 #include <sys/vio_mailbox.h> 93 #include <sys/vdsk_common.h> 94 #include <sys/vdsk_mailbox.h> 95 #include <sys/vdc.h> 96 97 /* 98 * function prototypes 99 */ 100 101 /* standard driver functions */ 102 static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 103 static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 104 static int vdc_strategy(struct buf *buf); 105 static int vdc_print(dev_t dev, char *str); 106 static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 107 static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 108 static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 109 static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 110 cred_t *credp, int *rvalp); 111 static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 112 static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 113 114 static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 115 void *arg, void **resultp); 116 static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 117 static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 118 119 /* setup */ 120 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 121 static int vdc_do_ldc_init(vdc_t *vdc); 122 static int vdc_start_ldc_connection(vdc_t *vdc); 123 static int vdc_create_device_nodes(vdc_t *vdc); 124 static int vdc_create_device_nodes_efi(vdc_t *vdc); 125 static int vdc_create_device_nodes_vtoc(vdc_t *vdc); 126 static int vdc_create_device_nodes_props(vdc_t *vdc); 127 static int vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id); 128 static int vdc_do_ldc_up(vdc_t *vdc); 129 static void vdc_terminate_ldc(vdc_t *vdc); 130 static int vdc_init_descriptor_ring(vdc_t *vdc); 131 static void vdc_destroy_descriptor_ring(vdc_t *vdc); 132 static int vdc_setup_devid(vdc_t *vdc); 133 static void vdc_store_efi(vdc_t *vdc, struct dk_gpt *efi); 134 135 /* handshake with vds */ 136 static void vdc_init_handshake_negotiation(void *arg); 137 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 138 static int vdc_init_attr_negotiation(vdc_t *vdc); 139 static int vdc_init_dring_negotiate(vdc_t *vdc); 140 static void vdc_reset_connection(vdc_t *vdc, boolean_t resetldc); 141 static boolean_t vdc_is_able_to_tx_data(vdc_t *vdc, int flag); 142 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 143 144 /* processing incoming messages from vDisk server */ 145 static void vdc_process_msg_thread(vdc_t *vdc); 146 static void vdc_process_msg(void *arg); 147 static void vdc_do_process_msg(vdc_t *vdc); 148 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 149 static int vdc_process_ctrl_msg(vdc_t *vdc, vio_msg_t msg); 150 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t msg); 151 static int vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg); 152 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 153 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 154 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 155 static int vdc_get_next_dring_entry_id(vdc_t *vdc, uint_t needed); 156 static int vdc_populate_descriptor(vdc_t *vdc, caddr_t addr, 157 size_t nbytes, int op, uint64_t arg, uint64_t slice); 158 static int vdc_wait_for_descriptor_update(vdc_t *vdc, uint_t idx, 159 vio_dring_msg_t dmsg); 160 static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 161 static int vdc_populate_mem_hdl(vdc_t *vdc, uint_t idx, 162 caddr_t addr, size_t nbytes, int operation); 163 static int vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg); 164 165 /* dkio */ 166 static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode); 167 static int vdc_create_fake_geometry(vdc_t *vdc); 168 static int vdc_setup_disk_layout(vdc_t *vdc); 169 static int vdc_null_copy_func(vdc_t *vdc, void *from, void *to, 170 int mode, int dir); 171 static int vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 172 int mode, int dir); 173 static int vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 174 int mode, int dir); 175 static int vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, 176 int mode, int dir); 177 static int vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, 178 int mode, int dir); 179 static int vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, 180 int mode, int dir); 181 static int vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, 182 int mode, int dir); 183 static int vdc_uscsicmd_convert(vdc_t *vdc, void *from, void *to, 184 int mode, int dir); 185 static int vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, 186 int mode, int dir); 187 static int vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, 188 int mode, int dir); 189 190 /* 191 * Module variables 192 */ 193 194 /* 195 * Tunable variables to control how long vdc waits before timing out on 196 * various operations 197 */ 198 static int vdc_retries = 10; 199 200 /* calculated from 'vdc_usec_timeout' during attach */ 201 static uint64_t vdc_hz_timeout; /* units: Hz */ 202 static uint64_t vdc_usec_timeout = 30 * MICROSEC; /* 30s units: ns */ 203 204 static uint64_t vdc_hz_timeout_ldc; /* units: Hz */ 205 static uint64_t vdc_usec_timeout_ldc = 10 * MILLISEC; /* 0.01s units: ns */ 206 207 /* values for dumping - need to run in a tighter loop */ 208 static uint64_t vdc_usec_timeout_dump = 100 * MILLISEC; /* 0.1s units: ns */ 209 static int vdc_dump_retries = 100; 210 211 /* Count of the number of vdc instances attached */ 212 static volatile uint32_t vdc_instance_count = 0; 213 214 /* Soft state pointer */ 215 static void *vdc_state; 216 217 /* variable level controlling the verbosity of the error/debug messages */ 218 int vdc_msglevel = 0; 219 220 /* 221 * Supported vDisk protocol version pairs. 222 * 223 * The first array entry is the latest and preferred version. 224 */ 225 static const vio_ver_t vdc_version[] = {{1, 0}}; 226 227 static struct cb_ops vdc_cb_ops = { 228 vdc_open, /* cb_open */ 229 vdc_close, /* cb_close */ 230 vdc_strategy, /* cb_strategy */ 231 vdc_print, /* cb_print */ 232 vdc_dump, /* cb_dump */ 233 vdc_read, /* cb_read */ 234 vdc_write, /* cb_write */ 235 vdc_ioctl, /* cb_ioctl */ 236 nodev, /* cb_devmap */ 237 nodev, /* cb_mmap */ 238 nodev, /* cb_segmap */ 239 nochpoll, /* cb_chpoll */ 240 ddi_prop_op, /* cb_prop_op */ 241 NULL, /* cb_str */ 242 D_MP | D_64BIT, /* cb_flag */ 243 CB_REV, /* cb_rev */ 244 vdc_aread, /* cb_aread */ 245 vdc_awrite /* cb_awrite */ 246 }; 247 248 static struct dev_ops vdc_ops = { 249 DEVO_REV, /* devo_rev */ 250 0, /* devo_refcnt */ 251 vdc_getinfo, /* devo_getinfo */ 252 nulldev, /* devo_identify */ 253 nulldev, /* devo_probe */ 254 vdc_attach, /* devo_attach */ 255 vdc_detach, /* devo_detach */ 256 nodev, /* devo_reset */ 257 &vdc_cb_ops, /* devo_cb_ops */ 258 NULL, /* devo_bus_ops */ 259 nulldev /* devo_power */ 260 }; 261 262 static struct modldrv modldrv = { 263 &mod_driverops, 264 "virtual disk client %I%", 265 &vdc_ops, 266 }; 267 268 static struct modlinkage modlinkage = { 269 MODREV_1, 270 &modldrv, 271 NULL 272 }; 273 274 /* -------------------------------------------------------------------------- */ 275 276 /* 277 * Device Driver housekeeping and setup 278 */ 279 280 int 281 _init(void) 282 { 283 int status; 284 285 if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 286 return (status); 287 if ((status = mod_install(&modlinkage)) != 0) 288 ddi_soft_state_fini(&vdc_state); 289 vdc_efi_init(vd_process_ioctl); 290 return (status); 291 } 292 293 int 294 _info(struct modinfo *modinfop) 295 { 296 return (mod_info(&modlinkage, modinfop)); 297 } 298 299 int 300 _fini(void) 301 { 302 int status; 303 304 if ((status = mod_remove(&modlinkage)) != 0) 305 return (status); 306 vdc_efi_fini(); 307 ddi_soft_state_fini(&vdc_state); 308 return (0); 309 } 310 311 static int 312 vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 313 { 314 _NOTE(ARGUNUSED(dip)) 315 316 int instance = SDUNIT(getminor((dev_t)arg)); 317 vdc_t *vdc = NULL; 318 319 switch (cmd) { 320 case DDI_INFO_DEVT2DEVINFO: 321 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 322 *resultp = NULL; 323 return (DDI_FAILURE); 324 } 325 *resultp = vdc->dip; 326 return (DDI_SUCCESS); 327 case DDI_INFO_DEVT2INSTANCE: 328 *resultp = (void *)(uintptr_t)instance; 329 return (DDI_SUCCESS); 330 default: 331 *resultp = NULL; 332 return (DDI_FAILURE); 333 } 334 } 335 336 static int 337 vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 338 { 339 int instance; 340 int rv; 341 uint_t retries = 0; 342 vdc_t *vdc = NULL; 343 344 switch (cmd) { 345 case DDI_DETACH: 346 /* the real work happens below */ 347 break; 348 case DDI_SUSPEND: 349 /* nothing to do for this non-device */ 350 return (DDI_SUCCESS); 351 default: 352 return (DDI_FAILURE); 353 } 354 355 ASSERT(cmd == DDI_DETACH); 356 instance = ddi_get_instance(dip); 357 DMSG(1, "[%d] Entered\n", instance); 358 359 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 360 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 361 return (DDI_FAILURE); 362 } 363 364 if (vdc->open) { 365 DMSG(0, "[%d] Cannot detach: device is open", instance); 366 return (DDI_FAILURE); 367 } 368 369 DMSG(0, "[%d] proceeding...\n", instance); 370 371 /* 372 * try and disable callbacks to prevent another handshake 373 */ 374 rv = ldc_set_cb_mode(vdc->ldc_handle, LDC_CB_DISABLE); 375 DMSG(0, "[%d] callback disabled (rv=%d)\n", instance, rv); 376 377 /* 378 * Prevent any more attempts to start a handshake with the vdisk 379 * server and tear down the existing connection. 380 */ 381 mutex_enter(&vdc->lock); 382 vdc->initialized |= VDC_HANDSHAKE_STOP; 383 vdc_reset_connection(vdc, B_TRUE); 384 mutex_exit(&vdc->lock); 385 386 if (vdc->initialized & VDC_THREAD) { 387 mutex_enter(&vdc->msg_proc_lock); 388 vdc->msg_proc_thr_state = VDC_THR_STOP; 389 vdc->msg_pending = B_TRUE; 390 cv_signal(&vdc->msg_proc_cv); 391 392 while (vdc->msg_proc_thr_state != VDC_THR_DONE) { 393 DMSG(0, "[%d] Waiting for thread to exit\n", instance); 394 rv = cv_timedwait(&vdc->msg_proc_cv, 395 &vdc->msg_proc_lock, 396 VD_GET_TIMEOUT_HZ(vdc_hz_timeout, 1)); 397 if ((rv == -1) && (retries++ > vdc_retries)) 398 break; 399 } 400 mutex_exit(&vdc->msg_proc_lock); 401 } 402 403 mutex_enter(&vdc->lock); 404 405 if (vdc->initialized & VDC_DRING) 406 vdc_destroy_descriptor_ring(vdc); 407 408 if (vdc->initialized & VDC_LDC) 409 vdc_terminate_ldc(vdc); 410 411 mutex_exit(&vdc->lock); 412 413 if (vdc->initialized & VDC_MINOR) { 414 ddi_prop_remove_all(dip); 415 ddi_remove_minor_node(dip, NULL); 416 } 417 418 if (vdc->initialized & VDC_LOCKS) { 419 mutex_destroy(&vdc->lock); 420 mutex_destroy(&vdc->attach_lock); 421 mutex_destroy(&vdc->msg_proc_lock); 422 mutex_destroy(&vdc->dring_lock); 423 cv_destroy(&vdc->cv); 424 cv_destroy(&vdc->attach_cv); 425 cv_destroy(&vdc->msg_proc_cv); 426 } 427 428 if (vdc->minfo) 429 kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 430 431 if (vdc->cinfo) 432 kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 433 434 if (vdc->vtoc) 435 kmem_free(vdc->vtoc, sizeof (struct vtoc)); 436 437 if (vdc->label) 438 kmem_free(vdc->label, DK_LABEL_SIZE); 439 440 if (vdc->devid) { 441 ddi_devid_unregister(dip); 442 ddi_devid_free(vdc->devid); 443 } 444 445 if (vdc->initialized & VDC_SOFT_STATE) 446 ddi_soft_state_free(vdc_state, instance); 447 448 DMSG(0, "[%d] End %p\n", instance, (void *)vdc); 449 450 return (DDI_SUCCESS); 451 } 452 453 454 static int 455 vdc_do_attach(dev_info_t *dip) 456 { 457 int instance; 458 vdc_t *vdc = NULL; 459 int status; 460 uint_t retries = 0; 461 462 ASSERT(dip != NULL); 463 464 instance = ddi_get_instance(dip); 465 if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 466 cmn_err(CE_NOTE, "[%d] Couldn't alloc state structure", 467 instance); 468 return (DDI_FAILURE); 469 } 470 471 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 472 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 473 return (DDI_FAILURE); 474 } 475 476 /* 477 * We assign the value to initialized in this case to zero out the 478 * variable and then set bits in it to indicate what has been done 479 */ 480 vdc->initialized = VDC_SOFT_STATE; 481 482 vdc_hz_timeout = drv_usectohz(vdc_usec_timeout); 483 vdc_hz_timeout_ldc = drv_usectohz(vdc_usec_timeout_ldc); 484 485 vdc->dip = dip; 486 vdc->instance = instance; 487 vdc->open = 0; 488 vdc->vdisk_type = VD_DISK_TYPE_UNK; 489 vdc->vdisk_label = VD_DISK_LABEL_UNK; 490 vdc->state = VD_STATE_INIT; 491 vdc->ldc_state = 0; 492 vdc->session_id = 0; 493 vdc->block_size = DEV_BSIZE; 494 vdc->max_xfer_sz = maxphys / DEV_BSIZE; 495 496 vdc->vtoc = NULL; 497 vdc->cinfo = NULL; 498 vdc->minfo = NULL; 499 500 mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 501 mutex_init(&vdc->attach_lock, NULL, MUTEX_DRIVER, NULL); 502 mutex_init(&vdc->msg_proc_lock, NULL, MUTEX_DRIVER, NULL); 503 mutex_init(&vdc->dring_lock, NULL, MUTEX_DRIVER, NULL); 504 cv_init(&vdc->cv, NULL, CV_DRIVER, NULL); 505 cv_init(&vdc->attach_cv, NULL, CV_DRIVER, NULL); 506 cv_init(&vdc->msg_proc_cv, NULL, CV_DRIVER, NULL); 507 vdc->initialized |= VDC_LOCKS; 508 509 vdc->msg_pending = B_FALSE; 510 vdc->msg_proc_thr_id = thread_create(NULL, 0, vdc_process_msg_thread, 511 vdc, 0, &p0, TS_RUN, minclsyspri); 512 if (vdc->msg_proc_thr_id == NULL) { 513 cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 514 instance); 515 return (DDI_FAILURE); 516 } 517 vdc->initialized |= VDC_THREAD; 518 519 /* initialise LDC channel which will be used to communicate with vds */ 520 if (vdc_do_ldc_init(vdc) != 0) { 521 cmn_err(CE_NOTE, "[%d] Couldn't initialize LDC", instance); 522 return (DDI_FAILURE); 523 } 524 525 /* Bring up connection with vds via LDC */ 526 status = vdc_start_ldc_connection(vdc); 527 if (status != 0) { 528 cmn_err(CE_NOTE, "[%d] Could not start LDC", instance); 529 return (DDI_FAILURE); 530 } 531 532 /* 533 * We need to wait until the handshake has completed before leaving 534 * the attach(). If this is the first vdc device attached (i.e. the root 535 * filesystem) we will wait much longer in the hope that we can finally 536 * communicate with the vDisk server (the service domain may be 537 * rebooting, etc.). This wait is necessary so that the device node(s) 538 * are created before the attach(9E) return (otherwise the open(9E) will 539 * fail and and the root file system will not boot). 540 */ 541 atomic_inc_32(&vdc_instance_count); 542 mutex_enter(&vdc->attach_lock); 543 while ((vdc->ldc_state != LDC_UP) || (vdc->state != VD_STATE_DATA)) { 544 545 DMSG(0, "[%d] handshake in progress [VD %d (LDC %d)]\n", 546 instance, vdc->state, vdc->ldc_state); 547 548 status = cv_timedwait(&vdc->attach_cv, &vdc->attach_lock, 549 VD_GET_TIMEOUT_HZ(vdc_hz_timeout, retries)); 550 if (status == -1) { 551 /* 552 * If this is not the first instance attached or we 553 * have exceeeded the max number of retries we give 554 * up waiting and do not delay the attach any longer 555 */ 556 if ((vdc_instance_count != 1) || 557 (retries >= vdc_retries)) { 558 DMSG(0, "[%d] Giving up wait for handshake\n", 559 instance); 560 mutex_exit(&vdc->attach_lock); 561 return (DDI_FAILURE); 562 } else { 563 DMSG(0, "[%d] Retry #%d for handshake.\n", 564 instance, retries); 565 vdc_init_handshake_negotiation(vdc); 566 retries++; 567 } 568 } 569 } 570 mutex_exit(&vdc->attach_lock); 571 572 /* 573 * Once the handshake is complete, we can use the DRing to send 574 * requests to the vDisk server to calculate the geometry and 575 * VTOC of the "disk" 576 */ 577 status = vdc_setup_disk_layout(vdc); 578 if (status != 0) { 579 cmn_err(CE_NOTE, "[%d] Failed to discover disk layout (err%d)", 580 vdc->instance, status); 581 } 582 583 /* 584 * Now that we have the device info we can create the 585 * device nodes and properties 586 */ 587 status = vdc_create_device_nodes(vdc); 588 if (status) { 589 cmn_err(CE_NOTE, "[%d] Failed to create device nodes", 590 instance); 591 return (status); 592 } 593 status = vdc_create_device_nodes_props(vdc); 594 if (status) { 595 cmn_err(CE_NOTE, "[%d] Failed to create device nodes" 596 " properties (%d)", instance, status); 597 return (status); 598 } 599 600 /* 601 * Setup devid 602 */ 603 if (vdc_setup_devid(vdc)) { 604 DMSG(0, "[%d] No device id available\n", instance); 605 } 606 607 ddi_report_dev(dip); 608 609 DMSG(0, "[%d] Attach completed\n", instance); 610 return (status); 611 } 612 613 static int 614 vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 615 { 616 int status; 617 618 switch (cmd) { 619 case DDI_ATTACH: 620 if ((status = vdc_do_attach(dip)) != 0) 621 (void) vdc_detach(dip, DDI_DETACH); 622 return (status); 623 case DDI_RESUME: 624 /* nothing to do for this non-device */ 625 return (DDI_SUCCESS); 626 default: 627 return (DDI_FAILURE); 628 } 629 } 630 631 static int 632 vdc_do_ldc_init(vdc_t *vdc) 633 { 634 int status = 0; 635 ldc_status_t ldc_state; 636 ldc_attr_t ldc_attr; 637 uint64_t ldc_id = 0; 638 dev_info_t *dip = NULL; 639 640 ASSERT(vdc != NULL); 641 642 dip = vdc->dip; 643 vdc->initialized |= VDC_LDC; 644 645 if ((status = vdc_get_ldc_id(dip, &ldc_id)) != 0) { 646 cmn_err(CE_NOTE, "[%d] Failed to get LDC channel ID property", 647 vdc->instance); 648 return (EIO); 649 } 650 vdc->ldc_id = ldc_id; 651 652 ldc_attr.devclass = LDC_DEV_BLK; 653 ldc_attr.instance = vdc->instance; 654 ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 655 ldc_attr.mtu = VD_LDC_MTU; 656 657 if ((vdc->initialized & VDC_LDC_INIT) == 0) { 658 status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle); 659 if (status != 0) { 660 cmn_err(CE_NOTE, "[%d] ldc_init(chan %ld) returned %d", 661 vdc->instance, ldc_id, status); 662 return (status); 663 } 664 vdc->initialized |= VDC_LDC_INIT; 665 } 666 status = ldc_status(vdc->ldc_handle, &ldc_state); 667 if (status != 0) { 668 cmn_err(CE_NOTE, "[%d] Cannot discover LDC status [err=%d]", 669 vdc->instance, status); 670 return (status); 671 } 672 vdc->ldc_state = ldc_state; 673 674 if ((vdc->initialized & VDC_LDC_CB) == 0) { 675 status = ldc_reg_callback(vdc->ldc_handle, vdc_handle_cb, 676 (caddr_t)vdc); 677 if (status != 0) { 678 cmn_err(CE_NOTE, "[%d] LDC callback reg. failed (%d)", 679 vdc->instance, status); 680 return (status); 681 } 682 vdc->initialized |= VDC_LDC_CB; 683 } 684 685 vdc->initialized |= VDC_LDC; 686 687 /* 688 * At this stage we have initialised LDC, we will now try and open 689 * the connection. 690 */ 691 if (vdc->ldc_state == LDC_INIT) { 692 status = ldc_open(vdc->ldc_handle); 693 if (status != 0) { 694 cmn_err(CE_NOTE, "[%d] ldc_open(chan %ld) returned %d", 695 vdc->instance, vdc->ldc_id, status); 696 return (status); 697 } 698 vdc->initialized |= VDC_LDC_OPEN; 699 } 700 701 return (status); 702 } 703 704 static int 705 vdc_start_ldc_connection(vdc_t *vdc) 706 { 707 int status = 0; 708 709 ASSERT(vdc != NULL); 710 711 mutex_enter(&vdc->lock); 712 713 if (vdc->ldc_state == LDC_UP) { 714 DMSG(0, "[%d] LDC is already UP ..\n", vdc->instance); 715 mutex_exit(&vdc->lock); 716 return (0); 717 } 718 719 status = vdc_do_ldc_up(vdc); 720 721 DMSG(0, "[%d] Finished bringing up LDC\n", vdc->instance); 722 723 mutex_exit(&vdc->lock); 724 725 return (status); 726 } 727 728 static int 729 vdc_create_device_nodes_efi(vdc_t *vdc) 730 { 731 ddi_remove_minor_node(vdc->dip, "h"); 732 ddi_remove_minor_node(vdc->dip, "h,raw"); 733 734 if (ddi_create_minor_node(vdc->dip, "wd", S_IFBLK, 735 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 736 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 737 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd'", 738 vdc->instance); 739 return (EIO); 740 } 741 742 /* if any device node is created we set this flag */ 743 vdc->initialized |= VDC_MINOR; 744 745 if (ddi_create_minor_node(vdc->dip, "wd,raw", S_IFCHR, 746 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 747 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 748 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd,raw'", 749 vdc->instance); 750 return (EIO); 751 } 752 753 return (0); 754 } 755 756 static int 757 vdc_create_device_nodes_vtoc(vdc_t *vdc) 758 { 759 ddi_remove_minor_node(vdc->dip, "wd"); 760 ddi_remove_minor_node(vdc->dip, "wd,raw"); 761 762 if (ddi_create_minor_node(vdc->dip, "h", S_IFBLK, 763 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 764 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 765 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h'", 766 vdc->instance); 767 return (EIO); 768 } 769 770 /* if any device node is created we set this flag */ 771 vdc->initialized |= VDC_MINOR; 772 773 if (ddi_create_minor_node(vdc->dip, "h,raw", S_IFCHR, 774 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 775 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 776 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h,raw'", 777 vdc->instance); 778 return (EIO); 779 } 780 781 return (0); 782 } 783 784 /* 785 * Function: 786 * vdc_create_device_nodes 787 * 788 * Description: 789 * This function creates the block and character device nodes under 790 * /devices along with the node properties. It is called as part of 791 * the attach(9E) of the instance during the handshake with vds after 792 * vds has sent the attributes to vdc. 793 * 794 * If the device is of type VD_DISK_TYPE_SLICE then the minor node 795 * of 2 is used in keeping with the Solaris convention that slice 2 796 * refers to a whole disk. Slices start at 'a' 797 * 798 * Parameters: 799 * vdc - soft state pointer 800 * 801 * Return Values 802 * 0 - Success 803 * EIO - Failed to create node 804 * EINVAL - Unknown type of disk exported 805 */ 806 static int 807 vdc_create_device_nodes(vdc_t *vdc) 808 { 809 char name[sizeof ("s,raw")]; 810 dev_info_t *dip = NULL; 811 int instance, status; 812 int num_slices = 1; 813 int i; 814 815 ASSERT(vdc != NULL); 816 817 instance = vdc->instance; 818 dip = vdc->dip; 819 820 switch (vdc->vdisk_type) { 821 case VD_DISK_TYPE_DISK: 822 num_slices = V_NUMPAR; 823 break; 824 case VD_DISK_TYPE_SLICE: 825 num_slices = 1; 826 break; 827 case VD_DISK_TYPE_UNK: 828 default: 829 return (EINVAL); 830 } 831 832 /* 833 * Minor nodes are different for EFI disks: EFI disks do not have 834 * a minor node 'g' for the minor number corresponding to slice 835 * VD_EFI_WD_SLICE (slice 7) instead they have a minor node 'wd' 836 * representing the whole disk. 837 */ 838 for (i = 0; i < num_slices; i++) { 839 840 if (i == VD_EFI_WD_SLICE) { 841 if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 842 status = vdc_create_device_nodes_efi(vdc); 843 else 844 status = vdc_create_device_nodes_vtoc(vdc); 845 if (status != 0) 846 return (status); 847 continue; 848 } 849 850 (void) snprintf(name, sizeof (name), "%c", 'a' + i); 851 if (ddi_create_minor_node(dip, name, S_IFBLK, 852 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 853 cmn_err(CE_NOTE, "[%d] Couldn't add block node '%s'", 854 instance, name); 855 return (EIO); 856 } 857 858 /* if any device node is created we set this flag */ 859 vdc->initialized |= VDC_MINOR; 860 861 (void) snprintf(name, sizeof (name), "%c%s", 862 'a' + i, ",raw"); 863 if (ddi_create_minor_node(dip, name, S_IFCHR, 864 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 865 cmn_err(CE_NOTE, "[%d] Couldn't add raw node '%s'", 866 instance, name); 867 return (EIO); 868 } 869 } 870 871 return (0); 872 } 873 874 /* 875 * Function: 876 * vdc_create_device_nodes_props 877 * 878 * Description: 879 * This function creates the block and character device nodes under 880 * /devices along with the node properties. It is called as part of 881 * the attach(9E) of the instance during the handshake with vds after 882 * vds has sent the attributes to vdc. 883 * 884 * Parameters: 885 * vdc - soft state pointer 886 * 887 * Return Values 888 * 0 - Success 889 * EIO - Failed to create device node property 890 * EINVAL - Unknown type of disk exported 891 */ 892 static int 893 vdc_create_device_nodes_props(vdc_t *vdc) 894 { 895 dev_info_t *dip = NULL; 896 int instance; 897 int num_slices = 1; 898 int64_t size = 0; 899 dev_t dev; 900 int rv; 901 int i; 902 903 ASSERT(vdc != NULL); 904 905 instance = vdc->instance; 906 dip = vdc->dip; 907 908 if ((vdc->vtoc == NULL) || (vdc->vtoc->v_sanity != VTOC_SANE)) { 909 cmn_err(CE_NOTE, "![%d] Could not create device node property." 910 " No VTOC available", instance); 911 return (ENXIO); 912 } 913 914 switch (vdc->vdisk_type) { 915 case VD_DISK_TYPE_DISK: 916 num_slices = V_NUMPAR; 917 break; 918 case VD_DISK_TYPE_SLICE: 919 num_slices = 1; 920 break; 921 case VD_DISK_TYPE_UNK: 922 default: 923 return (EINVAL); 924 } 925 926 for (i = 0; i < num_slices; i++) { 927 dev = makedevice(ddi_driver_major(dip), 928 VD_MAKE_DEV(instance, i)); 929 930 size = vdc->vtoc->v_part[i].p_size * vdc->vtoc->v_sectorsz; 931 DMSG(0, "[%d] sz %ld (%ld Mb) p_size %lx\n", 932 instance, size, size / (1024 * 1024), 933 vdc->vtoc->v_part[i].p_size); 934 935 rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size); 936 if (rv != DDI_PROP_SUCCESS) { 937 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop of [%ld]", 938 instance, VDC_SIZE_PROP_NAME, size); 939 return (EIO); 940 } 941 942 rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME, 943 lbtodb(size)); 944 if (rv != DDI_PROP_SUCCESS) { 945 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop [%llu]", 946 instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size)); 947 return (EIO); 948 } 949 } 950 951 return (0); 952 } 953 954 static int 955 vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 956 { 957 _NOTE(ARGUNUSED(cred)) 958 959 int instance; 960 vdc_t *vdc; 961 962 ASSERT(dev != NULL); 963 instance = SDUNIT(getminor(*dev)); 964 965 DMSG(0, "[%d] minor = %d flag = %x, otyp = %x\n", 966 instance, getminor(*dev), flag, otyp); 967 968 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 969 return (EINVAL); 970 971 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 972 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 973 return (ENXIO); 974 } 975 976 /* 977 * Check to see if we can communicate with vds 978 */ 979 if (!vdc_is_able_to_tx_data(vdc, flag)) { 980 DMSG(0, "[%d] Not ready to transmit data (flag=%x)\n", 981 instance, flag); 982 return (ENOLINK); 983 } 984 985 mutex_enter(&vdc->lock); 986 vdc->open++; 987 mutex_exit(&vdc->lock); 988 989 return (0); 990 } 991 992 static int 993 vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 994 { 995 _NOTE(ARGUNUSED(cred)) 996 997 int instance; 998 vdc_t *vdc; 999 1000 instance = SDUNIT(getminor(dev)); 1001 1002 DMSG(0, "[%d] flag = %x, otyp = %x\n", instance, flag, otyp); 1003 1004 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 1005 return (EINVAL); 1006 1007 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1008 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1009 return (ENXIO); 1010 } 1011 1012 /* 1013 * Check to see if we can communicate with vds 1014 */ 1015 if (!vdc_is_able_to_tx_data(vdc, 0)) { 1016 DMSG(0, "[%d] Not ready to transmit data (flag=%x)\n", 1017 instance, flag); 1018 return (ETIMEDOUT); 1019 } 1020 1021 if (vdc->dkio_flush_pending) { 1022 DMSG(0, "[%d] Cannot detach: %d outstanding DKIO flushes\n", 1023 instance, vdc->dkio_flush_pending); 1024 return (EBUSY); 1025 } 1026 1027 /* 1028 * Should not need the mutex here, since the framework should protect 1029 * against more opens on this device, but just in case. 1030 */ 1031 mutex_enter(&vdc->lock); 1032 vdc->open--; 1033 mutex_exit(&vdc->lock); 1034 1035 return (0); 1036 } 1037 1038 static int 1039 vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1040 { 1041 _NOTE(ARGUNUSED(credp)) 1042 _NOTE(ARGUNUSED(rvalp)) 1043 1044 return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode)); 1045 } 1046 1047 static int 1048 vdc_print(dev_t dev, char *str) 1049 { 1050 cmn_err(CE_NOTE, "vdc%d: %s", SDUNIT(getminor(dev)), str); 1051 return (0); 1052 } 1053 1054 static int 1055 vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 1056 { 1057 buf_t *buf; /* BWRITE requests need to be in a buf_t structure */ 1058 int rv; 1059 size_t nbytes = nblk * DEV_BSIZE; 1060 int instance = SDUNIT(getminor(dev)); 1061 vdc_t *vdc = NULL; 1062 1063 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1064 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1065 return (ENXIO); 1066 } 1067 1068 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 1069 bioinit(buf); 1070 buf->b_un.b_addr = addr; 1071 buf->b_bcount = nbytes; 1072 buf->b_flags = B_BUSY | B_WRITE; 1073 buf->b_dev = dev; 1074 rv = vdc_populate_descriptor(vdc, (caddr_t)buf, nbytes, 1075 VD_OP_BWRITE, blkno, SDPART(getminor(dev))); 1076 1077 /* 1078 * If the OS instance is panicking, the call above will ensure that 1079 * the descriptor is done before returning. This should always be 1080 * case when coming through this function but we check just in case 1081 * and wait if necessary for the vDisk server to ACK and trigger 1082 * the biodone. 1083 */ 1084 if (!ddi_in_panic()) 1085 rv = biowait(buf); 1086 1087 biofini(buf); 1088 kmem_free(buf, sizeof (buf_t)); 1089 1090 DMSG(1, "[%d] status=%d\n", instance, rv); 1091 1092 return (rv); 1093 } 1094 1095 /* -------------------------------------------------------------------------- */ 1096 1097 /* 1098 * Disk access routines 1099 * 1100 */ 1101 1102 /* 1103 * vdc_strategy() 1104 * 1105 * Return Value: 1106 * 0: As per strategy(9E), the strategy() function must return 0 1107 * [ bioerror(9f) sets b_flags to the proper error code ] 1108 */ 1109 static int 1110 vdc_strategy(struct buf *buf) 1111 { 1112 int rv = -1; 1113 vdc_t *vdc = NULL; 1114 int instance = SDUNIT(getminor(buf->b_edev)); 1115 int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 1116 1117 DMSG(2, "[%d] %s %ld bytes at block %llx : b_addr=0x%p", 1118 instance, (buf->b_flags & B_READ) ? "Read" : "Write", 1119 buf->b_bcount, buf->b_lblkno, (void *)buf->b_un.b_addr); 1120 1121 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1122 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1123 bioerror(buf, ENXIO); 1124 biodone(buf); 1125 return (0); 1126 } 1127 1128 DTRACE_IO2(vstart, buf_t *, buf, vdc_t *, vdc); 1129 1130 if (!vdc_is_able_to_tx_data(vdc, O_NONBLOCK)) { 1131 DMSG(0, "[%d] Not ready to transmit data\n", instance); 1132 bioerror(buf, ENXIO); 1133 biodone(buf); 1134 return (0); 1135 } 1136 bp_mapin(buf); 1137 1138 rv = vdc_populate_descriptor(vdc, (caddr_t)buf, buf->b_bcount, op, 1139 buf->b_lblkno, SDPART(getminor(buf->b_edev))); 1140 1141 /* 1142 * If the request was successfully sent, the strategy call returns and 1143 * the ACK handler calls the bioxxx functions when the vDisk server is 1144 * done. 1145 */ 1146 if (rv) { 1147 DMSG(0, "[%d] Failed to read/write (err=%d)\n", instance, rv); 1148 bioerror(buf, rv); 1149 biodone(buf); 1150 } 1151 1152 return (0); 1153 } 1154 1155 1156 static int 1157 vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 1158 { 1159 _NOTE(ARGUNUSED(cred)) 1160 1161 DMSG(1, "[%d] Entered", SDUNIT(getminor(dev))); 1162 return (physio(vdc_strategy, NULL, dev, B_READ, minphys, uio)); 1163 } 1164 1165 static int 1166 vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 1167 { 1168 _NOTE(ARGUNUSED(cred)) 1169 1170 DMSG(1, "[%d] Entered", SDUNIT(getminor(dev))); 1171 return (physio(vdc_strategy, NULL, dev, B_WRITE, minphys, uio)); 1172 } 1173 1174 static int 1175 vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 1176 { 1177 _NOTE(ARGUNUSED(cred)) 1178 1179 DMSG(1, "[%d] Entered", SDUNIT(getminor(dev))); 1180 return (aphysio(vdc_strategy, anocancel, dev, B_READ, minphys, aio)); 1181 } 1182 1183 static int 1184 vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 1185 { 1186 _NOTE(ARGUNUSED(cred)) 1187 1188 DMSG(1, "[%d] Entered", SDUNIT(getminor(dev))); 1189 return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, minphys, aio)); 1190 } 1191 1192 1193 /* -------------------------------------------------------------------------- */ 1194 1195 /* 1196 * Handshake support 1197 */ 1198 1199 /* 1200 * vdc_init_handshake_negotiation 1201 * 1202 * Description: 1203 * This function is called to trigger the handshake negotiations between 1204 * the client (vdc) and the server (vds). It may be called multiple times. 1205 * 1206 * Parameters: 1207 * vdc - soft state pointer 1208 */ 1209 static void 1210 vdc_init_handshake_negotiation(void *arg) 1211 { 1212 vdc_t *vdc = (vdc_t *)(void *)arg; 1213 ldc_status_t ldc_state; 1214 vd_state_t state; 1215 int status; 1216 1217 ASSERT(vdc != NULL); 1218 1219 DMSG(0, "[%d] Initializing vdc<->vds handshake\n", vdc->instance); 1220 1221 /* get LDC state */ 1222 status = ldc_status(vdc->ldc_handle, &ldc_state); 1223 if (status != 0) { 1224 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status (err=%d)", 1225 vdc->instance, status); 1226 return; 1227 } 1228 1229 /* 1230 * If the LDC connection is not UP we bring it up now and return. 1231 * The handshake will be started again when the callback is 1232 * triggered due to the UP event. 1233 */ 1234 if (ldc_state != LDC_UP) { 1235 DMSG(0, "[%d] Triggering LDC_UP & returning\n", vdc->instance); 1236 (void) vdc_do_ldc_up(vdc); 1237 return; 1238 } 1239 1240 mutex_enter(&vdc->lock); 1241 /* 1242 * Do not continue if another thread has triggered a handshake which 1243 * has not been reset or detach() has stopped further handshakes. 1244 */ 1245 if (vdc->initialized & (VDC_HANDSHAKE | VDC_HANDSHAKE_STOP)) { 1246 DMSG(0, "[%d] Negotiation not triggered. [init=%x]\n", 1247 vdc->instance, vdc->initialized); 1248 mutex_exit(&vdc->lock); 1249 return; 1250 } 1251 1252 if (vdc->hshake_cnt++ > vdc_retries) { 1253 cmn_err(CE_NOTE, "[%d] Failed repeatedly to complete handshake" 1254 "with vDisk server", vdc->instance); 1255 mutex_exit(&vdc->lock); 1256 return; 1257 } 1258 1259 vdc->initialized |= VDC_HANDSHAKE; 1260 vdc->ldc_state = ldc_state; 1261 1262 state = vdc->state; 1263 1264 if (state == VD_STATE_INIT) { 1265 /* 1266 * Set the desired version parameter to the first entry in the 1267 * version array. If this specific version is not supported, 1268 * the response handling code will step down the version number 1269 * to the next array entry and deal with it accordingly. 1270 */ 1271 (void) vdc_init_ver_negotiation(vdc, vdc_version[0]); 1272 } else if (state == VD_STATE_VER) { 1273 (void) vdc_init_attr_negotiation(vdc); 1274 } else if (state == VD_STATE_ATTR) { 1275 (void) vdc_init_dring_negotiate(vdc); 1276 } else if (state == VD_STATE_DATA) { 1277 /* 1278 * nothing to do - we have already completed the negotiation 1279 * and we can transmit data when ready. 1280 */ 1281 DMSG(0, "[%d] Negotiation triggered after handshake completed", 1282 vdc->instance); 1283 } 1284 1285 mutex_exit(&vdc->lock); 1286 } 1287 1288 /* 1289 * Function: 1290 * vdc_init_ver_negotiation() 1291 * 1292 * Description: 1293 * 1294 * Arguments: 1295 * vdc - soft state pointer for this instance of the device driver. 1296 * 1297 * Return Code: 1298 * 0 - Success 1299 */ 1300 static int 1301 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 1302 { 1303 vio_ver_msg_t pkt; 1304 size_t msglen = sizeof (pkt); 1305 int status = -1; 1306 1307 ASSERT(vdc != NULL); 1308 ASSERT(mutex_owned(&vdc->lock)); 1309 1310 DMSG(0, "[%d] Entered.\n", vdc->instance); 1311 1312 /* 1313 * set the Session ID to a unique value 1314 * (the lower 32 bits of the clock tick) 1315 */ 1316 vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 1317 DMSG(0, "[%d] Set SID to 0x%lx\n", vdc->instance, vdc->session_id); 1318 1319 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1320 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1321 pkt.tag.vio_subtype_env = VIO_VER_INFO; 1322 pkt.tag.vio_sid = vdc->session_id; 1323 pkt.dev_class = VDEV_DISK; 1324 pkt.ver_major = ver.major; 1325 pkt.ver_minor = ver.minor; 1326 1327 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1328 DMSG(0, "[%d] Ver info sent (status = %d)\n", vdc->instance, status); 1329 1330 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1331 cmn_err(CE_NOTE, "[%d] Failed to send Ver negotiation info: " 1332 "id(%lx) rv(%d) size(%ld)", 1333 vdc->instance, vdc->ldc_handle, 1334 status, msglen); 1335 if (msglen != sizeof (vio_ver_msg_t)) 1336 status = ENOMSG; 1337 } 1338 1339 return (status); 1340 } 1341 1342 /* 1343 * Function: 1344 * vdc_init_attr_negotiation() 1345 * 1346 * Description: 1347 * 1348 * Arguments: 1349 * vdc - soft state pointer for this instance of the device driver. 1350 * 1351 * Return Code: 1352 * 0 - Success 1353 */ 1354 static int 1355 vdc_init_attr_negotiation(vdc_t *vdc) 1356 { 1357 vd_attr_msg_t pkt; 1358 size_t msglen = sizeof (pkt); 1359 int status; 1360 1361 ASSERT(vdc != NULL); 1362 ASSERT(mutex_owned(&vdc->lock)); 1363 1364 DMSG(0, "[%d] entered\n", vdc->instance); 1365 1366 /* fill in tag */ 1367 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1368 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1369 pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 1370 pkt.tag.vio_sid = vdc->session_id; 1371 /* fill in payload */ 1372 pkt.max_xfer_sz = vdc->max_xfer_sz; 1373 pkt.vdisk_block_size = vdc->block_size; 1374 pkt.xfer_mode = VIO_DRING_MODE; 1375 pkt.operations = 0; /* server will set bits of valid operations */ 1376 pkt.vdisk_type = 0; /* server will set to valid device type */ 1377 pkt.vdisk_size = 0; /* server will set to valid size */ 1378 1379 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1380 DMSG(0, "[%d] Attr info sent (status = %d)\n", vdc->instance, status); 1381 1382 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1383 cmn_err(CE_NOTE, "[%d] Failed to send Attr negotiation info: " 1384 "id(%lx) rv(%d) size(%ld)", 1385 vdc->instance, vdc->ldc_handle, 1386 status, msglen); 1387 if (msglen != sizeof (vio_ver_msg_t)) 1388 status = ENOMSG; 1389 } 1390 1391 return (status); 1392 } 1393 1394 /* 1395 * Function: 1396 * vdc_init_dring_negotiate() 1397 * 1398 * Description: 1399 * 1400 * Arguments: 1401 * vdc - soft state pointer for this instance of the device driver. 1402 * 1403 * Return Code: 1404 * 0 - Success 1405 */ 1406 static int 1407 vdc_init_dring_negotiate(vdc_t *vdc) 1408 { 1409 vio_dring_reg_msg_t pkt; 1410 size_t msglen = sizeof (pkt); 1411 int status = -1; 1412 1413 ASSERT(vdc != NULL); 1414 ASSERT(mutex_owned(&vdc->lock)); 1415 1416 status = vdc_init_descriptor_ring(vdc); 1417 if (status != 0) { 1418 cmn_err(CE_CONT, "[%d] Failed to init DRing (status = %d)\n", 1419 vdc->instance, status); 1420 vdc_destroy_descriptor_ring(vdc); 1421 vdc_reset_connection(vdc, B_TRUE); 1422 return (status); 1423 } 1424 DMSG(0, "[%d] Init of descriptor ring completed (status = %d)\n", 1425 vdc->instance, status); 1426 1427 /* fill in tag */ 1428 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1429 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1430 pkt.tag.vio_subtype_env = VIO_DRING_REG; 1431 pkt.tag.vio_sid = vdc->session_id; 1432 /* fill in payload */ 1433 pkt.dring_ident = 0; 1434 pkt.num_descriptors = vdc->dring_len; 1435 pkt.descriptor_size = vdc->dring_entry_size; 1436 pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 1437 pkt.ncookies = vdc->dring_cookie_count; 1438 pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 1439 1440 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1441 if (status != 0) { 1442 cmn_err(CE_NOTE, "[%d] Failed to register DRing (err = %d)", 1443 vdc->instance, status); 1444 vdc_reset_connection(vdc, B_TRUE); 1445 } 1446 1447 return (status); 1448 } 1449 1450 1451 /* -------------------------------------------------------------------------- */ 1452 1453 /* 1454 * LDC helper routines 1455 */ 1456 1457 /* 1458 * Function: 1459 * vdc_send() 1460 * 1461 * Description: 1462 * The function encapsulates the call to write a message using LDC. 1463 * If LDC indicates that the call failed due to the queue being full, 1464 * we retry the ldc_write() [ up to 'vdc_retries' time ], otherwise 1465 * we return the error returned by LDC. 1466 * 1467 * Arguments: 1468 * ldc_handle - LDC handle for the channel this instance of vdc uses 1469 * pkt - address of LDC message to be sent 1470 * msglen - the size of the message being sent. When the function 1471 * returns, this contains the number of bytes written. 1472 * 1473 * Return Code: 1474 * 0 - Success. 1475 * EINVAL - pkt or msglen were NULL 1476 * ECONNRESET - The connection was not up. 1477 * EWOULDBLOCK - LDC queue is full 1478 * xxx - other error codes returned by ldc_write 1479 */ 1480 static int 1481 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 1482 { 1483 size_t size = 0; 1484 int retries = 0; 1485 int status = 0; 1486 1487 ASSERT(vdc != NULL); 1488 ASSERT(mutex_owned(&vdc->lock)); 1489 ASSERT(msglen != NULL); 1490 ASSERT(*msglen != 0); 1491 1492 do { 1493 size = *msglen; 1494 status = ldc_write(vdc->ldc_handle, pkt, &size); 1495 if (status == EWOULDBLOCK) 1496 delay(vdc_hz_timeout_ldc); 1497 } while (status == EWOULDBLOCK && retries++ < vdc_retries); 1498 1499 /* if LDC had serious issues --- reset vdc state */ 1500 if (status == EIO || status == ECONNRESET) { 1501 vdc_reset_connection(vdc, B_TRUE); 1502 } 1503 1504 /* return the last size written */ 1505 *msglen = size; 1506 1507 return (status); 1508 } 1509 1510 /* 1511 * Function: 1512 * vdc_get_ldc_id() 1513 * 1514 * Description: 1515 * This function gets the 'ldc-id' for this particular instance of vdc. 1516 * The id returned is the guest domain channel endpoint LDC uses for 1517 * communication with vds. 1518 * 1519 * Arguments: 1520 * dip - dev info pointer for this instance of the device driver. 1521 * ldc_id - pointer to variable used to return the 'ldc-id' found. 1522 * 1523 * Return Code: 1524 * 0 - Success. 1525 * ENOENT - Expected node or property did not exist. 1526 * ENXIO - Unexpected error communicating with MD framework 1527 */ 1528 static int 1529 vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id) 1530 { 1531 int status = ENOENT; 1532 char *node_name = NULL; 1533 md_t *mdp = NULL; 1534 int num_nodes; 1535 int num_vdevs; 1536 int num_chans; 1537 mde_cookie_t rootnode; 1538 mde_cookie_t *listp = NULL; 1539 mde_cookie_t *chanp = NULL; 1540 boolean_t found_inst = B_FALSE; 1541 int listsz; 1542 int idx; 1543 uint64_t md_inst; 1544 int obp_inst; 1545 int instance = ddi_get_instance(dip); 1546 1547 ASSERT(ldc_id != NULL); 1548 *ldc_id = 0; 1549 1550 /* 1551 * Get the OBP instance number for comparison with the MD instance 1552 * 1553 * The "cfg-handle" property of a vdc node in an MD contains the MD's 1554 * notion of "instance", or unique identifier, for that node; OBP 1555 * stores the value of the "cfg-handle" MD property as the value of 1556 * the "reg" property on the node in the device tree it builds from 1557 * the MD and passes to Solaris. Thus, we look up the devinfo node's 1558 * "reg" property value to uniquely identify this device instance. 1559 * If the "reg" property cannot be found, the device tree state is 1560 * presumably so broken that there is no point in continuing. 1561 */ 1562 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 1563 cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 1564 return (ENOENT); 1565 } 1566 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1567 OBP_REG, -1); 1568 DMSG(1, "[%d] OBP inst=%d\n", instance, obp_inst); 1569 1570 /* 1571 * We now walk the MD nodes and if an instance of a vdc node matches 1572 * the instance got from OBP we get the ldc-id property. 1573 */ 1574 if ((mdp = md_get_handle()) == NULL) { 1575 cmn_err(CE_WARN, "unable to init machine description"); 1576 return (ENXIO); 1577 } 1578 1579 num_nodes = md_node_count(mdp); 1580 ASSERT(num_nodes > 0); 1581 1582 listsz = num_nodes * sizeof (mde_cookie_t); 1583 1584 /* allocate memory for nodes */ 1585 listp = kmem_zalloc(listsz, KM_SLEEP); 1586 chanp = kmem_zalloc(listsz, KM_SLEEP); 1587 1588 rootnode = md_root_node(mdp); 1589 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 1590 1591 /* 1592 * Search for all the virtual devices, we will then check to see which 1593 * ones are disk nodes. 1594 */ 1595 num_vdevs = md_scan_dag(mdp, rootnode, 1596 md_find_name(mdp, VDC_MD_VDEV_NAME), 1597 md_find_name(mdp, "fwd"), listp); 1598 1599 if (num_vdevs <= 0) { 1600 cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 1601 status = ENOENT; 1602 goto done; 1603 } 1604 1605 DMSG(1, "[%d] num_vdevs=%d\n", instance, num_vdevs); 1606 for (idx = 0; idx < num_vdevs; idx++) { 1607 status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 1608 if ((status != 0) || (node_name == NULL)) { 1609 cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 1610 ": err %d", VDC_MD_VDEV_NAME, status); 1611 continue; 1612 } 1613 1614 DMSG(1, "[%d] Found node '%s'\n", instance, node_name); 1615 if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 1616 status = md_get_prop_val(mdp, listp[idx], 1617 VDC_MD_CFG_HDL, &md_inst); 1618 DMSG(1, "[%d] vdc inst in MD=%lx\n", instance, md_inst); 1619 if ((status == 0) && (md_inst == obp_inst)) { 1620 found_inst = B_TRUE; 1621 break; 1622 } 1623 } 1624 } 1625 1626 if (!found_inst) { 1627 cmn_err(CE_NOTE, "Unable to find correct '%s' node", 1628 VDC_MD_DISK_NAME); 1629 status = ENOENT; 1630 goto done; 1631 } 1632 DMSG(0, "[%d] MD inst=%lx\n", instance, md_inst); 1633 1634 /* get the channels for this node */ 1635 num_chans = md_scan_dag(mdp, listp[idx], 1636 md_find_name(mdp, VDC_MD_CHAN_NAME), 1637 md_find_name(mdp, "fwd"), chanp); 1638 1639 /* expecting at least one channel */ 1640 if (num_chans <= 0) { 1641 cmn_err(CE_NOTE, "No '%s' node for '%s' port", 1642 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 1643 status = ENOENT; 1644 goto done; 1645 1646 } else if (num_chans != 1) { 1647 DMSG(0, "[%d] Expected 1 '%s' node for '%s' port, found %d\n", 1648 instance, VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, 1649 num_chans); 1650 } 1651 1652 /* 1653 * We use the first channel found (index 0), irrespective of how 1654 * many are there in total. 1655 */ 1656 if (md_get_prop_val(mdp, chanp[0], VDC_ID_PROP, ldc_id) != 0) { 1657 cmn_err(CE_NOTE, "Channel '%s' property not found", 1658 VDC_ID_PROP); 1659 status = ENOENT; 1660 } 1661 1662 DMSG(0, "[%d] LDC id is 0x%lx\n", instance, *ldc_id); 1663 1664 done: 1665 if (chanp) 1666 kmem_free(chanp, listsz); 1667 if (listp) 1668 kmem_free(listp, listsz); 1669 1670 (void) md_fini_handle(mdp); 1671 1672 return (status); 1673 } 1674 1675 static int 1676 vdc_do_ldc_up(vdc_t *vdc) 1677 { 1678 int status; 1679 1680 DMSG(0, "[%d] Bringing up channel %lx\n", vdc->instance, vdc->ldc_id); 1681 1682 if ((status = ldc_up(vdc->ldc_handle)) != 0) { 1683 switch (status) { 1684 case ECONNREFUSED: /* listener not ready at other end */ 1685 DMSG(0, "[%d] ldc_up(%lx,...) return %d\n", 1686 vdc->instance, vdc->ldc_id, status); 1687 status = 0; 1688 break; 1689 default: 1690 cmn_err(CE_NOTE, "[%d] Failed to bring up LDC: " 1691 "channel=%ld, err=%d", 1692 vdc->instance, vdc->ldc_id, status); 1693 } 1694 } 1695 1696 return (status); 1697 } 1698 1699 1700 /* 1701 * vdc_is_able_to_tx_data() 1702 * 1703 * Description: 1704 * This function checks if we are able to send data to the 1705 * vDisk server (vds). The LDC connection needs to be up and 1706 * vdc & vds need to have completed the handshake negotiation. 1707 * 1708 * Parameters: 1709 * vdc - soft state pointer 1710 * flag - flag to indicate if we can block or not 1711 * [ If O_NONBLOCK or O_NDELAY (which are defined in 1712 * open(2)) are set then do not block) 1713 * 1714 * Return Values 1715 * B_TRUE - can talk to vds 1716 * B_FALSE - unable to talk to vds 1717 */ 1718 static boolean_t 1719 vdc_is_able_to_tx_data(vdc_t *vdc, int flag) 1720 { 1721 vd_state_t state; 1722 uint32_t ldc_state; 1723 uint_t retries = 0; 1724 int rv = -1; 1725 1726 ASSERT(vdc != NULL); 1727 1728 mutex_enter(&vdc->lock); 1729 state = vdc->state; 1730 ldc_state = vdc->ldc_state; 1731 mutex_exit(&vdc->lock); 1732 1733 if ((state == VD_STATE_DATA) && (ldc_state == LDC_UP)) 1734 return (B_TRUE); 1735 1736 if ((flag & O_NONBLOCK) || (flag & O_NDELAY)) { 1737 DMSG(0, "[%d] Not ready to tx - state %d LDC state %d\n", 1738 vdc->instance, state, ldc_state); 1739 return (B_FALSE); 1740 } 1741 1742 /* 1743 * We want to check and see if any negotiations triggered earlier 1744 * have succeeded. We are prepared to wait a little while in case 1745 * they are still in progress. 1746 */ 1747 mutex_enter(&vdc->lock); 1748 while ((vdc->ldc_state != LDC_UP) || (vdc->state != VD_STATE_DATA)) { 1749 DMSG(0, "[%d] Waiting for connection. (state %d : LDC %d)\n", 1750 vdc->instance, vdc->state, vdc->ldc_state); 1751 1752 rv = cv_timedwait(&vdc->cv, &vdc->lock, 1753 VD_GET_TIMEOUT_HZ(vdc_hz_timeout, retries)); 1754 1755 /* 1756 * An rv of -1 indicates that we timed out without the LDC 1757 * state changing so it looks like the other side (vdc) is 1758 * not yet ready/responding. 1759 * 1760 * Any other value of rv indicates that the LDC triggered an 1761 * interrupt so we just loop again, check the handshake state 1762 * and keep waiting if necessary. 1763 */ 1764 if (rv == -1) { 1765 if (retries >= vdc_retries) { 1766 DMSG(0, "[%d] handshake wait timed out\n", 1767 vdc->instance); 1768 mutex_exit(&vdc->lock); 1769 return (B_FALSE); 1770 } else { 1771 DMSG(1, "[%d] Handshake retry #%d timed out\n", 1772 vdc->instance, retries); 1773 retries++; 1774 } 1775 } 1776 } 1777 1778 ASSERT(vdc->ldc_state == LDC_UP); 1779 ASSERT(vdc->state == VD_STATE_DATA); 1780 1781 mutex_exit(&vdc->lock); 1782 1783 return (B_TRUE); 1784 } 1785 1786 1787 /* 1788 * Function: 1789 * vdc_terminate_ldc() 1790 * 1791 * Description: 1792 * 1793 * Arguments: 1794 * vdc - soft state pointer for this instance of the device driver. 1795 * 1796 * Return Code: 1797 * None 1798 */ 1799 static void 1800 vdc_terminate_ldc(vdc_t *vdc) 1801 { 1802 int instance = ddi_get_instance(vdc->dip); 1803 1804 ASSERT(vdc != NULL); 1805 ASSERT(mutex_owned(&vdc->lock)); 1806 1807 DMSG(0, "[%d] initialized=%x\n", instance, vdc->initialized); 1808 1809 if (vdc->initialized & VDC_LDC_OPEN) { 1810 DMSG(0, "[%d] ldc_close()\n", instance); 1811 (void) ldc_close(vdc->ldc_handle); 1812 } 1813 if (vdc->initialized & VDC_LDC_CB) { 1814 DMSG(0, "[%d] ldc_unreg_callback()\n", instance); 1815 (void) ldc_unreg_callback(vdc->ldc_handle); 1816 } 1817 if (vdc->initialized & VDC_LDC) { 1818 DMSG(0, "[%d] ldc_fini()\n", instance); 1819 (void) ldc_fini(vdc->ldc_handle); 1820 vdc->ldc_handle = NULL; 1821 } 1822 1823 vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN); 1824 } 1825 1826 /* 1827 * Function: 1828 * vdc_reset_connection() 1829 * 1830 * Description: 1831 * 1832 * Arguments: 1833 * vdc - soft state pointer for this instance of the device driver. 1834 * reset_ldc - Flag whether or not to reset the LDC connection also. 1835 * 1836 * Return Code: 1837 * None 1838 */ 1839 static void 1840 vdc_reset_connection(vdc_t *vdc, boolean_t reset_ldc) 1841 { 1842 int status; 1843 1844 ASSERT(vdc != NULL); 1845 ASSERT(mutex_owned(&vdc->lock)); 1846 1847 cmn_err(CE_CONT, "?[%d] Resetting connection to vDisk server\n", 1848 vdc->instance); 1849 1850 vdc->state = VD_STATE_INIT; 1851 1852 if (reset_ldc) { 1853 status = ldc_down(vdc->ldc_handle); 1854 DMSG(0, "[%d] ldc_down() = %d\n", vdc->instance, status); 1855 } 1856 1857 vdc->initialized &= ~VDC_HANDSHAKE; 1858 DMSG(0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized); 1859 } 1860 1861 /* -------------------------------------------------------------------------- */ 1862 1863 /* 1864 * Descriptor Ring helper routines 1865 */ 1866 1867 /* 1868 * Function: 1869 * vdc_init_descriptor_ring() 1870 * 1871 * Description: 1872 * 1873 * Arguments: 1874 * vdc - soft state pointer for this instance of the device driver. 1875 * 1876 * Return Code: 1877 * 0 - Success 1878 */ 1879 static int 1880 vdc_init_descriptor_ring(vdc_t *vdc) 1881 { 1882 vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 1883 int status = 0; 1884 int i; 1885 1886 DMSG(0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized); 1887 1888 ASSERT(vdc != NULL); 1889 ASSERT(mutex_owned(&vdc->lock)); 1890 ASSERT(vdc->ldc_handle != NULL); 1891 1892 /* ensure we have enough room to store max sized block */ 1893 ASSERT(maxphys <= VD_MAX_BLOCK_SIZE); 1894 1895 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 1896 DMSG(0, "[%d] ldc_mem_dring_create\n", vdc->instance); 1897 /* 1898 * Calculate the maximum block size we can transmit using one 1899 * Descriptor Ring entry from the attributes returned by the 1900 * vDisk server. This is subject to a minimum of 'maxphys' 1901 * as we do not have the capability to split requests over 1902 * multiple DRing entries. 1903 */ 1904 if ((vdc->max_xfer_sz * vdc->block_size) < maxphys) { 1905 DMSG(0, "[%d] using minimum DRing size\n", 1906 vdc->instance); 1907 vdc->dring_max_cookies = maxphys / PAGESIZE; 1908 } else { 1909 vdc->dring_max_cookies = 1910 (vdc->max_xfer_sz * vdc->block_size) / PAGESIZE; 1911 } 1912 vdc->dring_entry_size = (sizeof (vd_dring_entry_t) + 1913 (sizeof (ldc_mem_cookie_t) * 1914 (vdc->dring_max_cookies - 1))); 1915 vdc->dring_len = VD_DRING_LEN; 1916 1917 status = ldc_mem_dring_create(vdc->dring_len, 1918 vdc->dring_entry_size, &vdc->ldc_dring_hdl); 1919 if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) { 1920 cmn_err(CE_NOTE, "[%d] Descriptor ring creation failed", 1921 vdc->instance); 1922 return (status); 1923 } 1924 vdc->initialized |= VDC_DRING_INIT; 1925 } 1926 1927 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 1928 DMSG(0, "[%d] ldc_mem_dring_bind\n", vdc->instance); 1929 vdc->dring_cookie = 1930 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 1931 1932 status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl, 1933 LDC_SHADOW_MAP|LDC_DIRECT_MAP, LDC_MEM_RW, 1934 &vdc->dring_cookie[0], 1935 &vdc->dring_cookie_count); 1936 if (status != 0) { 1937 cmn_err(CE_NOTE, "[%d] Failed to bind descriptor ring " 1938 "(%lx) to channel (%lx)\n", vdc->instance, 1939 vdc->ldc_dring_hdl, vdc->ldc_handle); 1940 return (status); 1941 } 1942 ASSERT(vdc->dring_cookie_count == 1); 1943 vdc->initialized |= VDC_DRING_BOUND; 1944 } 1945 1946 status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info); 1947 if (status != 0) { 1948 DMSG(0, "[%d] Failed to get info for descriptor ring (%lx)\n", 1949 vdc->instance, vdc->ldc_dring_hdl); 1950 return (status); 1951 } 1952 1953 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 1954 DMSG(0, "[%d] local dring\n", vdc->instance); 1955 1956 /* Allocate the local copy of this dring */ 1957 vdc->local_dring = 1958 kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t), 1959 KM_SLEEP); 1960 vdc->initialized |= VDC_DRING_LOCAL; 1961 } 1962 1963 /* 1964 * Mark all DRing entries as free and initialize the private 1965 * descriptor's memory handles. If any entry is initialized, 1966 * we need to free it later so we set the bit in 'initialized' 1967 * at the start. 1968 */ 1969 vdc->initialized |= VDC_DRING_ENTRY; 1970 for (i = 0; i < vdc->dring_len; i++) { 1971 dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 1972 dep->hdr.dstate = VIO_DESC_FREE; 1973 1974 status = ldc_mem_alloc_handle(vdc->ldc_handle, 1975 &vdc->local_dring[i].desc_mhdl); 1976 if (status != 0) { 1977 cmn_err(CE_NOTE, "![%d] Failed to alloc mem handle for" 1978 " descriptor %d", vdc->instance, i); 1979 return (status); 1980 } 1981 vdc->local_dring[i].flags = VIO_DESC_FREE; 1982 vdc->local_dring[i].dep = dep; 1983 1984 mutex_init(&vdc->local_dring[i].lock, NULL, MUTEX_DRIVER, NULL); 1985 cv_init(&vdc->local_dring[i].cv, NULL, CV_DRIVER, NULL); 1986 } 1987 1988 /* 1989 * We init the index of the last DRing entry used. Since the code to 1990 * get the next available entry increments it before selecting one, 1991 * we set it to the last DRing entry so that it wraps around to zero 1992 * for the 1st entry to be used. 1993 */ 1994 vdc->dring_curr_idx = vdc->dring_len - 1; 1995 1996 vdc->dring_notify_server = B_TRUE; 1997 1998 return (status); 1999 } 2000 2001 /* 2002 * Function: 2003 * vdc_destroy_descriptor_ring() 2004 * 2005 * Description: 2006 * 2007 * Arguments: 2008 * vdc - soft state pointer for this instance of the device driver. 2009 * 2010 * Return Code: 2011 * None 2012 */ 2013 static void 2014 vdc_destroy_descriptor_ring(vdc_t *vdc) 2015 { 2016 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2017 ldc_mem_handle_t mhdl = NULL; 2018 int status = -1; 2019 int i; /* loop */ 2020 2021 ASSERT(vdc != NULL); 2022 ASSERT(mutex_owned(&vdc->lock)); 2023 ASSERT(vdc->state == VD_STATE_INIT); 2024 2025 DMSG(0, "[%d] Entered\n", vdc->instance); 2026 2027 if (vdc->initialized & VDC_DRING_ENTRY) { 2028 DMSG(0, "[%d] Removing Local DRing entries\n", vdc->instance); 2029 for (i = 0; i < vdc->dring_len; i++) { 2030 ldep = &vdc->local_dring[i]; 2031 mhdl = ldep->desc_mhdl; 2032 2033 if (mhdl == NULL) 2034 continue; 2035 2036 (void) ldc_mem_free_handle(mhdl); 2037 mutex_destroy(&ldep->lock); 2038 cv_destroy(&ldep->cv); 2039 } 2040 vdc->initialized &= ~VDC_DRING_ENTRY; 2041 } 2042 2043 if (vdc->initialized & VDC_DRING_LOCAL) { 2044 DMSG(0, "[%d] Freeing Local DRing\n", vdc->instance); 2045 kmem_free(vdc->local_dring, 2046 vdc->dring_len * sizeof (vdc_local_desc_t)); 2047 vdc->initialized &= ~VDC_DRING_LOCAL; 2048 } 2049 2050 if (vdc->initialized & VDC_DRING_BOUND) { 2051 DMSG(0, "[%d] Unbinding DRing\n", vdc->instance); 2052 status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl); 2053 if (status == 0) { 2054 vdc->initialized &= ~VDC_DRING_BOUND; 2055 } else { 2056 cmn_err(CE_NOTE, "[%d] Error %d unbinding DRing %lx", 2057 vdc->instance, status, vdc->ldc_dring_hdl); 2058 } 2059 } 2060 2061 if (vdc->initialized & VDC_DRING_INIT) { 2062 DMSG(0, "[%d] Destroying DRing\n", vdc->instance); 2063 status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl); 2064 if (status == 0) { 2065 vdc->ldc_dring_hdl = NULL; 2066 bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 2067 vdc->initialized &= ~VDC_DRING_INIT; 2068 } else { 2069 cmn_err(CE_NOTE, "[%d] Error %d destroying DRing (%lx)", 2070 vdc->instance, status, vdc->ldc_dring_hdl); 2071 } 2072 } 2073 } 2074 2075 /* 2076 * vdc_get_next_dring_entry_idx() 2077 * 2078 * Description: 2079 * This function gets the index of the next Descriptor Ring entry available 2080 * If the ring is full, it will back off and wait for the next entry to be 2081 * freed (the ACK handler will signal). 2082 * 2083 * Return Value: 2084 * 0 <= rv < vdc->dring_len Next available slot 2085 * -1 DRing is full 2086 */ 2087 static int 2088 vdc_get_next_dring_entry_idx(vdc_t *vdc, uint_t num_slots_needed) 2089 { 2090 _NOTE(ARGUNUSED(num_slots_needed)) 2091 2092 vd_dring_entry_t *dep = NULL; /* DRing Entry Pointer */ 2093 vdc_local_desc_t *ldep = NULL; /* Local DRing Entry Pointer */ 2094 int idx = -1; 2095 2096 ASSERT(vdc != NULL); 2097 ASSERT(vdc->dring_len == vdc->dring_len); 2098 ASSERT(vdc->dring_curr_idx >= 0); 2099 ASSERT(vdc->dring_curr_idx < vdc->dring_len); 2100 ASSERT(mutex_owned(&vdc->dring_lock)); 2101 2102 /* pick the next descriptor after the last one used */ 2103 idx = (vdc->dring_curr_idx + 1) % vdc->dring_len; 2104 ldep = &vdc->local_dring[idx]; 2105 ASSERT(ldep != NULL); 2106 dep = ldep->dep; 2107 ASSERT(dep != NULL); 2108 2109 mutex_enter(&ldep->lock); 2110 if (dep->hdr.dstate == VIO_DESC_FREE) { 2111 vdc->dring_curr_idx = idx; 2112 } else { 2113 DTRACE_PROBE(full); 2114 (void) cv_timedwait(&ldep->cv, &ldep->lock, 2115 VD_GET_TIMEOUT_HZ(vdc_hz_timeout, 1)); 2116 if (dep->hdr.dstate == VIO_DESC_FREE) { 2117 vdc->dring_curr_idx = idx; 2118 } else { 2119 DMSG(0, "[%d] Entry %d unavailable still in state %d\n", 2120 vdc->instance, idx, dep->hdr.dstate); 2121 idx = -1; /* indicate that the ring is full */ 2122 } 2123 } 2124 mutex_exit(&ldep->lock); 2125 2126 return (idx); 2127 } 2128 2129 /* 2130 * Function: 2131 * vdc_populate_descriptor 2132 * 2133 * Description: 2134 * This routine writes the data to be transmitted to vds into the 2135 * descriptor, notifies vds that the ring has been updated and 2136 * then waits for the request to be processed. 2137 * 2138 * Arguments: 2139 * vdc - the soft state pointer 2140 * addr - address of structure to be written. In the case of block 2141 * reads and writes this structure will be a buf_t and the 2142 * address of the data to be written will be in the b_un.b_addr 2143 * field. Otherwise the value of addr will be the address 2144 * to be written. 2145 * nbytes - number of bytes to read/write 2146 * operation - operation we want vds to perform (VD_OP_XXX) 2147 * arg - parameter to be sent to server (depends on VD_OP_XXX type) 2148 * . mode for ioctl(9e) 2149 * . LP64 diskaddr_t (block I/O) 2150 * slice - the disk slice this request is for 2151 * 2152 * Return Codes: 2153 * 0 2154 * EAGAIN 2155 * EFAULT 2156 * ENXIO 2157 * EIO 2158 */ 2159 static int 2160 vdc_populate_descriptor(vdc_t *vdc, caddr_t addr, size_t nbytes, int operation, 2161 uint64_t arg, uint64_t slice) 2162 { 2163 vdc_local_desc_t *local_dep = NULL; /* Local Dring Entry Pointer */ 2164 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2165 int idx = 0; /* Index of DRing entry used */ 2166 vio_dring_msg_t dmsg; 2167 size_t msglen = sizeof (dmsg); 2168 int retries = 0; 2169 int rv; 2170 2171 ASSERT(vdc != NULL); 2172 ASSERT(slice < V_NUMPAR); 2173 2174 /* 2175 * Get next available DRing entry. 2176 */ 2177 mutex_enter(&vdc->dring_lock); 2178 idx = vdc_get_next_dring_entry_idx(vdc, 1); 2179 if (idx == -1) { 2180 mutex_exit(&vdc->dring_lock); 2181 DMSG(0, "[%d] no descriptor ring entry avail, last seq=%ld\n", 2182 vdc->instance, vdc->seq_num - 1); 2183 2184 /* 2185 * Since strategy should not block we don't wait for the DRing 2186 * to empty and instead return 2187 */ 2188 return (EAGAIN); 2189 } 2190 2191 ASSERT(idx < vdc->dring_len); 2192 local_dep = &vdc->local_dring[idx]; 2193 dep = local_dep->dep; 2194 ASSERT(dep != NULL); 2195 2196 /* 2197 * We now get the lock for this descriptor before dropping the overall 2198 * DRing lock. This prevents a race condition where another vdc thread 2199 * could grab the descriptor we selected. 2200 */ 2201 ASSERT(MUTEX_NOT_HELD(&local_dep->lock)); 2202 mutex_enter(&local_dep->lock); 2203 mutex_exit(&vdc->dring_lock); 2204 2205 switch (operation) { 2206 case VD_OP_BREAD: 2207 case VD_OP_BWRITE: 2208 local_dep->buf = (struct buf *)addr; 2209 local_dep->addr = local_dep->buf->b_un.b_addr; 2210 DMSG(2, "[%d] buf=%p, block=%lx, nbytes=%lu\n", 2211 vdc->instance, (void *)addr, arg, nbytes); 2212 dep->payload.addr = (diskaddr_t)arg; 2213 rv = vdc_populate_mem_hdl(vdc, idx, local_dep->addr, 2214 nbytes, operation); 2215 break; 2216 2217 case VD_OP_GET_WCE: 2218 case VD_OP_SET_WCE: 2219 case VD_OP_GET_VTOC: 2220 case VD_OP_SET_VTOC: 2221 case VD_OP_GET_DISKGEOM: 2222 case VD_OP_SET_DISKGEOM: 2223 case VD_OP_SCSICMD: 2224 case VD_OP_GET_DEVID: 2225 case VD_OP_GET_EFI: 2226 case VD_OP_SET_EFI: 2227 local_dep->addr = addr; 2228 if (nbytes > 0) { 2229 rv = vdc_populate_mem_hdl(vdc, idx, addr, nbytes, 2230 operation); 2231 } 2232 break; 2233 2234 case VD_OP_FLUSH: 2235 rv = 0; /* nothing to bind */ 2236 break; 2237 2238 default: 2239 cmn_err(CE_CONT, "?[%d] Unsupported vDisk operation [%d]\n", 2240 vdc->instance, operation); 2241 rv = EINVAL; 2242 } 2243 2244 if (rv != 0) { 2245 mutex_exit(&local_dep->lock); 2246 return (rv); 2247 } 2248 2249 /* 2250 * fill in the data details into the DRing 2251 */ 2252 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdc); 2253 dep->payload.operation = operation; 2254 dep->payload.nbytes = nbytes; 2255 dep->payload.status = -1; /* vds will set valid value */ 2256 dep->payload.slice = slice; 2257 dep->hdr.dstate = VIO_DESC_READY; 2258 dep->hdr.ack = 1; /* request an ACK for every message */ 2259 2260 local_dep->flags = VIO_DESC_READY; 2261 2262 /* 2263 * Send a msg with the DRing details to vds 2264 */ 2265 mutex_enter(&vdc->lock); 2266 VIO_INIT_DRING_DATA_TAG(dmsg); 2267 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc); 2268 dmsg.dring_ident = vdc->dring_ident; 2269 dmsg.start_idx = idx; 2270 dmsg.end_idx = idx; 2271 2272 DTRACE_IO2(send, vio_dring_msg_t *, &dmsg, vdc_t *, vdc); 2273 2274 DMSG(2, "[%d] ident=0x%lx, st=%u, end=%u, seq=%ld req=%ld dep=%p\n", 2275 vdc->instance, vdc->dring_ident, 2276 dmsg.start_idx, dmsg.end_idx, 2277 dmsg.seq_num, dep->payload.req_id, (void *)dep); 2278 2279 rv = vdc_send(vdc, (caddr_t)&dmsg, &msglen); 2280 DMSG(1, "[%d] send via LDC: rv=%d\n", vdc->instance, rv); 2281 if (rv != 0) { 2282 DMSG(0, "[%d] err (%d) sending DRing data msg via LDC", 2283 vdc->instance, rv); 2284 2285 /* Clear the DRing entry */ 2286 rv = vdc_depopulate_descriptor(vdc, idx); 2287 2288 mutex_exit(&vdc->lock); 2289 mutex_exit(&local_dep->lock); 2290 2291 return (rv ? rv : EAGAIN); 2292 } 2293 2294 /* 2295 * If the message was successfully sent, we increment the sequence 2296 * number to be used by the next message 2297 */ 2298 vdc->seq_num++; 2299 mutex_exit(&vdc->lock); 2300 2301 /* 2302 * When a guest is panicking, the completion of requests needs to be 2303 * handled differently because interrupts are disabled and vdc 2304 * will not get messages. We have to poll for the messages instead. 2305 */ 2306 if (ddi_in_panic()) { 2307 int start = 0; 2308 retries = 0; 2309 for (;;) { 2310 msglen = sizeof (dmsg); 2311 rv = ldc_read(vdc->ldc_handle, (caddr_t)&dmsg, 2312 &msglen); 2313 if (rv) { 2314 rv = EINVAL; 2315 break; 2316 } 2317 2318 /* 2319 * if there are no packets wait and check again 2320 */ 2321 if ((rv == 0) && (msglen == 0)) { 2322 if (retries++ > vdc_dump_retries) { 2323 DMSG(0, "[%d] Stopping wait, idx %d\n", 2324 vdc->instance, idx); 2325 rv = EAGAIN; 2326 break; 2327 } 2328 2329 DMSG(1, "Waiting for next packet @ %d\n", idx); 2330 drv_usecwait(vdc_usec_timeout_dump); 2331 continue; 2332 } 2333 2334 /* 2335 * Ignore all messages that are not ACKs/NACKs to 2336 * DRing requests. 2337 */ 2338 if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 2339 (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 2340 DMSG(0, "discard pkt: type=%d sub=%d env=%d\n", 2341 dmsg.tag.vio_msgtype, 2342 dmsg.tag.vio_subtype, 2343 dmsg.tag.vio_subtype_env); 2344 continue; 2345 } 2346 2347 /* 2348 * set the appropriate return value for the 2349 * current request. 2350 */ 2351 switch (dmsg.tag.vio_subtype) { 2352 case VIO_SUBTYPE_ACK: 2353 rv = 0; 2354 break; 2355 case VIO_SUBTYPE_NACK: 2356 rv = EAGAIN; 2357 break; 2358 default: 2359 continue; 2360 } 2361 2362 start = dmsg.start_idx; 2363 if (start >= vdc->dring_len) { 2364 DMSG(0, "[%d] Bogus ack data : start %d\n", 2365 vdc->instance, start); 2366 continue; 2367 } 2368 2369 dep = VDC_GET_DRING_ENTRY_PTR(vdc, start); 2370 2371 DMSG(1, "[%d] Dumping start=%d idx=%d state=%d\n", 2372 vdc->instance, start, idx, dep->hdr.dstate); 2373 2374 if (dep->hdr.dstate != VIO_DESC_DONE) { 2375 DMSG(0, "[%d] Entry @ %d - state !DONE %d\n", 2376 vdc->instance, start, dep->hdr.dstate); 2377 continue; 2378 } 2379 2380 (void) vdc_depopulate_descriptor(vdc, start); 2381 2382 /* 2383 * We want to process all Dring entries up to 2384 * the current one so that we can return an 2385 * error with the correct request. 2386 */ 2387 if (idx > start) { 2388 DMSG(0, "[%d] Looping: start %d, idx %d\n", 2389 vdc->instance, idx, start); 2390 continue; 2391 } 2392 2393 /* exit - all outstanding requests are completed */ 2394 break; 2395 } 2396 2397 mutex_exit(&local_dep->lock); 2398 2399 return (rv); 2400 } 2401 2402 /* 2403 * In the case of calls from strategy and dump (in the non-panic case), 2404 * instead of waiting for a response from the vDisk server return now. 2405 * They will be processed asynchronously and the vdc ACK handling code 2406 * will trigger the biodone(9F) 2407 */ 2408 if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) { 2409 mutex_exit(&local_dep->lock); 2410 return (rv); 2411 } 2412 2413 /* 2414 * In the case of synchronous calls we watch the DRing entries we 2415 * modified and await the response from vds. 2416 */ 2417 rv = vdc_wait_for_descriptor_update(vdc, idx, dmsg); 2418 if (rv == ETIMEDOUT) { 2419 /* debug info when dumping state on vds side */ 2420 dep->payload.status = ECANCELED; 2421 } 2422 2423 rv = vdc_depopulate_descriptor(vdc, idx); 2424 DMSG(0, "[%d] Exiting: status=%d\n", vdc->instance, rv); 2425 2426 mutex_exit(&local_dep->lock); 2427 2428 return (rv); 2429 } 2430 2431 /* 2432 * Function: 2433 * vdc_wait_for_descriptor_update() 2434 * 2435 * Description: 2436 * 2437 * Arguments: 2438 * vdc - soft state pointer for this instance of the device driver. 2439 * idx - Index of the Descriptor Ring entry being modified 2440 * dmsg - LDC message sent by vDisk server 2441 * 2442 * Return Code: 2443 * 0 - Success 2444 */ 2445 static int 2446 vdc_wait_for_descriptor_update(vdc_t *vdc, uint_t idx, vio_dring_msg_t dmsg) 2447 { 2448 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2449 vdc_local_desc_t *local_dep = NULL; /* Local Dring Entry Pointer */ 2450 size_t msglen = sizeof (dmsg); 2451 int retries = 0; 2452 int status = 0; 2453 int rv = 0; 2454 2455 ASSERT(vdc != NULL); 2456 ASSERT(idx < vdc->dring_len); 2457 local_dep = &vdc->local_dring[idx]; 2458 ASSERT(local_dep != NULL); 2459 ASSERT(MUTEX_HELD(&local_dep->lock)); 2460 dep = local_dep->dep; 2461 ASSERT(dep != NULL); 2462 2463 while (dep->hdr.dstate != VIO_DESC_DONE) { 2464 rv = cv_timedwait(&local_dep->cv, &local_dep->lock, 2465 VD_GET_TIMEOUT_HZ(vdc_hz_timeout, retries)); 2466 if (rv == -1) { 2467 /* 2468 * If they persist in ignoring us we'll storm off in a 2469 * huff and return ETIMEDOUT to the upper layers. 2470 */ 2471 if (retries >= vdc_retries) { 2472 DMSG(0, "[%d] Finished waiting on entry %d\n", 2473 vdc->instance, idx); 2474 status = ETIMEDOUT; 2475 break; 2476 } else { 2477 retries++; 2478 DMSG(0, "[%d] Timeout #%d on entry %d " 2479 "[seq %lu][req %lu]\n", vdc->instance, 2480 retries, idx, dmsg.seq_num, 2481 dep->payload.req_id); 2482 } 2483 2484 if (dep->hdr.dstate & VIO_DESC_ACCEPTED) { 2485 DMSG(0, "[%d] entry %d ACCEPTED [seq %lu]" 2486 "[req %lu] but not ACK'ed by vds yet\n", 2487 vdc->instance, idx, dmsg.seq_num, 2488 dep->payload.req_id); 2489 continue; 2490 } 2491 2492 /* 2493 * we resend the message as it may have been dropped 2494 * and have never made it to the other side (vds). 2495 * (We reuse the original message but update seq ID) 2496 */ 2497 mutex_enter(&vdc->lock); 2498 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc); 2499 retries = 0; 2500 status = vdc_send(vdc, (caddr_t)&dmsg, &msglen); 2501 if (status != 0) { 2502 mutex_exit(&vdc->lock); 2503 cmn_err(CE_NOTE, "[%d] Error (%d) while sending" 2504 " after timeout", 2505 vdc->instance, status); 2506 status = ETIMEDOUT; 2507 break; 2508 } 2509 /* 2510 * If the message was successfully sent, we increment 2511 * the sequence number to be used by the next message. 2512 */ 2513 vdc->seq_num++; 2514 mutex_exit(&vdc->lock); 2515 } 2516 } 2517 2518 return (status); 2519 } 2520 2521 2522 /* 2523 * Function: 2524 * vdc_depopulate_descriptor() 2525 * 2526 * Description: 2527 * 2528 * Arguments: 2529 * vdc - soft state pointer for this instance of the device driver. 2530 * idx - Index of the Descriptor Ring entry being modified 2531 * 2532 * Return Code: 2533 * 0 - Success 2534 */ 2535 static int 2536 vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 2537 { 2538 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2539 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2540 int status = ENXIO; 2541 int operation; 2542 int rv = 0; 2543 2544 ASSERT(vdc != NULL); 2545 ASSERT(idx < vdc->dring_len); 2546 ldep = &vdc->local_dring[idx]; 2547 ASSERT(ldep != NULL); 2548 ASSERT(MUTEX_HELD(&ldep->lock)); 2549 dep = ldep->dep; 2550 ASSERT(dep != NULL); 2551 ASSERT((dep->hdr.dstate == VIO_DESC_DONE) || 2552 (dep->payload.status == ECANCELED)); 2553 2554 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 2555 VIO_SET_DESC_STATE(ldep->flags, VIO_DESC_FREE); 2556 status = dep->payload.status; 2557 operation = dep->payload.operation; 2558 2559 /* the DKIO FLUSH operation never bind handles so we can return now */ 2560 if (operation == VD_OP_FLUSH) 2561 return (status); 2562 2563 /* 2564 * If the upper layer passed in a misaligned address we copied the 2565 * data into an aligned buffer before sending it to LDC - we now 2566 * copy it back to the original buffer. 2567 */ 2568 if (ldep->align_addr) { 2569 ASSERT(ldep->addr != NULL); 2570 ASSERT(dep->payload.nbytes > 0); 2571 2572 bcopy(ldep->align_addr, ldep->addr, dep->payload.nbytes); 2573 kmem_free(ldep->align_addr, 2574 sizeof (caddr_t) * P2ROUNDUP(dep->payload.nbytes, 8)); 2575 ldep->align_addr = NULL; 2576 } 2577 2578 rv = ldc_mem_unbind_handle(ldep->desc_mhdl); 2579 if (rv != 0) { 2580 cmn_err(CE_CONT, "?[%d] unbind mhdl 0x%lx @ idx %d failed (%d)", 2581 vdc->instance, ldep->desc_mhdl, idx, rv); 2582 /* 2583 * The error returned by the vDisk server is more informative 2584 * and thus has a higher priority but if it isn't set we ensure 2585 * that this function returns an error. 2586 */ 2587 if (status == 0) 2588 status = EINVAL; 2589 } 2590 2591 return (status); 2592 } 2593 2594 /* 2595 * Function: 2596 * vdc_populate_mem_hdl() 2597 * 2598 * Description: 2599 * 2600 * Arguments: 2601 * vdc - soft state pointer for this instance of the device driver. 2602 * idx - Index of the Descriptor Ring entry being modified 2603 * addr - virtual address being mapped in 2604 * nybtes - number of bytes in 'addr' 2605 * operation - the vDisk operation being performed (VD_OP_xxx) 2606 * 2607 * Return Code: 2608 * 0 - Success 2609 */ 2610 static int 2611 vdc_populate_mem_hdl(vdc_t *vdc, uint_t idx, caddr_t addr, size_t nbytes, 2612 int operation) 2613 { 2614 vd_dring_entry_t *dep = NULL; 2615 vdc_local_desc_t *ldep = NULL; 2616 ldc_mem_handle_t mhdl; 2617 caddr_t vaddr; 2618 uint8_t perm = LDC_MEM_RW; 2619 uint8_t maptype; 2620 int rv = 0; 2621 int i; 2622 2623 ASSERT(vdc != NULL); 2624 ASSERT(idx < vdc->dring_len); 2625 2626 dep = VDC_GET_DRING_ENTRY_PTR(vdc, idx); 2627 ldep = &vdc->local_dring[idx]; 2628 mhdl = ldep->desc_mhdl; 2629 2630 switch (operation) { 2631 case VD_OP_BREAD: 2632 perm = LDC_MEM_W; 2633 break; 2634 2635 case VD_OP_BWRITE: 2636 perm = LDC_MEM_R; 2637 break; 2638 2639 case VD_OP_GET_WCE: 2640 case VD_OP_SET_WCE: 2641 case VD_OP_GET_VTOC: 2642 case VD_OP_SET_VTOC: 2643 case VD_OP_GET_DISKGEOM: 2644 case VD_OP_SET_DISKGEOM: 2645 case VD_OP_SCSICMD: 2646 case VD_OP_GET_DEVID: 2647 case VD_OP_GET_EFI: 2648 case VD_OP_SET_EFI: 2649 perm = LDC_MEM_RW; 2650 break; 2651 2652 default: 2653 ASSERT(0); /* catch bad programming in vdc */ 2654 } 2655 2656 /* 2657 * LDC expects any addresses passed in to be 8-byte aligned. We need 2658 * to copy the contents of any misaligned buffers to a newly allocated 2659 * buffer and bind it instead (and copy the the contents back to the 2660 * original buffer passed in when depopulating the descriptor) 2661 */ 2662 vaddr = addr; 2663 if (((uint64_t)addr & 0x7) != 0) { 2664 ASSERT(ldep->align_addr == NULL); 2665 ldep->align_addr = 2666 kmem_zalloc(sizeof (caddr_t) * P2ROUNDUP(nbytes, 8), 2667 KM_SLEEP); 2668 DMSG(0, "[%d] Misaligned address %p reallocating " 2669 "(buf=%p nb=%ld op=%d entry=%d)\n", 2670 vdc->instance, (void *)addr, (void *)ldep->align_addr, 2671 nbytes, operation, idx); 2672 bcopy(addr, ldep->align_addr, nbytes); 2673 vaddr = ldep->align_addr; 2674 } 2675 2676 maptype = LDC_IO_MAP|LDC_SHADOW_MAP|LDC_DIRECT_MAP; 2677 rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 2678 maptype, perm, &dep->payload.cookie[0], 2679 &dep->payload.ncookies); 2680 DMSG(2, "[%d] bound mem handle; ncookies=%d\n", 2681 vdc->instance, dep->payload.ncookies); 2682 if (rv != 0) { 2683 cmn_err(CE_CONT, "?[%d] Failed to bind LDC memory handle " 2684 "(mhdl=%p, buf=%p entry=%u err=%d)\n", 2685 vdc->instance, (void *)mhdl, (void *)addr, idx, rv); 2686 if (ldep->align_addr) { 2687 kmem_free(ldep->align_addr, 2688 sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); 2689 ldep->align_addr = NULL; 2690 } 2691 return (EAGAIN); 2692 } 2693 2694 /* 2695 * Get the other cookies (if any). 2696 */ 2697 for (i = 1; i < dep->payload.ncookies; i++) { 2698 rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 2699 if (rv != 0) { 2700 (void) ldc_mem_unbind_handle(mhdl); 2701 cmn_err(CE_CONT, "?[%d] Failed to get next cookie " 2702 "(mhdl=%lx cnum=%d), err=%d", 2703 vdc->instance, mhdl, i, rv); 2704 if (ldep->align_addr) { 2705 kmem_free(ldep->align_addr, 2706 sizeof (caddr_t) * dep->payload.nbytes); 2707 ldep->align_addr = NULL; 2708 } 2709 return (EAGAIN); 2710 } 2711 } 2712 2713 return (rv); 2714 } 2715 2716 /* 2717 * Interrupt handlers for messages from LDC 2718 */ 2719 2720 /* 2721 * Function: 2722 * vdc_handle_cb() 2723 * 2724 * Description: 2725 * 2726 * Arguments: 2727 * event - Type of event (LDC_EVT_xxx) that triggered the callback 2728 * arg - soft state pointer for this instance of the device driver. 2729 * 2730 * Return Code: 2731 * 0 - Success 2732 */ 2733 static uint_t 2734 vdc_handle_cb(uint64_t event, caddr_t arg) 2735 { 2736 ldc_status_t ldc_state; 2737 int rv = 0; 2738 2739 vdc_t *vdc = (vdc_t *)(void *)arg; 2740 2741 ASSERT(vdc != NULL); 2742 2743 DMSG(1, "[%d] evt=%lx seqID=%ld\n", vdc->instance, event, vdc->seq_num); 2744 2745 /* 2746 * Depending on the type of event that triggered this callback, 2747 * we modify the handhske state or read the data. 2748 * 2749 * NOTE: not done as a switch() as event could be triggered by 2750 * a state change and a read request. Also the ordering of the 2751 * check for the event types is deliberate. 2752 */ 2753 if (event & LDC_EVT_UP) { 2754 DMSG(0, "[%d] Received LDC_EVT_UP\n", vdc->instance); 2755 2756 /* get LDC state */ 2757 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2758 if (rv != 0) { 2759 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d", 2760 vdc->instance, rv); 2761 mutex_enter(&vdc->lock); 2762 vdc_reset_connection(vdc, B_TRUE); 2763 mutex_exit(&vdc->lock); 2764 return (LDC_SUCCESS); 2765 } 2766 2767 /* 2768 * Reset the transaction sequence numbers when LDC comes up. 2769 * We then kick off the handshake negotiation with the vDisk 2770 * server. 2771 */ 2772 mutex_enter(&vdc->lock); 2773 vdc->seq_num = 1; 2774 vdc->seq_num_reply = 0; 2775 vdc->ldc_state = ldc_state; 2776 ASSERT(ldc_state == LDC_UP); 2777 mutex_exit(&vdc->lock); 2778 2779 vdc_init_handshake_negotiation(vdc); 2780 2781 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 2782 } 2783 2784 if (event & LDC_EVT_READ) { 2785 /* 2786 * Wake up the worker thread to process the message 2787 */ 2788 mutex_enter(&vdc->msg_proc_lock); 2789 vdc->msg_pending = B_TRUE; 2790 cv_signal(&vdc->msg_proc_cv); 2791 mutex_exit(&vdc->msg_proc_lock); 2792 2793 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 2794 2795 /* that's all we have to do - no need to handle DOWN/RESET */ 2796 return (LDC_SUCCESS); 2797 } 2798 2799 if (event & LDC_EVT_RESET) { 2800 DMSG(0, "[%d] Received LDC RESET event\n", vdc->instance); 2801 2802 /* get LDC state */ 2803 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2804 if (rv != 0) { 2805 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d", 2806 vdc->instance, rv); 2807 ldc_state = LDC_OPEN; 2808 } 2809 mutex_enter(&vdc->lock); 2810 vdc->ldc_state = ldc_state; 2811 vdc_reset_connection(vdc, B_TRUE); 2812 mutex_exit(&vdc->lock); 2813 2814 vdc_init_handshake_negotiation(vdc); 2815 } 2816 2817 if (event & LDC_EVT_DOWN) { 2818 DMSG(0, "[%d] Received LDC DOWN event\n", vdc->instance); 2819 2820 /* get LDC state */ 2821 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2822 if (rv != 0) { 2823 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d", 2824 vdc->instance, rv); 2825 ldc_state = LDC_OPEN; 2826 } 2827 mutex_enter(&vdc->lock); 2828 vdc->ldc_state = ldc_state; 2829 vdc_reset_connection(vdc, B_TRUE); 2830 mutex_exit(&vdc->lock); 2831 } 2832 2833 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 2834 cmn_err(CE_NOTE, "![%d] Unexpected LDC event (%lx) received", 2835 vdc->instance, event); 2836 2837 return (LDC_SUCCESS); 2838 } 2839 2840 /* -------------------------------------------------------------------------- */ 2841 2842 /* 2843 * The following functions process the incoming messages from vds 2844 */ 2845 2846 2847 /* 2848 * Function: 2849 * vdc_process_msg_thread() 2850 * 2851 * Description: 2852 * 2853 * Arguments: 2854 * vdc - soft state pointer for this instance of the device driver. 2855 * 2856 * Return Code: 2857 * None 2858 */ 2859 static void 2860 vdc_process_msg_thread(vdc_t *vdc) 2861 { 2862 int status = 0; 2863 boolean_t q_has_pkts = B_FALSE; 2864 2865 ASSERT(vdc != NULL); 2866 2867 mutex_enter(&vdc->msg_proc_lock); 2868 DMSG(0, "[%d] Starting\n", vdc->instance); 2869 2870 vdc->msg_proc_thr_state = VDC_THR_RUNNING; 2871 2872 while (vdc->msg_proc_thr_state == VDC_THR_RUNNING) { 2873 2874 DMSG(2, "[%d] Waiting\n", vdc->instance); 2875 while (!vdc->msg_pending) 2876 cv_wait(&vdc->msg_proc_cv, &vdc->msg_proc_lock); 2877 2878 DMSG(2, "[%d] Message Received\n", vdc->instance); 2879 2880 /* check if there is data */ 2881 status = ldc_chkq(vdc->ldc_handle, &q_has_pkts); 2882 if ((status != 0) && 2883 (vdc->msg_proc_thr_state == VDC_THR_RUNNING)) { 2884 cmn_err(CE_NOTE, "[%d] Unable to communicate with vDisk" 2885 " server. Cannot check LDC queue: %d", 2886 vdc->instance, status); 2887 mutex_enter(&vdc->lock); 2888 vdc_reset_connection(vdc, B_TRUE); 2889 mutex_exit(&vdc->lock); 2890 vdc->msg_proc_thr_state = VDC_THR_STOP; 2891 continue; 2892 } 2893 2894 if (q_has_pkts) { 2895 DMSG(2, "[%d] new pkt(s) available\n", vdc->instance); 2896 vdc_process_msg(vdc); 2897 } 2898 2899 vdc->msg_pending = B_FALSE; 2900 } 2901 2902 DMSG(0, "[%d] Message processing thread stopped\n", vdc->instance); 2903 vdc->msg_pending = B_FALSE; 2904 vdc->msg_proc_thr_state = VDC_THR_DONE; 2905 cv_signal(&vdc->msg_proc_cv); 2906 mutex_exit(&vdc->msg_proc_lock); 2907 thread_exit(); 2908 } 2909 2910 2911 /* 2912 * Function: 2913 * vdc_process_msg() 2914 * 2915 * Description: 2916 * This function is called by the message processing thread each time it 2917 * is triggered when LDC sends an interrupt to indicate that there are 2918 * more packets on the queue. When it is called it will continue to loop 2919 * and read the messages until there are no more left of the queue. If it 2920 * encounters an invalid sized message it will drop it and check the next 2921 * message. 2922 * 2923 * Arguments: 2924 * arg - soft state pointer for this instance of the device driver. 2925 * 2926 * Return Code: 2927 * None. 2928 */ 2929 static void 2930 vdc_process_msg(void *arg) 2931 { 2932 vdc_t *vdc = (vdc_t *)(void *)arg; 2933 vio_msg_t vio_msg; 2934 size_t nbytes = sizeof (vio_msg); 2935 int status; 2936 2937 ASSERT(vdc != NULL); 2938 2939 mutex_enter(&vdc->lock); 2940 2941 DMSG(1, "[%d]\n", vdc->instance); 2942 2943 for (;;) { 2944 2945 /* read all messages - until no more left */ 2946 status = ldc_read(vdc->ldc_handle, (caddr_t)&vio_msg, &nbytes); 2947 2948 if (status) { 2949 cmn_err(CE_CONT, "?[%d] Error %d reading LDC msg\n", 2950 vdc->instance, status); 2951 2952 /* if status is ECONNRESET --- reset vdc state */ 2953 if (status == EIO || status == ECONNRESET) { 2954 vdc_reset_connection(vdc, B_TRUE); 2955 } 2956 2957 mutex_exit(&vdc->lock); 2958 return; 2959 } 2960 2961 if ((nbytes > 0) && (nbytes < sizeof (vio_msg_tag_t))) { 2962 cmn_err(CE_CONT, "?[%d] Expect %lu bytes; recv'd %lu\n", 2963 vdc->instance, sizeof (vio_msg_tag_t), nbytes); 2964 mutex_exit(&vdc->lock); 2965 return; 2966 } 2967 2968 if (nbytes == 0) { 2969 DMSG(3, "[%d] ldc_read() done..\n", vdc->instance); 2970 mutex_exit(&vdc->lock); 2971 return; 2972 } 2973 2974 DMSG(2, "[%d] (%x/%x/%x)\n", vdc->instance, 2975 vio_msg.tag.vio_msgtype, 2976 vio_msg.tag.vio_subtype, 2977 vio_msg.tag.vio_subtype_env); 2978 2979 /* 2980 * Verify the Session ID of the message 2981 * 2982 * Every message after the Version has been negotiated should 2983 * have the correct session ID set. 2984 */ 2985 if ((vio_msg.tag.vio_sid != vdc->session_id) && 2986 (vio_msg.tag.vio_subtype_env != VIO_VER_INFO)) { 2987 cmn_err(CE_NOTE, "[%d] Invalid SID: received 0x%x, " 2988 "expected 0x%lx [seq num %lx @ %d]", 2989 vdc->instance, vio_msg.tag.vio_sid, 2990 vdc->session_id, 2991 ((vio_dring_msg_t *)&vio_msg)->seq_num, 2992 ((vio_dring_msg_t *)&vio_msg)->start_idx); 2993 vdc_reset_connection(vdc, B_TRUE); 2994 mutex_exit(&vdc->lock); 2995 return; 2996 } 2997 2998 switch (vio_msg.tag.vio_msgtype) { 2999 case VIO_TYPE_CTRL: 3000 status = vdc_process_ctrl_msg(vdc, vio_msg); 3001 break; 3002 case VIO_TYPE_DATA: 3003 status = vdc_process_data_msg(vdc, vio_msg); 3004 break; 3005 case VIO_TYPE_ERR: 3006 status = vdc_process_err_msg(vdc, vio_msg); 3007 break; 3008 default: 3009 cmn_err(CE_NOTE, "[%d] Unknown VIO message type", 3010 vdc->instance); 3011 status = EINVAL; 3012 break; 3013 } 3014 3015 if (status != 0) { 3016 DMSG(0, "[%d] Error (%d) occurred processing req %lu\n", 3017 vdc->instance, status, 3018 vdc->req_id_proc); 3019 vdc_reset_connection(vdc, B_TRUE); 3020 3021 /* we need to drop the lock to trigger the handshake */ 3022 mutex_exit(&vdc->lock); 3023 vdc_init_handshake_negotiation(vdc); 3024 mutex_enter(&vdc->lock); 3025 } 3026 } 3027 _NOTE(NOTREACHED) 3028 } 3029 3030 /* 3031 * Function: 3032 * vdc_process_ctrl_msg() 3033 * 3034 * Description: 3035 * This function is called by the message processing thread each time 3036 * an LDC message with a msgtype of VIO_TYPE_CTRL is received. 3037 * 3038 * Arguments: 3039 * vdc - soft state pointer for this instance of the device driver. 3040 * msg - the LDC message sent by vds 3041 * 3042 * Return Codes: 3043 * 0 - Success. 3044 * EPROTO - A message was received which shouldn't have happened according 3045 * to the protocol 3046 * ENOTSUP - An action which is allowed according to the protocol but which 3047 * isn't (or doesn't need to be) implemented yet. 3048 * EINVAL - An invalid value was returned as part of a message. 3049 */ 3050 static int 3051 vdc_process_ctrl_msg(vdc_t *vdc, vio_msg_t msg) 3052 { 3053 int status = -1; 3054 3055 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_CTRL); 3056 ASSERT(vdc != NULL); 3057 ASSERT(mutex_owned(&vdc->lock)); 3058 3059 /* Depending on which state we are in; process the message */ 3060 switch (vdc->state) { 3061 case VD_STATE_INIT: 3062 status = vdc_handle_ver_msg(vdc, (vio_ver_msg_t *)&msg); 3063 break; 3064 3065 case VD_STATE_VER: 3066 status = vdc_handle_attr_msg(vdc, (vd_attr_msg_t *)&msg); 3067 break; 3068 3069 case VD_STATE_ATTR: 3070 status = vdc_handle_dring_reg_msg(vdc, 3071 (vio_dring_reg_msg_t *)&msg); 3072 break; 3073 3074 case VD_STATE_RDX: 3075 if (msg.tag.vio_subtype_env != VIO_RDX) { 3076 status = EPROTO; 3077 break; 3078 } 3079 3080 DMSG(0, "[%d] Received RDX: handshake done\n", vdc->instance); 3081 3082 vdc->hshake_cnt = 0; /* reset failed handshake count */ 3083 status = 0; 3084 vdc->state = VD_STATE_DATA; 3085 3086 cv_broadcast(&vdc->attach_cv); 3087 break; 3088 3089 case VD_STATE_DATA: 3090 default: 3091 cmn_err(CE_NOTE, "[%d] Unexpected handshake state %d", 3092 vdc->instance, vdc->state); 3093 status = EPROTO; 3094 break; 3095 } 3096 3097 return (status); 3098 } 3099 3100 3101 /* 3102 * Function: 3103 * vdc_process_data_msg() 3104 * 3105 * Description: 3106 * This function is called by the message processing thread each time 3107 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 3108 * be an ACK or NACK from vds[1] which vdc handles as follows. 3109 * ACK - wake up the waiting thread 3110 * NACK - resend any messages necessary 3111 * 3112 * [1] Although the message format allows it, vds should not send a 3113 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 3114 * some bizarre reason it does, vdc will reset the connection. 3115 * 3116 * Arguments: 3117 * vdc - soft state pointer for this instance of the device driver. 3118 * msg - the LDC message sent by vds 3119 * 3120 * Return Code: 3121 * 0 - Success. 3122 * > 0 - error value returned by LDC 3123 */ 3124 static int 3125 vdc_process_data_msg(vdc_t *vdc, vio_msg_t msg) 3126 { 3127 int status = 0; 3128 vdc_local_desc_t *ldep = NULL; 3129 vio_dring_msg_t *dring_msg = NULL; 3130 uint_t start; 3131 int end; 3132 uint_t count = 0; 3133 uint_t operation; 3134 uint_t idx; 3135 3136 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_DATA); 3137 ASSERT(vdc != NULL); 3138 ASSERT(mutex_owned(&vdc->lock)); 3139 3140 dring_msg = (vio_dring_msg_t *)&msg; 3141 3142 /* 3143 * Check to see if the message has bogus data 3144 */ 3145 idx = start = dring_msg->start_idx; 3146 end = dring_msg->end_idx; 3147 if ((start >= vdc->dring_len) || 3148 (end >= vdc->dring_len) || (end < -1)) { 3149 cmn_err(CE_CONT, "?[%d] Bogus ACK data : start %d, end %d\n", 3150 vdc->instance, start, end); 3151 return (EINVAL); 3152 } 3153 3154 DTRACE_IO2(recv, vio_dring_msg_t, dring_msg, vdc_t *, vdc); 3155 3156 /* 3157 * Verify that the sequence number is what vdc expects. 3158 */ 3159 switch (vdc_verify_seq_num(vdc, dring_msg)) { 3160 case VDC_SEQ_NUM_TODO: 3161 break; /* keep processing this message */ 3162 case VDC_SEQ_NUM_SKIP: 3163 return (0); 3164 case VDC_SEQ_NUM_INVALID: 3165 return (ENXIO); 3166 } 3167 3168 if (msg.tag.vio_subtype == VIO_SUBTYPE_NACK) { 3169 DMSG(0, "[%d] DATA NACK\n", vdc->instance); 3170 VDC_DUMP_DRING_MSG(dring_msg); 3171 return (EIO); 3172 3173 } else if (msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 3174 return (EPROTO); 3175 } 3176 3177 ldep = &vdc->local_dring[start]; 3178 if (ldep->dep->hdr.dstate == VIO_DESC_DONE) { 3179 mutex_enter(&ldep->lock); 3180 operation = ldep->dep->payload.operation; 3181 vdc->req_id_proc = ldep->dep->payload.req_id; 3182 vdc->dring_proc_idx = idx; 3183 ASSERT(ldep->dep->hdr.dstate == VIO_DESC_DONE); 3184 3185 if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) { 3186 bioerror(ldep->buf, ldep->dep->payload.status); 3187 3188 /* Clear the DRing entry */ 3189 status = vdc_depopulate_descriptor(vdc, idx); 3190 3191 /* 3192 * biodone() should be called after clearing the DRing 3193 * entry because biodone() will release the IO buffer. 3194 */ 3195 biodone(ldep->buf); 3196 DTRACE_IO2(vdone, buf_t *, ldep->buf, vdc_t *, vdc); 3197 } 3198 cv_signal(&ldep->cv); 3199 mutex_exit(&ldep->lock); 3200 } 3201 3202 /* probe gives the count of how many entries were processed */ 3203 DTRACE_IO2(processed, int, count, vdc_t *, vdc); 3204 3205 return (status); 3206 } 3207 3208 /* 3209 * Function: 3210 * vdc_process_err_msg() 3211 * 3212 * NOTE: No error messages are used as part of the vDisk protocol 3213 */ 3214 static int 3215 vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg) 3216 { 3217 _NOTE(ARGUNUSED(vdc)) 3218 _NOTE(ARGUNUSED(msg)) 3219 3220 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_ERR); 3221 cmn_err(CE_NOTE, "[%d] Got an ERR msg", vdc->instance); 3222 3223 return (ENOTSUP); 3224 } 3225 3226 /* 3227 * Function: 3228 * vdc_handle_ver_msg() 3229 * 3230 * Description: 3231 * 3232 * Arguments: 3233 * vdc - soft state pointer for this instance of the device driver. 3234 * ver_msg - LDC message sent by vDisk server 3235 * 3236 * Return Code: 3237 * 0 - Success 3238 */ 3239 static int 3240 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) 3241 { 3242 int status = 0; 3243 3244 ASSERT(vdc != NULL); 3245 ASSERT(mutex_owned(&vdc->lock)); 3246 3247 if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { 3248 return (EPROTO); 3249 } 3250 3251 if (ver_msg->dev_class != VDEV_DISK_SERVER) { 3252 return (EINVAL); 3253 } 3254 3255 switch (ver_msg->tag.vio_subtype) { 3256 case VIO_SUBTYPE_ACK: 3257 /* 3258 * We check to see if the version returned is indeed supported 3259 * (The server may have also adjusted the minor number downwards 3260 * and if so 'ver_msg' will contain the actual version agreed) 3261 */ 3262 if (vdc_is_supported_version(ver_msg)) { 3263 vdc->ver.major = ver_msg->ver_major; 3264 vdc->ver.minor = ver_msg->ver_minor; 3265 ASSERT(vdc->ver.major > 0); 3266 3267 vdc->state = VD_STATE_VER; 3268 status = vdc_init_attr_negotiation(vdc); 3269 } else { 3270 status = EPROTO; 3271 } 3272 break; 3273 3274 case VIO_SUBTYPE_NACK: 3275 /* 3276 * call vdc_is_supported_version() which will return the next 3277 * supported version (if any) in 'ver_msg' 3278 */ 3279 (void) vdc_is_supported_version(ver_msg); 3280 if (ver_msg->ver_major > 0) { 3281 size_t len = sizeof (*ver_msg); 3282 3283 ASSERT(vdc->ver.major > 0); 3284 3285 /* reset the necessary fields and resend */ 3286 ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; 3287 ver_msg->dev_class = VDEV_DISK; 3288 3289 status = vdc_send(vdc, (caddr_t)ver_msg, &len); 3290 DMSG(0, "[%d] Resend VER info (LDC status = %d)\n", 3291 vdc->instance, status); 3292 if (len != sizeof (*ver_msg)) 3293 status = EBADMSG; 3294 } else { 3295 cmn_err(CE_NOTE, "[%d] No common version with " 3296 "vDisk server", vdc->instance); 3297 status = ENOTSUP; 3298 } 3299 3300 break; 3301 case VIO_SUBTYPE_INFO: 3302 /* 3303 * Handle the case where vds starts handshake 3304 * (for now only vdc is the instigatior) 3305 */ 3306 status = ENOTSUP; 3307 break; 3308 3309 default: 3310 status = EINVAL; 3311 break; 3312 } 3313 3314 return (status); 3315 } 3316 3317 /* 3318 * Function: 3319 * vdc_handle_attr_msg() 3320 * 3321 * Description: 3322 * 3323 * Arguments: 3324 * vdc - soft state pointer for this instance of the device driver. 3325 * attr_msg - LDC message sent by vDisk server 3326 * 3327 * Return Code: 3328 * 0 - Success 3329 */ 3330 static int 3331 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) 3332 { 3333 int status = 0; 3334 3335 ASSERT(vdc != NULL); 3336 ASSERT(mutex_owned(&vdc->lock)); 3337 3338 if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { 3339 return (EPROTO); 3340 } 3341 3342 switch (attr_msg->tag.vio_subtype) { 3343 case VIO_SUBTYPE_ACK: 3344 /* 3345 * We now verify the attributes sent by vds. 3346 */ 3347 vdc->vdisk_size = attr_msg->vdisk_size; 3348 vdc->vdisk_type = attr_msg->vdisk_type; 3349 3350 DMSG(0, "[%d] max_xfer_sz: sent %lx acked %lx\n", 3351 vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz); 3352 DMSG(0, "[%d] vdisk_block_size: sent %lx acked %x\n", 3353 vdc->instance, vdc->block_size, 3354 attr_msg->vdisk_block_size); 3355 3356 /* 3357 * We don't know at compile time what the vDisk server will 3358 * think are good values but we apply an large (arbitrary) 3359 * upper bound to prevent memory exhaustion in vdc if it was 3360 * allocating a DRing based of huge values sent by the server. 3361 * We probably will never exceed this except if the message 3362 * was garbage. 3363 */ 3364 if ((attr_msg->max_xfer_sz * attr_msg->vdisk_block_size) <= 3365 (PAGESIZE * DEV_BSIZE)) { 3366 vdc->max_xfer_sz = attr_msg->max_xfer_sz; 3367 vdc->block_size = attr_msg->vdisk_block_size; 3368 } else { 3369 cmn_err(CE_NOTE, "[%d] vds block transfer size too big;" 3370 " using max supported by vdc", vdc->instance); 3371 } 3372 3373 if ((attr_msg->xfer_mode != VIO_DRING_MODE) || 3374 (attr_msg->vdisk_size > INT64_MAX) || 3375 (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 3376 cmn_err(CE_NOTE, "[%d] Invalid attributes from vds", 3377 vdc->instance); 3378 status = EINVAL; 3379 break; 3380 } 3381 3382 vdc->state = VD_STATE_ATTR; 3383 status = vdc_init_dring_negotiate(vdc); 3384 break; 3385 3386 case VIO_SUBTYPE_NACK: 3387 /* 3388 * vds could not handle the attributes we sent so we 3389 * stop negotiating. 3390 */ 3391 status = EPROTO; 3392 break; 3393 3394 case VIO_SUBTYPE_INFO: 3395 /* 3396 * Handle the case where vds starts the handshake 3397 * (for now; vdc is the only supported instigatior) 3398 */ 3399 status = ENOTSUP; 3400 break; 3401 3402 default: 3403 status = ENOTSUP; 3404 break; 3405 } 3406 3407 return (status); 3408 } 3409 3410 /* 3411 * Function: 3412 * vdc_handle_dring_reg_msg() 3413 * 3414 * Description: 3415 * 3416 * Arguments: 3417 * vdc - soft state pointer for this instance of the driver. 3418 * dring_msg - LDC message sent by vDisk server 3419 * 3420 * Return Code: 3421 * 0 - Success 3422 */ 3423 static int 3424 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) 3425 { 3426 int status = 0; 3427 vio_rdx_msg_t msg = {0}; 3428 size_t msglen = sizeof (msg); 3429 3430 ASSERT(vdc != NULL); 3431 ASSERT(mutex_owned(&vdc->lock)); 3432 3433 if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { 3434 return (EPROTO); 3435 } 3436 3437 switch (dring_msg->tag.vio_subtype) { 3438 case VIO_SUBTYPE_ACK: 3439 /* save the received dring_ident */ 3440 vdc->dring_ident = dring_msg->dring_ident; 3441 DMSG(0, "[%d] Received dring ident=0x%lx\n", 3442 vdc->instance, vdc->dring_ident); 3443 3444 /* 3445 * Send an RDX message to vds to indicate we are ready 3446 * to send data 3447 */ 3448 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 3449 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 3450 msg.tag.vio_subtype_env = VIO_RDX; 3451 msg.tag.vio_sid = vdc->session_id; 3452 status = vdc_send(vdc, (caddr_t)&msg, &msglen); 3453 if (status != 0) { 3454 cmn_err(CE_NOTE, "[%d] Failed to send RDX" 3455 " message (%d)", vdc->instance, status); 3456 break; 3457 } 3458 3459 vdc->state = VD_STATE_RDX; 3460 break; 3461 3462 case VIO_SUBTYPE_NACK: 3463 /* 3464 * vds could not handle the DRing info we sent so we 3465 * stop negotiating. 3466 */ 3467 cmn_err(CE_CONT, "server could not register DRing\n"); 3468 vdc_reset_connection(vdc, B_TRUE); 3469 vdc_destroy_descriptor_ring(vdc); 3470 status = EPROTO; 3471 break; 3472 3473 case VIO_SUBTYPE_INFO: 3474 /* 3475 * Handle the case where vds starts handshake 3476 * (for now only vdc is the instigatior) 3477 */ 3478 status = ENOTSUP; 3479 break; 3480 default: 3481 status = ENOTSUP; 3482 } 3483 3484 return (status); 3485 } 3486 3487 /* 3488 * Function: 3489 * vdc_verify_seq_num() 3490 * 3491 * Description: 3492 * This functions verifies that the sequence number sent back by the vDisk 3493 * server with the latest message is what is expected (i.e. it is greater 3494 * than the last seq num sent by the vDisk server and less than or equal 3495 * to the last seq num generated by vdc). 3496 * 3497 * It then checks the request ID to see if any requests need processing 3498 * in the DRing. 3499 * 3500 * Arguments: 3501 * vdc - soft state pointer for this instance of the driver. 3502 * dring_msg - pointer to the LDC message sent by vds 3503 * 3504 * Return Code: 3505 * VDC_SEQ_NUM_TODO - Message needs to be processed 3506 * VDC_SEQ_NUM_SKIP - Message has already been processed 3507 * VDC_SEQ_NUM_INVALID - The seq numbers are so out of sync, 3508 * vdc cannot deal with them 3509 */ 3510 static int 3511 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg) 3512 { 3513 ASSERT(vdc != NULL); 3514 ASSERT(dring_msg != NULL); 3515 ASSERT(mutex_owned(&vdc->lock)); 3516 3517 /* 3518 * Check to see if the messages were responded to in the correct 3519 * order by vds. 3520 */ 3521 if ((dring_msg->seq_num <= vdc->seq_num_reply) || 3522 (dring_msg->seq_num > vdc->seq_num)) { 3523 cmn_err(CE_CONT, "?[%d] Bogus sequence_number %lu: " 3524 "%lu > expected <= %lu (last proc req %lu sent %lu)\n", 3525 vdc->instance, dring_msg->seq_num, 3526 vdc->seq_num_reply, vdc->seq_num, 3527 vdc->req_id_proc, vdc->req_id); 3528 return (VDC_SEQ_NUM_INVALID); 3529 } 3530 vdc->seq_num_reply = dring_msg->seq_num; 3531 3532 if (vdc->req_id_proc < vdc->req_id) 3533 return (VDC_SEQ_NUM_TODO); 3534 else 3535 return (VDC_SEQ_NUM_SKIP); 3536 } 3537 3538 3539 /* 3540 * Function: 3541 * vdc_is_supported_version() 3542 * 3543 * Description: 3544 * This routine checks if the major/minor version numbers specified in 3545 * 'ver_msg' are supported. If not it finds the next version that is 3546 * in the supported version list 'vdc_version[]' and sets the fields in 3547 * 'ver_msg' to those values 3548 * 3549 * Arguments: 3550 * ver_msg - LDC message sent by vDisk server 3551 * 3552 * Return Code: 3553 * B_TRUE - Success 3554 * B_FALSE - Version not supported 3555 */ 3556 static boolean_t 3557 vdc_is_supported_version(vio_ver_msg_t *ver_msg) 3558 { 3559 int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); 3560 3561 for (int i = 0; i < vdc_num_versions; i++) { 3562 ASSERT(vdc_version[i].major > 0); 3563 ASSERT((i == 0) || 3564 (vdc_version[i].major < vdc_version[i-1].major)); 3565 3566 /* 3567 * If the major versions match, adjust the minor version, if 3568 * necessary, down to the highest value supported by this 3569 * client. The server should support all minor versions lower 3570 * than the value it sent 3571 */ 3572 if (ver_msg->ver_major == vdc_version[i].major) { 3573 if (ver_msg->ver_minor > vdc_version[i].minor) { 3574 DMSG(0, "Adjusting minor version from %u to %u", 3575 ver_msg->ver_minor, vdc_version[i].minor); 3576 ver_msg->ver_minor = vdc_version[i].minor; 3577 } 3578 return (B_TRUE); 3579 } 3580 3581 /* 3582 * If the message contains a higher major version number, set 3583 * the message's major/minor versions to the current values 3584 * and return false, so this message will get resent with 3585 * these values, and the server will potentially try again 3586 * with the same or a lower version 3587 */ 3588 if (ver_msg->ver_major > vdc_version[i].major) { 3589 ver_msg->ver_major = vdc_version[i].major; 3590 ver_msg->ver_minor = vdc_version[i].minor; 3591 DMSG(0, "Suggesting major/minor (0x%x/0x%x)\n", 3592 ver_msg->ver_major, ver_msg->ver_minor); 3593 3594 return (B_FALSE); 3595 } 3596 3597 /* 3598 * Otherwise, the message's major version is less than the 3599 * current major version, so continue the loop to the next 3600 * (lower) supported version 3601 */ 3602 } 3603 3604 /* 3605 * No common version was found; "ground" the version pair in the 3606 * message to terminate negotiation 3607 */ 3608 ver_msg->ver_major = 0; 3609 ver_msg->ver_minor = 0; 3610 3611 return (B_FALSE); 3612 } 3613 /* -------------------------------------------------------------------------- */ 3614 3615 /* 3616 * DKIO(7) support 3617 */ 3618 3619 typedef struct vdc_dk_arg { 3620 struct dk_callback dkc; 3621 int mode; 3622 dev_t dev; 3623 vdc_t *vdc; 3624 } vdc_dk_arg_t; 3625 3626 /* 3627 * Function: 3628 * vdc_dkio_flush_cb() 3629 * 3630 * Description: 3631 * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 3632 * by kernel code. 3633 * 3634 * Arguments: 3635 * arg - a pointer to a vdc_dk_arg_t structure. 3636 */ 3637 void 3638 vdc_dkio_flush_cb(void *arg) 3639 { 3640 struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 3641 struct dk_callback *dkc = NULL; 3642 vdc_t *vdc = NULL; 3643 int rv; 3644 3645 if (dk_arg == NULL) { 3646 cmn_err(CE_CONT, "?[Unk] DKIOCFLUSHWRITECACHE arg is NULL\n"); 3647 return; 3648 } 3649 dkc = &dk_arg->dkc; 3650 vdc = dk_arg->vdc; 3651 ASSERT(vdc != NULL); 3652 3653 rv = vdc_populate_descriptor(vdc, NULL, 0, VD_OP_FLUSH, 3654 dk_arg->mode, SDPART(getminor(dk_arg->dev))); 3655 if (rv != 0) { 3656 DMSG(0, "[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n", 3657 vdc->instance, rv, 3658 ddi_model_convert_from(dk_arg->mode & FMODELS)); 3659 } 3660 3661 /* 3662 * Trigger the call back to notify the caller the the ioctl call has 3663 * been completed. 3664 */ 3665 if ((dk_arg->mode & FKIOCTL) && 3666 (dkc != NULL) && 3667 (dkc->dkc_callback != NULL)) { 3668 ASSERT(dkc->dkc_cookie != NULL); 3669 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 3670 } 3671 3672 /* Indicate that one less DKIO write flush is outstanding */ 3673 mutex_enter(&vdc->lock); 3674 vdc->dkio_flush_pending--; 3675 ASSERT(vdc->dkio_flush_pending >= 0); 3676 mutex_exit(&vdc->lock); 3677 3678 /* free the mem that was allocated when the callback was dispatched */ 3679 kmem_free(arg, sizeof (vdc_dk_arg_t)); 3680 } 3681 3682 /* 3683 * This structure is used in the DKIO(7I) array below. 3684 */ 3685 typedef struct vdc_dk_ioctl { 3686 uint8_t op; /* VD_OP_XXX value */ 3687 int cmd; /* Solaris ioctl operation number */ 3688 size_t nbytes; /* size of structure to be copied */ 3689 3690 /* function to convert between vDisk and Solaris structure formats */ 3691 int (*convert)(vdc_t *vdc, void *vd_buf, void *ioctl_arg, 3692 int mode, int dir); 3693 } vdc_dk_ioctl_t; 3694 3695 /* 3696 * Subset of DKIO(7I) operations currently supported 3697 */ 3698 static vdc_dk_ioctl_t dk_ioctl[] = { 3699 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, sizeof (int), 3700 vdc_null_copy_func}, 3701 {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), 3702 vdc_get_wce_convert}, 3703 {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), 3704 vdc_set_wce_convert}, 3705 {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), 3706 vdc_get_vtoc_convert}, 3707 {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), 3708 vdc_set_vtoc_convert}, 3709 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), 3710 vdc_get_geom_convert}, 3711 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), 3712 vdc_get_geom_convert}, 3713 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), 3714 vdc_get_geom_convert}, 3715 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 3716 vdc_set_geom_convert}, 3717 {VD_OP_GET_EFI, DKIOCGETEFI, 0, 3718 vdc_get_efi_convert}, 3719 {VD_OP_SET_EFI, DKIOCSETEFI, 0, 3720 vdc_set_efi_convert}, 3721 3722 /* 3723 * These particular ioctls are not sent to the server - vdc fakes up 3724 * the necessary info. 3725 */ 3726 {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, 3727 {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, 3728 {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, 3729 {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, 3730 {0, CDROMREADOFFSET, 0, vdc_null_copy_func} 3731 }; 3732 3733 /* 3734 * Function: 3735 * vd_process_ioctl() 3736 * 3737 * Description: 3738 * This routine processes disk specific ioctl calls 3739 * 3740 * Arguments: 3741 * dev - the device number 3742 * cmd - the operation [dkio(7I)] to be processed 3743 * arg - pointer to user provided structure 3744 * (contains data to be set or reference parameter for get) 3745 * mode - bit flag, indicating open settings, 32/64 bit type, etc 3746 * 3747 * Return Code: 3748 * 0 3749 * EFAULT 3750 * ENXIO 3751 * EIO 3752 * ENOTSUP 3753 */ 3754 static int 3755 vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode) 3756 { 3757 int instance = SDUNIT(getminor(dev)); 3758 vdc_t *vdc = NULL; 3759 int rv = -1; 3760 int idx = 0; /* index into dk_ioctl[] */ 3761 size_t len = 0; /* #bytes to send to vds */ 3762 size_t alloc_len = 0; /* #bytes to allocate mem for */ 3763 caddr_t mem_p = NULL; 3764 size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 3765 struct vtoc vtoc_saved; 3766 3767 DMSG(0, "[%d] Processing ioctl(%x) for dev %lx : model %x\n", 3768 instance, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 3769 3770 vdc = ddi_get_soft_state(vdc_state, instance); 3771 if (vdc == NULL) { 3772 cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 3773 instance); 3774 return (ENXIO); 3775 } 3776 3777 /* 3778 * Check to see if we can communicate with the vDisk server 3779 */ 3780 if (!vdc_is_able_to_tx_data(vdc, O_NONBLOCK)) { 3781 DMSG(0, "[%d] Not ready to transmit data\n", instance); 3782 return (ENOLINK); 3783 } 3784 3785 /* 3786 * Validate the ioctl operation to be performed. 3787 * 3788 * If we have looped through the array without finding a match then we 3789 * don't support this ioctl. 3790 */ 3791 for (idx = 0; idx < nioctls; idx++) { 3792 if (cmd == dk_ioctl[idx].cmd) 3793 break; 3794 } 3795 3796 if (idx >= nioctls) { 3797 cmn_err(CE_CONT, "?[%d] Unsupported ioctl (0x%x)\n", 3798 vdc->instance, cmd); 3799 return (ENOTSUP); 3800 } 3801 3802 if (cmd == DKIOCGETEFI || cmd == DKIOCSETEFI) { 3803 /* size is not fixed for EFI ioctls, it depends on ioctl arg */ 3804 dk_efi_t dk_efi; 3805 3806 rv = ddi_copyin(arg, &dk_efi, sizeof (dk_efi_t), mode); 3807 if (rv != 0) 3808 return (EFAULT); 3809 3810 len = sizeof (vd_efi_t) - 1 + dk_efi.dki_length; 3811 } else { 3812 len = dk_ioctl[idx].nbytes; 3813 } 3814 3815 /* 3816 * Deal with the ioctls which the server does not provide. vdc can 3817 * fake these up and return immediately 3818 */ 3819 switch (cmd) { 3820 case CDROMREADOFFSET: 3821 case DKIOCREMOVABLE: 3822 case USCSICMD: 3823 return (ENOTTY); 3824 3825 case DKIOCINFO: 3826 { 3827 struct dk_cinfo cinfo; 3828 if (vdc->cinfo == NULL) 3829 return (ENXIO); 3830 3831 bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 3832 cinfo.dki_partition = SDPART(getminor(dev)); 3833 3834 rv = ddi_copyout(&cinfo, (void *)arg, 3835 sizeof (struct dk_cinfo), mode); 3836 if (rv != 0) 3837 return (EFAULT); 3838 3839 return (0); 3840 } 3841 3842 case DKIOCGMEDIAINFO: 3843 { 3844 if (vdc->minfo == NULL) 3845 return (ENXIO); 3846 3847 rv = ddi_copyout(vdc->minfo, (void *)arg, 3848 sizeof (struct dk_minfo), mode); 3849 if (rv != 0) 3850 return (EFAULT); 3851 3852 return (0); 3853 } 3854 3855 case DKIOCFLUSHWRITECACHE: 3856 { 3857 struct dk_callback *dkc = (struct dk_callback *)arg; 3858 vdc_dk_arg_t *dkarg = NULL; 3859 3860 DMSG(1, "[%d] Flush W$: mode %x\n", instance, mode); 3861 3862 /* 3863 * If the backing device is not a 'real' disk then the 3864 * W$ operation request to the vDisk server will fail 3865 * so we might as well save the cycles and return now. 3866 */ 3867 if (vdc->vdisk_type != VD_DISK_TYPE_DISK) 3868 return (ENOTTY); 3869 3870 /* 3871 * If arg is NULL, then there is no callback function 3872 * registered and the call operates synchronously; we 3873 * break and continue with the rest of the function and 3874 * wait for vds to return (i.e. after the request to 3875 * vds returns successfully, all writes completed prior 3876 * to the ioctl will have been flushed from the disk 3877 * write cache to persistent media. 3878 * 3879 * If a callback function is registered, we dispatch 3880 * the request on a task queue and return immediately. 3881 * The callback will deal with informing the calling 3882 * thread that the flush request is completed. 3883 */ 3884 if (dkc == NULL) 3885 break; 3886 3887 dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP); 3888 3889 dkarg->mode = mode; 3890 dkarg->dev = dev; 3891 bcopy(dkc, &dkarg->dkc, sizeof (*dkc)); 3892 3893 mutex_enter(&vdc->lock); 3894 vdc->dkio_flush_pending++; 3895 dkarg->vdc = vdc; 3896 mutex_exit(&vdc->lock); 3897 3898 /* put the request on a task queue */ 3899 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 3900 (void *)dkarg, DDI_SLEEP); 3901 3902 return (rv == NULL ? ENOMEM : 0); 3903 } 3904 } 3905 3906 /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 3907 ASSERT(dk_ioctl[idx].op != 0); 3908 3909 /* LDC requires that the memory being mapped is 8-byte aligned */ 3910 alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 3911 DMSG(1, "[%d] struct size %ld alloc %ld\n", instance, len, alloc_len); 3912 3913 ASSERT(alloc_len != 0); /* sanity check */ 3914 mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 3915 3916 if (cmd == DKIOCSVTOC) { 3917 /* 3918 * Save a copy of the current VTOC so that we can roll back 3919 * if the setting of the new VTOC fails. 3920 */ 3921 bcopy(vdc->vtoc, &vtoc_saved, sizeof (struct vtoc)); 3922 } 3923 3924 /* 3925 * Call the conversion function for this ioctl whhich if necessary 3926 * converts from the Solaris format to the format ARC'ed 3927 * as part of the vDisk protocol (FWARC 2006/195) 3928 */ 3929 ASSERT(dk_ioctl[idx].convert != NULL); 3930 rv = (dk_ioctl[idx].convert)(vdc, arg, mem_p, mode, VD_COPYIN); 3931 if (rv != 0) { 3932 DMSG(0, "[%d] convert func returned %d for ioctl 0x%x\n", 3933 instance, rv, cmd); 3934 if (mem_p != NULL) 3935 kmem_free(mem_p, alloc_len); 3936 return (rv); 3937 } 3938 3939 /* 3940 * send request to vds to service the ioctl. 3941 */ 3942 rv = vdc_populate_descriptor(vdc, mem_p, alloc_len, dk_ioctl[idx].op, 3943 mode, SDPART((getminor(dev)))); 3944 if (rv != 0) { 3945 /* 3946 * This is not necessarily an error. The ioctl could 3947 * be returning a value such as ENOTTY to indicate 3948 * that the ioctl is not applicable. 3949 */ 3950 DMSG(0, "[%d] vds returned %d for ioctl 0x%x\n", 3951 instance, rv, cmd); 3952 if (mem_p != NULL) 3953 kmem_free(mem_p, alloc_len); 3954 3955 if (cmd == DKIOCSVTOC) { 3956 /* update of the VTOC has failed, roll back */ 3957 bcopy(&vtoc_saved, vdc->vtoc, sizeof (struct vtoc)); 3958 } 3959 3960 return (rv); 3961 } 3962 3963 if (cmd == DKIOCSVTOC) { 3964 /* 3965 * The VTOC has been changed. We need to update the device 3966 * nodes to handle the case where an EFI label has been 3967 * changed to a VTOC label. We also try and update the device 3968 * node properties. Failing to set the properties should 3969 * not cause an error to be return the caller though. 3970 */ 3971 vdc->vdisk_label = VD_DISK_LABEL_VTOC; 3972 (void) vdc_create_device_nodes_vtoc(vdc); 3973 3974 if (vdc_create_device_nodes_props(vdc)) { 3975 cmn_err(CE_NOTE, "![%d] Failed to update device nodes" 3976 " properties", vdc->instance); 3977 } 3978 3979 } else if (cmd == DKIOCSETEFI) { 3980 /* 3981 * The EFI has been changed. We need to update the device 3982 * nodes to handle the case where a VTOC label has been 3983 * changed to an EFI label. We also try and update the device 3984 * node properties. Failing to set the properties should 3985 * not cause an error to be return the caller though. 3986 */ 3987 struct dk_gpt *efi; 3988 size_t efi_len; 3989 3990 vdc->vdisk_label = VD_DISK_LABEL_EFI; 3991 (void) vdc_create_device_nodes_efi(vdc); 3992 3993 rv = vdc_efi_alloc_and_read(dev, &efi, &efi_len); 3994 3995 if (rv == 0) { 3996 vdc_store_efi(vdc, efi); 3997 rv = vdc_create_device_nodes_props(vdc); 3998 vd_efi_free(efi, efi_len); 3999 } 4000 4001 if (rv) { 4002 cmn_err(CE_NOTE, "![%d] Failed to update device nodes" 4003 " properties", vdc->instance); 4004 } 4005 } 4006 4007 /* 4008 * Call the conversion function (if it exists) for this ioctl 4009 * which converts from the format ARC'ed as part of the vDisk 4010 * protocol (FWARC 2006/195) back to a format understood by 4011 * the rest of Solaris. 4012 */ 4013 rv = (dk_ioctl[idx].convert)(vdc, mem_p, arg, mode, VD_COPYOUT); 4014 if (rv != 0) { 4015 DMSG(0, "[%d] convert func returned %d for ioctl 0x%x\n", 4016 instance, rv, cmd); 4017 if (mem_p != NULL) 4018 kmem_free(mem_p, alloc_len); 4019 return (rv); 4020 } 4021 4022 if (mem_p != NULL) 4023 kmem_free(mem_p, alloc_len); 4024 4025 return (rv); 4026 } 4027 4028 /* 4029 * Function: 4030 * 4031 * Description: 4032 * This is an empty conversion function used by ioctl calls which 4033 * do not need to convert the data being passed in/out to userland 4034 */ 4035 static int 4036 vdc_null_copy_func(vdc_t *vdc, void *from, void *to, int mode, int dir) 4037 { 4038 _NOTE(ARGUNUSED(vdc)) 4039 _NOTE(ARGUNUSED(from)) 4040 _NOTE(ARGUNUSED(to)) 4041 _NOTE(ARGUNUSED(mode)) 4042 _NOTE(ARGUNUSED(dir)) 4043 4044 return (0); 4045 } 4046 4047 static int 4048 vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 4049 int mode, int dir) 4050 { 4051 _NOTE(ARGUNUSED(vdc)) 4052 4053 if (dir == VD_COPYIN) 4054 return (0); /* nothing to do */ 4055 4056 if (ddi_copyout(from, to, sizeof (int), mode) != 0) 4057 return (EFAULT); 4058 4059 return (0); 4060 } 4061 4062 static int 4063 vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 4064 int mode, int dir) 4065 { 4066 _NOTE(ARGUNUSED(vdc)) 4067 4068 if (dir == VD_COPYOUT) 4069 return (0); /* nothing to do */ 4070 4071 if (ddi_copyin(from, to, sizeof (int), mode) != 0) 4072 return (EFAULT); 4073 4074 return (0); 4075 } 4076 4077 /* 4078 * Function: 4079 * vdc_get_vtoc_convert() 4080 * 4081 * Description: 4082 * This routine performs the necessary convertions from the DKIOCGVTOC 4083 * Solaris structure to the format defined in FWARC 2006/195. 4084 * 4085 * In the struct vtoc definition, the timestamp field is marked as not 4086 * supported so it is not part of vDisk protocol (FWARC 2006/195). 4087 * However SVM uses that field to check it can write into the VTOC, 4088 * so we fake up the info of that field. 4089 * 4090 * Arguments: 4091 * vdc - the vDisk client 4092 * from - the buffer containing the data to be copied from 4093 * to - the buffer to be copied to 4094 * mode - flags passed to ioctl() call 4095 * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT 4096 * 4097 * Return Code: 4098 * 0 - Success 4099 * ENXIO - incorrect buffer passed in. 4100 * EFAULT - ddi_copyout routine encountered an error. 4101 */ 4102 static int 4103 vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4104 { 4105 int i; 4106 void *tmp_mem = NULL; 4107 void *tmp_memp; 4108 struct vtoc vt; 4109 struct vtoc32 vt32; 4110 int copy_len = 0; 4111 int rv = 0; 4112 4113 if (dir != VD_COPYOUT) 4114 return (0); /* nothing to do */ 4115 4116 if ((from == NULL) || (to == NULL)) 4117 return (ENXIO); 4118 4119 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 4120 copy_len = sizeof (struct vtoc32); 4121 else 4122 copy_len = sizeof (struct vtoc); 4123 4124 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 4125 4126 VD_VTOC2VTOC((vd_vtoc_t *)from, &vt); 4127 4128 /* fake the VTOC timestamp field */ 4129 for (i = 0; i < V_NUMPAR; i++) { 4130 vt.timestamp[i] = vdc->vtoc->timestamp[i]; 4131 } 4132 4133 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 4134 vtoctovtoc32(vt, vt32); 4135 tmp_memp = &vt32; 4136 } else { 4137 tmp_memp = &vt; 4138 } 4139 rv = ddi_copyout(tmp_memp, to, copy_len, mode); 4140 if (rv != 0) 4141 rv = EFAULT; 4142 4143 kmem_free(tmp_mem, copy_len); 4144 return (rv); 4145 } 4146 4147 /* 4148 * Function: 4149 * vdc_set_vtoc_convert() 4150 * 4151 * Description: 4152 * This routine performs the necessary convertions from the DKIOCSVTOC 4153 * Solaris structure to the format defined in FWARC 2006/195. 4154 * 4155 * Arguments: 4156 * vdc - the vDisk client 4157 * from - Buffer with data 4158 * to - Buffer where data is to be copied to 4159 * mode - flags passed to ioctl 4160 * dir - direction of copy (in or out) 4161 * 4162 * Return Code: 4163 * 0 - Success 4164 * ENXIO - Invalid buffer passed in 4165 * EFAULT - ddi_copyin of data failed 4166 */ 4167 static int 4168 vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4169 { 4170 void *tmp_mem = NULL; 4171 struct vtoc vt; 4172 struct vtoc *vtp = &vt; 4173 vd_vtoc_t vtvd; 4174 int copy_len = 0; 4175 int rv = 0; 4176 4177 if (dir != VD_COPYIN) 4178 return (0); /* nothing to do */ 4179 4180 if ((from == NULL) || (to == NULL)) 4181 return (ENXIO); 4182 4183 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 4184 copy_len = sizeof (struct vtoc32); 4185 else 4186 copy_len = sizeof (struct vtoc); 4187 4188 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 4189 4190 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 4191 if (rv != 0) { 4192 kmem_free(tmp_mem, copy_len); 4193 return (EFAULT); 4194 } 4195 4196 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 4197 vtoc32tovtoc((*(struct vtoc32 *)tmp_mem), vt); 4198 } else { 4199 vtp = tmp_mem; 4200 } 4201 4202 /* 4203 * The VTOC is being changed, then vdc needs to update the copy 4204 * it saved in the soft state structure. 4205 */ 4206 bcopy(vtp, vdc->vtoc, sizeof (struct vtoc)); 4207 4208 VTOC2VD_VTOC(vtp, &vtvd); 4209 bcopy(&vtvd, to, sizeof (vd_vtoc_t)); 4210 kmem_free(tmp_mem, copy_len); 4211 4212 return (0); 4213 } 4214 4215 /* 4216 * Function: 4217 * vdc_get_geom_convert() 4218 * 4219 * Description: 4220 * This routine performs the necessary convertions from the DKIOCGGEOM, 4221 * DKIOCG_PHYSGEOM and DKIOG_VIRTGEOM Solaris structures to the format 4222 * defined in FWARC 2006/195 4223 * 4224 * Arguments: 4225 * vdc - the vDisk client 4226 * from - Buffer with data 4227 * to - Buffer where data is to be copied to 4228 * mode - flags passed to ioctl 4229 * dir - direction of copy (in or out) 4230 * 4231 * Return Code: 4232 * 0 - Success 4233 * ENXIO - Invalid buffer passed in 4234 * EFAULT - ddi_copyout of data failed 4235 */ 4236 static int 4237 vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4238 { 4239 _NOTE(ARGUNUSED(vdc)) 4240 4241 struct dk_geom geom; 4242 int copy_len = sizeof (struct dk_geom); 4243 int rv = 0; 4244 4245 if (dir != VD_COPYOUT) 4246 return (0); /* nothing to do */ 4247 4248 if ((from == NULL) || (to == NULL)) 4249 return (ENXIO); 4250 4251 VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); 4252 rv = ddi_copyout(&geom, to, copy_len, mode); 4253 if (rv != 0) 4254 rv = EFAULT; 4255 4256 return (rv); 4257 } 4258 4259 /* 4260 * Function: 4261 * vdc_set_geom_convert() 4262 * 4263 * Description: 4264 * This routine performs the necessary convertions from the DKIOCSGEOM 4265 * Solaris structure to the format defined in FWARC 2006/195. 4266 * 4267 * Arguments: 4268 * vdc - the vDisk client 4269 * from - Buffer with data 4270 * to - Buffer where data is to be copied to 4271 * mode - flags passed to ioctl 4272 * dir - direction of copy (in or out) 4273 * 4274 * Return Code: 4275 * 0 - Success 4276 * ENXIO - Invalid buffer passed in 4277 * EFAULT - ddi_copyin of data failed 4278 */ 4279 static int 4280 vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4281 { 4282 _NOTE(ARGUNUSED(vdc)) 4283 4284 vd_geom_t vdgeom; 4285 void *tmp_mem = NULL; 4286 int copy_len = sizeof (struct dk_geom); 4287 int rv = 0; 4288 4289 if (dir != VD_COPYIN) 4290 return (0); /* nothing to do */ 4291 4292 if ((from == NULL) || (to == NULL)) 4293 return (ENXIO); 4294 4295 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 4296 4297 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 4298 if (rv != 0) { 4299 kmem_free(tmp_mem, copy_len); 4300 return (EFAULT); 4301 } 4302 DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); 4303 bcopy(&vdgeom, to, sizeof (vdgeom)); 4304 kmem_free(tmp_mem, copy_len); 4305 4306 return (0); 4307 } 4308 4309 static int 4310 vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4311 { 4312 _NOTE(ARGUNUSED(vdc)) 4313 4314 vd_efi_t *vd_efi; 4315 dk_efi_t dk_efi; 4316 int rv = 0; 4317 void *uaddr; 4318 4319 if ((from == NULL) || (to == NULL)) 4320 return (ENXIO); 4321 4322 if (dir == VD_COPYIN) { 4323 4324 vd_efi = (vd_efi_t *)to; 4325 4326 rv = ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode); 4327 if (rv != 0) 4328 return (EFAULT); 4329 4330 vd_efi->lba = dk_efi.dki_lba; 4331 vd_efi->length = dk_efi.dki_length; 4332 bzero(vd_efi->data, vd_efi->length); 4333 4334 } else { 4335 4336 rv = ddi_copyin(to, &dk_efi, sizeof (dk_efi_t), mode); 4337 if (rv != 0) 4338 return (EFAULT); 4339 4340 uaddr = dk_efi.dki_data; 4341 4342 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 4343 4344 VD_EFI2DK_EFI((vd_efi_t *)from, &dk_efi); 4345 4346 rv = ddi_copyout(dk_efi.dki_data, uaddr, dk_efi.dki_length, 4347 mode); 4348 if (rv != 0) 4349 return (EFAULT); 4350 4351 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 4352 } 4353 4354 return (0); 4355 } 4356 4357 static int 4358 vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4359 { 4360 _NOTE(ARGUNUSED(vdc)) 4361 4362 dk_efi_t dk_efi; 4363 void *uaddr; 4364 4365 if (dir == VD_COPYOUT) 4366 return (0); /* nothing to do */ 4367 4368 if ((from == NULL) || (to == NULL)) 4369 return (ENXIO); 4370 4371 if (ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode) != 0) 4372 return (EFAULT); 4373 4374 uaddr = dk_efi.dki_data; 4375 4376 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 4377 4378 if (ddi_copyin(uaddr, dk_efi.dki_data, dk_efi.dki_length, mode) != 0) 4379 return (EFAULT); 4380 4381 DK_EFI2VD_EFI(&dk_efi, (vd_efi_t *)to); 4382 4383 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 4384 4385 return (0); 4386 } 4387 4388 /* 4389 * Function: 4390 * vdc_create_fake_geometry() 4391 * 4392 * Description: 4393 * This routine fakes up the disk info needed for some DKIO ioctls. 4394 * - DKIOCINFO 4395 * - DKIOCGMEDIAINFO 4396 * 4397 * [ just like lofi(7D) and ramdisk(7D) ] 4398 * 4399 * Arguments: 4400 * vdc - soft state pointer for this instance of the device driver. 4401 * 4402 * Return Code: 4403 * 0 - Success 4404 */ 4405 static int 4406 vdc_create_fake_geometry(vdc_t *vdc) 4407 { 4408 int rv = 0; 4409 4410 ASSERT(vdc != NULL); 4411 4412 /* 4413 * DKIOCINFO support 4414 */ 4415 vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 4416 4417 (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 4418 (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 4419 /* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */ 4420 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; 4421 vdc->cinfo->dki_ctype = DKC_SCSI_CCS; 4422 vdc->cinfo->dki_flags = DKI_FMTVOL; 4423 vdc->cinfo->dki_cnum = 0; 4424 vdc->cinfo->dki_addr = 0; 4425 vdc->cinfo->dki_space = 0; 4426 vdc->cinfo->dki_prio = 0; 4427 vdc->cinfo->dki_vec = 0; 4428 vdc->cinfo->dki_unit = vdc->instance; 4429 vdc->cinfo->dki_slave = 0; 4430 /* 4431 * The partition number will be created on the fly depending on the 4432 * actual slice (i.e. minor node) that is used to request the data. 4433 */ 4434 vdc->cinfo->dki_partition = 0; 4435 4436 /* 4437 * DKIOCGMEDIAINFO support 4438 */ 4439 if (vdc->minfo == NULL) 4440 vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 4441 vdc->minfo->dki_media_type = DK_FIXED_DISK; 4442 vdc->minfo->dki_capacity = vdc->vdisk_size; 4443 vdc->minfo->dki_lbsize = DEV_BSIZE; 4444 4445 return (rv); 4446 } 4447 4448 /* 4449 * Function: 4450 * vdc_setup_disk_layout() 4451 * 4452 * Description: 4453 * This routine discovers all the necessary details about the "disk" 4454 * by requesting the data that is available from the vDisk server and by 4455 * faking up the rest of the data. 4456 * 4457 * Arguments: 4458 * vdc - soft state pointer for this instance of the device driver. 4459 * 4460 * Return Code: 4461 * 0 - Success 4462 */ 4463 static int 4464 vdc_setup_disk_layout(vdc_t *vdc) 4465 { 4466 buf_t *buf; /* BREAD requests need to be in a buf_t structure */ 4467 dev_t dev; 4468 int slice = 0; 4469 int rv; 4470 4471 ASSERT(vdc != NULL); 4472 4473 rv = vdc_create_fake_geometry(vdc); 4474 if (rv != 0) { 4475 cmn_err(CE_NOTE, "[%d] Failed to create disk geometry (err%d)", 4476 vdc->instance, rv); 4477 } 4478 4479 if (vdc->vtoc == NULL) 4480 vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); 4481 4482 dev = makedevice(ddi_driver_major(vdc->dip), 4483 VD_MAKE_DEV(vdc->instance, 0)); 4484 rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)vdc->vtoc, FKIOCTL); 4485 4486 if (rv && rv != ENOTSUP) { 4487 cmn_err(CE_NOTE, "[%d] Failed to get VTOC (err=%d)", 4488 vdc->instance, rv); 4489 return (rv); 4490 } 4491 4492 if (rv == ENOTSUP) { 4493 /* 4494 * If the device does not support VTOC then we try 4495 * to read an EFI label. 4496 */ 4497 struct dk_gpt *efi; 4498 size_t efi_len; 4499 4500 rv = vdc_efi_alloc_and_read(dev, &efi, &efi_len); 4501 4502 if (rv) { 4503 cmn_err(CE_NOTE, "[%d] Failed to get EFI (err=%d)", 4504 vdc->instance, rv); 4505 return (rv); 4506 } 4507 4508 vdc->vdisk_label = VD_DISK_LABEL_EFI; 4509 vdc_store_efi(vdc, efi); 4510 vd_efi_free(efi, efi_len); 4511 4512 return (0); 4513 } 4514 4515 vdc->vdisk_label = VD_DISK_LABEL_VTOC; 4516 4517 /* 4518 * find the slice that represents the entire "disk" and use that to 4519 * read the disk label. The convention in Solaris is that slice 2 4520 * represents the whole disk so we check that it is, otherwise we 4521 * default to slice 0 4522 */ 4523 if ((vdc->vdisk_type == VD_DISK_TYPE_DISK) && 4524 (vdc->vtoc->v_part[2].p_tag == V_BACKUP)) { 4525 slice = 2; 4526 } else { 4527 slice = 0; 4528 } 4529 4530 /* 4531 * Read disk label from start of disk 4532 */ 4533 vdc->label = kmem_zalloc(DK_LABEL_SIZE, KM_SLEEP); 4534 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 4535 bioinit(buf); 4536 buf->b_un.b_addr = (caddr_t)vdc->label; 4537 buf->b_bcount = DK_LABEL_SIZE; 4538 buf->b_flags = B_BUSY | B_READ; 4539 buf->b_dev = dev; 4540 rv = vdc_populate_descriptor(vdc, (caddr_t)buf, DK_LABEL_SIZE, 4541 VD_OP_BREAD, 0, slice); 4542 rv = biowait(buf); 4543 biofini(buf); 4544 kmem_free(buf, sizeof (buf_t)); 4545 4546 return (rv); 4547 } 4548 4549 /* 4550 * Function: 4551 * vdc_setup_devid() 4552 * 4553 * Description: 4554 * This routine discovers the devid of a vDisk. It requests the devid of 4555 * the underlying device from the vDisk server, builds an encapsulated 4556 * devid based on the retrieved devid and registers that new devid to 4557 * the vDisk. 4558 * 4559 * Arguments: 4560 * vdc - soft state pointer for this instance of the device driver. 4561 * 4562 * Return Code: 4563 * 0 - A devid was succesfully registered for the vDisk 4564 */ 4565 static int 4566 vdc_setup_devid(vdc_t *vdc) 4567 { 4568 int rv; 4569 vd_devid_t *vd_devid; 4570 size_t bufsize, bufid_len; 4571 4572 /* 4573 * At first sight, we don't know the size of the devid that the 4574 * server will return but this size will be encoded into the 4575 * reply. So we do a first request using a default size then we 4576 * check if this size was large enough. If not then we do a second 4577 * request with the correct size returned by the server. Note that 4578 * ldc requires size to be 8-byte aligned. 4579 */ 4580 bufsize = P2ROUNDUP(VD_DEVID_SIZE(VD_DEVID_DEFAULT_LEN), 4581 sizeof (uint64_t)); 4582 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 4583 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 4584 4585 rv = vdc_populate_descriptor(vdc, (caddr_t)vd_devid, bufsize, 4586 VD_OP_GET_DEVID, 0, 0); 4587 if (rv) { 4588 kmem_free(vd_devid, bufsize); 4589 return (rv); 4590 } 4591 4592 if (vd_devid->length > bufid_len) { 4593 /* 4594 * The returned devid is larger than the buffer used. Try again 4595 * with a buffer with the right size. 4596 */ 4597 kmem_free(vd_devid, bufsize); 4598 bufsize = P2ROUNDUP(VD_DEVID_SIZE(vd_devid->length), 4599 sizeof (uint64_t)); 4600 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 4601 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 4602 4603 rv = vdc_populate_descriptor(vdc, (caddr_t)vd_devid, bufsize, 4604 VD_OP_GET_DEVID, 0, 0); 4605 if (rv) { 4606 kmem_free(vd_devid, bufsize); 4607 return (rv); 4608 } 4609 } 4610 4611 /* 4612 * The virtual disk should have the same device id as the one associated 4613 * with the physical disk it is mapped on, otherwise sharing a disk 4614 * between a LDom and a non-LDom may not work (for example for a shared 4615 * SVM disk set). 4616 * 4617 * The DDI framework does not allow creating a device id with any 4618 * type so we first create a device id of type DEVID_ENCAP and then 4619 * we restore the orignal type of the physical device. 4620 */ 4621 4622 /* build an encapsulated devid based on the returned devid */ 4623 if (ddi_devid_init(vdc->dip, DEVID_ENCAP, vd_devid->length, 4624 vd_devid->id, &vdc->devid) != DDI_SUCCESS) { 4625 DMSG(1, "[%d] Fail to created devid\n", vdc->instance); 4626 kmem_free(vd_devid, bufsize); 4627 return (1); 4628 } 4629 4630 DEVID_FORMTYPE((impl_devid_t *)vdc->devid, vd_devid->type); 4631 4632 ASSERT(ddi_devid_valid(vdc->devid) == DDI_SUCCESS); 4633 4634 kmem_free(vd_devid, bufsize); 4635 4636 if (ddi_devid_register(vdc->dip, vdc->devid) != DDI_SUCCESS) { 4637 DMSG(1, "[%d] Fail to register devid\n", vdc->instance); 4638 return (1); 4639 } 4640 4641 return (0); 4642 } 4643 4644 static void 4645 vdc_store_efi(vdc_t *vdc, struct dk_gpt *efi) 4646 { 4647 struct vtoc *vtoc = vdc->vtoc; 4648 4649 vd_efi_to_vtoc(efi, vtoc); 4650 if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 4651 /* 4652 * vd_efi_to_vtoc() will store information about the EFI Sun 4653 * reserved partition (representing the entire disk) into 4654 * partition 7. However single-slice device will only have 4655 * that single partition and the vdc driver expects to find 4656 * information about that partition in slice 0. So we need 4657 * to copy information from slice 7 to slice 0. 4658 */ 4659 vtoc->v_part[0].p_tag = vtoc->v_part[VD_EFI_WD_SLICE].p_tag; 4660 vtoc->v_part[0].p_flag = vtoc->v_part[VD_EFI_WD_SLICE].p_flag; 4661 vtoc->v_part[0].p_start = vtoc->v_part[VD_EFI_WD_SLICE].p_start; 4662 vtoc->v_part[0].p_size = vtoc->v_part[VD_EFI_WD_SLICE].p_size; 4663 } 4664 } 4665