1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * LDoms virtual disk client (vdc) device driver 31 * 32 * This driver runs on a guest logical domain and communicates with the virtual 33 * disk server (vds) driver running on the service domain which is exporting 34 * virtualized "disks" to the guest logical domain. 35 * 36 * The driver can be divided into four sections: 37 * 38 * 1) generic device driver housekeeping 39 * _init, _fini, attach, detach, ops structures, etc. 40 * 41 * 2) communication channel setup 42 * Setup the communications link over the LDC channel that vdc uses to 43 * talk to the vDisk server. Initialise the descriptor ring which 44 * allows the LDC clients to transfer data via memory mappings. 45 * 46 * 3) Support exported to upper layers (filesystems, etc) 47 * The upper layers call into vdc via strategy(9E) and DKIO(7I) 48 * ioctl calls. vdc will copy the data to be written to the descriptor 49 * ring or maps the buffer to store the data read by the vDisk 50 * server into the descriptor ring. It then sends a message to the 51 * vDisk server requesting it to complete the operation. 52 * 53 * 4) Handling responses from vDisk server. 54 * The vDisk server will ACK some or all of the messages vdc sends to it 55 * (this is configured during the handshake). Upon receipt of an ACK 56 * vdc will check the descriptor ring and signal to the upper layer 57 * code waiting on the IO. 58 */ 59 60 #include <sys/atomic.h> 61 #include <sys/conf.h> 62 #include <sys/disp.h> 63 #include <sys/ddi.h> 64 #include <sys/dkio.h> 65 #include <sys/efi_partition.h> 66 #include <sys/fcntl.h> 67 #include <sys/file.h> 68 #include <sys/mach_descrip.h> 69 #include <sys/modctl.h> 70 #include <sys/mdeg.h> 71 #include <sys/note.h> 72 #include <sys/open.h> 73 #include <sys/sdt.h> 74 #include <sys/stat.h> 75 #include <sys/sunddi.h> 76 #include <sys/types.h> 77 #include <sys/promif.h> 78 #include <sys/vtoc.h> 79 #include <sys/archsystm.h> 80 #include <sys/sysmacros.h> 81 82 #include <sys/cdio.h> 83 #include <sys/dktp/fdisk.h> 84 #include <sys/dktp/dadkio.h> 85 #include <sys/scsi/generic/sense.h> 86 #include <sys/scsi/impl/uscsi.h> /* Needed for defn of USCSICMD ioctl */ 87 88 #include <sys/ldoms.h> 89 #include <sys/ldc.h> 90 #include <sys/vio_common.h> 91 #include <sys/vio_mailbox.h> 92 #include <sys/vio_util.h> 93 #include <sys/vdsk_common.h> 94 #include <sys/vdsk_mailbox.h> 95 #include <sys/vdc.h> 96 97 /* 98 * function prototypes 99 */ 100 101 /* standard driver functions */ 102 static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 103 static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 104 static int vdc_strategy(struct buf *buf); 105 static int vdc_print(dev_t dev, char *str); 106 static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 107 static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 108 static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 109 static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 110 cred_t *credp, int *rvalp); 111 static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 112 static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 113 114 static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 115 void *arg, void **resultp); 116 static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 117 static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 118 119 /* setup */ 120 static void vdc_min(struct buf *bufp); 121 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 122 static int vdc_do_ldc_init(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_node); 123 static int vdc_start_ldc_connection(vdc_t *vdc); 124 static int vdc_create_device_nodes(vdc_t *vdc); 125 static int vdc_create_device_nodes_efi(vdc_t *vdc); 126 static int vdc_create_device_nodes_vtoc(vdc_t *vdc); 127 static int vdc_create_device_nodes_props(vdc_t *vdc); 128 static int vdc_get_md_node(dev_info_t *dip, md_t **mdpp, 129 mde_cookie_t *vd_nodep, mde_cookie_t *vd_portp); 130 static int vdc_get_ldc_id(md_t *, mde_cookie_t, uint64_t *); 131 static int vdc_do_ldc_up(vdc_t *vdc); 132 static void vdc_terminate_ldc(vdc_t *vdc); 133 static int vdc_init_descriptor_ring(vdc_t *vdc); 134 static void vdc_destroy_descriptor_ring(vdc_t *vdc); 135 static int vdc_setup_devid(vdc_t *vdc); 136 static void vdc_store_label_efi(vdc_t *vdc, struct dk_gpt *efi); 137 static void vdc_store_label_vtoc(vdc_t *, struct dk_geom *, struct vtoc *); 138 static void vdc_store_label_unk(vdc_t *vdc); 139 static boolean_t vdc_is_opened(vdc_t *vdc); 140 141 /* handshake with vds */ 142 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 143 static int vdc_ver_negotiation(vdc_t *vdcp); 144 static int vdc_init_attr_negotiation(vdc_t *vdc); 145 static int vdc_attr_negotiation(vdc_t *vdcp); 146 static int vdc_init_dring_negotiate(vdc_t *vdc); 147 static int vdc_dring_negotiation(vdc_t *vdcp); 148 static int vdc_send_rdx(vdc_t *vdcp); 149 static int vdc_rdx_exchange(vdc_t *vdcp); 150 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 151 152 /* processing incoming messages from vDisk server */ 153 static void vdc_process_msg_thread(vdc_t *vdc); 154 static int vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp); 155 156 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 157 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t *msg); 158 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 159 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 160 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 161 static int vdc_send_request(vdc_t *vdcp, int operation, 162 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 163 int cb_type, void *cb_arg, vio_desc_direction_t dir); 164 static int vdc_map_to_shared_dring(vdc_t *vdcp, int idx); 165 static int vdc_populate_descriptor(vdc_t *vdcp, int operation, 166 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 167 int cb_type, void *cb_arg, vio_desc_direction_t dir); 168 static int vdc_do_sync_op(vdc_t *vdcp, int operation, 169 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 170 int cb_type, void *cb_arg, vio_desc_direction_t dir); 171 172 static int vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp); 173 static int vdc_drain_response(vdc_t *vdcp); 174 static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 175 static int vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep); 176 static int vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg); 177 178 /* dkio */ 179 static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode); 180 static void vdc_create_fake_geometry(vdc_t *vdc); 181 static int vdc_validate_geometry(vdc_t *vdc); 182 static void vdc_validate(vdc_t *vdc); 183 static void vdc_validate_task(void *arg); 184 static int vdc_null_copy_func(vdc_t *vdc, void *from, void *to, 185 int mode, int dir); 186 static int vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 187 int mode, int dir); 188 static int vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 189 int mode, int dir); 190 static int vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, 191 int mode, int dir); 192 static int vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, 193 int mode, int dir); 194 static int vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, 195 int mode, int dir); 196 static int vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, 197 int mode, int dir); 198 static int vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, 199 int mode, int dir); 200 static int vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, 201 int mode, int dir); 202 203 /* 204 * Module variables 205 */ 206 207 /* 208 * Tunable variables to control how long vdc waits before timing out on 209 * various operations 210 */ 211 static int vdc_hshake_retries = 3; 212 213 static int vdc_timeout = 0; /* units: seconds */ 214 215 static uint64_t vdc_hz_min_ldc_delay; 216 static uint64_t vdc_min_timeout_ldc = 1 * MILLISEC; 217 static uint64_t vdc_hz_max_ldc_delay; 218 static uint64_t vdc_max_timeout_ldc = 100 * MILLISEC; 219 220 static uint64_t vdc_ldc_read_init_delay = 1 * MILLISEC; 221 static uint64_t vdc_ldc_read_max_delay = 100 * MILLISEC; 222 223 /* values for dumping - need to run in a tighter loop */ 224 static uint64_t vdc_usec_timeout_dump = 100 * MILLISEC; /* 0.1s units: ns */ 225 static int vdc_dump_retries = 100; 226 227 /* Count of the number of vdc instances attached */ 228 static volatile uint32_t vdc_instance_count = 0; 229 230 /* Soft state pointer */ 231 static void *vdc_state; 232 233 /* 234 * Controlling the verbosity of the error/debug messages 235 * 236 * vdc_msglevel - controls level of messages 237 * vdc_matchinst - 64-bit variable where each bit corresponds 238 * to the vdc instance the vdc_msglevel applies. 239 */ 240 int vdc_msglevel = 0x0; 241 uint64_t vdc_matchinst = 0ull; 242 243 /* 244 * Supported vDisk protocol version pairs. 245 * 246 * The first array entry is the latest and preferred version. 247 */ 248 static const vio_ver_t vdc_version[] = {{1, 1}}; 249 250 static struct cb_ops vdc_cb_ops = { 251 vdc_open, /* cb_open */ 252 vdc_close, /* cb_close */ 253 vdc_strategy, /* cb_strategy */ 254 vdc_print, /* cb_print */ 255 vdc_dump, /* cb_dump */ 256 vdc_read, /* cb_read */ 257 vdc_write, /* cb_write */ 258 vdc_ioctl, /* cb_ioctl */ 259 nodev, /* cb_devmap */ 260 nodev, /* cb_mmap */ 261 nodev, /* cb_segmap */ 262 nochpoll, /* cb_chpoll */ 263 ddi_prop_op, /* cb_prop_op */ 264 NULL, /* cb_str */ 265 D_MP | D_64BIT, /* cb_flag */ 266 CB_REV, /* cb_rev */ 267 vdc_aread, /* cb_aread */ 268 vdc_awrite /* cb_awrite */ 269 }; 270 271 static struct dev_ops vdc_ops = { 272 DEVO_REV, /* devo_rev */ 273 0, /* devo_refcnt */ 274 vdc_getinfo, /* devo_getinfo */ 275 nulldev, /* devo_identify */ 276 nulldev, /* devo_probe */ 277 vdc_attach, /* devo_attach */ 278 vdc_detach, /* devo_detach */ 279 nodev, /* devo_reset */ 280 &vdc_cb_ops, /* devo_cb_ops */ 281 NULL, /* devo_bus_ops */ 282 nulldev /* devo_power */ 283 }; 284 285 static struct modldrv modldrv = { 286 &mod_driverops, 287 "virtual disk client", 288 &vdc_ops, 289 }; 290 291 static struct modlinkage modlinkage = { 292 MODREV_1, 293 &modldrv, 294 NULL 295 }; 296 297 /* -------------------------------------------------------------------------- */ 298 299 /* 300 * Device Driver housekeeping and setup 301 */ 302 303 int 304 _init(void) 305 { 306 int status; 307 308 if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 309 return (status); 310 if ((status = mod_install(&modlinkage)) != 0) 311 ddi_soft_state_fini(&vdc_state); 312 vdc_efi_init(vd_process_ioctl); 313 return (status); 314 } 315 316 int 317 _info(struct modinfo *modinfop) 318 { 319 return (mod_info(&modlinkage, modinfop)); 320 } 321 322 int 323 _fini(void) 324 { 325 int status; 326 327 if ((status = mod_remove(&modlinkage)) != 0) 328 return (status); 329 vdc_efi_fini(); 330 ddi_soft_state_fini(&vdc_state); 331 return (0); 332 } 333 334 static int 335 vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 336 { 337 _NOTE(ARGUNUSED(dip)) 338 339 int instance = VDCUNIT((dev_t)arg); 340 vdc_t *vdc = NULL; 341 342 switch (cmd) { 343 case DDI_INFO_DEVT2DEVINFO: 344 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 345 *resultp = NULL; 346 return (DDI_FAILURE); 347 } 348 *resultp = vdc->dip; 349 return (DDI_SUCCESS); 350 case DDI_INFO_DEVT2INSTANCE: 351 *resultp = (void *)(uintptr_t)instance; 352 return (DDI_SUCCESS); 353 default: 354 *resultp = NULL; 355 return (DDI_FAILURE); 356 } 357 } 358 359 static int 360 vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 361 { 362 int instance; 363 int rv; 364 vdc_t *vdc = NULL; 365 366 switch (cmd) { 367 case DDI_DETACH: 368 /* the real work happens below */ 369 break; 370 case DDI_SUSPEND: 371 /* nothing to do for this non-device */ 372 return (DDI_SUCCESS); 373 default: 374 return (DDI_FAILURE); 375 } 376 377 ASSERT(cmd == DDI_DETACH); 378 instance = ddi_get_instance(dip); 379 DMSGX(1, "[%d] Entered\n", instance); 380 381 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 382 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 383 return (DDI_FAILURE); 384 } 385 386 if (vdc_is_opened(vdc)) { 387 DMSG(vdc, 0, "[%d] Cannot detach: device is open", instance); 388 return (DDI_FAILURE); 389 } 390 391 if (vdc->dkio_flush_pending) { 392 DMSG(vdc, 0, 393 "[%d] Cannot detach: %d outstanding DKIO flushes\n", 394 instance, vdc->dkio_flush_pending); 395 return (DDI_FAILURE); 396 } 397 398 if (vdc->validate_pending) { 399 DMSG(vdc, 0, 400 "[%d] Cannot detach: %d outstanding validate request\n", 401 instance, vdc->validate_pending); 402 return (DDI_FAILURE); 403 } 404 405 DMSG(vdc, 0, "[%d] proceeding...\n", instance); 406 407 /* mark instance as detaching */ 408 vdc->lifecycle = VDC_LC_DETACHING; 409 410 /* 411 * try and disable callbacks to prevent another handshake 412 */ 413 rv = ldc_set_cb_mode(vdc->ldc_handle, LDC_CB_DISABLE); 414 DMSG(vdc, 0, "callback disabled (rv=%d)\n", rv); 415 416 if (vdc->initialized & VDC_THREAD) { 417 mutex_enter(&vdc->read_lock); 418 if ((vdc->read_state == VDC_READ_WAITING) || 419 (vdc->read_state == VDC_READ_RESET)) { 420 vdc->read_state = VDC_READ_RESET; 421 cv_signal(&vdc->read_cv); 422 } 423 424 mutex_exit(&vdc->read_lock); 425 426 /* wake up any thread waiting for connection to come online */ 427 mutex_enter(&vdc->lock); 428 if (vdc->state == VDC_STATE_INIT_WAITING) { 429 DMSG(vdc, 0, 430 "[%d] write reset - move to resetting state...\n", 431 instance); 432 vdc->state = VDC_STATE_RESETTING; 433 cv_signal(&vdc->initwait_cv); 434 } 435 mutex_exit(&vdc->lock); 436 437 /* now wait until state transitions to VDC_STATE_DETACH */ 438 thread_join(vdc->msg_proc_thr->t_did); 439 ASSERT(vdc->state == VDC_STATE_DETACH); 440 DMSG(vdc, 0, "[%d] Reset thread exit and join ..\n", 441 vdc->instance); 442 } 443 444 mutex_enter(&vdc->lock); 445 446 if (vdc->initialized & VDC_DRING) 447 vdc_destroy_descriptor_ring(vdc); 448 449 if (vdc->initialized & VDC_LDC) 450 vdc_terminate_ldc(vdc); 451 452 mutex_exit(&vdc->lock); 453 454 if (vdc->initialized & VDC_MINOR) { 455 ddi_prop_remove_all(dip); 456 ddi_remove_minor_node(dip, NULL); 457 } 458 459 if (vdc->initialized & VDC_LOCKS) { 460 mutex_destroy(&vdc->lock); 461 mutex_destroy(&vdc->read_lock); 462 cv_destroy(&vdc->initwait_cv); 463 cv_destroy(&vdc->dring_free_cv); 464 cv_destroy(&vdc->membind_cv); 465 cv_destroy(&vdc->sync_pending_cv); 466 cv_destroy(&vdc->sync_blocked_cv); 467 cv_destroy(&vdc->read_cv); 468 cv_destroy(&vdc->running_cv); 469 } 470 471 if (vdc->minfo) 472 kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 473 474 if (vdc->cinfo) 475 kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 476 477 if (vdc->vtoc) 478 kmem_free(vdc->vtoc, sizeof (struct vtoc)); 479 480 if (vdc->geom) 481 kmem_free(vdc->geom, sizeof (struct dk_geom)); 482 483 if (vdc->devid) { 484 ddi_devid_unregister(dip); 485 ddi_devid_free(vdc->devid); 486 } 487 488 if (vdc->initialized & VDC_SOFT_STATE) 489 ddi_soft_state_free(vdc_state, instance); 490 491 DMSG(vdc, 0, "[%d] End %p\n", instance, (void *)vdc); 492 493 return (DDI_SUCCESS); 494 } 495 496 497 static int 498 vdc_do_attach(dev_info_t *dip) 499 { 500 int instance; 501 vdc_t *vdc = NULL; 502 int status; 503 md_t *mdp; 504 mde_cookie_t vd_node, vd_port; 505 506 ASSERT(dip != NULL); 507 508 instance = ddi_get_instance(dip); 509 if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 510 cmn_err(CE_NOTE, "[%d] Couldn't alloc state structure", 511 instance); 512 return (DDI_FAILURE); 513 } 514 515 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 516 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 517 return (DDI_FAILURE); 518 } 519 520 /* 521 * We assign the value to initialized in this case to zero out the 522 * variable and then set bits in it to indicate what has been done 523 */ 524 vdc->initialized = VDC_SOFT_STATE; 525 526 vdc_hz_min_ldc_delay = drv_usectohz(vdc_min_timeout_ldc); 527 vdc_hz_max_ldc_delay = drv_usectohz(vdc_max_timeout_ldc); 528 529 vdc->dip = dip; 530 vdc->instance = instance; 531 vdc->vdisk_type = VD_DISK_TYPE_UNK; 532 vdc->vdisk_label = VD_DISK_LABEL_UNK; 533 vdc->state = VDC_STATE_INIT; 534 vdc->lifecycle = VDC_LC_ATTACHING; 535 vdc->ldc_state = 0; 536 vdc->session_id = 0; 537 vdc->block_size = DEV_BSIZE; 538 vdc->max_xfer_sz = maxphys / DEV_BSIZE; 539 540 /* 541 * We assume, for now, that the vDisk server will export 'read' 542 * operations to us at a minimum (this is needed because of checks 543 * in vdc for supported operations early in the handshake process). 544 * The vDisk server will return ENOTSUP if this is not the case. 545 * The value will be overwritten during the attribute exchange with 546 * the bitmask of operations exported by server. 547 */ 548 vdc->operations = VD_OP_MASK_READ; 549 550 vdc->vtoc = NULL; 551 vdc->geom = NULL; 552 vdc->cinfo = NULL; 553 vdc->minfo = NULL; 554 555 mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 556 cv_init(&vdc->initwait_cv, NULL, CV_DRIVER, NULL); 557 cv_init(&vdc->dring_free_cv, NULL, CV_DRIVER, NULL); 558 cv_init(&vdc->membind_cv, NULL, CV_DRIVER, NULL); 559 cv_init(&vdc->running_cv, NULL, CV_DRIVER, NULL); 560 561 vdc->threads_pending = 0; 562 vdc->sync_op_pending = B_FALSE; 563 vdc->sync_op_blocked = B_FALSE; 564 cv_init(&vdc->sync_pending_cv, NULL, CV_DRIVER, NULL); 565 cv_init(&vdc->sync_blocked_cv, NULL, CV_DRIVER, NULL); 566 567 /* init blocking msg read functionality */ 568 mutex_init(&vdc->read_lock, NULL, MUTEX_DRIVER, NULL); 569 cv_init(&vdc->read_cv, NULL, CV_DRIVER, NULL); 570 vdc->read_state = VDC_READ_IDLE; 571 572 vdc->initialized |= VDC_LOCKS; 573 574 /* get device and port MD node for this disk instance */ 575 if (vdc_get_md_node(dip, &mdp, &vd_node, &vd_port) != 0) { 576 cmn_err(CE_NOTE, "[%d] Could not get machine description node", 577 instance); 578 return (DDI_FAILURE); 579 } 580 581 /* set the connection timeout */ 582 if (vd_port == NULL || (md_get_prop_val(mdp, vd_port, 583 VDC_MD_TIMEOUT, &vdc->ctimeout) != 0)) { 584 vdc->ctimeout = 0; 585 } 586 587 /* initialise LDC channel which will be used to communicate with vds */ 588 status = vdc_do_ldc_init(vdc, mdp, vd_node); 589 590 (void) md_fini_handle(mdp); 591 592 if (status != 0) { 593 cmn_err(CE_NOTE, "[%d] Couldn't initialize LDC", instance); 594 goto return_status; 595 } 596 597 /* initialize the thread responsible for managing state with server */ 598 vdc->msg_proc_thr = thread_create(NULL, 0, vdc_process_msg_thread, 599 vdc, 0, &p0, TS_RUN, minclsyspri); 600 if (vdc->msg_proc_thr == NULL) { 601 cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 602 instance); 603 return (DDI_FAILURE); 604 } 605 606 vdc->initialized |= VDC_THREAD; 607 608 atomic_inc_32(&vdc_instance_count); 609 610 /* 611 * Check the disk label. This will send requests and do the handshake. 612 * We don't really care about the disk label now. What we really need is 613 * the handshake do be done so that we know the type of the disk (slice 614 * or full disk) and the appropriate device nodes can be created. 615 */ 616 vdc->vdisk_label = VD_DISK_LABEL_UNK; 617 vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); 618 vdc->geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP); 619 vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 620 621 mutex_enter(&vdc->lock); 622 (void) vdc_validate_geometry(vdc); 623 mutex_exit(&vdc->lock); 624 625 /* 626 * Now that we have the device info we can create the 627 * device nodes and properties 628 */ 629 status = vdc_create_device_nodes(vdc); 630 if (status) { 631 DMSG(vdc, 0, "[%d] Failed to create device nodes", 632 instance); 633 goto return_status; 634 } 635 status = vdc_create_device_nodes_props(vdc); 636 if (status) { 637 DMSG(vdc, 0, "[%d] Failed to create device nodes" 638 " properties (%d)", instance, status); 639 goto return_status; 640 } 641 642 /* 643 * Setup devid 644 */ 645 if (vdc_setup_devid(vdc)) { 646 DMSG(vdc, 0, "[%d] No device id available\n", instance); 647 } 648 649 ddi_report_dev(dip); 650 vdc->lifecycle = VDC_LC_ONLINE; 651 DMSG(vdc, 0, "[%d] Attach tasks successful\n", instance); 652 653 return_status: 654 DMSG(vdc, 0, "[%d] Attach completed\n", instance); 655 return (status); 656 } 657 658 static int 659 vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 660 { 661 int status; 662 663 switch (cmd) { 664 case DDI_ATTACH: 665 if ((status = vdc_do_attach(dip)) != 0) 666 (void) vdc_detach(dip, DDI_DETACH); 667 return (status); 668 case DDI_RESUME: 669 /* nothing to do for this non-device */ 670 return (DDI_SUCCESS); 671 default: 672 return (DDI_FAILURE); 673 } 674 } 675 676 static int 677 vdc_do_ldc_init(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_node) 678 { 679 int status = 0; 680 ldc_status_t ldc_state; 681 ldc_attr_t ldc_attr; 682 uint64_t ldc_id = 0; 683 684 ASSERT(vdc != NULL); 685 686 vdc->initialized |= VDC_LDC; 687 688 if ((status = vdc_get_ldc_id(mdp, vd_node, &ldc_id)) != 0) { 689 DMSG(vdc, 0, "[%d] Failed to get LDC channel ID property", 690 vdc->instance); 691 return (EIO); 692 } 693 694 DMSGX(0, "[%d] LDC id is 0x%lx\n", vdc->instance, ldc_id); 695 696 vdc->ldc_id = ldc_id; 697 698 ldc_attr.devclass = LDC_DEV_BLK; 699 ldc_attr.instance = vdc->instance; 700 ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 701 ldc_attr.mtu = VD_LDC_MTU; 702 703 if ((vdc->initialized & VDC_LDC_INIT) == 0) { 704 status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle); 705 if (status != 0) { 706 DMSG(vdc, 0, "[%d] ldc_init(chan %ld) returned %d", 707 vdc->instance, ldc_id, status); 708 return (status); 709 } 710 vdc->initialized |= VDC_LDC_INIT; 711 } 712 status = ldc_status(vdc->ldc_handle, &ldc_state); 713 if (status != 0) { 714 DMSG(vdc, 0, "[%d] Cannot discover LDC status [err=%d]", 715 vdc->instance, status); 716 return (status); 717 } 718 vdc->ldc_state = ldc_state; 719 720 if ((vdc->initialized & VDC_LDC_CB) == 0) { 721 status = ldc_reg_callback(vdc->ldc_handle, vdc_handle_cb, 722 (caddr_t)vdc); 723 if (status != 0) { 724 DMSG(vdc, 0, "[%d] LDC callback reg. failed (%d)", 725 vdc->instance, status); 726 return (status); 727 } 728 vdc->initialized |= VDC_LDC_CB; 729 } 730 731 vdc->initialized |= VDC_LDC; 732 733 /* 734 * At this stage we have initialised LDC, we will now try and open 735 * the connection. 736 */ 737 if (vdc->ldc_state == LDC_INIT) { 738 status = ldc_open(vdc->ldc_handle); 739 if (status != 0) { 740 DMSG(vdc, 0, "[%d] ldc_open(chan %ld) returned %d", 741 vdc->instance, vdc->ldc_id, status); 742 return (status); 743 } 744 vdc->initialized |= VDC_LDC_OPEN; 745 } 746 747 return (status); 748 } 749 750 static int 751 vdc_start_ldc_connection(vdc_t *vdc) 752 { 753 int status = 0; 754 755 ASSERT(vdc != NULL); 756 757 ASSERT(MUTEX_HELD(&vdc->lock)); 758 759 status = vdc_do_ldc_up(vdc); 760 761 DMSG(vdc, 0, "[%d] Finished bringing up LDC\n", vdc->instance); 762 763 return (status); 764 } 765 766 static int 767 vdc_stop_ldc_connection(vdc_t *vdcp) 768 { 769 int status; 770 771 DMSG(vdcp, 0, ": Resetting connection to vDisk server : state %d\n", 772 vdcp->state); 773 774 status = ldc_down(vdcp->ldc_handle); 775 DMSG(vdcp, 0, "ldc_down() = %d\n", status); 776 777 vdcp->initialized &= ~VDC_HANDSHAKE; 778 DMSG(vdcp, 0, "initialized=%x\n", vdcp->initialized); 779 780 return (status); 781 } 782 783 static int 784 vdc_create_device_nodes_efi(vdc_t *vdc) 785 { 786 ddi_remove_minor_node(vdc->dip, "h"); 787 ddi_remove_minor_node(vdc->dip, "h,raw"); 788 789 if (ddi_create_minor_node(vdc->dip, "wd", S_IFBLK, 790 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 791 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 792 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd'", 793 vdc->instance); 794 return (EIO); 795 } 796 797 /* if any device node is created we set this flag */ 798 vdc->initialized |= VDC_MINOR; 799 800 if (ddi_create_minor_node(vdc->dip, "wd,raw", S_IFCHR, 801 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 802 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 803 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd,raw'", 804 vdc->instance); 805 return (EIO); 806 } 807 808 return (0); 809 } 810 811 static int 812 vdc_create_device_nodes_vtoc(vdc_t *vdc) 813 { 814 ddi_remove_minor_node(vdc->dip, "wd"); 815 ddi_remove_minor_node(vdc->dip, "wd,raw"); 816 817 if (ddi_create_minor_node(vdc->dip, "h", S_IFBLK, 818 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 819 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 820 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h'", 821 vdc->instance); 822 return (EIO); 823 } 824 825 /* if any device node is created we set this flag */ 826 vdc->initialized |= VDC_MINOR; 827 828 if (ddi_create_minor_node(vdc->dip, "h,raw", S_IFCHR, 829 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 830 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 831 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h,raw'", 832 vdc->instance); 833 return (EIO); 834 } 835 836 return (0); 837 } 838 839 /* 840 * Function: 841 * vdc_create_device_nodes 842 * 843 * Description: 844 * This function creates the block and character device nodes under 845 * /devices along with the node properties. It is called as part of 846 * the attach(9E) of the instance during the handshake with vds after 847 * vds has sent the attributes to vdc. 848 * 849 * If the device is of type VD_DISK_TYPE_SLICE then the minor node 850 * of 2 is used in keeping with the Solaris convention that slice 2 851 * refers to a whole disk. Slices start at 'a' 852 * 853 * Parameters: 854 * vdc - soft state pointer 855 * 856 * Return Values 857 * 0 - Success 858 * EIO - Failed to create node 859 * EINVAL - Unknown type of disk exported 860 */ 861 static int 862 vdc_create_device_nodes(vdc_t *vdc) 863 { 864 char name[sizeof ("s,raw")]; 865 dev_info_t *dip = NULL; 866 int instance, status; 867 int num_slices = 1; 868 int i; 869 870 ASSERT(vdc != NULL); 871 872 instance = vdc->instance; 873 dip = vdc->dip; 874 875 switch (vdc->vdisk_type) { 876 case VD_DISK_TYPE_DISK: 877 num_slices = V_NUMPAR; 878 break; 879 case VD_DISK_TYPE_SLICE: 880 num_slices = 1; 881 break; 882 case VD_DISK_TYPE_UNK: 883 default: 884 return (EINVAL); 885 } 886 887 /* 888 * Minor nodes are different for EFI disks: EFI disks do not have 889 * a minor node 'g' for the minor number corresponding to slice 890 * VD_EFI_WD_SLICE (slice 7) instead they have a minor node 'wd' 891 * representing the whole disk. 892 */ 893 for (i = 0; i < num_slices; i++) { 894 895 if (i == VD_EFI_WD_SLICE) { 896 if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 897 status = vdc_create_device_nodes_efi(vdc); 898 else 899 status = vdc_create_device_nodes_vtoc(vdc); 900 if (status != 0) 901 return (status); 902 continue; 903 } 904 905 (void) snprintf(name, sizeof (name), "%c", 'a' + i); 906 if (ddi_create_minor_node(dip, name, S_IFBLK, 907 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 908 cmn_err(CE_NOTE, "[%d] Couldn't add block node '%s'", 909 instance, name); 910 return (EIO); 911 } 912 913 /* if any device node is created we set this flag */ 914 vdc->initialized |= VDC_MINOR; 915 916 (void) snprintf(name, sizeof (name), "%c%s", 'a' + i, ",raw"); 917 918 if (ddi_create_minor_node(dip, name, S_IFCHR, 919 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 920 cmn_err(CE_NOTE, "[%d] Couldn't add raw node '%s'", 921 instance, name); 922 return (EIO); 923 } 924 } 925 926 return (0); 927 } 928 929 /* 930 * Function: 931 * vdc_create_device_nodes_props 932 * 933 * Description: 934 * This function creates the block and character device nodes under 935 * /devices along with the node properties. It is called as part of 936 * the attach(9E) of the instance during the handshake with vds after 937 * vds has sent the attributes to vdc. 938 * 939 * Parameters: 940 * vdc - soft state pointer 941 * 942 * Return Values 943 * 0 - Success 944 * EIO - Failed to create device node property 945 * EINVAL - Unknown type of disk exported 946 */ 947 static int 948 vdc_create_device_nodes_props(vdc_t *vdc) 949 { 950 dev_info_t *dip = NULL; 951 int instance; 952 int num_slices = 1; 953 int64_t size = 0; 954 dev_t dev; 955 int rv; 956 int i; 957 958 ASSERT(vdc != NULL); 959 ASSERT(vdc->vtoc != NULL); 960 961 instance = vdc->instance; 962 dip = vdc->dip; 963 964 switch (vdc->vdisk_type) { 965 case VD_DISK_TYPE_DISK: 966 num_slices = V_NUMPAR; 967 break; 968 case VD_DISK_TYPE_SLICE: 969 num_slices = 1; 970 break; 971 case VD_DISK_TYPE_UNK: 972 default: 973 return (EINVAL); 974 } 975 976 if (vdc->vdisk_label == VD_DISK_LABEL_UNK) { 977 /* remove all properties */ 978 for (i = 0; i < num_slices; i++) { 979 dev = makedevice(ddi_driver_major(dip), 980 VD_MAKE_DEV(instance, i)); 981 (void) ddi_prop_remove(dev, dip, VDC_SIZE_PROP_NAME); 982 (void) ddi_prop_remove(dev, dip, VDC_NBLOCKS_PROP_NAME); 983 } 984 return (0); 985 } 986 987 for (i = 0; i < num_slices; i++) { 988 dev = makedevice(ddi_driver_major(dip), 989 VD_MAKE_DEV(instance, i)); 990 991 size = vdc->vtoc->v_part[i].p_size * vdc->vtoc->v_sectorsz; 992 DMSG(vdc, 0, "[%d] sz %ld (%ld Mb) p_size %lx\n", 993 instance, size, size / (1024 * 1024), 994 vdc->vtoc->v_part[i].p_size); 995 996 rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size); 997 if (rv != DDI_PROP_SUCCESS) { 998 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop of [%ld]", 999 instance, VDC_SIZE_PROP_NAME, size); 1000 return (EIO); 1001 } 1002 1003 rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME, 1004 lbtodb(size)); 1005 if (rv != DDI_PROP_SUCCESS) { 1006 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop [%llu]", 1007 instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size)); 1008 return (EIO); 1009 } 1010 } 1011 1012 return (0); 1013 } 1014 1015 /* 1016 * Function: 1017 * vdc_is_opened 1018 * 1019 * Description: 1020 * This function checks if any slice of a given virtual disk is 1021 * currently opened. 1022 * 1023 * Parameters: 1024 * vdc - soft state pointer 1025 * 1026 * Return Values 1027 * B_TRUE - at least one slice is opened. 1028 * B_FALSE - no slice is opened. 1029 */ 1030 static boolean_t 1031 vdc_is_opened(vdc_t *vdc) 1032 { 1033 int i, nslices; 1034 1035 switch (vdc->vdisk_type) { 1036 case VD_DISK_TYPE_DISK: 1037 nslices = V_NUMPAR; 1038 break; 1039 case VD_DISK_TYPE_SLICE: 1040 nslices = 1; 1041 break; 1042 case VD_DISK_TYPE_UNK: 1043 default: 1044 ASSERT(0); 1045 } 1046 1047 /* check if there's any layered open */ 1048 for (i = 0; i < nslices; i++) { 1049 if (vdc->open_lyr[i] > 0) 1050 return (B_TRUE); 1051 } 1052 1053 /* check if there is any other kind of open */ 1054 for (i = 0; i < OTYPCNT; i++) { 1055 if (vdc->open[i] != 0) 1056 return (B_TRUE); 1057 } 1058 1059 return (B_FALSE); 1060 } 1061 1062 static int 1063 vdc_mark_opened(vdc_t *vdc, int slice, int flag, int otyp) 1064 { 1065 uint8_t slicemask; 1066 int i; 1067 1068 ASSERT(otyp < OTYPCNT); 1069 ASSERT(slice < V_NUMPAR); 1070 ASSERT(MUTEX_HELD(&vdc->lock)); 1071 1072 slicemask = 1 << slice; 1073 1074 /* check if slice is already exclusively opened */ 1075 if (vdc->open_excl & slicemask) 1076 return (EBUSY); 1077 1078 /* if open exclusive, check if slice is already opened */ 1079 if (flag & FEXCL) { 1080 if (vdc->open_lyr[slice] > 0) 1081 return (EBUSY); 1082 for (i = 0; i < OTYPCNT; i++) { 1083 if (vdc->open[i] & slicemask) 1084 return (EBUSY); 1085 } 1086 vdc->open_excl |= slicemask; 1087 } 1088 1089 /* mark slice as opened */ 1090 if (otyp == OTYP_LYR) { 1091 vdc->open_lyr[slice]++; 1092 } else { 1093 vdc->open[otyp] |= slicemask; 1094 } 1095 1096 return (0); 1097 } 1098 1099 static void 1100 vdc_mark_closed(vdc_t *vdc, int slice, int flag, int otyp) 1101 { 1102 uint8_t slicemask; 1103 1104 ASSERT(otyp < OTYPCNT); 1105 ASSERT(slice < V_NUMPAR); 1106 ASSERT(MUTEX_HELD(&vdc->lock)); 1107 1108 slicemask = 1 << slice; 1109 1110 if (otyp == OTYP_LYR) { 1111 ASSERT(vdc->open_lyr[slice] > 0); 1112 vdc->open_lyr[slice]--; 1113 } else { 1114 vdc->open[otyp] &= ~slicemask; 1115 } 1116 1117 if (flag & FEXCL) 1118 vdc->open_excl &= ~slicemask; 1119 } 1120 1121 static int 1122 vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 1123 { 1124 _NOTE(ARGUNUSED(cred)) 1125 1126 int instance; 1127 int slice, status = 0; 1128 vdc_t *vdc; 1129 1130 ASSERT(dev != NULL); 1131 instance = VDCUNIT(*dev); 1132 1133 if (otyp >= OTYPCNT) 1134 return (EINVAL); 1135 1136 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1137 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1138 return (ENXIO); 1139 } 1140 1141 DMSG(vdc, 0, "minor = %d flag = %x, otyp = %x\n", 1142 getminor(*dev), flag, otyp); 1143 1144 slice = VDCPART(*dev); 1145 1146 mutex_enter(&vdc->lock); 1147 1148 status = vdc_mark_opened(vdc, slice, flag, otyp); 1149 1150 if (status != 0) { 1151 mutex_exit(&vdc->lock); 1152 return (status); 1153 } 1154 1155 if (flag & (FNDELAY | FNONBLOCK)) { 1156 1157 /* don't resubmit a validate request if there's already one */ 1158 if (vdc->validate_pending > 0) { 1159 mutex_exit(&vdc->lock); 1160 return (0); 1161 } 1162 1163 /* call vdc_validate() asynchronously to avoid blocking */ 1164 if (taskq_dispatch(system_taskq, vdc_validate_task, 1165 (void *)vdc, TQ_NOSLEEP) == NULL) { 1166 vdc_mark_closed(vdc, slice, flag, otyp); 1167 mutex_exit(&vdc->lock); 1168 return (ENXIO); 1169 } 1170 1171 vdc->validate_pending++; 1172 mutex_exit(&vdc->lock); 1173 return (0); 1174 } 1175 1176 mutex_exit(&vdc->lock); 1177 1178 vdc_validate(vdc); 1179 1180 mutex_enter(&vdc->lock); 1181 1182 if (vdc->vdisk_label == VD_DISK_LABEL_UNK || 1183 vdc->vtoc->v_part[slice].p_size == 0) { 1184 vdc_mark_closed(vdc, slice, flag, otyp); 1185 status = EIO; 1186 } 1187 1188 mutex_exit(&vdc->lock); 1189 1190 return (status); 1191 } 1192 1193 static int 1194 vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 1195 { 1196 _NOTE(ARGUNUSED(cred)) 1197 1198 int instance; 1199 int slice; 1200 int rv; 1201 vdc_t *vdc; 1202 1203 instance = VDCUNIT(dev); 1204 1205 if (otyp >= OTYPCNT) 1206 return (EINVAL); 1207 1208 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1209 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1210 return (ENXIO); 1211 } 1212 1213 DMSG(vdc, 0, "[%d] flag = %x, otyp = %x\n", instance, flag, otyp); 1214 1215 slice = VDCPART(dev); 1216 1217 /* 1218 * Attempt to flush the W$ on a close operation. If this is 1219 * not a supported IOCTL command or the backing device is read-only 1220 * do not fail the close operation. 1221 */ 1222 rv = vd_process_ioctl(dev, DKIOCFLUSHWRITECACHE, NULL, FKIOCTL); 1223 1224 if (rv != 0 && rv != ENOTSUP && rv != ENOTTY && rv != EROFS) { 1225 DMSG(vdc, 0, "[%d] flush failed with error %d on close\n", 1226 instance, rv); 1227 return (EIO); 1228 } 1229 1230 mutex_enter(&vdc->lock); 1231 vdc_mark_closed(vdc, slice, flag, otyp); 1232 mutex_exit(&vdc->lock); 1233 1234 return (0); 1235 } 1236 1237 static int 1238 vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1239 { 1240 _NOTE(ARGUNUSED(credp)) 1241 _NOTE(ARGUNUSED(rvalp)) 1242 1243 return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode)); 1244 } 1245 1246 static int 1247 vdc_print(dev_t dev, char *str) 1248 { 1249 cmn_err(CE_NOTE, "vdc%d: %s", VDCUNIT(dev), str); 1250 return (0); 1251 } 1252 1253 static int 1254 vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 1255 { 1256 int rv; 1257 size_t nbytes = nblk * DEV_BSIZE; 1258 int instance = VDCUNIT(dev); 1259 vdc_t *vdc = NULL; 1260 1261 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1262 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1263 return (ENXIO); 1264 } 1265 1266 DMSG(vdc, 2, "[%d] dump %ld bytes at block 0x%lx : addr=0x%p\n", 1267 instance, nbytes, blkno, (void *)addr); 1268 rv = vdc_send_request(vdc, VD_OP_BWRITE, addr, nbytes, 1269 VDCPART(dev), blkno, CB_STRATEGY, 0, VIO_write_dir); 1270 if (rv) { 1271 DMSG(vdc, 0, "Failed to do a disk dump (err=%d)\n", rv); 1272 return (rv); 1273 } 1274 1275 if (ddi_in_panic()) 1276 (void) vdc_drain_response(vdc); 1277 1278 DMSG(vdc, 0, "[%d] End\n", instance); 1279 1280 return (0); 1281 } 1282 1283 /* -------------------------------------------------------------------------- */ 1284 1285 /* 1286 * Disk access routines 1287 * 1288 */ 1289 1290 /* 1291 * vdc_strategy() 1292 * 1293 * Return Value: 1294 * 0: As per strategy(9E), the strategy() function must return 0 1295 * [ bioerror(9f) sets b_flags to the proper error code ] 1296 */ 1297 static int 1298 vdc_strategy(struct buf *buf) 1299 { 1300 int rv = -1; 1301 vdc_t *vdc = NULL; 1302 int instance = VDCUNIT(buf->b_edev); 1303 int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 1304 int slice; 1305 1306 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1307 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1308 bioerror(buf, ENXIO); 1309 biodone(buf); 1310 return (0); 1311 } 1312 1313 DMSG(vdc, 2, "[%d] %s %ld bytes at block %llx : b_addr=0x%p\n", 1314 instance, (buf->b_flags & B_READ) ? "Read" : "Write", 1315 buf->b_bcount, buf->b_lblkno, (void *)buf->b_un.b_addr); 1316 DTRACE_IO2(vstart, buf_t *, buf, vdc_t *, vdc); 1317 1318 bp_mapin(buf); 1319 1320 if ((long)buf->b_private == VD_SLICE_NONE) { 1321 /* I/O using an absolute disk offset */ 1322 slice = VD_SLICE_NONE; 1323 } else { 1324 slice = VDCPART(buf->b_edev); 1325 } 1326 1327 rv = vdc_send_request(vdc, op, (caddr_t)buf->b_un.b_addr, 1328 buf->b_bcount, slice, buf->b_lblkno, 1329 CB_STRATEGY, buf, (op == VD_OP_BREAD) ? VIO_read_dir : 1330 VIO_write_dir); 1331 1332 /* 1333 * If the request was successfully sent, the strategy call returns and 1334 * the ACK handler calls the bioxxx functions when the vDisk server is 1335 * done. 1336 */ 1337 if (rv) { 1338 DMSG(vdc, 0, "Failed to read/write (err=%d)\n", rv); 1339 bioerror(buf, rv); 1340 biodone(buf); 1341 } 1342 1343 return (0); 1344 } 1345 1346 /* 1347 * Function: 1348 * vdc_min 1349 * 1350 * Description: 1351 * Routine to limit the size of a data transfer. Used in 1352 * conjunction with physio(9F). 1353 * 1354 * Arguments: 1355 * bp - pointer to the indicated buf(9S) struct. 1356 * 1357 */ 1358 static void 1359 vdc_min(struct buf *bufp) 1360 { 1361 vdc_t *vdc = NULL; 1362 int instance = VDCUNIT(bufp->b_edev); 1363 1364 vdc = ddi_get_soft_state(vdc_state, instance); 1365 VERIFY(vdc != NULL); 1366 1367 if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->block_size)) { 1368 bufp->b_bcount = vdc->max_xfer_sz * vdc->block_size; 1369 } 1370 } 1371 1372 static int 1373 vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 1374 { 1375 _NOTE(ARGUNUSED(cred)) 1376 1377 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1378 return (physio(vdc_strategy, NULL, dev, B_READ, vdc_min, uio)); 1379 } 1380 1381 static int 1382 vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 1383 { 1384 _NOTE(ARGUNUSED(cred)) 1385 1386 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1387 return (physio(vdc_strategy, NULL, dev, B_WRITE, vdc_min, uio)); 1388 } 1389 1390 static int 1391 vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 1392 { 1393 _NOTE(ARGUNUSED(cred)) 1394 1395 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1396 return (aphysio(vdc_strategy, anocancel, dev, B_READ, vdc_min, aio)); 1397 } 1398 1399 static int 1400 vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 1401 { 1402 _NOTE(ARGUNUSED(cred)) 1403 1404 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1405 return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, vdc_min, aio)); 1406 } 1407 1408 1409 /* -------------------------------------------------------------------------- */ 1410 1411 /* 1412 * Handshake support 1413 */ 1414 1415 1416 /* 1417 * Function: 1418 * vdc_init_ver_negotiation() 1419 * 1420 * Description: 1421 * 1422 * Arguments: 1423 * vdc - soft state pointer for this instance of the device driver. 1424 * 1425 * Return Code: 1426 * 0 - Success 1427 */ 1428 static int 1429 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 1430 { 1431 vio_ver_msg_t pkt; 1432 size_t msglen = sizeof (pkt); 1433 int status = -1; 1434 1435 ASSERT(vdc != NULL); 1436 ASSERT(mutex_owned(&vdc->lock)); 1437 1438 DMSG(vdc, 0, "[%d] Entered.\n", vdc->instance); 1439 1440 /* 1441 * set the Session ID to a unique value 1442 * (the lower 32 bits of the clock tick) 1443 */ 1444 vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 1445 DMSG(vdc, 0, "[%d] Set SID to 0x%lx\n", vdc->instance, vdc->session_id); 1446 1447 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1448 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1449 pkt.tag.vio_subtype_env = VIO_VER_INFO; 1450 pkt.tag.vio_sid = vdc->session_id; 1451 pkt.dev_class = VDEV_DISK; 1452 pkt.ver_major = ver.major; 1453 pkt.ver_minor = ver.minor; 1454 1455 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1456 DMSG(vdc, 0, "[%d] Ver info sent (status = %d)\n", 1457 vdc->instance, status); 1458 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1459 DMSG(vdc, 0, "[%d] Failed to send Ver negotiation info: " 1460 "id(%lx) rv(%d) size(%ld)", vdc->instance, vdc->ldc_handle, 1461 status, msglen); 1462 if (msglen != sizeof (vio_ver_msg_t)) 1463 status = ENOMSG; 1464 } 1465 1466 return (status); 1467 } 1468 1469 /* 1470 * Function: 1471 * vdc_ver_negotiation() 1472 * 1473 * Description: 1474 * 1475 * Arguments: 1476 * vdcp - soft state pointer for this instance of the device driver. 1477 * 1478 * Return Code: 1479 * 0 - Success 1480 */ 1481 static int 1482 vdc_ver_negotiation(vdc_t *vdcp) 1483 { 1484 vio_msg_t vio_msg; 1485 int status; 1486 1487 if (status = vdc_init_ver_negotiation(vdcp, vdc_version[0])) 1488 return (status); 1489 1490 /* release lock and wait for response */ 1491 mutex_exit(&vdcp->lock); 1492 status = vdc_wait_for_response(vdcp, &vio_msg); 1493 mutex_enter(&vdcp->lock); 1494 if (status) { 1495 DMSG(vdcp, 0, 1496 "[%d] Failed waiting for Ver negotiation response, rv(%d)", 1497 vdcp->instance, status); 1498 return (status); 1499 } 1500 1501 /* check type and sub_type ... */ 1502 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1503 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1504 DMSG(vdcp, 0, "[%d] Invalid ver negotiation response\n", 1505 vdcp->instance); 1506 return (EPROTO); 1507 } 1508 1509 return (vdc_handle_ver_msg(vdcp, (vio_ver_msg_t *)&vio_msg)); 1510 } 1511 1512 /* 1513 * Function: 1514 * vdc_init_attr_negotiation() 1515 * 1516 * Description: 1517 * 1518 * Arguments: 1519 * vdc - soft state pointer for this instance of the device driver. 1520 * 1521 * Return Code: 1522 * 0 - Success 1523 */ 1524 static int 1525 vdc_init_attr_negotiation(vdc_t *vdc) 1526 { 1527 vd_attr_msg_t pkt; 1528 size_t msglen = sizeof (pkt); 1529 int status; 1530 1531 ASSERT(vdc != NULL); 1532 ASSERT(mutex_owned(&vdc->lock)); 1533 1534 DMSG(vdc, 0, "[%d] entered\n", vdc->instance); 1535 1536 /* fill in tag */ 1537 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1538 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1539 pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 1540 pkt.tag.vio_sid = vdc->session_id; 1541 /* fill in payload */ 1542 pkt.max_xfer_sz = vdc->max_xfer_sz; 1543 pkt.vdisk_block_size = vdc->block_size; 1544 pkt.xfer_mode = VIO_DRING_MODE; 1545 pkt.operations = 0; /* server will set bits of valid operations */ 1546 pkt.vdisk_type = 0; /* server will set to valid device type */ 1547 pkt.vdisk_media = 0; /* server will set to valid media type */ 1548 pkt.vdisk_size = 0; /* server will set to valid size */ 1549 1550 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1551 DMSG(vdc, 0, "Attr info sent (status = %d)\n", status); 1552 1553 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1554 DMSG(vdc, 0, "[%d] Failed to send Attr negotiation info: " 1555 "id(%lx) rv(%d) size(%ld)", vdc->instance, vdc->ldc_handle, 1556 status, msglen); 1557 if (msglen != sizeof (vio_ver_msg_t)) 1558 status = ENOMSG; 1559 } 1560 1561 return (status); 1562 } 1563 1564 /* 1565 * Function: 1566 * vdc_attr_negotiation() 1567 * 1568 * Description: 1569 * 1570 * Arguments: 1571 * vdc - soft state pointer for this instance of the device driver. 1572 * 1573 * Return Code: 1574 * 0 - Success 1575 */ 1576 static int 1577 vdc_attr_negotiation(vdc_t *vdcp) 1578 { 1579 int status; 1580 vio_msg_t vio_msg; 1581 1582 if (status = vdc_init_attr_negotiation(vdcp)) 1583 return (status); 1584 1585 /* release lock and wait for response */ 1586 mutex_exit(&vdcp->lock); 1587 status = vdc_wait_for_response(vdcp, &vio_msg); 1588 mutex_enter(&vdcp->lock); 1589 if (status) { 1590 DMSG(vdcp, 0, 1591 "[%d] Failed waiting for Attr negotiation response, rv(%d)", 1592 vdcp->instance, status); 1593 return (status); 1594 } 1595 1596 /* check type and sub_type ... */ 1597 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1598 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1599 DMSG(vdcp, 0, "[%d] Invalid attr negotiation response\n", 1600 vdcp->instance); 1601 return (EPROTO); 1602 } 1603 1604 return (vdc_handle_attr_msg(vdcp, (vd_attr_msg_t *)&vio_msg)); 1605 } 1606 1607 1608 /* 1609 * Function: 1610 * vdc_init_dring_negotiate() 1611 * 1612 * Description: 1613 * 1614 * Arguments: 1615 * vdc - soft state pointer for this instance of the device driver. 1616 * 1617 * Return Code: 1618 * 0 - Success 1619 */ 1620 static int 1621 vdc_init_dring_negotiate(vdc_t *vdc) 1622 { 1623 vio_dring_reg_msg_t pkt; 1624 size_t msglen = sizeof (pkt); 1625 int status = -1; 1626 int retry; 1627 int nretries = 10; 1628 1629 ASSERT(vdc != NULL); 1630 ASSERT(mutex_owned(&vdc->lock)); 1631 1632 for (retry = 0; retry < nretries; retry++) { 1633 status = vdc_init_descriptor_ring(vdc); 1634 if (status != EAGAIN) 1635 break; 1636 drv_usecwait(vdc_min_timeout_ldc); 1637 } 1638 1639 if (status != 0) { 1640 DMSG(vdc, 0, "[%d] Failed to init DRing (status = %d)\n", 1641 vdc->instance, status); 1642 return (status); 1643 } 1644 1645 DMSG(vdc, 0, "[%d] Init of descriptor ring completed (status = %d)\n", 1646 vdc->instance, status); 1647 1648 /* fill in tag */ 1649 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1650 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1651 pkt.tag.vio_subtype_env = VIO_DRING_REG; 1652 pkt.tag.vio_sid = vdc->session_id; 1653 /* fill in payload */ 1654 pkt.dring_ident = 0; 1655 pkt.num_descriptors = vdc->dring_len; 1656 pkt.descriptor_size = vdc->dring_entry_size; 1657 pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 1658 pkt.ncookies = vdc->dring_cookie_count; 1659 pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 1660 1661 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1662 if (status != 0) { 1663 DMSG(vdc, 0, "[%d] Failed to register DRing (err = %d)", 1664 vdc->instance, status); 1665 } 1666 1667 return (status); 1668 } 1669 1670 1671 /* 1672 * Function: 1673 * vdc_dring_negotiation() 1674 * 1675 * Description: 1676 * 1677 * Arguments: 1678 * vdc - soft state pointer for this instance of the device driver. 1679 * 1680 * Return Code: 1681 * 0 - Success 1682 */ 1683 static int 1684 vdc_dring_negotiation(vdc_t *vdcp) 1685 { 1686 int status; 1687 vio_msg_t vio_msg; 1688 1689 if (status = vdc_init_dring_negotiate(vdcp)) 1690 return (status); 1691 1692 /* release lock and wait for response */ 1693 mutex_exit(&vdcp->lock); 1694 status = vdc_wait_for_response(vdcp, &vio_msg); 1695 mutex_enter(&vdcp->lock); 1696 if (status) { 1697 DMSG(vdcp, 0, 1698 "[%d] Failed waiting for Dring negotiation response," 1699 " rv(%d)", vdcp->instance, status); 1700 return (status); 1701 } 1702 1703 /* check type and sub_type ... */ 1704 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1705 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1706 DMSG(vdcp, 0, "[%d] Invalid Dring negotiation response\n", 1707 vdcp->instance); 1708 return (EPROTO); 1709 } 1710 1711 return (vdc_handle_dring_reg_msg(vdcp, 1712 (vio_dring_reg_msg_t *)&vio_msg)); 1713 } 1714 1715 1716 /* 1717 * Function: 1718 * vdc_send_rdx() 1719 * 1720 * Description: 1721 * 1722 * Arguments: 1723 * vdc - soft state pointer for this instance of the device driver. 1724 * 1725 * Return Code: 1726 * 0 - Success 1727 */ 1728 static int 1729 vdc_send_rdx(vdc_t *vdcp) 1730 { 1731 vio_msg_t msg; 1732 size_t msglen = sizeof (vio_msg_t); 1733 int status; 1734 1735 /* 1736 * Send an RDX message to vds to indicate we are ready 1737 * to send data 1738 */ 1739 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 1740 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 1741 msg.tag.vio_subtype_env = VIO_RDX; 1742 msg.tag.vio_sid = vdcp->session_id; 1743 status = vdc_send(vdcp, (caddr_t)&msg, &msglen); 1744 if (status != 0) { 1745 DMSG(vdcp, 0, "[%d] Failed to send RDX message (%d)", 1746 vdcp->instance, status); 1747 } 1748 1749 return (status); 1750 } 1751 1752 /* 1753 * Function: 1754 * vdc_handle_rdx() 1755 * 1756 * Description: 1757 * 1758 * Arguments: 1759 * vdc - soft state pointer for this instance of the device driver. 1760 * msgp - received msg 1761 * 1762 * Return Code: 1763 * 0 - Success 1764 */ 1765 static int 1766 vdc_handle_rdx(vdc_t *vdcp, vio_rdx_msg_t *msgp) 1767 { 1768 _NOTE(ARGUNUSED(vdcp)) 1769 _NOTE(ARGUNUSED(msgp)) 1770 1771 ASSERT(msgp->tag.vio_msgtype == VIO_TYPE_CTRL); 1772 ASSERT(msgp->tag.vio_subtype == VIO_SUBTYPE_ACK); 1773 ASSERT(msgp->tag.vio_subtype_env == VIO_RDX); 1774 1775 DMSG(vdcp, 1, "[%d] Got an RDX msg", vdcp->instance); 1776 1777 return (0); 1778 } 1779 1780 /* 1781 * Function: 1782 * vdc_rdx_exchange() 1783 * 1784 * Description: 1785 * 1786 * Arguments: 1787 * vdc - soft state pointer for this instance of the device driver. 1788 * 1789 * Return Code: 1790 * 0 - Success 1791 */ 1792 static int 1793 vdc_rdx_exchange(vdc_t *vdcp) 1794 { 1795 int status; 1796 vio_msg_t vio_msg; 1797 1798 if (status = vdc_send_rdx(vdcp)) 1799 return (status); 1800 1801 /* release lock and wait for response */ 1802 mutex_exit(&vdcp->lock); 1803 status = vdc_wait_for_response(vdcp, &vio_msg); 1804 mutex_enter(&vdcp->lock); 1805 if (status) { 1806 DMSG(vdcp, 0, "[%d] Failed waiting for RDX response, rv(%d)", 1807 vdcp->instance, status); 1808 return (status); 1809 } 1810 1811 /* check type and sub_type ... */ 1812 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1813 vio_msg.tag.vio_subtype != VIO_SUBTYPE_ACK) { 1814 DMSG(vdcp, 0, "[%d] Invalid RDX response\n", vdcp->instance); 1815 return (EPROTO); 1816 } 1817 1818 return (vdc_handle_rdx(vdcp, (vio_rdx_msg_t *)&vio_msg)); 1819 } 1820 1821 1822 /* -------------------------------------------------------------------------- */ 1823 1824 /* 1825 * LDC helper routines 1826 */ 1827 1828 static int 1829 vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp) 1830 { 1831 int status; 1832 boolean_t q_has_pkts = B_FALSE; 1833 uint64_t delay_time; 1834 size_t len; 1835 1836 mutex_enter(&vdc->read_lock); 1837 1838 if (vdc->read_state == VDC_READ_IDLE) 1839 vdc->read_state = VDC_READ_WAITING; 1840 1841 while (vdc->read_state != VDC_READ_PENDING) { 1842 1843 /* detect if the connection has been reset */ 1844 if (vdc->read_state == VDC_READ_RESET) { 1845 status = ECONNRESET; 1846 goto done; 1847 } 1848 1849 cv_wait(&vdc->read_cv, &vdc->read_lock); 1850 } 1851 1852 /* 1853 * Until we get a blocking ldc read we have to retry 1854 * until the entire LDC message has arrived before 1855 * ldc_read() will succeed. Note we also bail out if 1856 * the channel is reset or goes away. 1857 */ 1858 delay_time = vdc_ldc_read_init_delay; 1859 loop: 1860 len = *nbytesp; 1861 status = ldc_read(vdc->ldc_handle, (caddr_t)msgp, &len); 1862 switch (status) { 1863 case EAGAIN: 1864 delay_time *= 2; 1865 if (delay_time >= vdc_ldc_read_max_delay) 1866 delay_time = vdc_ldc_read_max_delay; 1867 delay(delay_time); 1868 goto loop; 1869 1870 case 0: 1871 if (len == 0) { 1872 DMSG(vdc, 1, "[%d] ldc_read returned 0 bytes with " 1873 "no error!\n", vdc->instance); 1874 goto loop; 1875 } 1876 1877 *nbytesp = len; 1878 1879 /* 1880 * If there are pending messages, leave the 1881 * read state as pending. Otherwise, set the state 1882 * back to idle. 1883 */ 1884 status = ldc_chkq(vdc->ldc_handle, &q_has_pkts); 1885 if (status == 0 && !q_has_pkts) 1886 vdc->read_state = VDC_READ_IDLE; 1887 1888 break; 1889 default: 1890 DMSG(vdc, 0, "ldc_read returned %d\n", status); 1891 break; 1892 } 1893 1894 done: 1895 mutex_exit(&vdc->read_lock); 1896 1897 return (status); 1898 } 1899 1900 1901 1902 #ifdef DEBUG 1903 void 1904 vdc_decode_tag(vdc_t *vdcp, vio_msg_t *msg) 1905 { 1906 char *ms, *ss, *ses; 1907 switch (msg->tag.vio_msgtype) { 1908 #define Q(_s) case _s : ms = #_s; break; 1909 Q(VIO_TYPE_CTRL) 1910 Q(VIO_TYPE_DATA) 1911 Q(VIO_TYPE_ERR) 1912 #undef Q 1913 default: ms = "unknown"; break; 1914 } 1915 1916 switch (msg->tag.vio_subtype) { 1917 #define Q(_s) case _s : ss = #_s; break; 1918 Q(VIO_SUBTYPE_INFO) 1919 Q(VIO_SUBTYPE_ACK) 1920 Q(VIO_SUBTYPE_NACK) 1921 #undef Q 1922 default: ss = "unknown"; break; 1923 } 1924 1925 switch (msg->tag.vio_subtype_env) { 1926 #define Q(_s) case _s : ses = #_s; break; 1927 Q(VIO_VER_INFO) 1928 Q(VIO_ATTR_INFO) 1929 Q(VIO_DRING_REG) 1930 Q(VIO_DRING_UNREG) 1931 Q(VIO_RDX) 1932 Q(VIO_PKT_DATA) 1933 Q(VIO_DESC_DATA) 1934 Q(VIO_DRING_DATA) 1935 #undef Q 1936 default: ses = "unknown"; break; 1937 } 1938 1939 DMSG(vdcp, 3, "(%x/%x/%x) message : (%s/%s/%s)\n", 1940 msg->tag.vio_msgtype, msg->tag.vio_subtype, 1941 msg->tag.vio_subtype_env, ms, ss, ses); 1942 } 1943 #endif 1944 1945 /* 1946 * Function: 1947 * vdc_send() 1948 * 1949 * Description: 1950 * The function encapsulates the call to write a message using LDC. 1951 * If LDC indicates that the call failed due to the queue being full, 1952 * we retry the ldc_write(), otherwise we return the error returned by LDC. 1953 * 1954 * Arguments: 1955 * ldc_handle - LDC handle for the channel this instance of vdc uses 1956 * pkt - address of LDC message to be sent 1957 * msglen - the size of the message being sent. When the function 1958 * returns, this contains the number of bytes written. 1959 * 1960 * Return Code: 1961 * 0 - Success. 1962 * EINVAL - pkt or msglen were NULL 1963 * ECONNRESET - The connection was not up. 1964 * EWOULDBLOCK - LDC queue is full 1965 * xxx - other error codes returned by ldc_write 1966 */ 1967 static int 1968 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 1969 { 1970 size_t size = 0; 1971 int status = 0; 1972 clock_t delay_ticks; 1973 1974 ASSERT(vdc != NULL); 1975 ASSERT(mutex_owned(&vdc->lock)); 1976 ASSERT(msglen != NULL); 1977 ASSERT(*msglen != 0); 1978 1979 #ifdef DEBUG 1980 vdc_decode_tag(vdc, (vio_msg_t *)(uintptr_t)pkt); 1981 #endif 1982 /* 1983 * Wait indefinitely to send if channel 1984 * is busy, but bail out if we succeed or 1985 * if the channel closes or is reset. 1986 */ 1987 delay_ticks = vdc_hz_min_ldc_delay; 1988 do { 1989 size = *msglen; 1990 status = ldc_write(vdc->ldc_handle, pkt, &size); 1991 if (status == EWOULDBLOCK) { 1992 delay(delay_ticks); 1993 /* geometric backoff */ 1994 delay_ticks *= 2; 1995 if (delay_ticks > vdc_hz_max_ldc_delay) 1996 delay_ticks = vdc_hz_max_ldc_delay; 1997 } 1998 } while (status == EWOULDBLOCK); 1999 2000 /* if LDC had serious issues --- reset vdc state */ 2001 if (status == EIO || status == ECONNRESET) { 2002 /* LDC had serious issues --- reset vdc state */ 2003 mutex_enter(&vdc->read_lock); 2004 if ((vdc->read_state == VDC_READ_WAITING) || 2005 (vdc->read_state == VDC_READ_RESET)) 2006 cv_signal(&vdc->read_cv); 2007 vdc->read_state = VDC_READ_RESET; 2008 mutex_exit(&vdc->read_lock); 2009 2010 /* wake up any waiters in the reset thread */ 2011 if (vdc->state == VDC_STATE_INIT_WAITING) { 2012 DMSG(vdc, 0, "[%d] write reset - " 2013 "vdc is resetting ..\n", vdc->instance); 2014 vdc->state = VDC_STATE_RESETTING; 2015 cv_signal(&vdc->initwait_cv); 2016 } 2017 2018 return (ECONNRESET); 2019 } 2020 2021 /* return the last size written */ 2022 *msglen = size; 2023 2024 return (status); 2025 } 2026 2027 /* 2028 * Function: 2029 * vdc_get_md_node 2030 * 2031 * Description: 2032 * Get the MD, the device node and the port node for the given 2033 * disk instance. The caller is responsible for cleaning up the 2034 * reference to the returned MD (mdpp) by calling md_fini_handle(). 2035 * 2036 * Arguments: 2037 * dip - dev info pointer for this instance of the device driver. 2038 * mdpp - the returned MD. 2039 * vd_nodep - the returned device node. 2040 * vd_portp - the returned port node. The returned port node is NULL 2041 * if no port node is found. 2042 * 2043 * Return Code: 2044 * 0 - Success. 2045 * ENOENT - Expected node or property did not exist. 2046 * ENXIO - Unexpected error communicating with MD framework 2047 */ 2048 static int 2049 vdc_get_md_node(dev_info_t *dip, md_t **mdpp, mde_cookie_t *vd_nodep, 2050 mde_cookie_t *vd_portp) 2051 { 2052 int status = ENOENT; 2053 char *node_name = NULL; 2054 md_t *mdp = NULL; 2055 int num_nodes; 2056 int num_vdevs; 2057 int num_vports; 2058 mde_cookie_t rootnode; 2059 mde_cookie_t *listp = NULL; 2060 boolean_t found_inst = B_FALSE; 2061 int listsz; 2062 int idx; 2063 uint64_t md_inst; 2064 int obp_inst; 2065 int instance = ddi_get_instance(dip); 2066 2067 /* 2068 * Get the OBP instance number for comparison with the MD instance 2069 * 2070 * The "cfg-handle" property of a vdc node in an MD contains the MD's 2071 * notion of "instance", or unique identifier, for that node; OBP 2072 * stores the value of the "cfg-handle" MD property as the value of 2073 * the "reg" property on the node in the device tree it builds from 2074 * the MD and passes to Solaris. Thus, we look up the devinfo node's 2075 * "reg" property value to uniquely identify this device instance. 2076 * If the "reg" property cannot be found, the device tree state is 2077 * presumably so broken that there is no point in continuing. 2078 */ 2079 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 2080 cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 2081 return (ENOENT); 2082 } 2083 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 2084 OBP_REG, -1); 2085 DMSGX(1, "[%d] OBP inst=%d\n", instance, obp_inst); 2086 2087 /* 2088 * We now walk the MD nodes to find the node for this vdisk. 2089 */ 2090 if ((mdp = md_get_handle()) == NULL) { 2091 cmn_err(CE_WARN, "unable to init machine description"); 2092 return (ENXIO); 2093 } 2094 2095 num_nodes = md_node_count(mdp); 2096 ASSERT(num_nodes > 0); 2097 2098 listsz = num_nodes * sizeof (mde_cookie_t); 2099 2100 /* allocate memory for nodes */ 2101 listp = kmem_zalloc(listsz, KM_SLEEP); 2102 2103 rootnode = md_root_node(mdp); 2104 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 2105 2106 /* 2107 * Search for all the virtual devices, we will then check to see which 2108 * ones are disk nodes. 2109 */ 2110 num_vdevs = md_scan_dag(mdp, rootnode, 2111 md_find_name(mdp, VDC_MD_VDEV_NAME), 2112 md_find_name(mdp, "fwd"), listp); 2113 2114 if (num_vdevs <= 0) { 2115 cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 2116 status = ENOENT; 2117 goto done; 2118 } 2119 2120 DMSGX(1, "[%d] num_vdevs=%d\n", instance, num_vdevs); 2121 for (idx = 0; idx < num_vdevs; idx++) { 2122 status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 2123 if ((status != 0) || (node_name == NULL)) { 2124 cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 2125 ": err %d", VDC_MD_VDEV_NAME, status); 2126 continue; 2127 } 2128 2129 DMSGX(1, "[%d] Found node '%s'\n", instance, node_name); 2130 if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 2131 status = md_get_prop_val(mdp, listp[idx], 2132 VDC_MD_CFG_HDL, &md_inst); 2133 DMSGX(1, "[%d] vdc inst in MD=%lx\n", 2134 instance, md_inst); 2135 if ((status == 0) && (md_inst == obp_inst)) { 2136 found_inst = B_TRUE; 2137 break; 2138 } 2139 } 2140 } 2141 2142 if (!found_inst) { 2143 DMSGX(0, "Unable to find correct '%s' node", VDC_MD_DISK_NAME); 2144 status = ENOENT; 2145 goto done; 2146 } 2147 DMSGX(0, "[%d] MD inst=%lx\n", instance, md_inst); 2148 2149 *vd_nodep = listp[idx]; 2150 *mdpp = mdp; 2151 2152 num_vports = md_scan_dag(mdp, *vd_nodep, 2153 md_find_name(mdp, VDC_MD_PORT_NAME), 2154 md_find_name(mdp, "fwd"), listp); 2155 2156 if (num_vports != 1) { 2157 DMSGX(0, "Expected 1 '%s' node for '%s' port, found %d\n", 2158 VDC_MD_PORT_NAME, VDC_MD_VDEV_NAME, num_vports); 2159 } 2160 2161 *vd_portp = (num_vports == 0)? NULL: listp[0]; 2162 2163 done: 2164 kmem_free(listp, listsz); 2165 return (status); 2166 } 2167 2168 /* 2169 * Function: 2170 * vdc_get_ldc_id() 2171 * 2172 * Description: 2173 * This function gets the 'ldc-id' for this particular instance of vdc. 2174 * The id returned is the guest domain channel endpoint LDC uses for 2175 * communication with vds. 2176 * 2177 * Arguments: 2178 * mdp - pointer to the machine description. 2179 * vd_node - the vdisk element from the MD. 2180 * ldc_id - pointer to variable used to return the 'ldc-id' found. 2181 * 2182 * Return Code: 2183 * 0 - Success. 2184 * ENOENT - Expected node or property did not exist. 2185 */ 2186 static int 2187 vdc_get_ldc_id(md_t *mdp, mde_cookie_t vd_node, uint64_t *ldc_id) 2188 { 2189 mde_cookie_t *chanp = NULL; 2190 int listsz; 2191 int num_chans; 2192 int num_nodes; 2193 int status = 0; 2194 2195 num_nodes = md_node_count(mdp); 2196 ASSERT(num_nodes > 0); 2197 2198 listsz = num_nodes * sizeof (mde_cookie_t); 2199 2200 /* allocate memory for nodes */ 2201 chanp = kmem_zalloc(listsz, KM_SLEEP); 2202 2203 /* get the channels for this node */ 2204 num_chans = md_scan_dag(mdp, vd_node, 2205 md_find_name(mdp, VDC_MD_CHAN_NAME), 2206 md_find_name(mdp, "fwd"), chanp); 2207 2208 /* expecting at least one channel */ 2209 if (num_chans <= 0) { 2210 cmn_err(CE_NOTE, "No '%s' node for '%s' port", 2211 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 2212 status = ENOENT; 2213 goto done; 2214 2215 } else if (num_chans != 1) { 2216 DMSGX(0, "Expected 1 '%s' node for '%s' port, found %d\n", 2217 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, num_chans); 2218 } 2219 2220 /* 2221 * We use the first channel found (index 0), irrespective of how 2222 * many are there in total. 2223 */ 2224 if (md_get_prop_val(mdp, chanp[0], VDC_MD_ID, ldc_id) != 0) { 2225 cmn_err(CE_NOTE, "Channel '%s' property not found", VDC_MD_ID); 2226 status = ENOENT; 2227 } 2228 2229 done: 2230 kmem_free(chanp, listsz); 2231 return (status); 2232 } 2233 2234 static int 2235 vdc_do_ldc_up(vdc_t *vdc) 2236 { 2237 int status; 2238 ldc_status_t ldc_state; 2239 2240 DMSG(vdc, 0, "[%d] Bringing up channel %lx\n", 2241 vdc->instance, vdc->ldc_id); 2242 2243 if (vdc->lifecycle == VDC_LC_DETACHING) 2244 return (EINVAL); 2245 2246 if ((status = ldc_up(vdc->ldc_handle)) != 0) { 2247 switch (status) { 2248 case ECONNREFUSED: /* listener not ready at other end */ 2249 DMSG(vdc, 0, "[%d] ldc_up(%lx,...) return %d\n", 2250 vdc->instance, vdc->ldc_id, status); 2251 status = 0; 2252 break; 2253 default: 2254 DMSG(vdc, 0, "[%d] Failed to bring up LDC: " 2255 "channel=%ld, err=%d", vdc->instance, vdc->ldc_id, 2256 status); 2257 break; 2258 } 2259 } 2260 2261 if (ldc_status(vdc->ldc_handle, &ldc_state) == 0) { 2262 vdc->ldc_state = ldc_state; 2263 if (ldc_state == LDC_UP) { 2264 DMSG(vdc, 0, "[%d] LDC channel already up\n", 2265 vdc->instance); 2266 vdc->seq_num = 1; 2267 vdc->seq_num_reply = 0; 2268 } 2269 } 2270 2271 return (status); 2272 } 2273 2274 /* 2275 * Function: 2276 * vdc_terminate_ldc() 2277 * 2278 * Description: 2279 * 2280 * Arguments: 2281 * vdc - soft state pointer for this instance of the device driver. 2282 * 2283 * Return Code: 2284 * None 2285 */ 2286 static void 2287 vdc_terminate_ldc(vdc_t *vdc) 2288 { 2289 int instance = ddi_get_instance(vdc->dip); 2290 2291 ASSERT(vdc != NULL); 2292 ASSERT(mutex_owned(&vdc->lock)); 2293 2294 DMSG(vdc, 0, "[%d] initialized=%x\n", instance, vdc->initialized); 2295 2296 if (vdc->initialized & VDC_LDC_OPEN) { 2297 DMSG(vdc, 0, "[%d] ldc_close()\n", instance); 2298 (void) ldc_close(vdc->ldc_handle); 2299 } 2300 if (vdc->initialized & VDC_LDC_CB) { 2301 DMSG(vdc, 0, "[%d] ldc_unreg_callback()\n", instance); 2302 (void) ldc_unreg_callback(vdc->ldc_handle); 2303 } 2304 if (vdc->initialized & VDC_LDC) { 2305 DMSG(vdc, 0, "[%d] ldc_fini()\n", instance); 2306 (void) ldc_fini(vdc->ldc_handle); 2307 vdc->ldc_handle = NULL; 2308 } 2309 2310 vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN); 2311 } 2312 2313 /* -------------------------------------------------------------------------- */ 2314 2315 /* 2316 * Descriptor Ring helper routines 2317 */ 2318 2319 /* 2320 * Function: 2321 * vdc_init_descriptor_ring() 2322 * 2323 * Description: 2324 * 2325 * Arguments: 2326 * vdc - soft state pointer for this instance of the device driver. 2327 * 2328 * Return Code: 2329 * 0 - Success 2330 */ 2331 static int 2332 vdc_init_descriptor_ring(vdc_t *vdc) 2333 { 2334 vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 2335 int status = 0; 2336 int i; 2337 2338 DMSG(vdc, 0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized); 2339 2340 ASSERT(vdc != NULL); 2341 ASSERT(mutex_owned(&vdc->lock)); 2342 ASSERT(vdc->ldc_handle != NULL); 2343 2344 /* ensure we have enough room to store max sized block */ 2345 ASSERT(maxphys <= VD_MAX_BLOCK_SIZE); 2346 2347 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 2348 DMSG(vdc, 0, "[%d] ldc_mem_dring_create\n", vdc->instance); 2349 /* 2350 * Calculate the maximum block size we can transmit using one 2351 * Descriptor Ring entry from the attributes returned by the 2352 * vDisk server. This is subject to a minimum of 'maxphys' 2353 * as we do not have the capability to split requests over 2354 * multiple DRing entries. 2355 */ 2356 if ((vdc->max_xfer_sz * vdc->block_size) < maxphys) { 2357 DMSG(vdc, 0, "[%d] using minimum DRing size\n", 2358 vdc->instance); 2359 vdc->dring_max_cookies = maxphys / PAGESIZE; 2360 } else { 2361 vdc->dring_max_cookies = 2362 (vdc->max_xfer_sz * vdc->block_size) / PAGESIZE; 2363 } 2364 vdc->dring_entry_size = (sizeof (vd_dring_entry_t) + 2365 (sizeof (ldc_mem_cookie_t) * 2366 (vdc->dring_max_cookies - 1))); 2367 vdc->dring_len = VD_DRING_LEN; 2368 2369 status = ldc_mem_dring_create(vdc->dring_len, 2370 vdc->dring_entry_size, &vdc->ldc_dring_hdl); 2371 if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) { 2372 DMSG(vdc, 0, "[%d] Descriptor ring creation failed", 2373 vdc->instance); 2374 return (status); 2375 } 2376 vdc->initialized |= VDC_DRING_INIT; 2377 } 2378 2379 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 2380 DMSG(vdc, 0, "[%d] ldc_mem_dring_bind\n", vdc->instance); 2381 vdc->dring_cookie = 2382 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 2383 2384 status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl, 2385 LDC_SHADOW_MAP|LDC_DIRECT_MAP, LDC_MEM_RW, 2386 &vdc->dring_cookie[0], 2387 &vdc->dring_cookie_count); 2388 if (status != 0) { 2389 DMSG(vdc, 0, "[%d] Failed to bind descriptor ring " 2390 "(%lx) to channel (%lx) status=%d\n", 2391 vdc->instance, vdc->ldc_dring_hdl, 2392 vdc->ldc_handle, status); 2393 return (status); 2394 } 2395 ASSERT(vdc->dring_cookie_count == 1); 2396 vdc->initialized |= VDC_DRING_BOUND; 2397 } 2398 2399 status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info); 2400 if (status != 0) { 2401 DMSG(vdc, 0, 2402 "[%d] Failed to get info for descriptor ring (%lx)\n", 2403 vdc->instance, vdc->ldc_dring_hdl); 2404 return (status); 2405 } 2406 2407 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 2408 DMSG(vdc, 0, "[%d] local dring\n", vdc->instance); 2409 2410 /* Allocate the local copy of this dring */ 2411 vdc->local_dring = 2412 kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t), 2413 KM_SLEEP); 2414 vdc->initialized |= VDC_DRING_LOCAL; 2415 } 2416 2417 /* 2418 * Mark all DRing entries as free and initialize the private 2419 * descriptor's memory handles. If any entry is initialized, 2420 * we need to free it later so we set the bit in 'initialized' 2421 * at the start. 2422 */ 2423 vdc->initialized |= VDC_DRING_ENTRY; 2424 for (i = 0; i < vdc->dring_len; i++) { 2425 dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 2426 dep->hdr.dstate = VIO_DESC_FREE; 2427 2428 status = ldc_mem_alloc_handle(vdc->ldc_handle, 2429 &vdc->local_dring[i].desc_mhdl); 2430 if (status != 0) { 2431 DMSG(vdc, 0, "![%d] Failed to alloc mem handle for" 2432 " descriptor %d", vdc->instance, i); 2433 return (status); 2434 } 2435 vdc->local_dring[i].is_free = B_TRUE; 2436 vdc->local_dring[i].dep = dep; 2437 } 2438 2439 /* Initialize the starting index */ 2440 vdc->dring_curr_idx = 0; 2441 2442 return (status); 2443 } 2444 2445 /* 2446 * Function: 2447 * vdc_destroy_descriptor_ring() 2448 * 2449 * Description: 2450 * 2451 * Arguments: 2452 * vdc - soft state pointer for this instance of the device driver. 2453 * 2454 * Return Code: 2455 * None 2456 */ 2457 static void 2458 vdc_destroy_descriptor_ring(vdc_t *vdc) 2459 { 2460 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2461 ldc_mem_handle_t mhdl = NULL; 2462 ldc_mem_info_t minfo; 2463 int status = -1; 2464 int i; /* loop */ 2465 2466 ASSERT(vdc != NULL); 2467 ASSERT(mutex_owned(&vdc->lock)); 2468 2469 DMSG(vdc, 0, "[%d] Entered\n", vdc->instance); 2470 2471 if (vdc->initialized & VDC_DRING_ENTRY) { 2472 DMSG(vdc, 0, 2473 "[%d] Removing Local DRing entries\n", vdc->instance); 2474 for (i = 0; i < vdc->dring_len; i++) { 2475 ldep = &vdc->local_dring[i]; 2476 mhdl = ldep->desc_mhdl; 2477 2478 if (mhdl == NULL) 2479 continue; 2480 2481 if ((status = ldc_mem_info(mhdl, &minfo)) != 0) { 2482 DMSG(vdc, 0, 2483 "ldc_mem_info returned an error: %d\n", 2484 status); 2485 2486 /* 2487 * This must mean that the mem handle 2488 * is not valid. Clear it out so that 2489 * no one tries to use it. 2490 */ 2491 ldep->desc_mhdl = NULL; 2492 continue; 2493 } 2494 2495 if (minfo.status == LDC_BOUND) { 2496 (void) ldc_mem_unbind_handle(mhdl); 2497 } 2498 2499 (void) ldc_mem_free_handle(mhdl); 2500 2501 ldep->desc_mhdl = NULL; 2502 } 2503 vdc->initialized &= ~VDC_DRING_ENTRY; 2504 } 2505 2506 if (vdc->initialized & VDC_DRING_LOCAL) { 2507 DMSG(vdc, 0, "[%d] Freeing Local DRing\n", vdc->instance); 2508 kmem_free(vdc->local_dring, 2509 vdc->dring_len * sizeof (vdc_local_desc_t)); 2510 vdc->initialized &= ~VDC_DRING_LOCAL; 2511 } 2512 2513 if (vdc->initialized & VDC_DRING_BOUND) { 2514 DMSG(vdc, 0, "[%d] Unbinding DRing\n", vdc->instance); 2515 status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl); 2516 if (status == 0) { 2517 vdc->initialized &= ~VDC_DRING_BOUND; 2518 } else { 2519 DMSG(vdc, 0, "[%d] Error %d unbinding DRing %lx", 2520 vdc->instance, status, vdc->ldc_dring_hdl); 2521 } 2522 kmem_free(vdc->dring_cookie, sizeof (ldc_mem_cookie_t)); 2523 } 2524 2525 if (vdc->initialized & VDC_DRING_INIT) { 2526 DMSG(vdc, 0, "[%d] Destroying DRing\n", vdc->instance); 2527 status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl); 2528 if (status == 0) { 2529 vdc->ldc_dring_hdl = NULL; 2530 bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 2531 vdc->initialized &= ~VDC_DRING_INIT; 2532 } else { 2533 DMSG(vdc, 0, "[%d] Error %d destroying DRing (%lx)", 2534 vdc->instance, status, vdc->ldc_dring_hdl); 2535 } 2536 } 2537 } 2538 2539 /* 2540 * Function: 2541 * vdc_map_to_shared_ring() 2542 * 2543 * Description: 2544 * Copy contents of the local descriptor to the shared 2545 * memory descriptor. 2546 * 2547 * Arguments: 2548 * vdcp - soft state pointer for this instance of the device driver. 2549 * idx - descriptor ring index 2550 * 2551 * Return Code: 2552 * None 2553 */ 2554 static int 2555 vdc_map_to_shared_dring(vdc_t *vdcp, int idx) 2556 { 2557 vdc_local_desc_t *ldep; 2558 vd_dring_entry_t *dep; 2559 int rv; 2560 2561 ldep = &(vdcp->local_dring[idx]); 2562 2563 /* for now leave in the old pop_mem_hdl stuff */ 2564 if (ldep->nbytes > 0) { 2565 rv = vdc_populate_mem_hdl(vdcp, ldep); 2566 if (rv) { 2567 DMSG(vdcp, 0, "[%d] Cannot populate mem handle\n", 2568 vdcp->instance); 2569 return (rv); 2570 } 2571 } 2572 2573 /* 2574 * fill in the data details into the DRing 2575 */ 2576 dep = ldep->dep; 2577 ASSERT(dep != NULL); 2578 2579 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdcp); 2580 dep->payload.operation = ldep->operation; 2581 dep->payload.addr = ldep->offset; 2582 dep->payload.nbytes = ldep->nbytes; 2583 dep->payload.status = (uint32_t)-1; /* vds will set valid value */ 2584 dep->payload.slice = ldep->slice; 2585 dep->hdr.dstate = VIO_DESC_READY; 2586 dep->hdr.ack = 1; /* request an ACK for every message */ 2587 2588 return (0); 2589 } 2590 2591 /* 2592 * Function: 2593 * vdc_send_request 2594 * 2595 * Description: 2596 * This routine writes the data to be transmitted to vds into the 2597 * descriptor, notifies vds that the ring has been updated and 2598 * then waits for the request to be processed. 2599 * 2600 * Arguments: 2601 * vdcp - the soft state pointer 2602 * operation - operation we want vds to perform (VD_OP_XXX) 2603 * addr - address of data buf to be read/written. 2604 * nbytes - number of bytes to read/write 2605 * slice - the disk slice this request is for 2606 * offset - relative disk offset 2607 * cb_type - type of call - STRATEGY or SYNC 2608 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2609 * . mode for ioctl(9e) 2610 * . LP64 diskaddr_t (block I/O) 2611 * dir - direction of operation (READ/WRITE/BOTH) 2612 * 2613 * Return Codes: 2614 * 0 2615 * ENXIO 2616 */ 2617 static int 2618 vdc_send_request(vdc_t *vdcp, int operation, caddr_t addr, 2619 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 2620 void *cb_arg, vio_desc_direction_t dir) 2621 { 2622 ASSERT(vdcp != NULL); 2623 ASSERT(slice == VD_SLICE_NONE || slice < V_NUMPAR); 2624 2625 mutex_enter(&vdcp->lock); 2626 2627 do { 2628 while (vdcp->state != VDC_STATE_RUNNING) { 2629 2630 /* return error if detaching */ 2631 if (vdcp->state == VDC_STATE_DETACH) { 2632 mutex_exit(&vdcp->lock); 2633 return (ENXIO); 2634 } 2635 2636 /* fail request if connection timeout is reached */ 2637 if (vdcp->ctimeout_reached) { 2638 mutex_exit(&vdcp->lock); 2639 return (EIO); 2640 } 2641 2642 cv_wait(&vdcp->running_cv, &vdcp->lock); 2643 } 2644 2645 } while (vdc_populate_descriptor(vdcp, operation, addr, 2646 nbytes, slice, offset, cb_type, cb_arg, dir)); 2647 2648 mutex_exit(&vdcp->lock); 2649 return (0); 2650 } 2651 2652 2653 /* 2654 * Function: 2655 * vdc_populate_descriptor 2656 * 2657 * Description: 2658 * This routine writes the data to be transmitted to vds into the 2659 * descriptor, notifies vds that the ring has been updated and 2660 * then waits for the request to be processed. 2661 * 2662 * Arguments: 2663 * vdcp - the soft state pointer 2664 * operation - operation we want vds to perform (VD_OP_XXX) 2665 * addr - address of data buf to be read/written. 2666 * nbytes - number of bytes to read/write 2667 * slice - the disk slice this request is for 2668 * offset - relative disk offset 2669 * cb_type - type of call - STRATEGY or SYNC 2670 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2671 * . mode for ioctl(9e) 2672 * . LP64 diskaddr_t (block I/O) 2673 * dir - direction of operation (READ/WRITE/BOTH) 2674 * 2675 * Return Codes: 2676 * 0 2677 * EAGAIN 2678 * ECONNRESET 2679 * ENXIO 2680 */ 2681 static int 2682 vdc_populate_descriptor(vdc_t *vdcp, int operation, caddr_t addr, 2683 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 2684 void *cb_arg, vio_desc_direction_t dir) 2685 { 2686 vdc_local_desc_t *local_dep = NULL; /* Local Dring Pointer */ 2687 int idx; /* Index of DRing entry used */ 2688 int next_idx; 2689 vio_dring_msg_t dmsg; 2690 size_t msglen; 2691 int rv; 2692 2693 ASSERT(MUTEX_HELD(&vdcp->lock)); 2694 vdcp->threads_pending++; 2695 loop: 2696 DMSG(vdcp, 2, ": dring_curr_idx = %d\n", vdcp->dring_curr_idx); 2697 2698 /* Get next available D-Ring entry */ 2699 idx = vdcp->dring_curr_idx; 2700 local_dep = &(vdcp->local_dring[idx]); 2701 2702 if (!local_dep->is_free) { 2703 DMSG(vdcp, 2, "[%d]: dring full - waiting for space\n", 2704 vdcp->instance); 2705 cv_wait(&vdcp->dring_free_cv, &vdcp->lock); 2706 if (vdcp->state == VDC_STATE_RUNNING || 2707 vdcp->state == VDC_STATE_HANDLE_PENDING) { 2708 goto loop; 2709 } 2710 vdcp->threads_pending--; 2711 return (ECONNRESET); 2712 } 2713 2714 next_idx = idx + 1; 2715 if (next_idx >= vdcp->dring_len) 2716 next_idx = 0; 2717 vdcp->dring_curr_idx = next_idx; 2718 2719 ASSERT(local_dep->is_free); 2720 2721 local_dep->operation = operation; 2722 local_dep->addr = addr; 2723 local_dep->nbytes = nbytes; 2724 local_dep->slice = slice; 2725 local_dep->offset = offset; 2726 local_dep->cb_type = cb_type; 2727 local_dep->cb_arg = cb_arg; 2728 local_dep->dir = dir; 2729 2730 local_dep->is_free = B_FALSE; 2731 2732 rv = vdc_map_to_shared_dring(vdcp, idx); 2733 if (rv) { 2734 DMSG(vdcp, 0, "[%d]: cannot bind memory - waiting ..\n", 2735 vdcp->instance); 2736 /* free the descriptor */ 2737 local_dep->is_free = B_TRUE; 2738 vdcp->dring_curr_idx = idx; 2739 cv_wait(&vdcp->membind_cv, &vdcp->lock); 2740 if (vdcp->state == VDC_STATE_RUNNING || 2741 vdcp->state == VDC_STATE_HANDLE_PENDING) { 2742 goto loop; 2743 } 2744 vdcp->threads_pending--; 2745 return (ECONNRESET); 2746 } 2747 2748 /* 2749 * Send a msg with the DRing details to vds 2750 */ 2751 VIO_INIT_DRING_DATA_TAG(dmsg); 2752 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdcp); 2753 dmsg.dring_ident = vdcp->dring_ident; 2754 dmsg.start_idx = idx; 2755 dmsg.end_idx = idx; 2756 vdcp->seq_num++; 2757 2758 DTRACE_IO2(send, vio_dring_msg_t *, &dmsg, vdc_t *, vdcp); 2759 2760 DMSG(vdcp, 2, "ident=0x%lx, st=%u, end=%u, seq=%ld\n", 2761 vdcp->dring_ident, dmsg.start_idx, dmsg.end_idx, dmsg.seq_num); 2762 2763 /* 2764 * note we're still holding the lock here to 2765 * make sure the message goes out in order !!!... 2766 */ 2767 msglen = sizeof (dmsg); 2768 rv = vdc_send(vdcp, (caddr_t)&dmsg, &msglen); 2769 switch (rv) { 2770 case ECONNRESET: 2771 /* 2772 * vdc_send initiates the reset on failure. 2773 * Since the transaction has already been put 2774 * on the local dring, it will automatically get 2775 * retried when the channel is reset. Given that, 2776 * it is ok to just return success even though the 2777 * send failed. 2778 */ 2779 rv = 0; 2780 break; 2781 2782 case 0: /* EOK */ 2783 DMSG(vdcp, 1, "sent via LDC: rv=%d\n", rv); 2784 break; 2785 2786 default: 2787 goto cleanup_and_exit; 2788 } 2789 2790 vdcp->threads_pending--; 2791 return (rv); 2792 2793 cleanup_and_exit: 2794 DMSG(vdcp, 0, "unexpected error, rv=%d\n", rv); 2795 return (ENXIO); 2796 } 2797 2798 /* 2799 * Function: 2800 * vdc_do_sync_op 2801 * 2802 * Description: 2803 * Wrapper around vdc_populate_descriptor that blocks until the 2804 * response to the message is available. 2805 * 2806 * Arguments: 2807 * vdcp - the soft state pointer 2808 * operation - operation we want vds to perform (VD_OP_XXX) 2809 * addr - address of data buf to be read/written. 2810 * nbytes - number of bytes to read/write 2811 * slice - the disk slice this request is for 2812 * offset - relative disk offset 2813 * cb_type - type of call - STRATEGY or SYNC 2814 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2815 * . mode for ioctl(9e) 2816 * . LP64 diskaddr_t (block I/O) 2817 * dir - direction of operation (READ/WRITE/BOTH) 2818 * 2819 * Return Codes: 2820 * 0 2821 * EAGAIN 2822 * EFAULT 2823 * ENXIO 2824 * EIO 2825 */ 2826 static int 2827 vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, size_t nbytes, 2828 int slice, diskaddr_t offset, int cb_type, void *cb_arg, 2829 vio_desc_direction_t dir) 2830 { 2831 int status; 2832 2833 ASSERT(cb_type == CB_SYNC); 2834 2835 /* 2836 * Grab the lock, if blocked wait until the server 2837 * response causes us to wake up again. 2838 */ 2839 mutex_enter(&vdcp->lock); 2840 vdcp->sync_op_cnt++; 2841 while (vdcp->sync_op_blocked && vdcp->state != VDC_STATE_DETACH) 2842 cv_wait(&vdcp->sync_blocked_cv, &vdcp->lock); 2843 2844 if (vdcp->state == VDC_STATE_DETACH) { 2845 cv_broadcast(&vdcp->sync_blocked_cv); 2846 vdcp->sync_op_cnt--; 2847 mutex_exit(&vdcp->lock); 2848 return (ENXIO); 2849 } 2850 2851 /* now block anyone other thread entering after us */ 2852 vdcp->sync_op_blocked = B_TRUE; 2853 vdcp->sync_op_pending = B_TRUE; 2854 mutex_exit(&vdcp->lock); 2855 2856 status = vdc_send_request(vdcp, operation, addr, 2857 nbytes, slice, offset, cb_type, cb_arg, dir); 2858 2859 mutex_enter(&vdcp->lock); 2860 2861 if (status != 0) { 2862 vdcp->sync_op_pending = B_FALSE; 2863 } else { 2864 /* 2865 * block until our transaction completes. 2866 * Also anyone else waiting also gets to go next. 2867 */ 2868 while (vdcp->sync_op_pending && vdcp->state != VDC_STATE_DETACH) 2869 cv_wait(&vdcp->sync_pending_cv, &vdcp->lock); 2870 2871 DMSG(vdcp, 2, ": operation returned %d\n", 2872 vdcp->sync_op_status); 2873 if (vdcp->state == VDC_STATE_DETACH) { 2874 vdcp->sync_op_pending = B_FALSE; 2875 status = ENXIO; 2876 } else { 2877 status = vdcp->sync_op_status; 2878 } 2879 } 2880 2881 vdcp->sync_op_status = 0; 2882 vdcp->sync_op_blocked = B_FALSE; 2883 vdcp->sync_op_cnt--; 2884 2885 /* signal the next waiting thread */ 2886 cv_signal(&vdcp->sync_blocked_cv); 2887 mutex_exit(&vdcp->lock); 2888 2889 return (status); 2890 } 2891 2892 2893 /* 2894 * Function: 2895 * vdc_drain_response() 2896 * 2897 * Description: 2898 * When a guest is panicking, the completion of requests needs to be 2899 * handled differently because interrupts are disabled and vdc 2900 * will not get messages. We have to poll for the messages instead. 2901 * 2902 * Arguments: 2903 * vdc - soft state pointer for this instance of the device driver. 2904 * 2905 * Return Code: 2906 * 0 - Success 2907 */ 2908 static int 2909 vdc_drain_response(vdc_t *vdc) 2910 { 2911 int rv, idx, retries; 2912 size_t msglen; 2913 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2914 vio_dring_msg_t dmsg; 2915 2916 mutex_enter(&vdc->lock); 2917 2918 retries = 0; 2919 for (;;) { 2920 msglen = sizeof (dmsg); 2921 rv = ldc_read(vdc->ldc_handle, (caddr_t)&dmsg, &msglen); 2922 if (rv) { 2923 rv = EINVAL; 2924 break; 2925 } 2926 2927 /* 2928 * if there are no packets wait and check again 2929 */ 2930 if ((rv == 0) && (msglen == 0)) { 2931 if (retries++ > vdc_dump_retries) { 2932 rv = EAGAIN; 2933 break; 2934 } 2935 2936 drv_usecwait(vdc_usec_timeout_dump); 2937 continue; 2938 } 2939 2940 /* 2941 * Ignore all messages that are not ACKs/NACKs to 2942 * DRing requests. 2943 */ 2944 if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 2945 (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 2946 DMSG(vdc, 0, "discard pkt: type=%d sub=%d env=%d\n", 2947 dmsg.tag.vio_msgtype, 2948 dmsg.tag.vio_subtype, 2949 dmsg.tag.vio_subtype_env); 2950 continue; 2951 } 2952 2953 /* 2954 * set the appropriate return value for the current request. 2955 */ 2956 switch (dmsg.tag.vio_subtype) { 2957 case VIO_SUBTYPE_ACK: 2958 rv = 0; 2959 break; 2960 case VIO_SUBTYPE_NACK: 2961 rv = EAGAIN; 2962 break; 2963 default: 2964 continue; 2965 } 2966 2967 idx = dmsg.start_idx; 2968 if (idx >= vdc->dring_len) { 2969 DMSG(vdc, 0, "[%d] Bogus ack data : start %d\n", 2970 vdc->instance, idx); 2971 continue; 2972 } 2973 ldep = &vdc->local_dring[idx]; 2974 if (ldep->dep->hdr.dstate != VIO_DESC_DONE) { 2975 DMSG(vdc, 0, "[%d] Entry @ %d - state !DONE %d\n", 2976 vdc->instance, idx, ldep->dep->hdr.dstate); 2977 continue; 2978 } 2979 2980 DMSG(vdc, 1, "[%d] Depopulating idx=%d state=%d\n", 2981 vdc->instance, idx, ldep->dep->hdr.dstate); 2982 rv = vdc_depopulate_descriptor(vdc, idx); 2983 if (rv) { 2984 DMSG(vdc, 0, 2985 "[%d] Entry @ %d - depopulate failed ..\n", 2986 vdc->instance, idx); 2987 } 2988 2989 /* if this is the last descriptor - break out of loop */ 2990 if ((idx + 1) % vdc->dring_len == vdc->dring_curr_idx) 2991 break; 2992 } 2993 2994 mutex_exit(&vdc->lock); 2995 DMSG(vdc, 0, "End idx=%d\n", idx); 2996 2997 return (rv); 2998 } 2999 3000 3001 /* 3002 * Function: 3003 * vdc_depopulate_descriptor() 3004 * 3005 * Description: 3006 * 3007 * Arguments: 3008 * vdc - soft state pointer for this instance of the device driver. 3009 * idx - Index of the Descriptor Ring entry being modified 3010 * 3011 * Return Code: 3012 * 0 - Success 3013 */ 3014 static int 3015 vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 3016 { 3017 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 3018 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 3019 int status = ENXIO; 3020 int rv = 0; 3021 3022 ASSERT(vdc != NULL); 3023 ASSERT(idx < vdc->dring_len); 3024 ldep = &vdc->local_dring[idx]; 3025 ASSERT(ldep != NULL); 3026 ASSERT(MUTEX_HELD(&vdc->lock)); 3027 3028 DMSG(vdc, 2, ": idx = %d\n", idx); 3029 dep = ldep->dep; 3030 ASSERT(dep != NULL); 3031 ASSERT((dep->hdr.dstate == VIO_DESC_DONE) || 3032 (dep->payload.status == ECANCELED)); 3033 3034 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 3035 3036 ldep->is_free = B_TRUE; 3037 status = dep->payload.status; 3038 DMSG(vdc, 2, ": is_free = %d : status = %d\n", ldep->is_free, status); 3039 3040 /* 3041 * If no buffers were used to transfer information to the server when 3042 * populating the descriptor then no memory handles need to be unbound 3043 * and we can return now. 3044 */ 3045 if (ldep->nbytes == 0) { 3046 cv_signal(&vdc->dring_free_cv); 3047 return (status); 3048 } 3049 3050 /* 3051 * If the upper layer passed in a misaligned address we copied the 3052 * data into an aligned buffer before sending it to LDC - we now 3053 * copy it back to the original buffer. 3054 */ 3055 if (ldep->align_addr) { 3056 ASSERT(ldep->addr != NULL); 3057 3058 if (dep->payload.nbytes > 0) 3059 bcopy(ldep->align_addr, ldep->addr, 3060 dep->payload.nbytes); 3061 kmem_free(ldep->align_addr, 3062 sizeof (caddr_t) * P2ROUNDUP(ldep->nbytes, 8)); 3063 ldep->align_addr = NULL; 3064 } 3065 3066 rv = ldc_mem_unbind_handle(ldep->desc_mhdl); 3067 if (rv != 0) { 3068 DMSG(vdc, 0, "?[%d] unbind mhdl 0x%lx @ idx %d failed (%d)", 3069 vdc->instance, ldep->desc_mhdl, idx, rv); 3070 /* 3071 * The error returned by the vDisk server is more informative 3072 * and thus has a higher priority but if it isn't set we ensure 3073 * that this function returns an error. 3074 */ 3075 if (status == 0) 3076 status = EINVAL; 3077 } 3078 3079 cv_signal(&vdc->membind_cv); 3080 cv_signal(&vdc->dring_free_cv); 3081 3082 return (status); 3083 } 3084 3085 /* 3086 * Function: 3087 * vdc_populate_mem_hdl() 3088 * 3089 * Description: 3090 * 3091 * Arguments: 3092 * vdc - soft state pointer for this instance of the device driver. 3093 * idx - Index of the Descriptor Ring entry being modified 3094 * addr - virtual address being mapped in 3095 * nybtes - number of bytes in 'addr' 3096 * operation - the vDisk operation being performed (VD_OP_xxx) 3097 * 3098 * Return Code: 3099 * 0 - Success 3100 */ 3101 static int 3102 vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep) 3103 { 3104 vd_dring_entry_t *dep = NULL; 3105 ldc_mem_handle_t mhdl; 3106 caddr_t vaddr; 3107 size_t nbytes; 3108 uint8_t perm = LDC_MEM_RW; 3109 uint8_t maptype; 3110 int rv = 0; 3111 int i; 3112 3113 ASSERT(vdcp != NULL); 3114 3115 dep = ldep->dep; 3116 mhdl = ldep->desc_mhdl; 3117 3118 switch (ldep->dir) { 3119 case VIO_read_dir: 3120 perm = LDC_MEM_W; 3121 break; 3122 3123 case VIO_write_dir: 3124 perm = LDC_MEM_R; 3125 break; 3126 3127 case VIO_both_dir: 3128 perm = LDC_MEM_RW; 3129 break; 3130 3131 default: 3132 ASSERT(0); /* catch bad programming in vdc */ 3133 } 3134 3135 /* 3136 * LDC expects any addresses passed in to be 8-byte aligned. We need 3137 * to copy the contents of any misaligned buffers to a newly allocated 3138 * buffer and bind it instead (and copy the the contents back to the 3139 * original buffer passed in when depopulating the descriptor) 3140 */ 3141 vaddr = ldep->addr; 3142 nbytes = ldep->nbytes; 3143 if (((uint64_t)vaddr & 0x7) != 0) { 3144 ASSERT(ldep->align_addr == NULL); 3145 ldep->align_addr = 3146 kmem_alloc(sizeof (caddr_t) * 3147 P2ROUNDUP(nbytes, 8), KM_SLEEP); 3148 DMSG(vdcp, 0, "[%d] Misaligned address %p reallocating " 3149 "(buf=%p nb=%ld op=%d)\n", 3150 vdcp->instance, (void *)vaddr, (void *)ldep->align_addr, 3151 nbytes, ldep->operation); 3152 if (perm != LDC_MEM_W) 3153 bcopy(vaddr, ldep->align_addr, nbytes); 3154 vaddr = ldep->align_addr; 3155 } 3156 3157 maptype = LDC_IO_MAP|LDC_SHADOW_MAP|LDC_DIRECT_MAP; 3158 rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 3159 maptype, perm, &dep->payload.cookie[0], &dep->payload.ncookies); 3160 DMSG(vdcp, 2, "[%d] bound mem handle; ncookies=%d\n", 3161 vdcp->instance, dep->payload.ncookies); 3162 if (rv != 0) { 3163 DMSG(vdcp, 0, "[%d] Failed to bind LDC memory handle " 3164 "(mhdl=%p, buf=%p, err=%d)\n", 3165 vdcp->instance, (void *)mhdl, (void *)vaddr, rv); 3166 if (ldep->align_addr) { 3167 kmem_free(ldep->align_addr, 3168 sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); 3169 ldep->align_addr = NULL; 3170 } 3171 return (EAGAIN); 3172 } 3173 3174 /* 3175 * Get the other cookies (if any). 3176 */ 3177 for (i = 1; i < dep->payload.ncookies; i++) { 3178 rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 3179 if (rv != 0) { 3180 (void) ldc_mem_unbind_handle(mhdl); 3181 DMSG(vdcp, 0, "?[%d] Failed to get next cookie " 3182 "(mhdl=%lx cnum=%d), err=%d", 3183 vdcp->instance, mhdl, i, rv); 3184 if (ldep->align_addr) { 3185 kmem_free(ldep->align_addr, 3186 sizeof (caddr_t) * ldep->nbytes); 3187 ldep->align_addr = NULL; 3188 } 3189 return (EAGAIN); 3190 } 3191 } 3192 3193 return (rv); 3194 } 3195 3196 /* 3197 * Interrupt handlers for messages from LDC 3198 */ 3199 3200 /* 3201 * Function: 3202 * vdc_handle_cb() 3203 * 3204 * Description: 3205 * 3206 * Arguments: 3207 * event - Type of event (LDC_EVT_xxx) that triggered the callback 3208 * arg - soft state pointer for this instance of the device driver. 3209 * 3210 * Return Code: 3211 * 0 - Success 3212 */ 3213 static uint_t 3214 vdc_handle_cb(uint64_t event, caddr_t arg) 3215 { 3216 ldc_status_t ldc_state; 3217 int rv = 0; 3218 3219 vdc_t *vdc = (vdc_t *)(void *)arg; 3220 3221 ASSERT(vdc != NULL); 3222 3223 DMSG(vdc, 1, "evt=%lx seqID=%ld\n", event, vdc->seq_num); 3224 3225 /* 3226 * Depending on the type of event that triggered this callback, 3227 * we modify the handshake state or read the data. 3228 * 3229 * NOTE: not done as a switch() as event could be triggered by 3230 * a state change and a read request. Also the ordering of the 3231 * check for the event types is deliberate. 3232 */ 3233 if (event & LDC_EVT_UP) { 3234 DMSG(vdc, 0, "[%d] Received LDC_EVT_UP\n", vdc->instance); 3235 3236 mutex_enter(&vdc->lock); 3237 3238 /* get LDC state */ 3239 rv = ldc_status(vdc->ldc_handle, &ldc_state); 3240 if (rv != 0) { 3241 DMSG(vdc, 0, "[%d] Couldn't get LDC status %d", 3242 vdc->instance, rv); 3243 return (LDC_SUCCESS); 3244 } 3245 if (vdc->ldc_state != LDC_UP && ldc_state == LDC_UP) { 3246 /* 3247 * Reset the transaction sequence numbers when 3248 * LDC comes up. We then kick off the handshake 3249 * negotiation with the vDisk server. 3250 */ 3251 vdc->seq_num = 1; 3252 vdc->seq_num_reply = 0; 3253 vdc->ldc_state = ldc_state; 3254 cv_signal(&vdc->initwait_cv); 3255 } 3256 3257 mutex_exit(&vdc->lock); 3258 } 3259 3260 if (event & LDC_EVT_READ) { 3261 DMSG(vdc, 1, "[%d] Received LDC_EVT_READ\n", vdc->instance); 3262 mutex_enter(&vdc->read_lock); 3263 cv_signal(&vdc->read_cv); 3264 vdc->read_state = VDC_READ_PENDING; 3265 mutex_exit(&vdc->read_lock); 3266 3267 /* that's all we have to do - no need to handle DOWN/RESET */ 3268 return (LDC_SUCCESS); 3269 } 3270 3271 if (event & (LDC_EVT_RESET|LDC_EVT_DOWN)) { 3272 3273 DMSG(vdc, 0, "[%d] Received LDC RESET event\n", vdc->instance); 3274 3275 mutex_enter(&vdc->lock); 3276 /* 3277 * Need to wake up any readers so they will 3278 * detect that a reset has occurred. 3279 */ 3280 mutex_enter(&vdc->read_lock); 3281 if ((vdc->read_state == VDC_READ_WAITING) || 3282 (vdc->read_state == VDC_READ_RESET)) 3283 cv_signal(&vdc->read_cv); 3284 vdc->read_state = VDC_READ_RESET; 3285 mutex_exit(&vdc->read_lock); 3286 3287 /* wake up any threads waiting for connection to come up */ 3288 if (vdc->state == VDC_STATE_INIT_WAITING) { 3289 vdc->state = VDC_STATE_RESETTING; 3290 cv_signal(&vdc->initwait_cv); 3291 } 3292 3293 mutex_exit(&vdc->lock); 3294 } 3295 3296 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 3297 DMSG(vdc, 0, "![%d] Unexpected LDC event (%lx) received", 3298 vdc->instance, event); 3299 3300 return (LDC_SUCCESS); 3301 } 3302 3303 /* 3304 * Function: 3305 * vdc_wait_for_response() 3306 * 3307 * Description: 3308 * Block waiting for a response from the server. If there is 3309 * no data the thread block on the read_cv that is signalled 3310 * by the callback when an EVT_READ occurs. 3311 * 3312 * Arguments: 3313 * vdcp - soft state pointer for this instance of the device driver. 3314 * 3315 * Return Code: 3316 * 0 - Success 3317 */ 3318 static int 3319 vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp) 3320 { 3321 size_t nbytes = sizeof (*msgp); 3322 int status; 3323 3324 ASSERT(vdcp != NULL); 3325 3326 DMSG(vdcp, 1, "[%d] Entered\n", vdcp->instance); 3327 3328 status = vdc_recv(vdcp, msgp, &nbytes); 3329 DMSG(vdcp, 3, "vdc_read() done.. status=0x%x size=0x%x\n", 3330 status, (int)nbytes); 3331 if (status) { 3332 DMSG(vdcp, 0, "?[%d] Error %d reading LDC msg\n", 3333 vdcp->instance, status); 3334 return (status); 3335 } 3336 3337 if (nbytes < sizeof (vio_msg_tag_t)) { 3338 DMSG(vdcp, 0, "?[%d] Expect %lu bytes; recv'd %lu\n", 3339 vdcp->instance, sizeof (vio_msg_tag_t), nbytes); 3340 return (ENOMSG); 3341 } 3342 3343 DMSG(vdcp, 2, "[%d] (%x/%x/%x)\n", vdcp->instance, 3344 msgp->tag.vio_msgtype, 3345 msgp->tag.vio_subtype, 3346 msgp->tag.vio_subtype_env); 3347 3348 /* 3349 * Verify the Session ID of the message 3350 * 3351 * Every message after the Version has been negotiated should 3352 * have the correct session ID set. 3353 */ 3354 if ((msgp->tag.vio_sid != vdcp->session_id) && 3355 (msgp->tag.vio_subtype_env != VIO_VER_INFO)) { 3356 DMSG(vdcp, 0, "[%d] Invalid SID: received 0x%x, " 3357 "expected 0x%lx [seq num %lx @ %d]", 3358 vdcp->instance, msgp->tag.vio_sid, 3359 vdcp->session_id, 3360 ((vio_dring_msg_t *)msgp)->seq_num, 3361 ((vio_dring_msg_t *)msgp)->start_idx); 3362 return (ENOMSG); 3363 } 3364 return (0); 3365 } 3366 3367 3368 /* 3369 * Function: 3370 * vdc_resubmit_backup_dring() 3371 * 3372 * Description: 3373 * Resubmit each descriptor in the backed up dring to 3374 * vDisk server. The Dring was backed up during connection 3375 * reset. 3376 * 3377 * Arguments: 3378 * vdcp - soft state pointer for this instance of the device driver. 3379 * 3380 * Return Code: 3381 * 0 - Success 3382 */ 3383 static int 3384 vdc_resubmit_backup_dring(vdc_t *vdcp) 3385 { 3386 int count; 3387 int b_idx; 3388 int rv; 3389 int dring_size; 3390 int status; 3391 vio_msg_t vio_msg; 3392 vdc_local_desc_t *curr_ldep; 3393 3394 ASSERT(MUTEX_NOT_HELD(&vdcp->lock)); 3395 ASSERT(vdcp->state == VDC_STATE_HANDLE_PENDING); 3396 3397 if (vdcp->local_dring_backup == NULL) { 3398 /* the pending requests have already been processed */ 3399 return (0); 3400 } 3401 3402 DMSG(vdcp, 1, "restoring pending dring entries (len=%d, tail=%d)\n", 3403 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3404 3405 /* 3406 * Walk the backup copy of the local descriptor ring and 3407 * resubmit all the outstanding transactions. 3408 */ 3409 b_idx = vdcp->local_dring_backup_tail; 3410 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 3411 3412 curr_ldep = &(vdcp->local_dring_backup[b_idx]); 3413 3414 /* only resubmit outstanding transactions */ 3415 if (!curr_ldep->is_free) { 3416 3417 DMSG(vdcp, 1, "resubmitting entry idx=%x\n", b_idx); 3418 mutex_enter(&vdcp->lock); 3419 rv = vdc_populate_descriptor(vdcp, curr_ldep->operation, 3420 curr_ldep->addr, curr_ldep->nbytes, 3421 curr_ldep->slice, curr_ldep->offset, 3422 curr_ldep->cb_type, curr_ldep->cb_arg, 3423 curr_ldep->dir); 3424 mutex_exit(&vdcp->lock); 3425 if (rv) { 3426 DMSG(vdcp, 1, "[%d] cannot resubmit entry %d\n", 3427 vdcp->instance, b_idx); 3428 return (rv); 3429 } 3430 3431 /* Wait for the response message. */ 3432 DMSG(vdcp, 1, "waiting for response to idx=%x\n", 3433 b_idx); 3434 status = vdc_wait_for_response(vdcp, &vio_msg); 3435 if (status) { 3436 DMSG(vdcp, 1, "[%d] wait_for_response " 3437 "returned err=%d\n", vdcp->instance, 3438 status); 3439 return (status); 3440 } 3441 3442 DMSG(vdcp, 1, "processing msg for idx=%x\n", b_idx); 3443 status = vdc_process_data_msg(vdcp, &vio_msg); 3444 if (status) { 3445 DMSG(vdcp, 1, "[%d] process_data_msg " 3446 "returned err=%d\n", vdcp->instance, 3447 status); 3448 return (status); 3449 } 3450 } 3451 3452 /* get the next element to submit */ 3453 if (++b_idx >= vdcp->local_dring_backup_len) 3454 b_idx = 0; 3455 } 3456 3457 /* all done - now clear up pending dring copy */ 3458 dring_size = vdcp->local_dring_backup_len * 3459 sizeof (vdcp->local_dring_backup[0]); 3460 3461 (void) kmem_free(vdcp->local_dring_backup, dring_size); 3462 3463 vdcp->local_dring_backup = NULL; 3464 3465 return (0); 3466 } 3467 3468 /* 3469 * Function: 3470 * vdc_cancel_backup_dring 3471 * 3472 * Description: 3473 * Cancel each descriptor in the backed up dring to vDisk server. 3474 * The Dring was backed up during connection reset. 3475 * 3476 * Arguments: 3477 * vdcp - soft state pointer for this instance of the device driver. 3478 * 3479 * Return Code: 3480 * None 3481 */ 3482 void 3483 vdc_cancel_backup_ring(vdc_t *vdcp) 3484 { 3485 vdc_local_desc_t *ldep; 3486 struct buf *bufp; 3487 int count; 3488 int b_idx; 3489 int dring_size; 3490 3491 ASSERT(MUTEX_HELD(&vdcp->lock)); 3492 ASSERT(vdcp->state == VDC_STATE_INIT || 3493 vdcp->state == VDC_STATE_INIT_WAITING || 3494 vdcp->state == VDC_STATE_NEGOTIATE || 3495 vdcp->state == VDC_STATE_RESETTING); 3496 3497 if (vdcp->local_dring_backup == NULL) { 3498 /* the pending requests have already been processed */ 3499 return; 3500 } 3501 3502 DMSG(vdcp, 1, "cancelling pending dring entries (len=%d, tail=%d)\n", 3503 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3504 3505 /* 3506 * Walk the backup copy of the local descriptor ring and 3507 * cancel all the outstanding transactions. 3508 */ 3509 b_idx = vdcp->local_dring_backup_tail; 3510 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 3511 3512 ldep = &(vdcp->local_dring_backup[b_idx]); 3513 3514 /* only cancel outstanding transactions */ 3515 if (!ldep->is_free) { 3516 3517 DMSG(vdcp, 1, "cancelling entry idx=%x\n", b_idx); 3518 3519 /* 3520 * All requests have already been cleared from the 3521 * local descriptor ring and the LDC channel has been 3522 * reset so we will never get any reply for these 3523 * requests. Now we just have to notify threads waiting 3524 * for replies that the request has failed. 3525 */ 3526 switch (ldep->cb_type) { 3527 case CB_SYNC: 3528 ASSERT(vdcp->sync_op_pending); 3529 vdcp->sync_op_status = EIO; 3530 vdcp->sync_op_pending = B_FALSE; 3531 cv_signal(&vdcp->sync_pending_cv); 3532 break; 3533 3534 case CB_STRATEGY: 3535 bufp = ldep->cb_arg; 3536 ASSERT(bufp != NULL); 3537 bufp->b_resid = bufp->b_bcount; 3538 bioerror(bufp, EIO); 3539 biodone(bufp); 3540 break; 3541 3542 default: 3543 ASSERT(0); 3544 } 3545 3546 } 3547 3548 /* get the next element to cancel */ 3549 if (++b_idx >= vdcp->local_dring_backup_len) 3550 b_idx = 0; 3551 } 3552 3553 /* all done - now clear up pending dring copy */ 3554 dring_size = vdcp->local_dring_backup_len * 3555 sizeof (vdcp->local_dring_backup[0]); 3556 3557 (void) kmem_free(vdcp->local_dring_backup, dring_size); 3558 3559 vdcp->local_dring_backup = NULL; 3560 3561 DTRACE_IO2(processed, int, count, vdc_t *, vdcp); 3562 } 3563 3564 /* 3565 * Function: 3566 * vdc_connection_timeout 3567 * 3568 * Description: 3569 * This function is invoked if the timeout set to establish the connection 3570 * with vds expires. This will happen if we spend too much time in the 3571 * VDC_STATE_INIT_WAITING or VDC_STATE_NEGOTIATE states. Then we will 3572 * cancel any pending request and mark them as failed. 3573 * 3574 * If the timeout does not expire, it will be cancelled when we reach the 3575 * VDC_STATE_HANDLE_PENDING or VDC_STATE_RESETTING state. This function can 3576 * be invoked while we are in the VDC_STATE_HANDLE_PENDING or 3577 * VDC_STATE_RESETTING state in which case we do nothing because the 3578 * timeout is being cancelled. 3579 * 3580 * Arguments: 3581 * arg - argument of the timeout function actually a soft state 3582 * pointer for the instance of the device driver. 3583 * 3584 * Return Code: 3585 * None 3586 */ 3587 void 3588 vdc_connection_timeout(void *arg) 3589 { 3590 vdc_t *vdcp = (vdc_t *)arg; 3591 3592 mutex_enter(&vdcp->lock); 3593 3594 if (vdcp->state == VDC_STATE_HANDLE_PENDING || 3595 vdcp->state == VDC_STATE_DETACH) { 3596 /* 3597 * The connection has just been re-established or 3598 * we are detaching. 3599 */ 3600 vdcp->ctimeout_reached = B_FALSE; 3601 mutex_exit(&vdcp->lock); 3602 return; 3603 } 3604 3605 vdcp->ctimeout_reached = B_TRUE; 3606 3607 /* notify requests waiting for sending */ 3608 cv_broadcast(&vdcp->running_cv); 3609 3610 /* cancel requests waiting for a result */ 3611 vdc_cancel_backup_ring(vdcp); 3612 3613 mutex_exit(&vdcp->lock); 3614 3615 cmn_err(CE_NOTE, "[%d] connection to service domain timeout", 3616 vdcp->instance); 3617 } 3618 3619 /* 3620 * Function: 3621 * vdc_backup_local_dring() 3622 * 3623 * Description: 3624 * Backup the current dring in the event of a reset. The Dring 3625 * transactions will be resubmitted to the server when the 3626 * connection is restored. 3627 * 3628 * Arguments: 3629 * vdcp - soft state pointer for this instance of the device driver. 3630 * 3631 * Return Code: 3632 * NONE 3633 */ 3634 static void 3635 vdc_backup_local_dring(vdc_t *vdcp) 3636 { 3637 int dring_size; 3638 3639 ASSERT(MUTEX_HELD(&vdcp->lock)); 3640 ASSERT(vdcp->state == VDC_STATE_RESETTING); 3641 3642 /* 3643 * If the backup dring is stil around, it means 3644 * that the last restore did not complete. However, 3645 * since we never got back into the running state, 3646 * the backup copy we have is still valid. 3647 */ 3648 if (vdcp->local_dring_backup != NULL) { 3649 DMSG(vdcp, 1, "reusing local descriptor ring backup " 3650 "(len=%d, tail=%d)\n", vdcp->local_dring_backup_len, 3651 vdcp->local_dring_backup_tail); 3652 return; 3653 } 3654 3655 /* 3656 * The backup dring can be NULL and the local dring may not be 3657 * initialized. This can happen if we had a reset while establishing 3658 * a new connection but after the connection has timed out. In that 3659 * case the backup dring is NULL because the requests have been 3660 * cancelled and the request occured before the local dring is 3661 * initialized. 3662 */ 3663 if (!(vdcp->initialized & VDC_DRING_LOCAL)) 3664 return; 3665 3666 DMSG(vdcp, 1, "backing up the local descriptor ring (len=%d, " 3667 "tail=%d)\n", vdcp->dring_len, vdcp->dring_curr_idx); 3668 3669 dring_size = vdcp->dring_len * sizeof (vdcp->local_dring[0]); 3670 3671 vdcp->local_dring_backup = kmem_alloc(dring_size, KM_SLEEP); 3672 bcopy(vdcp->local_dring, vdcp->local_dring_backup, dring_size); 3673 3674 vdcp->local_dring_backup_tail = vdcp->dring_curr_idx; 3675 vdcp->local_dring_backup_len = vdcp->dring_len; 3676 } 3677 3678 /* -------------------------------------------------------------------------- */ 3679 3680 /* 3681 * The following functions process the incoming messages from vds 3682 */ 3683 3684 /* 3685 * Function: 3686 * vdc_process_msg_thread() 3687 * 3688 * Description: 3689 * 3690 * Main VDC message processing thread. Each vDisk instance 3691 * consists of a copy of this thread. This thread triggers 3692 * all the handshakes and data exchange with the server. It 3693 * also handles all channel resets 3694 * 3695 * Arguments: 3696 * vdc - soft state pointer for this instance of the device driver. 3697 * 3698 * Return Code: 3699 * None 3700 */ 3701 static void 3702 vdc_process_msg_thread(vdc_t *vdcp) 3703 { 3704 int status; 3705 int ctimeout; 3706 timeout_id_t tmid = 0; 3707 3708 mutex_enter(&vdcp->lock); 3709 3710 for (;;) { 3711 3712 #define Q(_s) (vdcp->state == _s) ? #_s : 3713 DMSG(vdcp, 3, "state = %d (%s)\n", vdcp->state, 3714 Q(VDC_STATE_INIT) 3715 Q(VDC_STATE_INIT_WAITING) 3716 Q(VDC_STATE_NEGOTIATE) 3717 Q(VDC_STATE_HANDLE_PENDING) 3718 Q(VDC_STATE_RUNNING) 3719 Q(VDC_STATE_RESETTING) 3720 Q(VDC_STATE_DETACH) 3721 "UNKNOWN"); 3722 3723 switch (vdcp->state) { 3724 case VDC_STATE_INIT: 3725 3726 /* 3727 * If requested, start a timeout to check if the 3728 * connection with vds is established in the 3729 * specified delay. If the timeout expires, we 3730 * will cancel any pending request. 3731 * 3732 * If some reset have occurred while establishing 3733 * the connection, we already have a timeout armed 3734 * and in that case we don't need to arm a new one. 3735 */ 3736 ctimeout = (vdc_timeout != 0)? 3737 vdc_timeout : vdcp->ctimeout; 3738 3739 if (ctimeout != 0 && tmid == 0) { 3740 tmid = timeout(vdc_connection_timeout, vdcp, 3741 ctimeout * drv_usectohz(1000000)); 3742 } 3743 3744 /* Check if have re-initializing repeatedly */ 3745 if (vdcp->hshake_cnt++ > vdc_hshake_retries && 3746 vdcp->lifecycle != VDC_LC_ONLINE) { 3747 cmn_err(CE_NOTE, "[%d] disk access failed.\n", 3748 vdcp->instance); 3749 vdcp->state = VDC_STATE_DETACH; 3750 break; 3751 } 3752 3753 /* Bring up connection with vds via LDC */ 3754 status = vdc_start_ldc_connection(vdcp); 3755 if (status == EINVAL) { 3756 DMSG(vdcp, 0, "[%d] Could not start LDC", 3757 vdcp->instance); 3758 vdcp->state = VDC_STATE_DETACH; 3759 } else { 3760 vdcp->state = VDC_STATE_INIT_WAITING; 3761 } 3762 break; 3763 3764 case VDC_STATE_INIT_WAITING: 3765 3766 /* 3767 * Let the callback event move us on 3768 * when channel is open to server 3769 */ 3770 while (vdcp->ldc_state != LDC_UP) { 3771 cv_wait(&vdcp->initwait_cv, &vdcp->lock); 3772 if (vdcp->state != VDC_STATE_INIT_WAITING) { 3773 DMSG(vdcp, 0, 3774 "state moved to %d out from under us...\n", 3775 vdcp->state); 3776 3777 break; 3778 } 3779 } 3780 if (vdcp->state == VDC_STATE_INIT_WAITING && 3781 vdcp->ldc_state == LDC_UP) { 3782 vdcp->state = VDC_STATE_NEGOTIATE; 3783 } 3784 break; 3785 3786 case VDC_STATE_NEGOTIATE: 3787 switch (status = vdc_ver_negotiation(vdcp)) { 3788 case 0: 3789 break; 3790 default: 3791 DMSG(vdcp, 0, "ver negotiate failed (%d)..\n", 3792 status); 3793 goto reset; 3794 } 3795 3796 switch (status = vdc_attr_negotiation(vdcp)) { 3797 case 0: 3798 break; 3799 default: 3800 DMSG(vdcp, 0, "attr negotiate failed (%d)..\n", 3801 status); 3802 goto reset; 3803 } 3804 3805 switch (status = vdc_dring_negotiation(vdcp)) { 3806 case 0: 3807 break; 3808 default: 3809 DMSG(vdcp, 0, "dring negotiate failed (%d)..\n", 3810 status); 3811 goto reset; 3812 } 3813 3814 switch (status = vdc_rdx_exchange(vdcp)) { 3815 case 0: 3816 vdcp->state = VDC_STATE_HANDLE_PENDING; 3817 goto done; 3818 default: 3819 DMSG(vdcp, 0, "RDX xchg failed ..(%d)\n", 3820 status); 3821 goto reset; 3822 } 3823 reset: 3824 DMSG(vdcp, 0, "negotiation failed: resetting (%d)\n", 3825 status); 3826 vdcp->state = VDC_STATE_RESETTING; 3827 vdcp->self_reset = B_TRUE; 3828 done: 3829 DMSG(vdcp, 0, "negotiation complete (state=0x%x)...\n", 3830 vdcp->state); 3831 break; 3832 3833 case VDC_STATE_HANDLE_PENDING: 3834 3835 if (vdcp->ctimeout_reached) { 3836 /* 3837 * The connection timeout had been reached so 3838 * pending requests have been cancelled. Now 3839 * that the connection is back we can reset 3840 * the timeout. 3841 */ 3842 ASSERT(vdcp->local_dring_backup == NULL); 3843 ASSERT(tmid != 0); 3844 tmid = 0; 3845 vdcp->ctimeout_reached = B_FALSE; 3846 vdcp->state = VDC_STATE_RUNNING; 3847 DMSG(vdcp, 0, "[%d] connection to service " 3848 "domain is up", vdcp->instance); 3849 break; 3850 } 3851 3852 mutex_exit(&vdcp->lock); 3853 if (tmid != 0) { 3854 (void) untimeout(tmid); 3855 tmid = 0; 3856 } 3857 status = vdc_resubmit_backup_dring(vdcp); 3858 mutex_enter(&vdcp->lock); 3859 3860 if (status) 3861 vdcp->state = VDC_STATE_RESETTING; 3862 else 3863 vdcp->state = VDC_STATE_RUNNING; 3864 3865 break; 3866 3867 /* enter running state */ 3868 case VDC_STATE_RUNNING: 3869 /* 3870 * Signal anyone waiting for the connection 3871 * to come on line. 3872 */ 3873 vdcp->hshake_cnt = 0; 3874 cv_broadcast(&vdcp->running_cv); 3875 mutex_exit(&vdcp->lock); 3876 3877 for (;;) { 3878 vio_msg_t msg; 3879 status = vdc_wait_for_response(vdcp, &msg); 3880 if (status) break; 3881 3882 DMSG(vdcp, 1, "[%d] new pkt(s) available\n", 3883 vdcp->instance); 3884 status = vdc_process_data_msg(vdcp, &msg); 3885 if (status) { 3886 DMSG(vdcp, 1, "[%d] process_data_msg " 3887 "returned err=%d\n", vdcp->instance, 3888 status); 3889 break; 3890 } 3891 3892 } 3893 3894 mutex_enter(&vdcp->lock); 3895 3896 vdcp->state = VDC_STATE_RESETTING; 3897 vdcp->self_reset = B_TRUE; 3898 break; 3899 3900 case VDC_STATE_RESETTING: 3901 /* 3902 * When we reach this state, we either come from the 3903 * VDC_STATE_RUNNING state and we can have pending 3904 * request but no timeout is armed; or we come from 3905 * the VDC_STATE_INIT_WAITING, VDC_NEGOTIATE or 3906 * VDC_HANDLE_PENDING state and there is no pending 3907 * request or pending requests have already been copied 3908 * into the backup dring. So we can safely keep the 3909 * connection timeout armed while we are in this state. 3910 */ 3911 3912 DMSG(vdcp, 0, "Initiating channel reset " 3913 "(pending = %d)\n", (int)vdcp->threads_pending); 3914 3915 if (vdcp->self_reset) { 3916 DMSG(vdcp, 0, 3917 "[%d] calling stop_ldc_connection.\n", 3918 vdcp->instance); 3919 status = vdc_stop_ldc_connection(vdcp); 3920 vdcp->self_reset = B_FALSE; 3921 } 3922 3923 /* 3924 * Wait for all threads currently waiting 3925 * for a free dring entry to use. 3926 */ 3927 while (vdcp->threads_pending) { 3928 cv_broadcast(&vdcp->membind_cv); 3929 cv_broadcast(&vdcp->dring_free_cv); 3930 mutex_exit(&vdcp->lock); 3931 /* give the waiters enough time to wake up */ 3932 delay(vdc_hz_min_ldc_delay); 3933 mutex_enter(&vdcp->lock); 3934 } 3935 3936 ASSERT(vdcp->threads_pending == 0); 3937 3938 /* Sanity check that no thread is receiving */ 3939 ASSERT(vdcp->read_state != VDC_READ_WAITING); 3940 3941 vdcp->read_state = VDC_READ_IDLE; 3942 3943 vdc_backup_local_dring(vdcp); 3944 3945 /* cleanup the old d-ring */ 3946 vdc_destroy_descriptor_ring(vdcp); 3947 3948 /* go and start again */ 3949 vdcp->state = VDC_STATE_INIT; 3950 3951 break; 3952 3953 case VDC_STATE_DETACH: 3954 DMSG(vdcp, 0, "[%d] Reset thread exit cleanup ..\n", 3955 vdcp->instance); 3956 3957 /* cancel any pending timeout */ 3958 mutex_exit(&vdcp->lock); 3959 if (tmid != 0) { 3960 (void) untimeout(tmid); 3961 tmid = 0; 3962 } 3963 mutex_enter(&vdcp->lock); 3964 3965 /* 3966 * Signal anyone waiting for connection 3967 * to come online 3968 */ 3969 cv_broadcast(&vdcp->running_cv); 3970 3971 while (vdcp->sync_op_pending) { 3972 cv_signal(&vdcp->sync_pending_cv); 3973 cv_signal(&vdcp->sync_blocked_cv); 3974 mutex_exit(&vdcp->lock); 3975 /* give the waiters enough time to wake up */ 3976 delay(vdc_hz_min_ldc_delay); 3977 mutex_enter(&vdcp->lock); 3978 } 3979 3980 mutex_exit(&vdcp->lock); 3981 3982 DMSG(vdcp, 0, "[%d] Msg processing thread exiting ..\n", 3983 vdcp->instance); 3984 thread_exit(); 3985 break; 3986 } 3987 } 3988 } 3989 3990 3991 /* 3992 * Function: 3993 * vdc_process_data_msg() 3994 * 3995 * Description: 3996 * This function is called by the message processing thread each time 3997 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 3998 * be an ACK or NACK from vds[1] which vdc handles as follows. 3999 * ACK - wake up the waiting thread 4000 * NACK - resend any messages necessary 4001 * 4002 * [1] Although the message format allows it, vds should not send a 4003 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 4004 * some bizarre reason it does, vdc will reset the connection. 4005 * 4006 * Arguments: 4007 * vdc - soft state pointer for this instance of the device driver. 4008 * msg - the LDC message sent by vds 4009 * 4010 * Return Code: 4011 * 0 - Success. 4012 * > 0 - error value returned by LDC 4013 */ 4014 static int 4015 vdc_process_data_msg(vdc_t *vdcp, vio_msg_t *msg) 4016 { 4017 int status = 0; 4018 vio_dring_msg_t *dring_msg; 4019 vdc_local_desc_t *ldep = NULL; 4020 int start, end; 4021 int idx; 4022 4023 dring_msg = (vio_dring_msg_t *)msg; 4024 4025 ASSERT(msg->tag.vio_msgtype == VIO_TYPE_DATA); 4026 ASSERT(vdcp != NULL); 4027 4028 mutex_enter(&vdcp->lock); 4029 4030 /* 4031 * Check to see if the message has bogus data 4032 */ 4033 idx = start = dring_msg->start_idx; 4034 end = dring_msg->end_idx; 4035 if ((start >= vdcp->dring_len) || 4036 (end >= vdcp->dring_len) || (end < -1)) { 4037 DMSG(vdcp, 0, "[%d] Bogus ACK data : start %d, end %d\n", 4038 vdcp->instance, start, end); 4039 mutex_exit(&vdcp->lock); 4040 return (EINVAL); 4041 } 4042 4043 /* 4044 * Verify that the sequence number is what vdc expects. 4045 */ 4046 switch (vdc_verify_seq_num(vdcp, dring_msg)) { 4047 case VDC_SEQ_NUM_TODO: 4048 break; /* keep processing this message */ 4049 case VDC_SEQ_NUM_SKIP: 4050 mutex_exit(&vdcp->lock); 4051 return (0); 4052 case VDC_SEQ_NUM_INVALID: 4053 mutex_exit(&vdcp->lock); 4054 DMSG(vdcp, 0, "[%d] invalid seqno\n", vdcp->instance); 4055 return (ENXIO); 4056 } 4057 4058 if (msg->tag.vio_subtype == VIO_SUBTYPE_NACK) { 4059 DMSG(vdcp, 0, "[%d] DATA NACK\n", vdcp->instance); 4060 VDC_DUMP_DRING_MSG(dring_msg); 4061 mutex_exit(&vdcp->lock); 4062 return (EIO); 4063 4064 } else if (msg->tag.vio_subtype == VIO_SUBTYPE_INFO) { 4065 mutex_exit(&vdcp->lock); 4066 return (EPROTO); 4067 } 4068 4069 DTRACE_IO2(recv, vio_dring_msg_t, dring_msg, vdc_t *, vdcp); 4070 DMSG(vdcp, 1, ": start %d end %d\n", start, end); 4071 ASSERT(start == end); 4072 4073 ldep = &vdcp->local_dring[idx]; 4074 4075 DMSG(vdcp, 1, ": state 0x%x - cb_type 0x%x\n", 4076 ldep->dep->hdr.dstate, ldep->cb_type); 4077 4078 if (ldep->dep->hdr.dstate == VIO_DESC_DONE) { 4079 struct buf *bufp; 4080 4081 switch (ldep->cb_type) { 4082 case CB_SYNC: 4083 ASSERT(vdcp->sync_op_pending); 4084 4085 status = vdc_depopulate_descriptor(vdcp, idx); 4086 vdcp->sync_op_status = status; 4087 vdcp->sync_op_pending = B_FALSE; 4088 cv_signal(&vdcp->sync_pending_cv); 4089 break; 4090 4091 case CB_STRATEGY: 4092 bufp = ldep->cb_arg; 4093 ASSERT(bufp != NULL); 4094 bufp->b_resid = 4095 bufp->b_bcount - ldep->dep->payload.nbytes; 4096 status = ldep->dep->payload.status; /* Future:ntoh */ 4097 if (status != 0) { 4098 DMSG(vdcp, 1, "strategy status=%d\n", status); 4099 bioerror(bufp, status); 4100 } 4101 status = vdc_depopulate_descriptor(vdcp, idx); 4102 biodone(bufp); 4103 4104 DMSG(vdcp, 1, 4105 "strategy complete req=%ld bytes resp=%ld bytes\n", 4106 bufp->b_bcount, ldep->dep->payload.nbytes); 4107 break; 4108 4109 default: 4110 ASSERT(0); 4111 } 4112 } 4113 4114 /* let the arrival signal propogate */ 4115 mutex_exit(&vdcp->lock); 4116 4117 /* probe gives the count of how many entries were processed */ 4118 DTRACE_IO2(processed, int, 1, vdc_t *, vdcp); 4119 4120 return (0); 4121 } 4122 4123 4124 /* 4125 * Function: 4126 * vdc_handle_ver_msg() 4127 * 4128 * Description: 4129 * 4130 * Arguments: 4131 * vdc - soft state pointer for this instance of the device driver. 4132 * ver_msg - LDC message sent by vDisk server 4133 * 4134 * Return Code: 4135 * 0 - Success 4136 */ 4137 static int 4138 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) 4139 { 4140 int status = 0; 4141 4142 ASSERT(vdc != NULL); 4143 ASSERT(mutex_owned(&vdc->lock)); 4144 4145 if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { 4146 return (EPROTO); 4147 } 4148 4149 if (ver_msg->dev_class != VDEV_DISK_SERVER) { 4150 return (EINVAL); 4151 } 4152 4153 switch (ver_msg->tag.vio_subtype) { 4154 case VIO_SUBTYPE_ACK: 4155 /* 4156 * We check to see if the version returned is indeed supported 4157 * (The server may have also adjusted the minor number downwards 4158 * and if so 'ver_msg' will contain the actual version agreed) 4159 */ 4160 if (vdc_is_supported_version(ver_msg)) { 4161 vdc->ver.major = ver_msg->ver_major; 4162 vdc->ver.minor = ver_msg->ver_minor; 4163 ASSERT(vdc->ver.major > 0); 4164 } else { 4165 status = EPROTO; 4166 } 4167 break; 4168 4169 case VIO_SUBTYPE_NACK: 4170 /* 4171 * call vdc_is_supported_version() which will return the next 4172 * supported version (if any) in 'ver_msg' 4173 */ 4174 (void) vdc_is_supported_version(ver_msg); 4175 if (ver_msg->ver_major > 0) { 4176 size_t len = sizeof (*ver_msg); 4177 4178 ASSERT(vdc->ver.major > 0); 4179 4180 /* reset the necessary fields and resend */ 4181 ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; 4182 ver_msg->dev_class = VDEV_DISK; 4183 4184 status = vdc_send(vdc, (caddr_t)ver_msg, &len); 4185 DMSG(vdc, 0, "[%d] Resend VER info (LDC status = %d)\n", 4186 vdc->instance, status); 4187 if (len != sizeof (*ver_msg)) 4188 status = EBADMSG; 4189 } else { 4190 DMSG(vdc, 0, "[%d] No common version with vDisk server", 4191 vdc->instance); 4192 status = ENOTSUP; 4193 } 4194 4195 break; 4196 case VIO_SUBTYPE_INFO: 4197 /* 4198 * Handle the case where vds starts handshake 4199 * (for now only vdc is the instigator) 4200 */ 4201 status = ENOTSUP; 4202 break; 4203 4204 default: 4205 status = EINVAL; 4206 break; 4207 } 4208 4209 return (status); 4210 } 4211 4212 /* 4213 * Function: 4214 * vdc_handle_attr_msg() 4215 * 4216 * Description: 4217 * 4218 * Arguments: 4219 * vdc - soft state pointer for this instance of the device driver. 4220 * attr_msg - LDC message sent by vDisk server 4221 * 4222 * Return Code: 4223 * 0 - Success 4224 */ 4225 static int 4226 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) 4227 { 4228 int status = 0; 4229 4230 ASSERT(vdc != NULL); 4231 ASSERT(mutex_owned(&vdc->lock)); 4232 4233 if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { 4234 return (EPROTO); 4235 } 4236 4237 switch (attr_msg->tag.vio_subtype) { 4238 case VIO_SUBTYPE_ACK: 4239 /* 4240 * We now verify the attributes sent by vds. 4241 */ 4242 if (attr_msg->vdisk_size == 0) { 4243 DMSG(vdc, 0, "[%d] Invalid disk size from vds", 4244 vdc->instance); 4245 status = EINVAL; 4246 break; 4247 } 4248 4249 if (attr_msg->max_xfer_sz == 0) { 4250 DMSG(vdc, 0, "[%d] Invalid transfer size from vds", 4251 vdc->instance); 4252 status = EINVAL; 4253 break; 4254 } 4255 4256 /* 4257 * If the disk size is already set check that it hasn't changed. 4258 */ 4259 if ((vdc->vdisk_size != 0) && 4260 (vdc->vdisk_size != attr_msg->vdisk_size)) { 4261 DMSG(vdc, 0, "[%d] Different disk size from vds " 4262 "(old=0x%lx - new=0x%lx", vdc->instance, 4263 vdc->vdisk_size, attr_msg->vdisk_size) 4264 status = EINVAL; 4265 break; 4266 } 4267 4268 vdc->vdisk_size = attr_msg->vdisk_size; 4269 vdc->vdisk_type = attr_msg->vdisk_type; 4270 vdc->operations = attr_msg->operations; 4271 if (vio_ver_is_supported(vdc->ver, 1, 1)) 4272 vdc->vdisk_media = attr_msg->vdisk_media; 4273 else 4274 vdc->vdisk_media = 0; 4275 4276 DMSG(vdc, 0, "[%d] max_xfer_sz: sent %lx acked %lx\n", 4277 vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz); 4278 DMSG(vdc, 0, "[%d] vdisk_block_size: sent %lx acked %x\n", 4279 vdc->instance, vdc->block_size, 4280 attr_msg->vdisk_block_size); 4281 4282 /* 4283 * We don't know at compile time what the vDisk server will 4284 * think are good values but we apply a large (arbitrary) 4285 * upper bound to prevent memory exhaustion in vdc if it was 4286 * allocating a DRing based of huge values sent by the server. 4287 * We probably will never exceed this except if the message 4288 * was garbage. 4289 */ 4290 if ((attr_msg->max_xfer_sz * attr_msg->vdisk_block_size) <= 4291 (PAGESIZE * DEV_BSIZE)) { 4292 vdc->max_xfer_sz = attr_msg->max_xfer_sz; 4293 vdc->block_size = attr_msg->vdisk_block_size; 4294 } else { 4295 DMSG(vdc, 0, "[%d] vds block transfer size too big;" 4296 " using max supported by vdc", vdc->instance); 4297 } 4298 4299 if ((attr_msg->xfer_mode != VIO_DRING_MODE) || 4300 (attr_msg->vdisk_size > INT64_MAX) || 4301 (attr_msg->operations == 0) || 4302 (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 4303 DMSG(vdc, 0, "[%d] Invalid attributes from vds", 4304 vdc->instance); 4305 status = EINVAL; 4306 break; 4307 } 4308 4309 /* 4310 * Now that we have received all attributes we can create a 4311 * fake geometry for the disk. 4312 */ 4313 vdc_create_fake_geometry(vdc); 4314 break; 4315 4316 case VIO_SUBTYPE_NACK: 4317 /* 4318 * vds could not handle the attributes we sent so we 4319 * stop negotiating. 4320 */ 4321 status = EPROTO; 4322 break; 4323 4324 case VIO_SUBTYPE_INFO: 4325 /* 4326 * Handle the case where vds starts the handshake 4327 * (for now; vdc is the only supported instigatior) 4328 */ 4329 status = ENOTSUP; 4330 break; 4331 4332 default: 4333 status = ENOTSUP; 4334 break; 4335 } 4336 4337 return (status); 4338 } 4339 4340 /* 4341 * Function: 4342 * vdc_handle_dring_reg_msg() 4343 * 4344 * Description: 4345 * 4346 * Arguments: 4347 * vdc - soft state pointer for this instance of the driver. 4348 * dring_msg - LDC message sent by vDisk server 4349 * 4350 * Return Code: 4351 * 0 - Success 4352 */ 4353 static int 4354 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) 4355 { 4356 int status = 0; 4357 4358 ASSERT(vdc != NULL); 4359 ASSERT(mutex_owned(&vdc->lock)); 4360 4361 if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { 4362 return (EPROTO); 4363 } 4364 4365 switch (dring_msg->tag.vio_subtype) { 4366 case VIO_SUBTYPE_ACK: 4367 /* save the received dring_ident */ 4368 vdc->dring_ident = dring_msg->dring_ident; 4369 DMSG(vdc, 0, "[%d] Received dring ident=0x%lx\n", 4370 vdc->instance, vdc->dring_ident); 4371 break; 4372 4373 case VIO_SUBTYPE_NACK: 4374 /* 4375 * vds could not handle the DRing info we sent so we 4376 * stop negotiating. 4377 */ 4378 DMSG(vdc, 0, "[%d] server could not register DRing\n", 4379 vdc->instance); 4380 status = EPROTO; 4381 break; 4382 4383 case VIO_SUBTYPE_INFO: 4384 /* 4385 * Handle the case where vds starts handshake 4386 * (for now only vdc is the instigatior) 4387 */ 4388 status = ENOTSUP; 4389 break; 4390 default: 4391 status = ENOTSUP; 4392 } 4393 4394 return (status); 4395 } 4396 4397 /* 4398 * Function: 4399 * vdc_verify_seq_num() 4400 * 4401 * Description: 4402 * This functions verifies that the sequence number sent back by the vDisk 4403 * server with the latest message is what is expected (i.e. it is greater 4404 * than the last seq num sent by the vDisk server and less than or equal 4405 * to the last seq num generated by vdc). 4406 * 4407 * It then checks the request ID to see if any requests need processing 4408 * in the DRing. 4409 * 4410 * Arguments: 4411 * vdc - soft state pointer for this instance of the driver. 4412 * dring_msg - pointer to the LDC message sent by vds 4413 * 4414 * Return Code: 4415 * VDC_SEQ_NUM_TODO - Message needs to be processed 4416 * VDC_SEQ_NUM_SKIP - Message has already been processed 4417 * VDC_SEQ_NUM_INVALID - The seq numbers are so out of sync, 4418 * vdc cannot deal with them 4419 */ 4420 static int 4421 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg) 4422 { 4423 ASSERT(vdc != NULL); 4424 ASSERT(dring_msg != NULL); 4425 ASSERT(mutex_owned(&vdc->lock)); 4426 4427 /* 4428 * Check to see if the messages were responded to in the correct 4429 * order by vds. 4430 */ 4431 if ((dring_msg->seq_num <= vdc->seq_num_reply) || 4432 (dring_msg->seq_num > vdc->seq_num)) { 4433 DMSG(vdc, 0, "?[%d] Bogus sequence_number %lu: " 4434 "%lu > expected <= %lu (last proc req %lu sent %lu)\n", 4435 vdc->instance, dring_msg->seq_num, 4436 vdc->seq_num_reply, vdc->seq_num, 4437 vdc->req_id_proc, vdc->req_id); 4438 return (VDC_SEQ_NUM_INVALID); 4439 } 4440 vdc->seq_num_reply = dring_msg->seq_num; 4441 4442 if (vdc->req_id_proc < vdc->req_id) 4443 return (VDC_SEQ_NUM_TODO); 4444 else 4445 return (VDC_SEQ_NUM_SKIP); 4446 } 4447 4448 4449 /* 4450 * Function: 4451 * vdc_is_supported_version() 4452 * 4453 * Description: 4454 * This routine checks if the major/minor version numbers specified in 4455 * 'ver_msg' are supported. If not it finds the next version that is 4456 * in the supported version list 'vdc_version[]' and sets the fields in 4457 * 'ver_msg' to those values 4458 * 4459 * Arguments: 4460 * ver_msg - LDC message sent by vDisk server 4461 * 4462 * Return Code: 4463 * B_TRUE - Success 4464 * B_FALSE - Version not supported 4465 */ 4466 static boolean_t 4467 vdc_is_supported_version(vio_ver_msg_t *ver_msg) 4468 { 4469 int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); 4470 4471 for (int i = 0; i < vdc_num_versions; i++) { 4472 ASSERT(vdc_version[i].major > 0); 4473 ASSERT((i == 0) || 4474 (vdc_version[i].major < vdc_version[i-1].major)); 4475 4476 /* 4477 * If the major versions match, adjust the minor version, if 4478 * necessary, down to the highest value supported by this 4479 * client. The server should support all minor versions lower 4480 * than the value it sent 4481 */ 4482 if (ver_msg->ver_major == vdc_version[i].major) { 4483 if (ver_msg->ver_minor > vdc_version[i].minor) { 4484 DMSGX(0, 4485 "Adjusting minor version from %u to %u", 4486 ver_msg->ver_minor, vdc_version[i].minor); 4487 ver_msg->ver_minor = vdc_version[i].minor; 4488 } 4489 return (B_TRUE); 4490 } 4491 4492 /* 4493 * If the message contains a higher major version number, set 4494 * the message's major/minor versions to the current values 4495 * and return false, so this message will get resent with 4496 * these values, and the server will potentially try again 4497 * with the same or a lower version 4498 */ 4499 if (ver_msg->ver_major > vdc_version[i].major) { 4500 ver_msg->ver_major = vdc_version[i].major; 4501 ver_msg->ver_minor = vdc_version[i].minor; 4502 DMSGX(0, "Suggesting major/minor (0x%x/0x%x)\n", 4503 ver_msg->ver_major, ver_msg->ver_minor); 4504 4505 return (B_FALSE); 4506 } 4507 4508 /* 4509 * Otherwise, the message's major version is less than the 4510 * current major version, so continue the loop to the next 4511 * (lower) supported version 4512 */ 4513 } 4514 4515 /* 4516 * No common version was found; "ground" the version pair in the 4517 * message to terminate negotiation 4518 */ 4519 ver_msg->ver_major = 0; 4520 ver_msg->ver_minor = 0; 4521 4522 return (B_FALSE); 4523 } 4524 /* -------------------------------------------------------------------------- */ 4525 4526 /* 4527 * DKIO(7) support 4528 */ 4529 4530 typedef struct vdc_dk_arg { 4531 struct dk_callback dkc; 4532 int mode; 4533 dev_t dev; 4534 vdc_t *vdc; 4535 } vdc_dk_arg_t; 4536 4537 /* 4538 * Function: 4539 * vdc_dkio_flush_cb() 4540 * 4541 * Description: 4542 * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 4543 * by kernel code. 4544 * 4545 * Arguments: 4546 * arg - a pointer to a vdc_dk_arg_t structure. 4547 */ 4548 void 4549 vdc_dkio_flush_cb(void *arg) 4550 { 4551 struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 4552 struct dk_callback *dkc = NULL; 4553 vdc_t *vdc = NULL; 4554 int rv; 4555 4556 if (dk_arg == NULL) { 4557 cmn_err(CE_NOTE, "?[Unk] DKIOCFLUSHWRITECACHE arg is NULL\n"); 4558 return; 4559 } 4560 dkc = &dk_arg->dkc; 4561 vdc = dk_arg->vdc; 4562 ASSERT(vdc != NULL); 4563 4564 rv = vdc_do_sync_op(vdc, VD_OP_FLUSH, NULL, 0, 4565 VDCPART(dk_arg->dev), 0, CB_SYNC, 0, VIO_both_dir); 4566 if (rv != 0) { 4567 DMSG(vdc, 0, "[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n", 4568 vdc->instance, rv, 4569 ddi_model_convert_from(dk_arg->mode & FMODELS)); 4570 } 4571 4572 /* 4573 * Trigger the call back to notify the caller the the ioctl call has 4574 * been completed. 4575 */ 4576 if ((dk_arg->mode & FKIOCTL) && 4577 (dkc != NULL) && 4578 (dkc->dkc_callback != NULL)) { 4579 ASSERT(dkc->dkc_cookie != NULL); 4580 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 4581 } 4582 4583 /* Indicate that one less DKIO write flush is outstanding */ 4584 mutex_enter(&vdc->lock); 4585 vdc->dkio_flush_pending--; 4586 ASSERT(vdc->dkio_flush_pending >= 0); 4587 mutex_exit(&vdc->lock); 4588 4589 /* free the mem that was allocated when the callback was dispatched */ 4590 kmem_free(arg, sizeof (vdc_dk_arg_t)); 4591 } 4592 4593 /* 4594 * Function: 4595 * vdc_dkio_get_partition() 4596 * 4597 * Description: 4598 * This function implements the DKIOCGAPART ioctl. 4599 * 4600 * Arguments: 4601 * vdc - soft state pointer 4602 * arg - a pointer to a dk_map[NDKMAP] or dk_map32[NDKMAP] structure 4603 * flag - ioctl flags 4604 */ 4605 static int 4606 vdc_dkio_get_partition(vdc_t *vdc, caddr_t arg, int flag) 4607 { 4608 struct dk_geom *geom; 4609 struct vtoc *vtoc; 4610 union { 4611 struct dk_map map[NDKMAP]; 4612 struct dk_map32 map32[NDKMAP]; 4613 } data; 4614 int i, rv, size; 4615 4616 mutex_enter(&vdc->lock); 4617 4618 if ((rv = vdc_validate_geometry(vdc)) != 0) { 4619 mutex_exit(&vdc->lock); 4620 return (rv); 4621 } 4622 4623 vtoc = vdc->vtoc; 4624 geom = vdc->geom; 4625 4626 if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 4627 4628 for (i = 0; i < vtoc->v_nparts; i++) { 4629 data.map32[i].dkl_cylno = vtoc->v_part[i].p_start / 4630 (geom->dkg_nhead * geom->dkg_nsect); 4631 data.map32[i].dkl_nblk = vtoc->v_part[i].p_size; 4632 } 4633 size = NDKMAP * sizeof (struct dk_map32); 4634 4635 } else { 4636 4637 for (i = 0; i < vtoc->v_nparts; i++) { 4638 data.map[i].dkl_cylno = vtoc->v_part[i].p_start / 4639 (geom->dkg_nhead * geom->dkg_nsect); 4640 data.map[i].dkl_nblk = vtoc->v_part[i].p_size; 4641 } 4642 size = NDKMAP * sizeof (struct dk_map); 4643 4644 } 4645 4646 mutex_exit(&vdc->lock); 4647 4648 if (ddi_copyout(&data, arg, size, flag) != 0) 4649 return (EFAULT); 4650 4651 return (0); 4652 } 4653 4654 /* 4655 * Function: 4656 * vdc_dioctl_rwcmd() 4657 * 4658 * Description: 4659 * This function implements the DIOCTL_RWCMD ioctl. This ioctl is used 4660 * for DKC_DIRECT disks to read or write at an absolute disk offset. 4661 * 4662 * Arguments: 4663 * dev - device 4664 * arg - a pointer to a dadkio_rwcmd or dadkio_rwcmd32 structure 4665 * flag - ioctl flags 4666 */ 4667 static int 4668 vdc_dioctl_rwcmd(dev_t dev, caddr_t arg, int flag) 4669 { 4670 struct dadkio_rwcmd32 rwcmd32; 4671 struct dadkio_rwcmd rwcmd; 4672 struct iovec aiov; 4673 struct uio auio; 4674 int rw, status; 4675 struct buf *buf; 4676 4677 if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 4678 if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd32, 4679 sizeof (struct dadkio_rwcmd32), flag)) { 4680 return (EFAULT); 4681 } 4682 rwcmd.cmd = rwcmd32.cmd; 4683 rwcmd.flags = rwcmd32.flags; 4684 rwcmd.blkaddr = (daddr_t)rwcmd32.blkaddr; 4685 rwcmd.buflen = rwcmd32.buflen; 4686 rwcmd.bufaddr = (caddr_t)(uintptr_t)rwcmd32.bufaddr; 4687 } else { 4688 if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd, 4689 sizeof (struct dadkio_rwcmd), flag)) { 4690 return (EFAULT); 4691 } 4692 } 4693 4694 switch (rwcmd.cmd) { 4695 case DADKIO_RWCMD_READ: 4696 rw = B_READ; 4697 break; 4698 case DADKIO_RWCMD_WRITE: 4699 rw = B_WRITE; 4700 break; 4701 default: 4702 return (EINVAL); 4703 } 4704 4705 bzero((caddr_t)&aiov, sizeof (struct iovec)); 4706 aiov.iov_base = rwcmd.bufaddr; 4707 aiov.iov_len = rwcmd.buflen; 4708 4709 bzero((caddr_t)&auio, sizeof (struct uio)); 4710 auio.uio_iov = &aiov; 4711 auio.uio_iovcnt = 1; 4712 auio.uio_loffset = rwcmd.blkaddr * DEV_BSIZE; 4713 auio.uio_resid = rwcmd.buflen; 4714 auio.uio_segflg = flag & FKIOCTL ? UIO_SYSSPACE : UIO_USERSPACE; 4715 4716 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 4717 bioinit(buf); 4718 /* 4719 * We use the private field of buf to specify that this is an 4720 * I/O using an absolute offset. 4721 */ 4722 buf->b_private = (void *)VD_SLICE_NONE; 4723 4724 status = physio(vdc_strategy, buf, dev, rw, vdc_min, &auio); 4725 4726 biofini(buf); 4727 kmem_free(buf, sizeof (buf_t)); 4728 4729 return (status); 4730 } 4731 4732 /* 4733 * This structure is used in the DKIO(7I) array below. 4734 */ 4735 typedef struct vdc_dk_ioctl { 4736 uint8_t op; /* VD_OP_XXX value */ 4737 int cmd; /* Solaris ioctl operation number */ 4738 size_t nbytes; /* size of structure to be copied */ 4739 4740 /* function to convert between vDisk and Solaris structure formats */ 4741 int (*convert)(vdc_t *vdc, void *vd_buf, void *ioctl_arg, 4742 int mode, int dir); 4743 } vdc_dk_ioctl_t; 4744 4745 /* 4746 * Subset of DKIO(7I) operations currently supported 4747 */ 4748 static vdc_dk_ioctl_t dk_ioctl[] = { 4749 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, 0, 4750 vdc_null_copy_func}, 4751 {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), 4752 vdc_get_wce_convert}, 4753 {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), 4754 vdc_set_wce_convert}, 4755 {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), 4756 vdc_get_vtoc_convert}, 4757 {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), 4758 vdc_set_vtoc_convert}, 4759 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), 4760 vdc_get_geom_convert}, 4761 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), 4762 vdc_get_geom_convert}, 4763 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), 4764 vdc_get_geom_convert}, 4765 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 4766 vdc_set_geom_convert}, 4767 {VD_OP_GET_EFI, DKIOCGETEFI, 0, 4768 vdc_get_efi_convert}, 4769 {VD_OP_SET_EFI, DKIOCSETEFI, 0, 4770 vdc_set_efi_convert}, 4771 4772 /* DIOCTL_RWCMD is converted to a read or a write */ 4773 {0, DIOCTL_RWCMD, sizeof (struct dadkio_rwcmd), NULL}, 4774 4775 /* 4776 * These particular ioctls are not sent to the server - vdc fakes up 4777 * the necessary info. 4778 */ 4779 {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, 4780 {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, 4781 {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, 4782 {0, DKIOCGAPART, 0, vdc_null_copy_func }, 4783 {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, 4784 {0, CDROMREADOFFSET, 0, vdc_null_copy_func} 4785 }; 4786 4787 /* 4788 * Function: 4789 * vd_process_ioctl() 4790 * 4791 * Description: 4792 * This routine processes disk specific ioctl calls 4793 * 4794 * Arguments: 4795 * dev - the device number 4796 * cmd - the operation [dkio(7I)] to be processed 4797 * arg - pointer to user provided structure 4798 * (contains data to be set or reference parameter for get) 4799 * mode - bit flag, indicating open settings, 32/64 bit type, etc 4800 * 4801 * Return Code: 4802 * 0 4803 * EFAULT 4804 * ENXIO 4805 * EIO 4806 * ENOTSUP 4807 */ 4808 static int 4809 vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode) 4810 { 4811 int instance = VDCUNIT(dev); 4812 vdc_t *vdc = NULL; 4813 int rv = -1; 4814 int idx = 0; /* index into dk_ioctl[] */ 4815 size_t len = 0; /* #bytes to send to vds */ 4816 size_t alloc_len = 0; /* #bytes to allocate mem for */ 4817 caddr_t mem_p = NULL; 4818 size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 4819 vdc_dk_ioctl_t *iop; 4820 4821 vdc = ddi_get_soft_state(vdc_state, instance); 4822 if (vdc == NULL) { 4823 cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 4824 instance); 4825 return (ENXIO); 4826 } 4827 4828 DMSG(vdc, 0, "[%d] Processing ioctl(%x) for dev %lx : model %x\n", 4829 instance, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 4830 4831 /* 4832 * Validate the ioctl operation to be performed. 4833 * 4834 * If we have looped through the array without finding a match then we 4835 * don't support this ioctl. 4836 */ 4837 for (idx = 0; idx < nioctls; idx++) { 4838 if (cmd == dk_ioctl[idx].cmd) 4839 break; 4840 } 4841 4842 if (idx >= nioctls) { 4843 DMSG(vdc, 0, "[%d] Unsupported ioctl (0x%x)\n", 4844 vdc->instance, cmd); 4845 return (ENOTSUP); 4846 } 4847 4848 iop = &(dk_ioctl[idx]); 4849 4850 if (cmd == DKIOCGETEFI || cmd == DKIOCSETEFI) { 4851 /* size is not fixed for EFI ioctls, it depends on ioctl arg */ 4852 dk_efi_t dk_efi; 4853 4854 rv = ddi_copyin(arg, &dk_efi, sizeof (dk_efi_t), mode); 4855 if (rv != 0) 4856 return (EFAULT); 4857 4858 len = sizeof (vd_efi_t) - 1 + dk_efi.dki_length; 4859 } else { 4860 len = iop->nbytes; 4861 } 4862 4863 /* 4864 * Deal with the ioctls which the server does not provide. vdc can 4865 * fake these up and return immediately 4866 */ 4867 switch (cmd) { 4868 case CDROMREADOFFSET: 4869 case DKIOCREMOVABLE: 4870 case USCSICMD: 4871 return (ENOTTY); 4872 4873 case DIOCTL_RWCMD: 4874 { 4875 if (vdc->cinfo == NULL) 4876 return (ENXIO); 4877 4878 if (vdc->cinfo->dki_ctype != DKC_DIRECT) 4879 return (ENOTTY); 4880 4881 return (vdc_dioctl_rwcmd(dev, arg, mode)); 4882 } 4883 4884 case DKIOCGAPART: 4885 { 4886 return (vdc_dkio_get_partition(vdc, arg, mode)); 4887 } 4888 4889 case DKIOCINFO: 4890 { 4891 struct dk_cinfo cinfo; 4892 if (vdc->cinfo == NULL) 4893 return (ENXIO); 4894 4895 bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 4896 cinfo.dki_partition = VDCPART(dev); 4897 4898 rv = ddi_copyout(&cinfo, (void *)arg, 4899 sizeof (struct dk_cinfo), mode); 4900 if (rv != 0) 4901 return (EFAULT); 4902 4903 return (0); 4904 } 4905 4906 case DKIOCGMEDIAINFO: 4907 { 4908 if (vdc->minfo == NULL) 4909 return (ENXIO); 4910 4911 rv = ddi_copyout(vdc->minfo, (void *)arg, 4912 sizeof (struct dk_minfo), mode); 4913 if (rv != 0) 4914 return (EFAULT); 4915 4916 return (0); 4917 } 4918 4919 case DKIOCFLUSHWRITECACHE: 4920 { 4921 struct dk_callback *dkc = 4922 (struct dk_callback *)(uintptr_t)arg; 4923 vdc_dk_arg_t *dkarg = NULL; 4924 4925 DMSG(vdc, 1, "[%d] Flush W$: mode %x\n", 4926 instance, mode); 4927 4928 /* 4929 * If arg is NULL, then there is no callback function 4930 * registered and the call operates synchronously; we 4931 * break and continue with the rest of the function and 4932 * wait for vds to return (i.e. after the request to 4933 * vds returns successfully, all writes completed prior 4934 * to the ioctl will have been flushed from the disk 4935 * write cache to persistent media. 4936 * 4937 * If a callback function is registered, we dispatch 4938 * the request on a task queue and return immediately. 4939 * The callback will deal with informing the calling 4940 * thread that the flush request is completed. 4941 */ 4942 if (dkc == NULL) 4943 break; 4944 4945 /* 4946 * the asynchronous callback is only supported if 4947 * invoked from within the kernel 4948 */ 4949 if ((mode & FKIOCTL) == 0) 4950 return (ENOTSUP); 4951 4952 dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP); 4953 4954 dkarg->mode = mode; 4955 dkarg->dev = dev; 4956 bcopy(dkc, &dkarg->dkc, sizeof (*dkc)); 4957 4958 mutex_enter(&vdc->lock); 4959 vdc->dkio_flush_pending++; 4960 dkarg->vdc = vdc; 4961 mutex_exit(&vdc->lock); 4962 4963 /* put the request on a task queue */ 4964 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 4965 (void *)dkarg, DDI_SLEEP); 4966 if (rv == NULL) { 4967 /* clean up if dispatch fails */ 4968 mutex_enter(&vdc->lock); 4969 vdc->dkio_flush_pending--; 4970 mutex_exit(&vdc->lock); 4971 kmem_free(dkarg, sizeof (vdc_dk_arg_t)); 4972 } 4973 4974 return (rv == NULL ? ENOMEM : 0); 4975 } 4976 } 4977 4978 /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 4979 ASSERT(iop->op != 0); 4980 4981 /* check if the vDisk server handles the operation for this vDisk */ 4982 if (VD_OP_SUPPORTED(vdc->operations, iop->op) == B_FALSE) { 4983 DMSG(vdc, 0, "[%d] Unsupported VD_OP operation (0x%x)\n", 4984 vdc->instance, iop->op); 4985 return (ENOTSUP); 4986 } 4987 4988 /* LDC requires that the memory being mapped is 8-byte aligned */ 4989 alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 4990 DMSG(vdc, 1, "[%d] struct size %ld alloc %ld\n", 4991 instance, len, alloc_len); 4992 4993 if (alloc_len > 0) 4994 mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 4995 4996 /* 4997 * Call the conversion function for this ioctl which, if necessary, 4998 * converts from the Solaris format to the format ARC'ed 4999 * as part of the vDisk protocol (FWARC 2006/195) 5000 */ 5001 ASSERT(iop->convert != NULL); 5002 rv = (iop->convert)(vdc, arg, mem_p, mode, VD_COPYIN); 5003 if (rv != 0) { 5004 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 5005 instance, rv, cmd); 5006 if (mem_p != NULL) 5007 kmem_free(mem_p, alloc_len); 5008 return (rv); 5009 } 5010 5011 /* 5012 * send request to vds to service the ioctl. 5013 */ 5014 rv = vdc_do_sync_op(vdc, iop->op, mem_p, alloc_len, 5015 VDCPART(dev), 0, CB_SYNC, (void *)(uint64_t)mode, 5016 VIO_both_dir); 5017 5018 if (cmd == DKIOCSVTOC || cmd == DKIOCSETEFI) { 5019 /* 5020 * The disk label may have changed. Revalidate the disk 5021 * geometry. This will also update the device nodes and 5022 * properties. 5023 */ 5024 vdc_validate(vdc); 5025 } 5026 5027 if (rv != 0) { 5028 /* 5029 * This is not necessarily an error. The ioctl could 5030 * be returning a value such as ENOTTY to indicate 5031 * that the ioctl is not applicable. 5032 */ 5033 DMSG(vdc, 0, "[%d] vds returned %d for ioctl 0x%x\n", 5034 instance, rv, cmd); 5035 if (mem_p != NULL) 5036 kmem_free(mem_p, alloc_len); 5037 5038 return (rv); 5039 } 5040 5041 /* 5042 * Call the conversion function (if it exists) for this ioctl 5043 * which converts from the format ARC'ed as part of the vDisk 5044 * protocol (FWARC 2006/195) back to a format understood by 5045 * the rest of Solaris. 5046 */ 5047 rv = (iop->convert)(vdc, mem_p, arg, mode, VD_COPYOUT); 5048 if (rv != 0) { 5049 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 5050 instance, rv, cmd); 5051 if (mem_p != NULL) 5052 kmem_free(mem_p, alloc_len); 5053 return (rv); 5054 } 5055 5056 if (mem_p != NULL) 5057 kmem_free(mem_p, alloc_len); 5058 5059 return (rv); 5060 } 5061 5062 /* 5063 * Function: 5064 * 5065 * Description: 5066 * This is an empty conversion function used by ioctl calls which 5067 * do not need to convert the data being passed in/out to userland 5068 */ 5069 static int 5070 vdc_null_copy_func(vdc_t *vdc, void *from, void *to, int mode, int dir) 5071 { 5072 _NOTE(ARGUNUSED(vdc)) 5073 _NOTE(ARGUNUSED(from)) 5074 _NOTE(ARGUNUSED(to)) 5075 _NOTE(ARGUNUSED(mode)) 5076 _NOTE(ARGUNUSED(dir)) 5077 5078 return (0); 5079 } 5080 5081 static int 5082 vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 5083 int mode, int dir) 5084 { 5085 _NOTE(ARGUNUSED(vdc)) 5086 5087 if (dir == VD_COPYIN) 5088 return (0); /* nothing to do */ 5089 5090 if (ddi_copyout(from, to, sizeof (int), mode) != 0) 5091 return (EFAULT); 5092 5093 return (0); 5094 } 5095 5096 static int 5097 vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 5098 int mode, int dir) 5099 { 5100 _NOTE(ARGUNUSED(vdc)) 5101 5102 if (dir == VD_COPYOUT) 5103 return (0); /* nothing to do */ 5104 5105 if (ddi_copyin(from, to, sizeof (int), mode) != 0) 5106 return (EFAULT); 5107 5108 return (0); 5109 } 5110 5111 /* 5112 * Function: 5113 * vdc_get_vtoc_convert() 5114 * 5115 * Description: 5116 * This routine performs the necessary convertions from the DKIOCGVTOC 5117 * Solaris structure to the format defined in FWARC 2006/195. 5118 * 5119 * In the struct vtoc definition, the timestamp field is marked as not 5120 * supported so it is not part of vDisk protocol (FWARC 2006/195). 5121 * However SVM uses that field to check it can write into the VTOC, 5122 * so we fake up the info of that field. 5123 * 5124 * Arguments: 5125 * vdc - the vDisk client 5126 * from - the buffer containing the data to be copied from 5127 * to - the buffer to be copied to 5128 * mode - flags passed to ioctl() call 5129 * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT 5130 * 5131 * Return Code: 5132 * 0 - Success 5133 * ENXIO - incorrect buffer passed in. 5134 * EFAULT - ddi_copyout routine encountered an error. 5135 */ 5136 static int 5137 vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5138 { 5139 int i; 5140 void *tmp_mem = NULL; 5141 void *tmp_memp; 5142 struct vtoc vt; 5143 struct vtoc32 vt32; 5144 int copy_len = 0; 5145 int rv = 0; 5146 5147 if (dir != VD_COPYOUT) 5148 return (0); /* nothing to do */ 5149 5150 if ((from == NULL) || (to == NULL)) 5151 return (ENXIO); 5152 5153 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 5154 copy_len = sizeof (struct vtoc32); 5155 else 5156 copy_len = sizeof (struct vtoc); 5157 5158 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 5159 5160 VD_VTOC2VTOC((vd_vtoc_t *)from, &vt); 5161 5162 /* fake the VTOC timestamp field */ 5163 for (i = 0; i < V_NUMPAR; i++) { 5164 vt.timestamp[i] = vdc->vtoc->timestamp[i]; 5165 } 5166 5167 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 5168 /* LINTED E_ASSIGN_NARROW_CONV */ 5169 vtoctovtoc32(vt, vt32); 5170 tmp_memp = &vt32; 5171 } else { 5172 tmp_memp = &vt; 5173 } 5174 rv = ddi_copyout(tmp_memp, to, copy_len, mode); 5175 if (rv != 0) 5176 rv = EFAULT; 5177 5178 kmem_free(tmp_mem, copy_len); 5179 return (rv); 5180 } 5181 5182 /* 5183 * Function: 5184 * vdc_set_vtoc_convert() 5185 * 5186 * Description: 5187 * This routine performs the necessary convertions from the DKIOCSVTOC 5188 * Solaris structure to the format defined in FWARC 2006/195. 5189 * 5190 * Arguments: 5191 * vdc - the vDisk client 5192 * from - Buffer with data 5193 * to - Buffer where data is to be copied to 5194 * mode - flags passed to ioctl 5195 * dir - direction of copy (in or out) 5196 * 5197 * Return Code: 5198 * 0 - Success 5199 * ENXIO - Invalid buffer passed in 5200 * EFAULT - ddi_copyin of data failed 5201 */ 5202 static int 5203 vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5204 { 5205 _NOTE(ARGUNUSED(vdc)) 5206 5207 void *tmp_mem = NULL; 5208 struct vtoc vt; 5209 struct vtoc *vtp = &vt; 5210 vd_vtoc_t vtvd; 5211 int copy_len = 0; 5212 int rv = 0; 5213 5214 if (dir != VD_COPYIN) 5215 return (0); /* nothing to do */ 5216 5217 if ((from == NULL) || (to == NULL)) 5218 return (ENXIO); 5219 5220 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 5221 copy_len = sizeof (struct vtoc32); 5222 else 5223 copy_len = sizeof (struct vtoc); 5224 5225 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 5226 5227 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 5228 if (rv != 0) { 5229 kmem_free(tmp_mem, copy_len); 5230 return (EFAULT); 5231 } 5232 5233 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 5234 vtoc32tovtoc((*(struct vtoc32 *)tmp_mem), vt); 5235 } else { 5236 vtp = tmp_mem; 5237 } 5238 5239 VTOC2VD_VTOC(vtp, &vtvd); 5240 bcopy(&vtvd, to, sizeof (vd_vtoc_t)); 5241 kmem_free(tmp_mem, copy_len); 5242 5243 return (0); 5244 } 5245 5246 /* 5247 * Function: 5248 * vdc_get_geom_convert() 5249 * 5250 * Description: 5251 * This routine performs the necessary convertions from the DKIOCGGEOM, 5252 * DKIOCG_PHYSGEOM and DKIOG_VIRTGEOM Solaris structures to the format 5253 * defined in FWARC 2006/195 5254 * 5255 * Arguments: 5256 * vdc - the vDisk client 5257 * from - Buffer with data 5258 * to - Buffer where data is to be copied to 5259 * mode - flags passed to ioctl 5260 * dir - direction of copy (in or out) 5261 * 5262 * Return Code: 5263 * 0 - Success 5264 * ENXIO - Invalid buffer passed in 5265 * EFAULT - ddi_copyout of data failed 5266 */ 5267 static int 5268 vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5269 { 5270 _NOTE(ARGUNUSED(vdc)) 5271 5272 struct dk_geom geom; 5273 int copy_len = sizeof (struct dk_geom); 5274 int rv = 0; 5275 5276 if (dir != VD_COPYOUT) 5277 return (0); /* nothing to do */ 5278 5279 if ((from == NULL) || (to == NULL)) 5280 return (ENXIO); 5281 5282 VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); 5283 rv = ddi_copyout(&geom, to, copy_len, mode); 5284 if (rv != 0) 5285 rv = EFAULT; 5286 5287 return (rv); 5288 } 5289 5290 /* 5291 * Function: 5292 * vdc_set_geom_convert() 5293 * 5294 * Description: 5295 * This routine performs the necessary convertions from the DKIOCSGEOM 5296 * Solaris structure to the format defined in FWARC 2006/195. 5297 * 5298 * Arguments: 5299 * vdc - the vDisk client 5300 * from - Buffer with data 5301 * to - Buffer where data is to be copied to 5302 * mode - flags passed to ioctl 5303 * dir - direction of copy (in or out) 5304 * 5305 * Return Code: 5306 * 0 - Success 5307 * ENXIO - Invalid buffer passed in 5308 * EFAULT - ddi_copyin of data failed 5309 */ 5310 static int 5311 vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5312 { 5313 _NOTE(ARGUNUSED(vdc)) 5314 5315 vd_geom_t vdgeom; 5316 void *tmp_mem = NULL; 5317 int copy_len = sizeof (struct dk_geom); 5318 int rv = 0; 5319 5320 if (dir != VD_COPYIN) 5321 return (0); /* nothing to do */ 5322 5323 if ((from == NULL) || (to == NULL)) 5324 return (ENXIO); 5325 5326 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 5327 5328 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 5329 if (rv != 0) { 5330 kmem_free(tmp_mem, copy_len); 5331 return (EFAULT); 5332 } 5333 DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); 5334 bcopy(&vdgeom, to, sizeof (vdgeom)); 5335 kmem_free(tmp_mem, copy_len); 5336 5337 return (0); 5338 } 5339 5340 static int 5341 vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5342 { 5343 _NOTE(ARGUNUSED(vdc)) 5344 5345 vd_efi_t *vd_efi; 5346 dk_efi_t dk_efi; 5347 int rv = 0; 5348 void *uaddr; 5349 5350 if ((from == NULL) || (to == NULL)) 5351 return (ENXIO); 5352 5353 if (dir == VD_COPYIN) { 5354 5355 vd_efi = (vd_efi_t *)to; 5356 5357 rv = ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode); 5358 if (rv != 0) 5359 return (EFAULT); 5360 5361 vd_efi->lba = dk_efi.dki_lba; 5362 vd_efi->length = dk_efi.dki_length; 5363 bzero(vd_efi->data, vd_efi->length); 5364 5365 } else { 5366 5367 rv = ddi_copyin(to, &dk_efi, sizeof (dk_efi_t), mode); 5368 if (rv != 0) 5369 return (EFAULT); 5370 5371 uaddr = dk_efi.dki_data; 5372 5373 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 5374 5375 VD_EFI2DK_EFI((vd_efi_t *)from, &dk_efi); 5376 5377 rv = ddi_copyout(dk_efi.dki_data, uaddr, dk_efi.dki_length, 5378 mode); 5379 if (rv != 0) 5380 return (EFAULT); 5381 5382 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 5383 } 5384 5385 return (0); 5386 } 5387 5388 static int 5389 vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5390 { 5391 _NOTE(ARGUNUSED(vdc)) 5392 5393 dk_efi_t dk_efi; 5394 void *uaddr; 5395 5396 if (dir == VD_COPYOUT) 5397 return (0); /* nothing to do */ 5398 5399 if ((from == NULL) || (to == NULL)) 5400 return (ENXIO); 5401 5402 if (ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode) != 0) 5403 return (EFAULT); 5404 5405 uaddr = dk_efi.dki_data; 5406 5407 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 5408 5409 if (ddi_copyin(uaddr, dk_efi.dki_data, dk_efi.dki_length, mode) != 0) 5410 return (EFAULT); 5411 5412 DK_EFI2VD_EFI(&dk_efi, (vd_efi_t *)to); 5413 5414 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 5415 5416 return (0); 5417 } 5418 5419 5420 /* -------------------------------------------------------------------------- */ 5421 5422 /* 5423 * Function: 5424 * vdc_create_fake_geometry() 5425 * 5426 * Description: 5427 * This routine fakes up the disk info needed for some DKIO ioctls such 5428 * as DKIOCINFO and DKIOCGMEDIAINFO [just like lofi(7D) and ramdisk(7D) do] 5429 * 5430 * Note: This function must not be called until the vDisk attributes have 5431 * been exchanged as part of the handshake with the vDisk server. 5432 * 5433 * Arguments: 5434 * vdc - soft state pointer for this instance of the device driver. 5435 * 5436 * Return Code: 5437 * none. 5438 */ 5439 static void 5440 vdc_create_fake_geometry(vdc_t *vdc) 5441 { 5442 ASSERT(vdc != NULL); 5443 ASSERT(vdc->vdisk_size != 0); 5444 ASSERT(vdc->max_xfer_sz != 0); 5445 5446 /* 5447 * DKIOCINFO support 5448 */ 5449 if (vdc->cinfo == NULL) 5450 vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 5451 5452 (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 5453 (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 5454 /* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */ 5455 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; 5456 /* 5457 * We currently set the controller type to DKC_DIRECT for any disk. 5458 * When SCSI support is implemented, we will eventually change this 5459 * type to DKC_SCSI_CCS for disks supporting the SCSI protocol. 5460 * If the virtual disk is backed by a physical CD/DVD device or 5461 * an ISO image, modify the controller type to indicate this 5462 */ 5463 switch (vdc->vdisk_media) { 5464 case VD_MEDIA_CD: 5465 case VD_MEDIA_DVD: 5466 vdc->cinfo->dki_ctype = DKC_CDROM; 5467 break; 5468 case VD_MEDIA_FIXED: 5469 vdc->cinfo->dki_ctype = DKC_DIRECT; 5470 break; 5471 default: 5472 /* in the case of v1.0 we default to a fixed disk */ 5473 vdc->cinfo->dki_ctype = DKC_DIRECT; 5474 break; 5475 } 5476 vdc->cinfo->dki_flags = DKI_FMTVOL; 5477 vdc->cinfo->dki_cnum = 0; 5478 vdc->cinfo->dki_addr = 0; 5479 vdc->cinfo->dki_space = 0; 5480 vdc->cinfo->dki_prio = 0; 5481 vdc->cinfo->dki_vec = 0; 5482 vdc->cinfo->dki_unit = vdc->instance; 5483 vdc->cinfo->dki_slave = 0; 5484 /* 5485 * The partition number will be created on the fly depending on the 5486 * actual slice (i.e. minor node) that is used to request the data. 5487 */ 5488 vdc->cinfo->dki_partition = 0; 5489 5490 /* 5491 * DKIOCGMEDIAINFO support 5492 */ 5493 if (vdc->minfo == NULL) 5494 vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 5495 5496 if (vio_ver_is_supported(vdc->ver, 1, 1)) { 5497 vdc->minfo->dki_media_type = 5498 VD_MEDIATYPE2DK_MEDIATYPE(vdc->vdisk_media); 5499 } else { 5500 vdc->minfo->dki_media_type = DK_FIXED_DISK; 5501 } 5502 5503 vdc->minfo->dki_capacity = vdc->vdisk_size; 5504 vdc->minfo->dki_lbsize = vdc->block_size; 5505 } 5506 5507 static ushort_t 5508 vdc_lbl2cksum(struct dk_label *label) 5509 { 5510 int count; 5511 ushort_t sum, *sp; 5512 5513 count = (sizeof (struct dk_label)) / (sizeof (short)) - 1; 5514 sp = (ushort_t *)label; 5515 sum = 0; 5516 while (count--) { 5517 sum ^= *sp++; 5518 } 5519 5520 return (sum); 5521 } 5522 5523 /* 5524 * Function: 5525 * vdc_validate_geometry 5526 * 5527 * Description: 5528 * This routine discovers the label and geometry of the disk. It stores 5529 * the disk label and related information in the vdc structure. If it 5530 * fails to validate the geometry or to discover the disk label then 5531 * the label is marked as unknown (VD_DISK_LABEL_UNK). 5532 * 5533 * Arguments: 5534 * vdc - soft state pointer for this instance of the device driver. 5535 * 5536 * Return Code: 5537 * 0 - success. 5538 * EINVAL - unknown disk label. 5539 * ENOTSUP - geometry not applicable (EFI label). 5540 * EIO - error accessing the disk. 5541 */ 5542 static int 5543 vdc_validate_geometry(vdc_t *vdc) 5544 { 5545 buf_t *buf; /* BREAD requests need to be in a buf_t structure */ 5546 dev_t dev; 5547 int rv; 5548 struct dk_label label; 5549 struct dk_geom geom; 5550 struct vtoc vtoc; 5551 5552 ASSERT(vdc != NULL); 5553 ASSERT(vdc->vtoc != NULL && vdc->geom != NULL); 5554 ASSERT(MUTEX_HELD(&vdc->lock)); 5555 5556 mutex_exit(&vdc->lock); 5557 5558 dev = makedevice(ddi_driver_major(vdc->dip), 5559 VD_MAKE_DEV(vdc->instance, 0)); 5560 5561 rv = vd_process_ioctl(dev, DKIOCGGEOM, (caddr_t)&geom, FKIOCTL); 5562 if (rv == 0) 5563 rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)&vtoc, FKIOCTL); 5564 5565 if (rv == ENOTSUP) { 5566 /* 5567 * If the device does not support VTOC then we try 5568 * to read an EFI label. 5569 */ 5570 struct dk_gpt *efi; 5571 size_t efi_len; 5572 5573 rv = vdc_efi_alloc_and_read(dev, &efi, &efi_len); 5574 5575 if (rv) { 5576 DMSG(vdc, 0, "[%d] Failed to get EFI (err=%d)", 5577 vdc->instance, rv); 5578 mutex_enter(&vdc->lock); 5579 vdc_store_label_unk(vdc); 5580 return (EIO); 5581 } 5582 5583 mutex_enter(&vdc->lock); 5584 vdc_store_label_efi(vdc, efi); 5585 vd_efi_free(efi, efi_len); 5586 return (ENOTSUP); 5587 } 5588 5589 if (rv != 0) { 5590 DMSG(vdc, 0, "[%d] Failed to get VTOC (err=%d)", 5591 vdc->instance, rv); 5592 mutex_enter(&vdc->lock); 5593 vdc_store_label_unk(vdc); 5594 if (rv != EINVAL) 5595 rv = EIO; 5596 return (rv); 5597 } 5598 5599 /* check that geometry and vtoc are valid */ 5600 if (geom.dkg_nhead == 0 || geom.dkg_nsect == 0 || 5601 vtoc.v_sanity != VTOC_SANE) { 5602 mutex_enter(&vdc->lock); 5603 vdc_store_label_unk(vdc); 5604 return (EINVAL); 5605 } 5606 5607 /* 5608 * We have a disk and a valid VTOC. However this does not mean 5609 * that the disk currently have a VTOC label. The returned VTOC may 5610 * be a default VTOC to be used for configuring the disk (this is 5611 * what is done for disk image). So we read the label from the 5612 * beginning of the disk to ensure we really have a VTOC label. 5613 * 5614 * FUTURE: This could be the default way for reading the VTOC 5615 * from the disk as opposed to sending the VD_OP_GET_VTOC 5616 * to the server. This will be the default if vdc is implemented 5617 * ontop of cmlb. 5618 */ 5619 5620 /* 5621 * Single slice disk does not support read using an absolute disk 5622 * offset so we just rely on the DKIOCGVTOC ioctl in that case. 5623 */ 5624 if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 5625 mutex_enter(&vdc->lock); 5626 if (vtoc.v_nparts != 1) { 5627 vdc_store_label_unk(vdc); 5628 return (EINVAL); 5629 } 5630 vdc_store_label_vtoc(vdc, &geom, &vtoc); 5631 return (0); 5632 } 5633 5634 if (vtoc.v_nparts != V_NUMPAR) { 5635 mutex_enter(&vdc->lock); 5636 vdc_store_label_unk(vdc); 5637 return (EINVAL); 5638 } 5639 5640 /* 5641 * Read disk label from start of disk 5642 */ 5643 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 5644 bioinit(buf); 5645 buf->b_un.b_addr = (caddr_t)&label; 5646 buf->b_bcount = DK_LABEL_SIZE; 5647 buf->b_flags = B_BUSY | B_READ; 5648 buf->b_dev = cmpdev(dev); 5649 rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)&label, 5650 DK_LABEL_SIZE, VD_SLICE_NONE, 0, CB_STRATEGY, buf, VIO_read_dir); 5651 if (rv) { 5652 DMSG(vdc, 1, "[%d] Failed to read disk block 0\n", 5653 vdc->instance); 5654 } else { 5655 rv = biowait(buf); 5656 biofini(buf); 5657 } 5658 kmem_free(buf, sizeof (buf_t)); 5659 5660 if (rv != 0 || label.dkl_magic != DKL_MAGIC || 5661 label.dkl_cksum != vdc_lbl2cksum(&label)) { 5662 DMSG(vdc, 1, "[%d] Got VTOC with invalid label\n", 5663 vdc->instance); 5664 mutex_enter(&vdc->lock); 5665 vdc_store_label_unk(vdc); 5666 return (EINVAL); 5667 } 5668 5669 mutex_enter(&vdc->lock); 5670 vdc_store_label_vtoc(vdc, &geom, &vtoc); 5671 return (0); 5672 } 5673 5674 /* 5675 * Function: 5676 * vdc_validate 5677 * 5678 * Description: 5679 * This routine discovers the label of the disk and create the 5680 * appropriate device nodes if the label has changed. 5681 * 5682 * Arguments: 5683 * vdc - soft state pointer for this instance of the device driver. 5684 * 5685 * Return Code: 5686 * none. 5687 */ 5688 static void 5689 vdc_validate(vdc_t *vdc) 5690 { 5691 vd_disk_label_t old_label; 5692 struct vtoc old_vtoc; 5693 int rv; 5694 5695 ASSERT(!MUTEX_HELD(&vdc->lock)); 5696 5697 mutex_enter(&vdc->lock); 5698 5699 /* save the current label and vtoc */ 5700 old_label = vdc->vdisk_label; 5701 bcopy(vdc->vtoc, &old_vtoc, sizeof (struct vtoc)); 5702 5703 /* check the geometry */ 5704 (void) vdc_validate_geometry(vdc); 5705 5706 /* if the disk label has changed, update device nodes */ 5707 if (vdc->vdisk_label != old_label) { 5708 5709 if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 5710 rv = vdc_create_device_nodes_efi(vdc); 5711 else 5712 rv = vdc_create_device_nodes_vtoc(vdc); 5713 5714 if (rv != 0) { 5715 DMSG(vdc, 0, "![%d] Failed to update device nodes", 5716 vdc->instance); 5717 } 5718 } 5719 5720 /* if the vtoc has changed, update device nodes properties */ 5721 if (bcmp(vdc->vtoc, &old_vtoc, sizeof (struct vtoc)) != 0) { 5722 5723 if (vdc_create_device_nodes_props(vdc) != 0) { 5724 DMSG(vdc, 0, "![%d] Failed to update device nodes" 5725 " properties", vdc->instance); 5726 } 5727 } 5728 5729 mutex_exit(&vdc->lock); 5730 } 5731 5732 static void 5733 vdc_validate_task(void *arg) 5734 { 5735 vdc_t *vdc = (vdc_t *)arg; 5736 5737 vdc_validate(vdc); 5738 5739 mutex_enter(&vdc->lock); 5740 ASSERT(vdc->validate_pending > 0); 5741 vdc->validate_pending--; 5742 mutex_exit(&vdc->lock); 5743 } 5744 5745 /* 5746 * Function: 5747 * vdc_setup_devid() 5748 * 5749 * Description: 5750 * This routine discovers the devid of a vDisk. It requests the devid of 5751 * the underlying device from the vDisk server, builds an encapsulated 5752 * devid based on the retrieved devid and registers that new devid to 5753 * the vDisk. 5754 * 5755 * Arguments: 5756 * vdc - soft state pointer for this instance of the device driver. 5757 * 5758 * Return Code: 5759 * 0 - A devid was succesfully registered for the vDisk 5760 */ 5761 static int 5762 vdc_setup_devid(vdc_t *vdc) 5763 { 5764 int rv; 5765 vd_devid_t *vd_devid; 5766 size_t bufsize, bufid_len; 5767 5768 /* 5769 * At first sight, we don't know the size of the devid that the 5770 * server will return but this size will be encoded into the 5771 * reply. So we do a first request using a default size then we 5772 * check if this size was large enough. If not then we do a second 5773 * request with the correct size returned by the server. Note that 5774 * ldc requires size to be 8-byte aligned. 5775 */ 5776 bufsize = P2ROUNDUP(VD_DEVID_SIZE(VD_DEVID_DEFAULT_LEN), 5777 sizeof (uint64_t)); 5778 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 5779 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 5780 5781 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, (caddr_t)vd_devid, 5782 bufsize, 0, 0, CB_SYNC, 0, VIO_both_dir); 5783 5784 DMSG(vdc, 2, "sync_op returned %d\n", rv); 5785 5786 if (rv) { 5787 kmem_free(vd_devid, bufsize); 5788 return (rv); 5789 } 5790 5791 if (vd_devid->length > bufid_len) { 5792 /* 5793 * The returned devid is larger than the buffer used. Try again 5794 * with a buffer with the right size. 5795 */ 5796 kmem_free(vd_devid, bufsize); 5797 bufsize = P2ROUNDUP(VD_DEVID_SIZE(vd_devid->length), 5798 sizeof (uint64_t)); 5799 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 5800 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 5801 5802 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, 5803 (caddr_t)vd_devid, bufsize, 0, 0, CB_SYNC, 0, 5804 VIO_both_dir); 5805 5806 if (rv) { 5807 kmem_free(vd_devid, bufsize); 5808 return (rv); 5809 } 5810 } 5811 5812 /* 5813 * The virtual disk should have the same device id as the one associated 5814 * with the physical disk it is mapped on, otherwise sharing a disk 5815 * between a LDom and a non-LDom may not work (for example for a shared 5816 * SVM disk set). 5817 * 5818 * The DDI framework does not allow creating a device id with any 5819 * type so we first create a device id of type DEVID_ENCAP and then 5820 * we restore the orignal type of the physical device. 5821 */ 5822 5823 DMSG(vdc, 2, ": devid length = %d\n", vd_devid->length); 5824 5825 /* build an encapsulated devid based on the returned devid */ 5826 if (ddi_devid_init(vdc->dip, DEVID_ENCAP, vd_devid->length, 5827 vd_devid->id, &vdc->devid) != DDI_SUCCESS) { 5828 DMSG(vdc, 1, "[%d] Fail to created devid\n", vdc->instance); 5829 kmem_free(vd_devid, bufsize); 5830 return (1); 5831 } 5832 5833 DEVID_FORMTYPE((impl_devid_t *)vdc->devid, vd_devid->type); 5834 5835 ASSERT(ddi_devid_valid(vdc->devid) == DDI_SUCCESS); 5836 5837 kmem_free(vd_devid, bufsize); 5838 5839 if (ddi_devid_register(vdc->dip, vdc->devid) != DDI_SUCCESS) { 5840 DMSG(vdc, 1, "[%d] Fail to register devid\n", vdc->instance); 5841 return (1); 5842 } 5843 5844 return (0); 5845 } 5846 5847 static void 5848 vdc_store_label_efi(vdc_t *vdc, struct dk_gpt *efi) 5849 { 5850 struct vtoc *vtoc = vdc->vtoc; 5851 5852 ASSERT(MUTEX_HELD(&vdc->lock)); 5853 5854 vdc->vdisk_label = VD_DISK_LABEL_EFI; 5855 bzero(vdc->geom, sizeof (struct dk_geom)); 5856 vd_efi_to_vtoc(efi, vtoc); 5857 if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 5858 /* 5859 * vd_efi_to_vtoc() will store information about the EFI Sun 5860 * reserved partition (representing the entire disk) into 5861 * partition 7. However single-slice device will only have 5862 * that single partition and the vdc driver expects to find 5863 * information about that partition in slice 0. So we need 5864 * to copy information from slice 7 to slice 0. 5865 */ 5866 vtoc->v_part[0].p_tag = vtoc->v_part[VD_EFI_WD_SLICE].p_tag; 5867 vtoc->v_part[0].p_flag = vtoc->v_part[VD_EFI_WD_SLICE].p_flag; 5868 vtoc->v_part[0].p_start = vtoc->v_part[VD_EFI_WD_SLICE].p_start; 5869 vtoc->v_part[0].p_size = vtoc->v_part[VD_EFI_WD_SLICE].p_size; 5870 } 5871 } 5872 5873 static void 5874 vdc_store_label_vtoc(vdc_t *vdc, struct dk_geom *geom, struct vtoc *vtoc) 5875 { 5876 ASSERT(MUTEX_HELD(&vdc->lock)); 5877 5878 vdc->vdisk_label = VD_DISK_LABEL_VTOC; 5879 bcopy(vtoc, vdc->vtoc, sizeof (struct vtoc)); 5880 bcopy(geom, vdc->geom, sizeof (struct dk_geom)); 5881 } 5882 5883 static void 5884 vdc_store_label_unk(vdc_t *vdc) 5885 { 5886 ASSERT(MUTEX_HELD(&vdc->lock)); 5887 5888 vdc->vdisk_label = VD_DISK_LABEL_UNK; 5889 bzero(vdc->vtoc, sizeof (struct vtoc)); 5890 bzero(vdc->geom, sizeof (struct dk_geom)); 5891 } 5892