1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * LDoms virtual disk client (vdc) device driver 31 * 32 * This driver runs on a guest logical domain and communicates with the virtual 33 * disk server (vds) driver running on the service domain which is exporting 34 * virtualized "disks" to the guest logical domain. 35 * 36 * The driver can be divided into four sections: 37 * 38 * 1) generic device driver housekeeping 39 * _init, _fini, attach, detach, ops structures, etc. 40 * 41 * 2) communication channel setup 42 * Setup the communications link over the LDC channel that vdc uses to 43 * talk to the vDisk server. Initialise the descriptor ring which 44 * allows the LDC clients to transfer data via memory mappings. 45 * 46 * 3) Support exported to upper layers (filesystems, etc) 47 * The upper layers call into vdc via strategy(9E) and DKIO(7I) 48 * ioctl calls. vdc will copy the data to be written to the descriptor 49 * ring or maps the buffer to store the data read by the vDisk 50 * server into the descriptor ring. It then sends a message to the 51 * vDisk server requesting it to complete the operation. 52 * 53 * 4) Handling responses from vDisk server. 54 * The vDisk server will ACK some or all of the messages vdc sends to it 55 * (this is configured during the handshake). Upon receipt of an ACK 56 * vdc will check the descriptor ring and signal to the upper layer 57 * code waiting on the IO. 58 */ 59 60 #include <sys/atomic.h> 61 #include <sys/conf.h> 62 #include <sys/disp.h> 63 #include <sys/ddi.h> 64 #include <sys/dkio.h> 65 #include <sys/efi_partition.h> 66 #include <sys/fcntl.h> 67 #include <sys/file.h> 68 #include <sys/mach_descrip.h> 69 #include <sys/modctl.h> 70 #include <sys/mdeg.h> 71 #include <sys/note.h> 72 #include <sys/open.h> 73 #include <sys/sdt.h> 74 #include <sys/stat.h> 75 #include <sys/sunddi.h> 76 #include <sys/types.h> 77 #include <sys/promif.h> 78 #include <sys/vtoc.h> 79 #include <sys/archsystm.h> 80 #include <sys/sysmacros.h> 81 82 #include <sys/cdio.h> 83 #include <sys/dktp/fdisk.h> 84 #include <sys/dktp/dadkio.h> 85 #include <sys/scsi/generic/sense.h> 86 #include <sys/scsi/impl/uscsi.h> /* Needed for defn of USCSICMD ioctl */ 87 88 #include <sys/ldoms.h> 89 #include <sys/ldc.h> 90 #include <sys/vio_common.h> 91 #include <sys/vio_mailbox.h> 92 #include <sys/vdsk_common.h> 93 #include <sys/vdsk_mailbox.h> 94 #include <sys/vdc.h> 95 96 /* 97 * function prototypes 98 */ 99 100 /* standard driver functions */ 101 static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 102 static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 103 static int vdc_strategy(struct buf *buf); 104 static int vdc_print(dev_t dev, char *str); 105 static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 106 static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 107 static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 108 static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 109 cred_t *credp, int *rvalp); 110 static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 111 static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 112 113 static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 114 void *arg, void **resultp); 115 static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 116 static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 117 118 /* setup */ 119 static void vdc_min(struct buf *bufp); 120 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 121 static int vdc_do_ldc_init(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_node); 122 static int vdc_start_ldc_connection(vdc_t *vdc); 123 static int vdc_create_device_nodes(vdc_t *vdc); 124 static int vdc_create_device_nodes_efi(vdc_t *vdc); 125 static int vdc_create_device_nodes_vtoc(vdc_t *vdc); 126 static int vdc_create_device_nodes_props(vdc_t *vdc); 127 static int vdc_get_md_node(dev_info_t *dip, md_t **mdpp, 128 mde_cookie_t *vd_nodep, mde_cookie_t *vd_portp); 129 static int vdc_get_ldc_id(md_t *, mde_cookie_t, uint64_t *); 130 static int vdc_do_ldc_up(vdc_t *vdc); 131 static void vdc_terminate_ldc(vdc_t *vdc); 132 static int vdc_init_descriptor_ring(vdc_t *vdc); 133 static void vdc_destroy_descriptor_ring(vdc_t *vdc); 134 static int vdc_setup_devid(vdc_t *vdc); 135 static void vdc_store_label_efi(vdc_t *vdc, struct dk_gpt *efi); 136 static void vdc_store_label_vtoc(vdc_t *, struct dk_geom *, struct vtoc *); 137 static void vdc_store_label_unk(vdc_t *vdc); 138 static boolean_t vdc_is_opened(vdc_t *vdc); 139 140 /* handshake with vds */ 141 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 142 static int vdc_ver_negotiation(vdc_t *vdcp); 143 static int vdc_init_attr_negotiation(vdc_t *vdc); 144 static int vdc_attr_negotiation(vdc_t *vdcp); 145 static int vdc_init_dring_negotiate(vdc_t *vdc); 146 static int vdc_dring_negotiation(vdc_t *vdcp); 147 static int vdc_send_rdx(vdc_t *vdcp); 148 static int vdc_rdx_exchange(vdc_t *vdcp); 149 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 150 151 /* processing incoming messages from vDisk server */ 152 static void vdc_process_msg_thread(vdc_t *vdc); 153 static int vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp); 154 155 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 156 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t *msg); 157 static int vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg); 158 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 159 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 160 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 161 static int vdc_send_request(vdc_t *vdcp, int operation, 162 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 163 int cb_type, void *cb_arg, vio_desc_direction_t dir); 164 static int vdc_map_to_shared_dring(vdc_t *vdcp, int idx); 165 static int vdc_populate_descriptor(vdc_t *vdcp, int operation, 166 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 167 int cb_type, void *cb_arg, vio_desc_direction_t dir); 168 static int vdc_do_sync_op(vdc_t *vdcp, int operation, 169 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 170 int cb_type, void *cb_arg, vio_desc_direction_t dir); 171 172 static int vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp); 173 static int vdc_drain_response(vdc_t *vdcp); 174 static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 175 static int vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep); 176 static int vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg); 177 178 /* dkio */ 179 static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode); 180 static void vdc_create_fake_geometry(vdc_t *vdc); 181 static int vdc_validate_geometry(vdc_t *vdc); 182 static void vdc_validate(vdc_t *vdc); 183 static void vdc_validate_task(void *arg); 184 static int vdc_null_copy_func(vdc_t *vdc, void *from, void *to, 185 int mode, int dir); 186 static int vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 187 int mode, int dir); 188 static int vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 189 int mode, int dir); 190 static int vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, 191 int mode, int dir); 192 static int vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, 193 int mode, int dir); 194 static int vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, 195 int mode, int dir); 196 static int vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, 197 int mode, int dir); 198 static int vdc_uscsicmd_convert(vdc_t *vdc, void *from, void *to, 199 int mode, int dir); 200 static int vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, 201 int mode, int dir); 202 static int vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, 203 int mode, int dir); 204 205 /* 206 * Module variables 207 */ 208 209 /* 210 * Tunable variables to control how long vdc waits before timing out on 211 * various operations 212 */ 213 static int vdc_retries = 10; 214 static int vdc_hshake_retries = 3; 215 216 static int vdc_timeout = 0; /* units: seconds */ 217 218 /* calculated from 'vdc_usec_timeout' during attach */ 219 static uint64_t vdc_hz_timeout; /* units: Hz */ 220 static uint64_t vdc_usec_timeout = 30 * MICROSEC; /* 30s units: ns */ 221 222 static uint64_t vdc_hz_min_ldc_delay; 223 static uint64_t vdc_min_timeout_ldc = 1 * MILLISEC; 224 static uint64_t vdc_hz_max_ldc_delay; 225 static uint64_t vdc_max_timeout_ldc = 100 * MILLISEC; 226 227 static uint64_t vdc_ldc_read_init_delay = 1 * MILLISEC; 228 static uint64_t vdc_ldc_read_max_delay = 100 * MILLISEC; 229 230 /* values for dumping - need to run in a tighter loop */ 231 static uint64_t vdc_usec_timeout_dump = 100 * MILLISEC; /* 0.1s units: ns */ 232 static int vdc_dump_retries = 100; 233 234 /* Count of the number of vdc instances attached */ 235 static volatile uint32_t vdc_instance_count = 0; 236 237 /* Soft state pointer */ 238 static void *vdc_state; 239 240 /* 241 * Controlling the verbosity of the error/debug messages 242 * 243 * vdc_msglevel - controls level of messages 244 * vdc_matchinst - 64-bit variable where each bit corresponds 245 * to the vdc instance the vdc_msglevel applies. 246 */ 247 int vdc_msglevel = 0x0; 248 uint64_t vdc_matchinst = 0ull; 249 250 /* 251 * Supported vDisk protocol version pairs. 252 * 253 * The first array entry is the latest and preferred version. 254 */ 255 static const vio_ver_t vdc_version[] = {{1, 0}}; 256 257 static struct cb_ops vdc_cb_ops = { 258 vdc_open, /* cb_open */ 259 vdc_close, /* cb_close */ 260 vdc_strategy, /* cb_strategy */ 261 vdc_print, /* cb_print */ 262 vdc_dump, /* cb_dump */ 263 vdc_read, /* cb_read */ 264 vdc_write, /* cb_write */ 265 vdc_ioctl, /* cb_ioctl */ 266 nodev, /* cb_devmap */ 267 nodev, /* cb_mmap */ 268 nodev, /* cb_segmap */ 269 nochpoll, /* cb_chpoll */ 270 ddi_prop_op, /* cb_prop_op */ 271 NULL, /* cb_str */ 272 D_MP | D_64BIT, /* cb_flag */ 273 CB_REV, /* cb_rev */ 274 vdc_aread, /* cb_aread */ 275 vdc_awrite /* cb_awrite */ 276 }; 277 278 static struct dev_ops vdc_ops = { 279 DEVO_REV, /* devo_rev */ 280 0, /* devo_refcnt */ 281 vdc_getinfo, /* devo_getinfo */ 282 nulldev, /* devo_identify */ 283 nulldev, /* devo_probe */ 284 vdc_attach, /* devo_attach */ 285 vdc_detach, /* devo_detach */ 286 nodev, /* devo_reset */ 287 &vdc_cb_ops, /* devo_cb_ops */ 288 NULL, /* devo_bus_ops */ 289 nulldev /* devo_power */ 290 }; 291 292 static struct modldrv modldrv = { 293 &mod_driverops, 294 "virtual disk client", 295 &vdc_ops, 296 }; 297 298 static struct modlinkage modlinkage = { 299 MODREV_1, 300 &modldrv, 301 NULL 302 }; 303 304 /* -------------------------------------------------------------------------- */ 305 306 /* 307 * Device Driver housekeeping and setup 308 */ 309 310 int 311 _init(void) 312 { 313 int status; 314 315 if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 316 return (status); 317 if ((status = mod_install(&modlinkage)) != 0) 318 ddi_soft_state_fini(&vdc_state); 319 vdc_efi_init(vd_process_ioctl); 320 return (status); 321 } 322 323 int 324 _info(struct modinfo *modinfop) 325 { 326 return (mod_info(&modlinkage, modinfop)); 327 } 328 329 int 330 _fini(void) 331 { 332 int status; 333 334 if ((status = mod_remove(&modlinkage)) != 0) 335 return (status); 336 vdc_efi_fini(); 337 ddi_soft_state_fini(&vdc_state); 338 return (0); 339 } 340 341 static int 342 vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 343 { 344 _NOTE(ARGUNUSED(dip)) 345 346 int instance = VDCUNIT((dev_t)arg); 347 vdc_t *vdc = NULL; 348 349 switch (cmd) { 350 case DDI_INFO_DEVT2DEVINFO: 351 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 352 *resultp = NULL; 353 return (DDI_FAILURE); 354 } 355 *resultp = vdc->dip; 356 return (DDI_SUCCESS); 357 case DDI_INFO_DEVT2INSTANCE: 358 *resultp = (void *)(uintptr_t)instance; 359 return (DDI_SUCCESS); 360 default: 361 *resultp = NULL; 362 return (DDI_FAILURE); 363 } 364 } 365 366 static int 367 vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 368 { 369 int instance; 370 int rv; 371 vdc_t *vdc = NULL; 372 373 switch (cmd) { 374 case DDI_DETACH: 375 /* the real work happens below */ 376 break; 377 case DDI_SUSPEND: 378 /* nothing to do for this non-device */ 379 return (DDI_SUCCESS); 380 default: 381 return (DDI_FAILURE); 382 } 383 384 ASSERT(cmd == DDI_DETACH); 385 instance = ddi_get_instance(dip); 386 DMSGX(1, "[%d] Entered\n", instance); 387 388 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 389 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 390 return (DDI_FAILURE); 391 } 392 393 if (vdc_is_opened(vdc)) { 394 DMSG(vdc, 0, "[%d] Cannot detach: device is open", instance); 395 return (DDI_FAILURE); 396 } 397 398 if (vdc->dkio_flush_pending) { 399 DMSG(vdc, 0, 400 "[%d] Cannot detach: %d outstanding DKIO flushes\n", 401 instance, vdc->dkio_flush_pending); 402 return (DDI_FAILURE); 403 } 404 405 if (vdc->validate_pending) { 406 DMSG(vdc, 0, 407 "[%d] Cannot detach: %d outstanding validate request\n", 408 instance, vdc->validate_pending); 409 return (DDI_FAILURE); 410 } 411 412 DMSG(vdc, 0, "[%d] proceeding...\n", instance); 413 414 /* mark instance as detaching */ 415 vdc->lifecycle = VDC_LC_DETACHING; 416 417 /* 418 * try and disable callbacks to prevent another handshake 419 */ 420 rv = ldc_set_cb_mode(vdc->ldc_handle, LDC_CB_DISABLE); 421 DMSG(vdc, 0, "callback disabled (rv=%d)\n", rv); 422 423 if (vdc->initialized & VDC_THREAD) { 424 mutex_enter(&vdc->read_lock); 425 if ((vdc->read_state == VDC_READ_WAITING) || 426 (vdc->read_state == VDC_READ_RESET)) { 427 vdc->read_state = VDC_READ_RESET; 428 cv_signal(&vdc->read_cv); 429 } 430 431 mutex_exit(&vdc->read_lock); 432 433 /* wake up any thread waiting for connection to come online */ 434 mutex_enter(&vdc->lock); 435 if (vdc->state == VDC_STATE_INIT_WAITING) { 436 DMSG(vdc, 0, 437 "[%d] write reset - move to resetting state...\n", 438 instance); 439 vdc->state = VDC_STATE_RESETTING; 440 cv_signal(&vdc->initwait_cv); 441 } 442 mutex_exit(&vdc->lock); 443 444 /* now wait until state transitions to VDC_STATE_DETACH */ 445 thread_join(vdc->msg_proc_thr->t_did); 446 ASSERT(vdc->state == VDC_STATE_DETACH); 447 DMSG(vdc, 0, "[%d] Reset thread exit and join ..\n", 448 vdc->instance); 449 } 450 451 mutex_enter(&vdc->lock); 452 453 if (vdc->initialized & VDC_DRING) 454 vdc_destroy_descriptor_ring(vdc); 455 456 if (vdc->initialized & VDC_LDC) 457 vdc_terminate_ldc(vdc); 458 459 mutex_exit(&vdc->lock); 460 461 if (vdc->initialized & VDC_MINOR) { 462 ddi_prop_remove_all(dip); 463 ddi_remove_minor_node(dip, NULL); 464 } 465 466 if (vdc->initialized & VDC_LOCKS) { 467 mutex_destroy(&vdc->lock); 468 mutex_destroy(&vdc->read_lock); 469 cv_destroy(&vdc->initwait_cv); 470 cv_destroy(&vdc->dring_free_cv); 471 cv_destroy(&vdc->membind_cv); 472 cv_destroy(&vdc->sync_pending_cv); 473 cv_destroy(&vdc->sync_blocked_cv); 474 cv_destroy(&vdc->read_cv); 475 cv_destroy(&vdc->running_cv); 476 } 477 478 if (vdc->minfo) 479 kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 480 481 if (vdc->cinfo) 482 kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 483 484 if (vdc->vtoc) 485 kmem_free(vdc->vtoc, sizeof (struct vtoc)); 486 487 if (vdc->geom) 488 kmem_free(vdc->geom, sizeof (struct dk_geom)); 489 490 if (vdc->devid) { 491 ddi_devid_unregister(dip); 492 ddi_devid_free(vdc->devid); 493 } 494 495 if (vdc->initialized & VDC_SOFT_STATE) 496 ddi_soft_state_free(vdc_state, instance); 497 498 DMSG(vdc, 0, "[%d] End %p\n", instance, (void *)vdc); 499 500 return (DDI_SUCCESS); 501 } 502 503 504 static int 505 vdc_do_attach(dev_info_t *dip) 506 { 507 int instance; 508 vdc_t *vdc = NULL; 509 int status; 510 md_t *mdp; 511 mde_cookie_t vd_node, vd_port; 512 513 ASSERT(dip != NULL); 514 515 instance = ddi_get_instance(dip); 516 if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 517 cmn_err(CE_NOTE, "[%d] Couldn't alloc state structure", 518 instance); 519 return (DDI_FAILURE); 520 } 521 522 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 523 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 524 return (DDI_FAILURE); 525 } 526 527 /* 528 * We assign the value to initialized in this case to zero out the 529 * variable and then set bits in it to indicate what has been done 530 */ 531 vdc->initialized = VDC_SOFT_STATE; 532 533 vdc_hz_timeout = drv_usectohz(vdc_usec_timeout); 534 535 vdc_hz_min_ldc_delay = drv_usectohz(vdc_min_timeout_ldc); 536 vdc_hz_max_ldc_delay = drv_usectohz(vdc_max_timeout_ldc); 537 538 vdc->dip = dip; 539 vdc->instance = instance; 540 vdc->vdisk_type = VD_DISK_TYPE_UNK; 541 vdc->vdisk_label = VD_DISK_LABEL_UNK; 542 vdc->state = VDC_STATE_INIT; 543 vdc->lifecycle = VDC_LC_ATTACHING; 544 vdc->ldc_state = 0; 545 vdc->session_id = 0; 546 vdc->block_size = DEV_BSIZE; 547 vdc->max_xfer_sz = maxphys / DEV_BSIZE; 548 549 vdc->vtoc = NULL; 550 vdc->geom = NULL; 551 vdc->cinfo = NULL; 552 vdc->minfo = NULL; 553 554 mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 555 cv_init(&vdc->initwait_cv, NULL, CV_DRIVER, NULL); 556 cv_init(&vdc->dring_free_cv, NULL, CV_DRIVER, NULL); 557 cv_init(&vdc->membind_cv, NULL, CV_DRIVER, NULL); 558 cv_init(&vdc->running_cv, NULL, CV_DRIVER, NULL); 559 560 vdc->threads_pending = 0; 561 vdc->sync_op_pending = B_FALSE; 562 vdc->sync_op_blocked = B_FALSE; 563 cv_init(&vdc->sync_pending_cv, NULL, CV_DRIVER, NULL); 564 cv_init(&vdc->sync_blocked_cv, NULL, CV_DRIVER, NULL); 565 566 /* init blocking msg read functionality */ 567 mutex_init(&vdc->read_lock, NULL, MUTEX_DRIVER, NULL); 568 cv_init(&vdc->read_cv, NULL, CV_DRIVER, NULL); 569 vdc->read_state = VDC_READ_IDLE; 570 571 vdc->initialized |= VDC_LOCKS; 572 573 /* get device and port MD node for this disk instance */ 574 if (vdc_get_md_node(dip, &mdp, &vd_node, &vd_port) != 0) { 575 cmn_err(CE_NOTE, "[%d] Could not get machine description node", 576 instance); 577 return (DDI_FAILURE); 578 } 579 580 /* set the connection timeout */ 581 if (vd_port == NULL || (md_get_prop_val(mdp, vd_port, 582 VDC_MD_TIMEOUT, &vdc->ctimeout) != 0)) { 583 vdc->ctimeout = 0; 584 } 585 586 /* initialise LDC channel which will be used to communicate with vds */ 587 status = vdc_do_ldc_init(vdc, mdp, vd_node); 588 589 (void) md_fini_handle(mdp); 590 591 if (status != 0) { 592 cmn_err(CE_NOTE, "[%d] Couldn't initialize LDC", instance); 593 goto return_status; 594 } 595 596 /* initialize the thread responsible for managing state with server */ 597 vdc->msg_proc_thr = thread_create(NULL, 0, vdc_process_msg_thread, 598 vdc, 0, &p0, TS_RUN, minclsyspri); 599 if (vdc->msg_proc_thr == NULL) { 600 cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 601 instance); 602 return (DDI_FAILURE); 603 } 604 605 vdc->initialized |= VDC_THREAD; 606 607 atomic_inc_32(&vdc_instance_count); 608 609 /* 610 * Check the disk label. This will send requests and do the handshake. 611 * We don't really care about the disk label now. What we really need is 612 * the handshake do be done so that we know the type of the disk (slice 613 * or full disk) and the appropriate device nodes can be created. 614 */ 615 vdc->vdisk_label = VD_DISK_LABEL_UNK; 616 vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); 617 vdc->geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP); 618 619 mutex_enter(&vdc->lock); 620 (void) vdc_validate_geometry(vdc); 621 mutex_exit(&vdc->lock); 622 623 /* 624 * Now that we have the device info we can create the 625 * device nodes and properties 626 */ 627 status = vdc_create_device_nodes(vdc); 628 if (status) { 629 DMSG(vdc, 0, "[%d] Failed to create device nodes", 630 instance); 631 goto return_status; 632 } 633 status = vdc_create_device_nodes_props(vdc); 634 if (status) { 635 DMSG(vdc, 0, "[%d] Failed to create device nodes" 636 " properties (%d)", instance, status); 637 goto return_status; 638 } 639 640 /* 641 * Setup devid 642 */ 643 if (vdc_setup_devid(vdc)) { 644 DMSG(vdc, 0, "[%d] No device id available\n", instance); 645 } 646 647 ddi_report_dev(dip); 648 vdc->lifecycle = VDC_LC_ONLINE; 649 DMSG(vdc, 0, "[%d] Attach tasks successful\n", instance); 650 651 return_status: 652 DMSG(vdc, 0, "[%d] Attach completed\n", instance); 653 return (status); 654 } 655 656 static int 657 vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 658 { 659 int status; 660 661 switch (cmd) { 662 case DDI_ATTACH: 663 if ((status = vdc_do_attach(dip)) != 0) 664 (void) vdc_detach(dip, DDI_DETACH); 665 return (status); 666 case DDI_RESUME: 667 /* nothing to do for this non-device */ 668 return (DDI_SUCCESS); 669 default: 670 return (DDI_FAILURE); 671 } 672 } 673 674 static int 675 vdc_do_ldc_init(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_node) 676 { 677 int status = 0; 678 ldc_status_t ldc_state; 679 ldc_attr_t ldc_attr; 680 uint64_t ldc_id = 0; 681 682 ASSERT(vdc != NULL); 683 684 vdc->initialized |= VDC_LDC; 685 686 if ((status = vdc_get_ldc_id(mdp, vd_node, &ldc_id)) != 0) { 687 DMSG(vdc, 0, "[%d] Failed to get LDC channel ID property", 688 vdc->instance); 689 return (EIO); 690 } 691 692 DMSGX(0, "[%d] LDC id is 0x%lx\n", vdc->instance, ldc_id); 693 694 vdc->ldc_id = ldc_id; 695 696 ldc_attr.devclass = LDC_DEV_BLK; 697 ldc_attr.instance = vdc->instance; 698 ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 699 ldc_attr.mtu = VD_LDC_MTU; 700 701 if ((vdc->initialized & VDC_LDC_INIT) == 0) { 702 status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle); 703 if (status != 0) { 704 DMSG(vdc, 0, "[%d] ldc_init(chan %ld) returned %d", 705 vdc->instance, ldc_id, status); 706 return (status); 707 } 708 vdc->initialized |= VDC_LDC_INIT; 709 } 710 status = ldc_status(vdc->ldc_handle, &ldc_state); 711 if (status != 0) { 712 DMSG(vdc, 0, "[%d] Cannot discover LDC status [err=%d]", 713 vdc->instance, status); 714 return (status); 715 } 716 vdc->ldc_state = ldc_state; 717 718 if ((vdc->initialized & VDC_LDC_CB) == 0) { 719 status = ldc_reg_callback(vdc->ldc_handle, vdc_handle_cb, 720 (caddr_t)vdc); 721 if (status != 0) { 722 DMSG(vdc, 0, "[%d] LDC callback reg. failed (%d)", 723 vdc->instance, status); 724 return (status); 725 } 726 vdc->initialized |= VDC_LDC_CB; 727 } 728 729 vdc->initialized |= VDC_LDC; 730 731 /* 732 * At this stage we have initialised LDC, we will now try and open 733 * the connection. 734 */ 735 if (vdc->ldc_state == LDC_INIT) { 736 status = ldc_open(vdc->ldc_handle); 737 if (status != 0) { 738 DMSG(vdc, 0, "[%d] ldc_open(chan %ld) returned %d", 739 vdc->instance, vdc->ldc_id, status); 740 return (status); 741 } 742 vdc->initialized |= VDC_LDC_OPEN; 743 } 744 745 return (status); 746 } 747 748 static int 749 vdc_start_ldc_connection(vdc_t *vdc) 750 { 751 int status = 0; 752 753 ASSERT(vdc != NULL); 754 755 ASSERT(MUTEX_HELD(&vdc->lock)); 756 757 status = vdc_do_ldc_up(vdc); 758 759 DMSG(vdc, 0, "[%d] Finished bringing up LDC\n", vdc->instance); 760 761 return (status); 762 } 763 764 static int 765 vdc_stop_ldc_connection(vdc_t *vdcp) 766 { 767 int status; 768 769 DMSG(vdcp, 0, ": Resetting connection to vDisk server : state %d\n", 770 vdcp->state); 771 772 status = ldc_down(vdcp->ldc_handle); 773 DMSG(vdcp, 0, "ldc_down() = %d\n", status); 774 775 vdcp->initialized &= ~VDC_HANDSHAKE; 776 DMSG(vdcp, 0, "initialized=%x\n", vdcp->initialized); 777 778 return (status); 779 } 780 781 static int 782 vdc_create_device_nodes_efi(vdc_t *vdc) 783 { 784 ddi_remove_minor_node(vdc->dip, "h"); 785 ddi_remove_minor_node(vdc->dip, "h,raw"); 786 787 if (ddi_create_minor_node(vdc->dip, "wd", S_IFBLK, 788 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 789 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 790 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd'", 791 vdc->instance); 792 return (EIO); 793 } 794 795 /* if any device node is created we set this flag */ 796 vdc->initialized |= VDC_MINOR; 797 798 if (ddi_create_minor_node(vdc->dip, "wd,raw", S_IFCHR, 799 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 800 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 801 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd,raw'", 802 vdc->instance); 803 return (EIO); 804 } 805 806 return (0); 807 } 808 809 static int 810 vdc_create_device_nodes_vtoc(vdc_t *vdc) 811 { 812 ddi_remove_minor_node(vdc->dip, "wd"); 813 ddi_remove_minor_node(vdc->dip, "wd,raw"); 814 815 if (ddi_create_minor_node(vdc->dip, "h", S_IFBLK, 816 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 817 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 818 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h'", 819 vdc->instance); 820 return (EIO); 821 } 822 823 /* if any device node is created we set this flag */ 824 vdc->initialized |= VDC_MINOR; 825 826 if (ddi_create_minor_node(vdc->dip, "h,raw", S_IFCHR, 827 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 828 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 829 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h,raw'", 830 vdc->instance); 831 return (EIO); 832 } 833 834 return (0); 835 } 836 837 /* 838 * Function: 839 * vdc_create_device_nodes 840 * 841 * Description: 842 * This function creates the block and character device nodes under 843 * /devices along with the node properties. It is called as part of 844 * the attach(9E) of the instance during the handshake with vds after 845 * vds has sent the attributes to vdc. 846 * 847 * If the device is of type VD_DISK_TYPE_SLICE then the minor node 848 * of 2 is used in keeping with the Solaris convention that slice 2 849 * refers to a whole disk. Slices start at 'a' 850 * 851 * Parameters: 852 * vdc - soft state pointer 853 * 854 * Return Values 855 * 0 - Success 856 * EIO - Failed to create node 857 * EINVAL - Unknown type of disk exported 858 */ 859 static int 860 vdc_create_device_nodes(vdc_t *vdc) 861 { 862 char name[sizeof ("s,raw")]; 863 dev_info_t *dip = NULL; 864 int instance, status; 865 int num_slices = 1; 866 int i; 867 868 ASSERT(vdc != NULL); 869 870 instance = vdc->instance; 871 dip = vdc->dip; 872 873 switch (vdc->vdisk_type) { 874 case VD_DISK_TYPE_DISK: 875 num_slices = V_NUMPAR; 876 break; 877 case VD_DISK_TYPE_SLICE: 878 num_slices = 1; 879 break; 880 case VD_DISK_TYPE_UNK: 881 default: 882 return (EINVAL); 883 } 884 885 /* 886 * Minor nodes are different for EFI disks: EFI disks do not have 887 * a minor node 'g' for the minor number corresponding to slice 888 * VD_EFI_WD_SLICE (slice 7) instead they have a minor node 'wd' 889 * representing the whole disk. 890 */ 891 for (i = 0; i < num_slices; i++) { 892 893 if (i == VD_EFI_WD_SLICE) { 894 if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 895 status = vdc_create_device_nodes_efi(vdc); 896 else 897 status = vdc_create_device_nodes_vtoc(vdc); 898 if (status != 0) 899 return (status); 900 continue; 901 } 902 903 (void) snprintf(name, sizeof (name), "%c", 'a' + i); 904 if (ddi_create_minor_node(dip, name, S_IFBLK, 905 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 906 cmn_err(CE_NOTE, "[%d] Couldn't add block node '%s'", 907 instance, name); 908 return (EIO); 909 } 910 911 /* if any device node is created we set this flag */ 912 vdc->initialized |= VDC_MINOR; 913 914 (void) snprintf(name, sizeof (name), "%c%s", 'a' + i, ",raw"); 915 916 if (ddi_create_minor_node(dip, name, S_IFCHR, 917 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 918 cmn_err(CE_NOTE, "[%d] Couldn't add raw node '%s'", 919 instance, name); 920 return (EIO); 921 } 922 } 923 924 return (0); 925 } 926 927 /* 928 * Function: 929 * vdc_create_device_nodes_props 930 * 931 * Description: 932 * This function creates the block and character device nodes under 933 * /devices along with the node properties. It is called as part of 934 * the attach(9E) of the instance during the handshake with vds after 935 * vds has sent the attributes to vdc. 936 * 937 * Parameters: 938 * vdc - soft state pointer 939 * 940 * Return Values 941 * 0 - Success 942 * EIO - Failed to create device node property 943 * EINVAL - Unknown type of disk exported 944 */ 945 static int 946 vdc_create_device_nodes_props(vdc_t *vdc) 947 { 948 dev_info_t *dip = NULL; 949 int instance; 950 int num_slices = 1; 951 int64_t size = 0; 952 dev_t dev; 953 int rv; 954 int i; 955 956 ASSERT(vdc != NULL); 957 ASSERT(vdc->vtoc != NULL); 958 959 instance = vdc->instance; 960 dip = vdc->dip; 961 962 switch (vdc->vdisk_type) { 963 case VD_DISK_TYPE_DISK: 964 num_slices = V_NUMPAR; 965 break; 966 case VD_DISK_TYPE_SLICE: 967 num_slices = 1; 968 break; 969 case VD_DISK_TYPE_UNK: 970 default: 971 return (EINVAL); 972 } 973 974 if (vdc->vdisk_label == VD_DISK_LABEL_UNK) { 975 /* remove all properties */ 976 for (i = 0; i < num_slices; i++) { 977 dev = makedevice(ddi_driver_major(dip), 978 VD_MAKE_DEV(instance, i)); 979 (void) ddi_prop_remove(dev, dip, VDC_SIZE_PROP_NAME); 980 (void) ddi_prop_remove(dev, dip, VDC_NBLOCKS_PROP_NAME); 981 } 982 return (0); 983 } 984 985 for (i = 0; i < num_slices; i++) { 986 dev = makedevice(ddi_driver_major(dip), 987 VD_MAKE_DEV(instance, i)); 988 989 size = vdc->vtoc->v_part[i].p_size * vdc->vtoc->v_sectorsz; 990 DMSG(vdc, 0, "[%d] sz %ld (%ld Mb) p_size %lx\n", 991 instance, size, size / (1024 * 1024), 992 vdc->vtoc->v_part[i].p_size); 993 994 rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size); 995 if (rv != DDI_PROP_SUCCESS) { 996 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop of [%ld]", 997 instance, VDC_SIZE_PROP_NAME, size); 998 return (EIO); 999 } 1000 1001 rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME, 1002 lbtodb(size)); 1003 if (rv != DDI_PROP_SUCCESS) { 1004 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop [%llu]", 1005 instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size)); 1006 return (EIO); 1007 } 1008 } 1009 1010 return (0); 1011 } 1012 1013 /* 1014 * Function: 1015 * vdc_is_opened 1016 * 1017 * Description: 1018 * This function checks if any slice of a given virtual disk is 1019 * currently opened. 1020 * 1021 * Parameters: 1022 * vdc - soft state pointer 1023 * 1024 * Return Values 1025 * B_TRUE - at least one slice is opened. 1026 * B_FALSE - no slice is opened. 1027 */ 1028 static boolean_t 1029 vdc_is_opened(vdc_t *vdc) 1030 { 1031 int i, nslices; 1032 1033 switch (vdc->vdisk_type) { 1034 case VD_DISK_TYPE_DISK: 1035 nslices = V_NUMPAR; 1036 break; 1037 case VD_DISK_TYPE_SLICE: 1038 nslices = 1; 1039 break; 1040 case VD_DISK_TYPE_UNK: 1041 default: 1042 ASSERT(0); 1043 } 1044 1045 /* check if there's any layered open */ 1046 for (i = 0; i < nslices; i++) { 1047 if (vdc->open_lyr[i] > 0) 1048 return (B_TRUE); 1049 } 1050 1051 /* check if there is any other kind of open */ 1052 for (i = 0; i < OTYPCNT; i++) { 1053 if (vdc->open[i] != 0) 1054 return (B_TRUE); 1055 } 1056 1057 return (B_FALSE); 1058 } 1059 1060 static int 1061 vdc_mark_opened(vdc_t *vdc, int slice, int flag, int otyp) 1062 { 1063 uint8_t slicemask; 1064 int i; 1065 1066 ASSERT(otyp < OTYPCNT); 1067 ASSERT(slice < V_NUMPAR); 1068 ASSERT(MUTEX_HELD(&vdc->lock)); 1069 1070 slicemask = 1 << slice; 1071 1072 /* check if slice is already exclusively opened */ 1073 if (vdc->open_excl & slicemask) 1074 return (EBUSY); 1075 1076 /* if open exclusive, check if slice is already opened */ 1077 if (flag & FEXCL) { 1078 if (vdc->open_lyr[slice] > 0) 1079 return (EBUSY); 1080 for (i = 0; i < OTYPCNT; i++) { 1081 if (vdc->open[i] & slicemask) 1082 return (EBUSY); 1083 } 1084 vdc->open_excl |= slicemask; 1085 } 1086 1087 /* mark slice as opened */ 1088 if (otyp == OTYP_LYR) { 1089 vdc->open_lyr[slice]++; 1090 } else { 1091 vdc->open[otyp] |= slicemask; 1092 } 1093 1094 return (0); 1095 } 1096 1097 static void 1098 vdc_mark_closed(vdc_t *vdc, int slice, int flag, int otyp) 1099 { 1100 uint8_t slicemask; 1101 1102 ASSERT(otyp < OTYPCNT); 1103 ASSERT(slice < V_NUMPAR); 1104 ASSERT(MUTEX_HELD(&vdc->lock)); 1105 1106 slicemask = 1 << slice; 1107 1108 if (otyp == OTYP_LYR) { 1109 ASSERT(vdc->open_lyr[slice] > 0); 1110 vdc->open_lyr[slice]--; 1111 } else { 1112 vdc->open[otyp] &= ~slicemask; 1113 } 1114 1115 if (flag & FEXCL) 1116 vdc->open_excl &= ~slicemask; 1117 } 1118 1119 static int 1120 vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 1121 { 1122 _NOTE(ARGUNUSED(cred)) 1123 1124 int instance; 1125 int slice, status = 0; 1126 vdc_t *vdc; 1127 1128 ASSERT(dev != NULL); 1129 instance = VDCUNIT(*dev); 1130 1131 if (otyp >= OTYPCNT) 1132 return (EINVAL); 1133 1134 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1135 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1136 return (ENXIO); 1137 } 1138 1139 DMSG(vdc, 0, "minor = %d flag = %x, otyp = %x\n", 1140 getminor(*dev), flag, otyp); 1141 1142 slice = VDCPART(*dev); 1143 1144 mutex_enter(&vdc->lock); 1145 1146 status = vdc_mark_opened(vdc, slice, flag, otyp); 1147 1148 if (status != 0) { 1149 mutex_exit(&vdc->lock); 1150 return (status); 1151 } 1152 1153 if (flag & (FNDELAY | FNONBLOCK)) { 1154 1155 /* don't resubmit a validate request if there's already one */ 1156 if (vdc->validate_pending > 0) { 1157 mutex_exit(&vdc->lock); 1158 return (0); 1159 } 1160 1161 /* call vdc_validate() asynchronously to avoid blocking */ 1162 if (taskq_dispatch(system_taskq, vdc_validate_task, 1163 (void *)vdc, TQ_NOSLEEP) == NULL) { 1164 vdc_mark_closed(vdc, slice, flag, otyp); 1165 mutex_exit(&vdc->lock); 1166 return (ENXIO); 1167 } 1168 1169 vdc->validate_pending++; 1170 mutex_exit(&vdc->lock); 1171 return (0); 1172 } 1173 1174 mutex_exit(&vdc->lock); 1175 1176 vdc_validate(vdc); 1177 1178 mutex_enter(&vdc->lock); 1179 1180 if (vdc->vdisk_label == VD_DISK_LABEL_UNK || 1181 vdc->vtoc->v_part[slice].p_size == 0) { 1182 vdc_mark_closed(vdc, slice, flag, otyp); 1183 status = EIO; 1184 } 1185 1186 mutex_exit(&vdc->lock); 1187 1188 return (status); 1189 } 1190 1191 static int 1192 vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 1193 { 1194 _NOTE(ARGUNUSED(cred)) 1195 1196 int instance; 1197 int slice; 1198 int rv; 1199 vdc_t *vdc; 1200 1201 instance = VDCUNIT(dev); 1202 1203 if (otyp >= OTYPCNT) 1204 return (EINVAL); 1205 1206 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1207 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1208 return (ENXIO); 1209 } 1210 1211 DMSG(vdc, 0, "[%d] flag = %x, otyp = %x\n", instance, flag, otyp); 1212 1213 slice = VDCPART(dev); 1214 1215 /* 1216 * Attempt to flush the W$ on a close operation. If this is 1217 * not a supported IOCTL command or the backing device is read-only 1218 * do not fail the close operation. 1219 */ 1220 rv = vd_process_ioctl(dev, DKIOCFLUSHWRITECACHE, NULL, FKIOCTL); 1221 1222 if (rv != 0 && rv != ENOTSUP && rv != ENOTTY && rv != EROFS) { 1223 DMSG(vdc, 0, "[%d] flush failed with error %d on close\n", 1224 instance, rv); 1225 return (EIO); 1226 } 1227 1228 mutex_enter(&vdc->lock); 1229 vdc_mark_closed(vdc, slice, flag, otyp); 1230 mutex_exit(&vdc->lock); 1231 1232 return (0); 1233 } 1234 1235 static int 1236 vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1237 { 1238 _NOTE(ARGUNUSED(credp)) 1239 _NOTE(ARGUNUSED(rvalp)) 1240 1241 return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode)); 1242 } 1243 1244 static int 1245 vdc_print(dev_t dev, char *str) 1246 { 1247 cmn_err(CE_NOTE, "vdc%d: %s", VDCUNIT(dev), str); 1248 return (0); 1249 } 1250 1251 static int 1252 vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 1253 { 1254 int rv; 1255 size_t nbytes = nblk * DEV_BSIZE; 1256 int instance = VDCUNIT(dev); 1257 vdc_t *vdc = NULL; 1258 1259 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1260 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1261 return (ENXIO); 1262 } 1263 1264 DMSG(vdc, 2, "[%d] dump %ld bytes at block 0x%lx : addr=0x%p\n", 1265 instance, nbytes, blkno, (void *)addr); 1266 rv = vdc_send_request(vdc, VD_OP_BWRITE, addr, nbytes, 1267 VDCPART(dev), blkno, CB_STRATEGY, 0, VIO_write_dir); 1268 if (rv) { 1269 DMSG(vdc, 0, "Failed to do a disk dump (err=%d)\n", rv); 1270 return (rv); 1271 } 1272 1273 if (ddi_in_panic()) 1274 (void) vdc_drain_response(vdc); 1275 1276 DMSG(vdc, 0, "[%d] End\n", instance); 1277 1278 return (0); 1279 } 1280 1281 /* -------------------------------------------------------------------------- */ 1282 1283 /* 1284 * Disk access routines 1285 * 1286 */ 1287 1288 /* 1289 * vdc_strategy() 1290 * 1291 * Return Value: 1292 * 0: As per strategy(9E), the strategy() function must return 0 1293 * [ bioerror(9f) sets b_flags to the proper error code ] 1294 */ 1295 static int 1296 vdc_strategy(struct buf *buf) 1297 { 1298 int rv = -1; 1299 vdc_t *vdc = NULL; 1300 int instance = VDCUNIT(buf->b_edev); 1301 int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 1302 int slice; 1303 1304 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1305 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1306 bioerror(buf, ENXIO); 1307 biodone(buf); 1308 return (0); 1309 } 1310 1311 DMSG(vdc, 2, "[%d] %s %ld bytes at block %llx : b_addr=0x%p\n", 1312 instance, (buf->b_flags & B_READ) ? "Read" : "Write", 1313 buf->b_bcount, buf->b_lblkno, (void *)buf->b_un.b_addr); 1314 DTRACE_IO2(vstart, buf_t *, buf, vdc_t *, vdc); 1315 1316 bp_mapin(buf); 1317 1318 if ((long)buf->b_private == VD_SLICE_NONE) { 1319 /* I/O using an absolute disk offset */ 1320 slice = VD_SLICE_NONE; 1321 } else { 1322 slice = VDCPART(buf->b_edev); 1323 } 1324 1325 rv = vdc_send_request(vdc, op, (caddr_t)buf->b_un.b_addr, 1326 buf->b_bcount, slice, buf->b_lblkno, 1327 CB_STRATEGY, buf, (op == VD_OP_BREAD) ? VIO_read_dir : 1328 VIO_write_dir); 1329 1330 /* 1331 * If the request was successfully sent, the strategy call returns and 1332 * the ACK handler calls the bioxxx functions when the vDisk server is 1333 * done. 1334 */ 1335 if (rv) { 1336 DMSG(vdc, 0, "Failed to read/write (err=%d)\n", rv); 1337 bioerror(buf, rv); 1338 biodone(buf); 1339 } 1340 1341 return (0); 1342 } 1343 1344 /* 1345 * Function: 1346 * vdc_min 1347 * 1348 * Description: 1349 * Routine to limit the size of a data transfer. Used in 1350 * conjunction with physio(9F). 1351 * 1352 * Arguments: 1353 * bp - pointer to the indicated buf(9S) struct. 1354 * 1355 */ 1356 static void 1357 vdc_min(struct buf *bufp) 1358 { 1359 vdc_t *vdc = NULL; 1360 int instance = VDCUNIT(bufp->b_edev); 1361 1362 vdc = ddi_get_soft_state(vdc_state, instance); 1363 VERIFY(vdc != NULL); 1364 1365 if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->block_size)) { 1366 bufp->b_bcount = vdc->max_xfer_sz * vdc->block_size; 1367 } 1368 } 1369 1370 static int 1371 vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 1372 { 1373 _NOTE(ARGUNUSED(cred)) 1374 1375 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1376 return (physio(vdc_strategy, NULL, dev, B_READ, vdc_min, uio)); 1377 } 1378 1379 static int 1380 vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 1381 { 1382 _NOTE(ARGUNUSED(cred)) 1383 1384 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1385 return (physio(vdc_strategy, NULL, dev, B_WRITE, vdc_min, uio)); 1386 } 1387 1388 static int 1389 vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 1390 { 1391 _NOTE(ARGUNUSED(cred)) 1392 1393 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1394 return (aphysio(vdc_strategy, anocancel, dev, B_READ, vdc_min, aio)); 1395 } 1396 1397 static int 1398 vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 1399 { 1400 _NOTE(ARGUNUSED(cred)) 1401 1402 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1403 return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, vdc_min, aio)); 1404 } 1405 1406 1407 /* -------------------------------------------------------------------------- */ 1408 1409 /* 1410 * Handshake support 1411 */ 1412 1413 1414 /* 1415 * Function: 1416 * vdc_init_ver_negotiation() 1417 * 1418 * Description: 1419 * 1420 * Arguments: 1421 * vdc - soft state pointer for this instance of the device driver. 1422 * 1423 * Return Code: 1424 * 0 - Success 1425 */ 1426 static int 1427 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 1428 { 1429 vio_ver_msg_t pkt; 1430 size_t msglen = sizeof (pkt); 1431 int status = -1; 1432 1433 ASSERT(vdc != NULL); 1434 ASSERT(mutex_owned(&vdc->lock)); 1435 1436 DMSG(vdc, 0, "[%d] Entered.\n", vdc->instance); 1437 1438 /* 1439 * set the Session ID to a unique value 1440 * (the lower 32 bits of the clock tick) 1441 */ 1442 vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 1443 DMSG(vdc, 0, "[%d] Set SID to 0x%lx\n", vdc->instance, vdc->session_id); 1444 1445 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1446 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1447 pkt.tag.vio_subtype_env = VIO_VER_INFO; 1448 pkt.tag.vio_sid = vdc->session_id; 1449 pkt.dev_class = VDEV_DISK; 1450 pkt.ver_major = ver.major; 1451 pkt.ver_minor = ver.minor; 1452 1453 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1454 DMSG(vdc, 0, "[%d] Ver info sent (status = %d)\n", 1455 vdc->instance, status); 1456 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1457 DMSG(vdc, 0, "[%d] Failed to send Ver negotiation info: " 1458 "id(%lx) rv(%d) size(%ld)", vdc->instance, vdc->ldc_handle, 1459 status, msglen); 1460 if (msglen != sizeof (vio_ver_msg_t)) 1461 status = ENOMSG; 1462 } 1463 1464 return (status); 1465 } 1466 1467 /* 1468 * Function: 1469 * vdc_ver_negotiation() 1470 * 1471 * Description: 1472 * 1473 * Arguments: 1474 * vdcp - soft state pointer for this instance of the device driver. 1475 * 1476 * Return Code: 1477 * 0 - Success 1478 */ 1479 static int 1480 vdc_ver_negotiation(vdc_t *vdcp) 1481 { 1482 vio_msg_t vio_msg; 1483 int status; 1484 1485 if (status = vdc_init_ver_negotiation(vdcp, vdc_version[0])) 1486 return (status); 1487 1488 /* release lock and wait for response */ 1489 mutex_exit(&vdcp->lock); 1490 status = vdc_wait_for_response(vdcp, &vio_msg); 1491 mutex_enter(&vdcp->lock); 1492 if (status) { 1493 DMSG(vdcp, 0, 1494 "[%d] Failed waiting for Ver negotiation response, rv(%d)", 1495 vdcp->instance, status); 1496 return (status); 1497 } 1498 1499 /* check type and sub_type ... */ 1500 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1501 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1502 DMSG(vdcp, 0, "[%d] Invalid ver negotiation response\n", 1503 vdcp->instance); 1504 return (EPROTO); 1505 } 1506 1507 return (vdc_handle_ver_msg(vdcp, (vio_ver_msg_t *)&vio_msg)); 1508 } 1509 1510 /* 1511 * Function: 1512 * vdc_init_attr_negotiation() 1513 * 1514 * Description: 1515 * 1516 * Arguments: 1517 * vdc - soft state pointer for this instance of the device driver. 1518 * 1519 * Return Code: 1520 * 0 - Success 1521 */ 1522 static int 1523 vdc_init_attr_negotiation(vdc_t *vdc) 1524 { 1525 vd_attr_msg_t pkt; 1526 size_t msglen = sizeof (pkt); 1527 int status; 1528 1529 ASSERT(vdc != NULL); 1530 ASSERT(mutex_owned(&vdc->lock)); 1531 1532 DMSG(vdc, 0, "[%d] entered\n", vdc->instance); 1533 1534 /* fill in tag */ 1535 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1536 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1537 pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 1538 pkt.tag.vio_sid = vdc->session_id; 1539 /* fill in payload */ 1540 pkt.max_xfer_sz = vdc->max_xfer_sz; 1541 pkt.vdisk_block_size = vdc->block_size; 1542 pkt.xfer_mode = VIO_DRING_MODE; 1543 pkt.operations = 0; /* server will set bits of valid operations */ 1544 pkt.vdisk_type = 0; /* server will set to valid device type */ 1545 pkt.vdisk_size = 0; /* server will set to valid size */ 1546 1547 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1548 DMSG(vdc, 0, "Attr info sent (status = %d)\n", status); 1549 1550 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1551 DMSG(vdc, 0, "[%d] Failed to send Attr negotiation info: " 1552 "id(%lx) rv(%d) size(%ld)", vdc->instance, vdc->ldc_handle, 1553 status, msglen); 1554 if (msglen != sizeof (vio_ver_msg_t)) 1555 status = ENOMSG; 1556 } 1557 1558 return (status); 1559 } 1560 1561 /* 1562 * Function: 1563 * vdc_attr_negotiation() 1564 * 1565 * Description: 1566 * 1567 * Arguments: 1568 * vdc - soft state pointer for this instance of the device driver. 1569 * 1570 * Return Code: 1571 * 0 - Success 1572 */ 1573 static int 1574 vdc_attr_negotiation(vdc_t *vdcp) 1575 { 1576 int status; 1577 vio_msg_t vio_msg; 1578 1579 if (status = vdc_init_attr_negotiation(vdcp)) 1580 return (status); 1581 1582 /* release lock and wait for response */ 1583 mutex_exit(&vdcp->lock); 1584 status = vdc_wait_for_response(vdcp, &vio_msg); 1585 mutex_enter(&vdcp->lock); 1586 if (status) { 1587 DMSG(vdcp, 0, 1588 "[%d] Failed waiting for Attr negotiation response, rv(%d)", 1589 vdcp->instance, status); 1590 return (status); 1591 } 1592 1593 /* check type and sub_type ... */ 1594 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1595 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1596 DMSG(vdcp, 0, "[%d] Invalid attr negotiation response\n", 1597 vdcp->instance); 1598 return (EPROTO); 1599 } 1600 1601 return (vdc_handle_attr_msg(vdcp, (vd_attr_msg_t *)&vio_msg)); 1602 } 1603 1604 1605 /* 1606 * Function: 1607 * vdc_init_dring_negotiate() 1608 * 1609 * Description: 1610 * 1611 * Arguments: 1612 * vdc - soft state pointer for this instance of the device driver. 1613 * 1614 * Return Code: 1615 * 0 - Success 1616 */ 1617 static int 1618 vdc_init_dring_negotiate(vdc_t *vdc) 1619 { 1620 vio_dring_reg_msg_t pkt; 1621 size_t msglen = sizeof (pkt); 1622 int status = -1; 1623 int retry; 1624 int nretries = 10; 1625 1626 ASSERT(vdc != NULL); 1627 ASSERT(mutex_owned(&vdc->lock)); 1628 1629 for (retry = 0; retry < nretries; retry++) { 1630 status = vdc_init_descriptor_ring(vdc); 1631 if (status != EAGAIN) 1632 break; 1633 drv_usecwait(vdc_min_timeout_ldc); 1634 } 1635 1636 if (status != 0) { 1637 DMSG(vdc, 0, "[%d] Failed to init DRing (status = %d)\n", 1638 vdc->instance, status); 1639 return (status); 1640 } 1641 1642 DMSG(vdc, 0, "[%d] Init of descriptor ring completed (status = %d)\n", 1643 vdc->instance, status); 1644 1645 /* fill in tag */ 1646 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1647 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1648 pkt.tag.vio_subtype_env = VIO_DRING_REG; 1649 pkt.tag.vio_sid = vdc->session_id; 1650 /* fill in payload */ 1651 pkt.dring_ident = 0; 1652 pkt.num_descriptors = vdc->dring_len; 1653 pkt.descriptor_size = vdc->dring_entry_size; 1654 pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 1655 pkt.ncookies = vdc->dring_cookie_count; 1656 pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 1657 1658 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1659 if (status != 0) { 1660 DMSG(vdc, 0, "[%d] Failed to register DRing (err = %d)", 1661 vdc->instance, status); 1662 } 1663 1664 return (status); 1665 } 1666 1667 1668 /* 1669 * Function: 1670 * vdc_dring_negotiation() 1671 * 1672 * Description: 1673 * 1674 * Arguments: 1675 * vdc - soft state pointer for this instance of the device driver. 1676 * 1677 * Return Code: 1678 * 0 - Success 1679 */ 1680 static int 1681 vdc_dring_negotiation(vdc_t *vdcp) 1682 { 1683 int status; 1684 vio_msg_t vio_msg; 1685 1686 if (status = vdc_init_dring_negotiate(vdcp)) 1687 return (status); 1688 1689 /* release lock and wait for response */ 1690 mutex_exit(&vdcp->lock); 1691 status = vdc_wait_for_response(vdcp, &vio_msg); 1692 mutex_enter(&vdcp->lock); 1693 if (status) { 1694 DMSG(vdcp, 0, 1695 "[%d] Failed waiting for Dring negotiation response," 1696 " rv(%d)", vdcp->instance, status); 1697 return (status); 1698 } 1699 1700 /* check type and sub_type ... */ 1701 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1702 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1703 DMSG(vdcp, 0, "[%d] Invalid Dring negotiation response\n", 1704 vdcp->instance); 1705 return (EPROTO); 1706 } 1707 1708 return (vdc_handle_dring_reg_msg(vdcp, 1709 (vio_dring_reg_msg_t *)&vio_msg)); 1710 } 1711 1712 1713 /* 1714 * Function: 1715 * vdc_send_rdx() 1716 * 1717 * Description: 1718 * 1719 * Arguments: 1720 * vdc - soft state pointer for this instance of the device driver. 1721 * 1722 * Return Code: 1723 * 0 - Success 1724 */ 1725 static int 1726 vdc_send_rdx(vdc_t *vdcp) 1727 { 1728 vio_msg_t msg; 1729 size_t msglen = sizeof (vio_msg_t); 1730 int status; 1731 1732 /* 1733 * Send an RDX message to vds to indicate we are ready 1734 * to send data 1735 */ 1736 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 1737 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 1738 msg.tag.vio_subtype_env = VIO_RDX; 1739 msg.tag.vio_sid = vdcp->session_id; 1740 status = vdc_send(vdcp, (caddr_t)&msg, &msglen); 1741 if (status != 0) { 1742 DMSG(vdcp, 0, "[%d] Failed to send RDX message (%d)", 1743 vdcp->instance, status); 1744 } 1745 1746 return (status); 1747 } 1748 1749 /* 1750 * Function: 1751 * vdc_handle_rdx() 1752 * 1753 * Description: 1754 * 1755 * Arguments: 1756 * vdc - soft state pointer for this instance of the device driver. 1757 * msgp - received msg 1758 * 1759 * Return Code: 1760 * 0 - Success 1761 */ 1762 static int 1763 vdc_handle_rdx(vdc_t *vdcp, vio_rdx_msg_t *msgp) 1764 { 1765 _NOTE(ARGUNUSED(vdcp)) 1766 _NOTE(ARGUNUSED(msgp)) 1767 1768 ASSERT(msgp->tag.vio_msgtype == VIO_TYPE_CTRL); 1769 ASSERT(msgp->tag.vio_subtype == VIO_SUBTYPE_ACK); 1770 ASSERT(msgp->tag.vio_subtype_env == VIO_RDX); 1771 1772 DMSG(vdcp, 1, "[%d] Got an RDX msg", vdcp->instance); 1773 1774 return (0); 1775 } 1776 1777 /* 1778 * Function: 1779 * vdc_rdx_exchange() 1780 * 1781 * Description: 1782 * 1783 * Arguments: 1784 * vdc - soft state pointer for this instance of the device driver. 1785 * 1786 * Return Code: 1787 * 0 - Success 1788 */ 1789 static int 1790 vdc_rdx_exchange(vdc_t *vdcp) 1791 { 1792 int status; 1793 vio_msg_t vio_msg; 1794 1795 if (status = vdc_send_rdx(vdcp)) 1796 return (status); 1797 1798 /* release lock and wait for response */ 1799 mutex_exit(&vdcp->lock); 1800 status = vdc_wait_for_response(vdcp, &vio_msg); 1801 mutex_enter(&vdcp->lock); 1802 if (status) { 1803 DMSG(vdcp, 0, "[%d] Failed waiting for RDX response, rv(%d)", 1804 vdcp->instance, status); 1805 return (status); 1806 } 1807 1808 /* check type and sub_type ... */ 1809 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1810 vio_msg.tag.vio_subtype != VIO_SUBTYPE_ACK) { 1811 DMSG(vdcp, 0, "[%d] Invalid RDX response\n", vdcp->instance); 1812 return (EPROTO); 1813 } 1814 1815 return (vdc_handle_rdx(vdcp, (vio_rdx_msg_t *)&vio_msg)); 1816 } 1817 1818 1819 /* -------------------------------------------------------------------------- */ 1820 1821 /* 1822 * LDC helper routines 1823 */ 1824 1825 static int 1826 vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp) 1827 { 1828 int status; 1829 boolean_t q_has_pkts = B_FALSE; 1830 int delay_time; 1831 size_t len; 1832 1833 mutex_enter(&vdc->read_lock); 1834 1835 if (vdc->read_state == VDC_READ_IDLE) 1836 vdc->read_state = VDC_READ_WAITING; 1837 1838 while (vdc->read_state != VDC_READ_PENDING) { 1839 1840 /* detect if the connection has been reset */ 1841 if (vdc->read_state == VDC_READ_RESET) { 1842 status = ECONNRESET; 1843 goto done; 1844 } 1845 1846 cv_wait(&vdc->read_cv, &vdc->read_lock); 1847 } 1848 1849 /* 1850 * Until we get a blocking ldc read we have to retry 1851 * until the entire LDC message has arrived before 1852 * ldc_read() will succeed. Note we also bail out if 1853 * the channel is reset or goes away. 1854 */ 1855 delay_time = vdc_ldc_read_init_delay; 1856 loop: 1857 len = *nbytesp; 1858 status = ldc_read(vdc->ldc_handle, (caddr_t)msgp, &len); 1859 switch (status) { 1860 case EAGAIN: 1861 delay_time *= 2; 1862 if (delay_time >= vdc_ldc_read_max_delay) 1863 delay_time = vdc_ldc_read_max_delay; 1864 delay(delay_time); 1865 goto loop; 1866 1867 case 0: 1868 if (len == 0) { 1869 DMSG(vdc, 0, "[%d] ldc_read returned 0 bytes with " 1870 "no error!\n", vdc->instance); 1871 goto loop; 1872 } 1873 1874 *nbytesp = len; 1875 1876 /* 1877 * If there are pending messages, leave the 1878 * read state as pending. Otherwise, set the state 1879 * back to idle. 1880 */ 1881 status = ldc_chkq(vdc->ldc_handle, &q_has_pkts); 1882 if (status == 0 && !q_has_pkts) 1883 vdc->read_state = VDC_READ_IDLE; 1884 1885 break; 1886 default: 1887 DMSG(vdc, 0, "ldc_read returned %d\n", status); 1888 break; 1889 } 1890 1891 done: 1892 mutex_exit(&vdc->read_lock); 1893 1894 return (status); 1895 } 1896 1897 1898 1899 #ifdef DEBUG 1900 void 1901 vdc_decode_tag(vdc_t *vdcp, vio_msg_t *msg) 1902 { 1903 char *ms, *ss, *ses; 1904 switch (msg->tag.vio_msgtype) { 1905 #define Q(_s) case _s : ms = #_s; break; 1906 Q(VIO_TYPE_CTRL) 1907 Q(VIO_TYPE_DATA) 1908 Q(VIO_TYPE_ERR) 1909 #undef Q 1910 default: ms = "unknown"; break; 1911 } 1912 1913 switch (msg->tag.vio_subtype) { 1914 #define Q(_s) case _s : ss = #_s; break; 1915 Q(VIO_SUBTYPE_INFO) 1916 Q(VIO_SUBTYPE_ACK) 1917 Q(VIO_SUBTYPE_NACK) 1918 #undef Q 1919 default: ss = "unknown"; break; 1920 } 1921 1922 switch (msg->tag.vio_subtype_env) { 1923 #define Q(_s) case _s : ses = #_s; break; 1924 Q(VIO_VER_INFO) 1925 Q(VIO_ATTR_INFO) 1926 Q(VIO_DRING_REG) 1927 Q(VIO_DRING_UNREG) 1928 Q(VIO_RDX) 1929 Q(VIO_PKT_DATA) 1930 Q(VIO_DESC_DATA) 1931 Q(VIO_DRING_DATA) 1932 #undef Q 1933 default: ses = "unknown"; break; 1934 } 1935 1936 DMSG(vdcp, 3, "(%x/%x/%x) message : (%s/%s/%s)\n", 1937 msg->tag.vio_msgtype, msg->tag.vio_subtype, 1938 msg->tag.vio_subtype_env, ms, ss, ses); 1939 } 1940 #endif 1941 1942 /* 1943 * Function: 1944 * vdc_send() 1945 * 1946 * Description: 1947 * The function encapsulates the call to write a message using LDC. 1948 * If LDC indicates that the call failed due to the queue being full, 1949 * we retry the ldc_write() [ up to 'vdc_retries' time ], otherwise 1950 * we return the error returned by LDC. 1951 * 1952 * Arguments: 1953 * ldc_handle - LDC handle for the channel this instance of vdc uses 1954 * pkt - address of LDC message to be sent 1955 * msglen - the size of the message being sent. When the function 1956 * returns, this contains the number of bytes written. 1957 * 1958 * Return Code: 1959 * 0 - Success. 1960 * EINVAL - pkt or msglen were NULL 1961 * ECONNRESET - The connection was not up. 1962 * EWOULDBLOCK - LDC queue is full 1963 * xxx - other error codes returned by ldc_write 1964 */ 1965 static int 1966 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 1967 { 1968 size_t size = 0; 1969 int status = 0; 1970 clock_t delay_ticks; 1971 1972 ASSERT(vdc != NULL); 1973 ASSERT(mutex_owned(&vdc->lock)); 1974 ASSERT(msglen != NULL); 1975 ASSERT(*msglen != 0); 1976 1977 #ifdef DEBUG 1978 vdc_decode_tag(vdc, (vio_msg_t *)pkt); 1979 #endif 1980 /* 1981 * Wait indefinitely to send if channel 1982 * is busy, but bail out if we succeed or 1983 * if the channel closes or is reset. 1984 */ 1985 delay_ticks = vdc_hz_min_ldc_delay; 1986 do { 1987 size = *msglen; 1988 status = ldc_write(vdc->ldc_handle, pkt, &size); 1989 if (status == EWOULDBLOCK) { 1990 delay(delay_ticks); 1991 /* geometric backoff */ 1992 delay_ticks *= 2; 1993 if (delay_ticks > vdc_hz_max_ldc_delay) 1994 delay_ticks = vdc_hz_max_ldc_delay; 1995 } 1996 } while (status == EWOULDBLOCK); 1997 1998 /* if LDC had serious issues --- reset vdc state */ 1999 if (status == EIO || status == ECONNRESET) { 2000 /* LDC had serious issues --- reset vdc state */ 2001 mutex_enter(&vdc->read_lock); 2002 if ((vdc->read_state == VDC_READ_WAITING) || 2003 (vdc->read_state == VDC_READ_RESET)) 2004 cv_signal(&vdc->read_cv); 2005 vdc->read_state = VDC_READ_RESET; 2006 mutex_exit(&vdc->read_lock); 2007 2008 /* wake up any waiters in the reset thread */ 2009 if (vdc->state == VDC_STATE_INIT_WAITING) { 2010 DMSG(vdc, 0, "[%d] write reset - " 2011 "vdc is resetting ..\n", vdc->instance); 2012 vdc->state = VDC_STATE_RESETTING; 2013 cv_signal(&vdc->initwait_cv); 2014 } 2015 2016 return (ECONNRESET); 2017 } 2018 2019 /* return the last size written */ 2020 *msglen = size; 2021 2022 return (status); 2023 } 2024 2025 /* 2026 * Function: 2027 * vdc_get_md_node 2028 * 2029 * Description: 2030 * Get the MD, the device node and the port node for the given 2031 * disk instance. The caller is responsible for cleaning up the 2032 * reference to the returned MD (mdpp) by calling md_fini_handle(). 2033 * 2034 * Arguments: 2035 * dip - dev info pointer for this instance of the device driver. 2036 * mdpp - the returned MD. 2037 * vd_nodep - the returned device node. 2038 * vd_portp - the returned port node. The returned port node is NULL 2039 * if no port node is found. 2040 * 2041 * Return Code: 2042 * 0 - Success. 2043 * ENOENT - Expected node or property did not exist. 2044 * ENXIO - Unexpected error communicating with MD framework 2045 */ 2046 static int 2047 vdc_get_md_node(dev_info_t *dip, md_t **mdpp, mde_cookie_t *vd_nodep, 2048 mde_cookie_t *vd_portp) 2049 { 2050 int status = ENOENT; 2051 char *node_name = NULL; 2052 md_t *mdp = NULL; 2053 int num_nodes; 2054 int num_vdevs; 2055 int num_vports; 2056 mde_cookie_t rootnode; 2057 mde_cookie_t *listp = NULL; 2058 boolean_t found_inst = B_FALSE; 2059 int listsz; 2060 int idx; 2061 uint64_t md_inst; 2062 int obp_inst; 2063 int instance = ddi_get_instance(dip); 2064 2065 /* 2066 * Get the OBP instance number for comparison with the MD instance 2067 * 2068 * The "cfg-handle" property of a vdc node in an MD contains the MD's 2069 * notion of "instance", or unique identifier, for that node; OBP 2070 * stores the value of the "cfg-handle" MD property as the value of 2071 * the "reg" property on the node in the device tree it builds from 2072 * the MD and passes to Solaris. Thus, we look up the devinfo node's 2073 * "reg" property value to uniquely identify this device instance. 2074 * If the "reg" property cannot be found, the device tree state is 2075 * presumably so broken that there is no point in continuing. 2076 */ 2077 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 2078 cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 2079 return (ENOENT); 2080 } 2081 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 2082 OBP_REG, -1); 2083 DMSGX(1, "[%d] OBP inst=%d\n", instance, obp_inst); 2084 2085 /* 2086 * We now walk the MD nodes to find the node for this vdisk. 2087 */ 2088 if ((mdp = md_get_handle()) == NULL) { 2089 cmn_err(CE_WARN, "unable to init machine description"); 2090 return (ENXIO); 2091 } 2092 2093 num_nodes = md_node_count(mdp); 2094 ASSERT(num_nodes > 0); 2095 2096 listsz = num_nodes * sizeof (mde_cookie_t); 2097 2098 /* allocate memory for nodes */ 2099 listp = kmem_zalloc(listsz, KM_SLEEP); 2100 2101 rootnode = md_root_node(mdp); 2102 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 2103 2104 /* 2105 * Search for all the virtual devices, we will then check to see which 2106 * ones are disk nodes. 2107 */ 2108 num_vdevs = md_scan_dag(mdp, rootnode, 2109 md_find_name(mdp, VDC_MD_VDEV_NAME), 2110 md_find_name(mdp, "fwd"), listp); 2111 2112 if (num_vdevs <= 0) { 2113 cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 2114 status = ENOENT; 2115 goto done; 2116 } 2117 2118 DMSGX(1, "[%d] num_vdevs=%d\n", instance, num_vdevs); 2119 for (idx = 0; idx < num_vdevs; idx++) { 2120 status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 2121 if ((status != 0) || (node_name == NULL)) { 2122 cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 2123 ": err %d", VDC_MD_VDEV_NAME, status); 2124 continue; 2125 } 2126 2127 DMSGX(1, "[%d] Found node '%s'\n", instance, node_name); 2128 if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 2129 status = md_get_prop_val(mdp, listp[idx], 2130 VDC_MD_CFG_HDL, &md_inst); 2131 DMSGX(1, "[%d] vdc inst in MD=%lx\n", 2132 instance, md_inst); 2133 if ((status == 0) && (md_inst == obp_inst)) { 2134 found_inst = B_TRUE; 2135 break; 2136 } 2137 } 2138 } 2139 2140 if (!found_inst) { 2141 DMSGX(0, "Unable to find correct '%s' node", VDC_MD_DISK_NAME); 2142 status = ENOENT; 2143 goto done; 2144 } 2145 DMSGX(0, "[%d] MD inst=%lx\n", instance, md_inst); 2146 2147 *vd_nodep = listp[idx]; 2148 *mdpp = mdp; 2149 2150 num_vports = md_scan_dag(mdp, *vd_nodep, 2151 md_find_name(mdp, VDC_MD_PORT_NAME), 2152 md_find_name(mdp, "fwd"), listp); 2153 2154 if (num_vports != 1) { 2155 DMSGX(0, "Expected 1 '%s' node for '%s' port, found %d\n", 2156 VDC_MD_PORT_NAME, VDC_MD_VDEV_NAME, num_vports); 2157 } 2158 2159 *vd_portp = (num_vports == 0)? NULL: listp[0]; 2160 2161 done: 2162 kmem_free(listp, listsz); 2163 return (status); 2164 } 2165 2166 /* 2167 * Function: 2168 * vdc_get_ldc_id() 2169 * 2170 * Description: 2171 * This function gets the 'ldc-id' for this particular instance of vdc. 2172 * The id returned is the guest domain channel endpoint LDC uses for 2173 * communication with vds. 2174 * 2175 * Arguments: 2176 * mdp - pointer to the machine description. 2177 * vd_node - the vdisk element from the MD. 2178 * ldc_id - pointer to variable used to return the 'ldc-id' found. 2179 * 2180 * Return Code: 2181 * 0 - Success. 2182 * ENOENT - Expected node or property did not exist. 2183 */ 2184 static int 2185 vdc_get_ldc_id(md_t *mdp, mde_cookie_t vd_node, uint64_t *ldc_id) 2186 { 2187 mde_cookie_t *chanp = NULL; 2188 int listsz; 2189 int num_chans; 2190 int num_nodes; 2191 int status = 0; 2192 2193 num_nodes = md_node_count(mdp); 2194 ASSERT(num_nodes > 0); 2195 2196 listsz = num_nodes * sizeof (mde_cookie_t); 2197 2198 /* allocate memory for nodes */ 2199 chanp = kmem_zalloc(listsz, KM_SLEEP); 2200 2201 /* get the channels for this node */ 2202 num_chans = md_scan_dag(mdp, vd_node, 2203 md_find_name(mdp, VDC_MD_CHAN_NAME), 2204 md_find_name(mdp, "fwd"), chanp); 2205 2206 /* expecting at least one channel */ 2207 if (num_chans <= 0) { 2208 cmn_err(CE_NOTE, "No '%s' node for '%s' port", 2209 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 2210 status = ENOENT; 2211 goto done; 2212 2213 } else if (num_chans != 1) { 2214 DMSGX(0, "Expected 1 '%s' node for '%s' port, found %d\n", 2215 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, num_chans); 2216 } 2217 2218 /* 2219 * We use the first channel found (index 0), irrespective of how 2220 * many are there in total. 2221 */ 2222 if (md_get_prop_val(mdp, chanp[0], VDC_MD_ID, ldc_id) != 0) { 2223 cmn_err(CE_NOTE, "Channel '%s' property not found", VDC_MD_ID); 2224 status = ENOENT; 2225 } 2226 2227 done: 2228 kmem_free(chanp, listsz); 2229 return (status); 2230 } 2231 2232 static int 2233 vdc_do_ldc_up(vdc_t *vdc) 2234 { 2235 int status; 2236 ldc_status_t ldc_state; 2237 2238 DMSG(vdc, 0, "[%d] Bringing up channel %lx\n", 2239 vdc->instance, vdc->ldc_id); 2240 2241 if (vdc->lifecycle == VDC_LC_DETACHING) 2242 return (EINVAL); 2243 2244 if ((status = ldc_up(vdc->ldc_handle)) != 0) { 2245 switch (status) { 2246 case ECONNREFUSED: /* listener not ready at other end */ 2247 DMSG(vdc, 0, "[%d] ldc_up(%lx,...) return %d\n", 2248 vdc->instance, vdc->ldc_id, status); 2249 status = 0; 2250 break; 2251 default: 2252 DMSG(vdc, 0, "[%d] Failed to bring up LDC: " 2253 "channel=%ld, err=%d", vdc->instance, vdc->ldc_id, 2254 status); 2255 break; 2256 } 2257 } 2258 2259 if (ldc_status(vdc->ldc_handle, &ldc_state) == 0) { 2260 vdc->ldc_state = ldc_state; 2261 if (ldc_state == LDC_UP) { 2262 DMSG(vdc, 0, "[%d] LDC channel already up\n", 2263 vdc->instance); 2264 vdc->seq_num = 1; 2265 vdc->seq_num_reply = 0; 2266 } 2267 } 2268 2269 return (status); 2270 } 2271 2272 /* 2273 * Function: 2274 * vdc_terminate_ldc() 2275 * 2276 * Description: 2277 * 2278 * Arguments: 2279 * vdc - soft state pointer for this instance of the device driver. 2280 * 2281 * Return Code: 2282 * None 2283 */ 2284 static void 2285 vdc_terminate_ldc(vdc_t *vdc) 2286 { 2287 int instance = ddi_get_instance(vdc->dip); 2288 2289 ASSERT(vdc != NULL); 2290 ASSERT(mutex_owned(&vdc->lock)); 2291 2292 DMSG(vdc, 0, "[%d] initialized=%x\n", instance, vdc->initialized); 2293 2294 if (vdc->initialized & VDC_LDC_OPEN) { 2295 DMSG(vdc, 0, "[%d] ldc_close()\n", instance); 2296 (void) ldc_close(vdc->ldc_handle); 2297 } 2298 if (vdc->initialized & VDC_LDC_CB) { 2299 DMSG(vdc, 0, "[%d] ldc_unreg_callback()\n", instance); 2300 (void) ldc_unreg_callback(vdc->ldc_handle); 2301 } 2302 if (vdc->initialized & VDC_LDC) { 2303 DMSG(vdc, 0, "[%d] ldc_fini()\n", instance); 2304 (void) ldc_fini(vdc->ldc_handle); 2305 vdc->ldc_handle = NULL; 2306 } 2307 2308 vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN); 2309 } 2310 2311 /* -------------------------------------------------------------------------- */ 2312 2313 /* 2314 * Descriptor Ring helper routines 2315 */ 2316 2317 /* 2318 * Function: 2319 * vdc_init_descriptor_ring() 2320 * 2321 * Description: 2322 * 2323 * Arguments: 2324 * vdc - soft state pointer for this instance of the device driver. 2325 * 2326 * Return Code: 2327 * 0 - Success 2328 */ 2329 static int 2330 vdc_init_descriptor_ring(vdc_t *vdc) 2331 { 2332 vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 2333 int status = 0; 2334 int i; 2335 2336 DMSG(vdc, 0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized); 2337 2338 ASSERT(vdc != NULL); 2339 ASSERT(mutex_owned(&vdc->lock)); 2340 ASSERT(vdc->ldc_handle != NULL); 2341 2342 /* ensure we have enough room to store max sized block */ 2343 ASSERT(maxphys <= VD_MAX_BLOCK_SIZE); 2344 2345 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 2346 DMSG(vdc, 0, "[%d] ldc_mem_dring_create\n", vdc->instance); 2347 /* 2348 * Calculate the maximum block size we can transmit using one 2349 * Descriptor Ring entry from the attributes returned by the 2350 * vDisk server. This is subject to a minimum of 'maxphys' 2351 * as we do not have the capability to split requests over 2352 * multiple DRing entries. 2353 */ 2354 if ((vdc->max_xfer_sz * vdc->block_size) < maxphys) { 2355 DMSG(vdc, 0, "[%d] using minimum DRing size\n", 2356 vdc->instance); 2357 vdc->dring_max_cookies = maxphys / PAGESIZE; 2358 } else { 2359 vdc->dring_max_cookies = 2360 (vdc->max_xfer_sz * vdc->block_size) / PAGESIZE; 2361 } 2362 vdc->dring_entry_size = (sizeof (vd_dring_entry_t) + 2363 (sizeof (ldc_mem_cookie_t) * 2364 (vdc->dring_max_cookies - 1))); 2365 vdc->dring_len = VD_DRING_LEN; 2366 2367 status = ldc_mem_dring_create(vdc->dring_len, 2368 vdc->dring_entry_size, &vdc->ldc_dring_hdl); 2369 if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) { 2370 DMSG(vdc, 0, "[%d] Descriptor ring creation failed", 2371 vdc->instance); 2372 return (status); 2373 } 2374 vdc->initialized |= VDC_DRING_INIT; 2375 } 2376 2377 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 2378 DMSG(vdc, 0, "[%d] ldc_mem_dring_bind\n", vdc->instance); 2379 vdc->dring_cookie = 2380 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 2381 2382 status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl, 2383 LDC_SHADOW_MAP|LDC_DIRECT_MAP, LDC_MEM_RW, 2384 &vdc->dring_cookie[0], 2385 &vdc->dring_cookie_count); 2386 if (status != 0) { 2387 DMSG(vdc, 0, "[%d] Failed to bind descriptor ring " 2388 "(%lx) to channel (%lx) status=%d\n", 2389 vdc->instance, vdc->ldc_dring_hdl, 2390 vdc->ldc_handle, status); 2391 return (status); 2392 } 2393 ASSERT(vdc->dring_cookie_count == 1); 2394 vdc->initialized |= VDC_DRING_BOUND; 2395 } 2396 2397 status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info); 2398 if (status != 0) { 2399 DMSG(vdc, 0, 2400 "[%d] Failed to get info for descriptor ring (%lx)\n", 2401 vdc->instance, vdc->ldc_dring_hdl); 2402 return (status); 2403 } 2404 2405 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 2406 DMSG(vdc, 0, "[%d] local dring\n", vdc->instance); 2407 2408 /* Allocate the local copy of this dring */ 2409 vdc->local_dring = 2410 kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t), 2411 KM_SLEEP); 2412 vdc->initialized |= VDC_DRING_LOCAL; 2413 } 2414 2415 /* 2416 * Mark all DRing entries as free and initialize the private 2417 * descriptor's memory handles. If any entry is initialized, 2418 * we need to free it later so we set the bit in 'initialized' 2419 * at the start. 2420 */ 2421 vdc->initialized |= VDC_DRING_ENTRY; 2422 for (i = 0; i < vdc->dring_len; i++) { 2423 dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 2424 dep->hdr.dstate = VIO_DESC_FREE; 2425 2426 status = ldc_mem_alloc_handle(vdc->ldc_handle, 2427 &vdc->local_dring[i].desc_mhdl); 2428 if (status != 0) { 2429 DMSG(vdc, 0, "![%d] Failed to alloc mem handle for" 2430 " descriptor %d", vdc->instance, i); 2431 return (status); 2432 } 2433 vdc->local_dring[i].is_free = B_TRUE; 2434 vdc->local_dring[i].dep = dep; 2435 } 2436 2437 /* Initialize the starting index */ 2438 vdc->dring_curr_idx = 0; 2439 2440 return (status); 2441 } 2442 2443 /* 2444 * Function: 2445 * vdc_destroy_descriptor_ring() 2446 * 2447 * Description: 2448 * 2449 * Arguments: 2450 * vdc - soft state pointer for this instance of the device driver. 2451 * 2452 * Return Code: 2453 * None 2454 */ 2455 static void 2456 vdc_destroy_descriptor_ring(vdc_t *vdc) 2457 { 2458 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2459 ldc_mem_handle_t mhdl = NULL; 2460 ldc_mem_info_t minfo; 2461 int status = -1; 2462 int i; /* loop */ 2463 2464 ASSERT(vdc != NULL); 2465 ASSERT(mutex_owned(&vdc->lock)); 2466 2467 DMSG(vdc, 0, "[%d] Entered\n", vdc->instance); 2468 2469 if (vdc->initialized & VDC_DRING_ENTRY) { 2470 DMSG(vdc, 0, 2471 "[%d] Removing Local DRing entries\n", vdc->instance); 2472 for (i = 0; i < vdc->dring_len; i++) { 2473 ldep = &vdc->local_dring[i]; 2474 mhdl = ldep->desc_mhdl; 2475 2476 if (mhdl == NULL) 2477 continue; 2478 2479 if ((status = ldc_mem_info(mhdl, &minfo)) != 0) { 2480 DMSG(vdc, 0, 2481 "ldc_mem_info returned an error: %d\n", 2482 status); 2483 2484 /* 2485 * This must mean that the mem handle 2486 * is not valid. Clear it out so that 2487 * no one tries to use it. 2488 */ 2489 ldep->desc_mhdl = NULL; 2490 continue; 2491 } 2492 2493 if (minfo.status == LDC_BOUND) { 2494 (void) ldc_mem_unbind_handle(mhdl); 2495 } 2496 2497 (void) ldc_mem_free_handle(mhdl); 2498 2499 ldep->desc_mhdl = NULL; 2500 } 2501 vdc->initialized &= ~VDC_DRING_ENTRY; 2502 } 2503 2504 if (vdc->initialized & VDC_DRING_LOCAL) { 2505 DMSG(vdc, 0, "[%d] Freeing Local DRing\n", vdc->instance); 2506 kmem_free(vdc->local_dring, 2507 vdc->dring_len * sizeof (vdc_local_desc_t)); 2508 vdc->initialized &= ~VDC_DRING_LOCAL; 2509 } 2510 2511 if (vdc->initialized & VDC_DRING_BOUND) { 2512 DMSG(vdc, 0, "[%d] Unbinding DRing\n", vdc->instance); 2513 status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl); 2514 if (status == 0) { 2515 vdc->initialized &= ~VDC_DRING_BOUND; 2516 } else { 2517 DMSG(vdc, 0, "[%d] Error %d unbinding DRing %lx", 2518 vdc->instance, status, vdc->ldc_dring_hdl); 2519 } 2520 kmem_free(vdc->dring_cookie, sizeof (ldc_mem_cookie_t)); 2521 } 2522 2523 if (vdc->initialized & VDC_DRING_INIT) { 2524 DMSG(vdc, 0, "[%d] Destroying DRing\n", vdc->instance); 2525 status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl); 2526 if (status == 0) { 2527 vdc->ldc_dring_hdl = NULL; 2528 bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 2529 vdc->initialized &= ~VDC_DRING_INIT; 2530 } else { 2531 DMSG(vdc, 0, "[%d] Error %d destroying DRing (%lx)", 2532 vdc->instance, status, vdc->ldc_dring_hdl); 2533 } 2534 } 2535 } 2536 2537 /* 2538 * Function: 2539 * vdc_map_to_shared_ring() 2540 * 2541 * Description: 2542 * Copy contents of the local descriptor to the shared 2543 * memory descriptor. 2544 * 2545 * Arguments: 2546 * vdcp - soft state pointer for this instance of the device driver. 2547 * idx - descriptor ring index 2548 * 2549 * Return Code: 2550 * None 2551 */ 2552 static int 2553 vdc_map_to_shared_dring(vdc_t *vdcp, int idx) 2554 { 2555 vdc_local_desc_t *ldep; 2556 vd_dring_entry_t *dep; 2557 int rv; 2558 2559 ldep = &(vdcp->local_dring[idx]); 2560 2561 /* for now leave in the old pop_mem_hdl stuff */ 2562 if (ldep->nbytes > 0) { 2563 rv = vdc_populate_mem_hdl(vdcp, ldep); 2564 if (rv) { 2565 DMSG(vdcp, 0, "[%d] Cannot populate mem handle\n", 2566 vdcp->instance); 2567 return (rv); 2568 } 2569 } 2570 2571 /* 2572 * fill in the data details into the DRing 2573 */ 2574 dep = ldep->dep; 2575 ASSERT(dep != NULL); 2576 2577 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdcp); 2578 dep->payload.operation = ldep->operation; 2579 dep->payload.addr = ldep->offset; 2580 dep->payload.nbytes = ldep->nbytes; 2581 dep->payload.status = (uint32_t)-1; /* vds will set valid value */ 2582 dep->payload.slice = ldep->slice; 2583 dep->hdr.dstate = VIO_DESC_READY; 2584 dep->hdr.ack = 1; /* request an ACK for every message */ 2585 2586 return (0); 2587 } 2588 2589 /* 2590 * Function: 2591 * vdc_send_request 2592 * 2593 * Description: 2594 * This routine writes the data to be transmitted to vds into the 2595 * descriptor, notifies vds that the ring has been updated and 2596 * then waits for the request to be processed. 2597 * 2598 * Arguments: 2599 * vdcp - the soft state pointer 2600 * operation - operation we want vds to perform (VD_OP_XXX) 2601 * addr - address of data buf to be read/written. 2602 * nbytes - number of bytes to read/write 2603 * slice - the disk slice this request is for 2604 * offset - relative disk offset 2605 * cb_type - type of call - STRATEGY or SYNC 2606 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2607 * . mode for ioctl(9e) 2608 * . LP64 diskaddr_t (block I/O) 2609 * dir - direction of operation (READ/WRITE/BOTH) 2610 * 2611 * Return Codes: 2612 * 0 2613 * ENXIO 2614 */ 2615 static int 2616 vdc_send_request(vdc_t *vdcp, int operation, caddr_t addr, 2617 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 2618 void *cb_arg, vio_desc_direction_t dir) 2619 { 2620 ASSERT(vdcp != NULL); 2621 ASSERT(slice == VD_SLICE_NONE || slice < V_NUMPAR); 2622 2623 mutex_enter(&vdcp->lock); 2624 2625 do { 2626 while (vdcp->state != VDC_STATE_RUNNING) { 2627 2628 /* return error if detaching */ 2629 if (vdcp->state == VDC_STATE_DETACH) { 2630 mutex_exit(&vdcp->lock); 2631 return (ENXIO); 2632 } 2633 2634 /* fail request if connection timeout is reached */ 2635 if (vdcp->ctimeout_reached) { 2636 mutex_exit(&vdcp->lock); 2637 return (EIO); 2638 } 2639 2640 cv_wait(&vdcp->running_cv, &vdcp->lock); 2641 } 2642 2643 } while (vdc_populate_descriptor(vdcp, operation, addr, 2644 nbytes, slice, offset, cb_type, cb_arg, dir)); 2645 2646 mutex_exit(&vdcp->lock); 2647 return (0); 2648 } 2649 2650 2651 /* 2652 * Function: 2653 * vdc_populate_descriptor 2654 * 2655 * Description: 2656 * This routine writes the data to be transmitted to vds into the 2657 * descriptor, notifies vds that the ring has been updated and 2658 * then waits for the request to be processed. 2659 * 2660 * Arguments: 2661 * vdcp - the soft state pointer 2662 * operation - operation we want vds to perform (VD_OP_XXX) 2663 * addr - address of data buf to be read/written. 2664 * nbytes - number of bytes to read/write 2665 * slice - the disk slice this request is for 2666 * offset - relative disk offset 2667 * cb_type - type of call - STRATEGY or SYNC 2668 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2669 * . mode for ioctl(9e) 2670 * . LP64 diskaddr_t (block I/O) 2671 * dir - direction of operation (READ/WRITE/BOTH) 2672 * 2673 * Return Codes: 2674 * 0 2675 * EAGAIN 2676 * EFAULT 2677 * ENXIO 2678 * EIO 2679 */ 2680 static int 2681 vdc_populate_descriptor(vdc_t *vdcp, int operation, caddr_t addr, 2682 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 2683 void *cb_arg, vio_desc_direction_t dir) 2684 { 2685 vdc_local_desc_t *local_dep = NULL; /* Local Dring Pointer */ 2686 int idx; /* Index of DRing entry used */ 2687 int next_idx; 2688 vio_dring_msg_t dmsg; 2689 size_t msglen; 2690 int rv; 2691 2692 ASSERT(MUTEX_HELD(&vdcp->lock)); 2693 vdcp->threads_pending++; 2694 loop: 2695 DMSG(vdcp, 2, ": dring_curr_idx = %d\n", vdcp->dring_curr_idx); 2696 2697 /* Get next available D-Ring entry */ 2698 idx = vdcp->dring_curr_idx; 2699 local_dep = &(vdcp->local_dring[idx]); 2700 2701 if (!local_dep->is_free) { 2702 DMSG(vdcp, 2, "[%d]: dring full - waiting for space\n", 2703 vdcp->instance); 2704 cv_wait(&vdcp->dring_free_cv, &vdcp->lock); 2705 if (vdcp->state == VDC_STATE_RUNNING || 2706 vdcp->state == VDC_STATE_HANDLE_PENDING) { 2707 goto loop; 2708 } 2709 vdcp->threads_pending--; 2710 return (ECONNRESET); 2711 } 2712 2713 next_idx = idx + 1; 2714 if (next_idx >= vdcp->dring_len) 2715 next_idx = 0; 2716 vdcp->dring_curr_idx = next_idx; 2717 2718 ASSERT(local_dep->is_free); 2719 2720 local_dep->operation = operation; 2721 local_dep->addr = addr; 2722 local_dep->nbytes = nbytes; 2723 local_dep->slice = slice; 2724 local_dep->offset = offset; 2725 local_dep->cb_type = cb_type; 2726 local_dep->cb_arg = cb_arg; 2727 local_dep->dir = dir; 2728 2729 local_dep->is_free = B_FALSE; 2730 2731 rv = vdc_map_to_shared_dring(vdcp, idx); 2732 if (rv) { 2733 DMSG(vdcp, 0, "[%d]: cannot bind memory - waiting ..\n", 2734 vdcp->instance); 2735 /* free the descriptor */ 2736 local_dep->is_free = B_TRUE; 2737 vdcp->dring_curr_idx = idx; 2738 cv_wait(&vdcp->membind_cv, &vdcp->lock); 2739 if (vdcp->state == VDC_STATE_RUNNING || 2740 vdcp->state == VDC_STATE_HANDLE_PENDING) { 2741 goto loop; 2742 } 2743 vdcp->threads_pending--; 2744 return (ECONNRESET); 2745 } 2746 2747 /* 2748 * Send a msg with the DRing details to vds 2749 */ 2750 VIO_INIT_DRING_DATA_TAG(dmsg); 2751 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdcp); 2752 dmsg.dring_ident = vdcp->dring_ident; 2753 dmsg.start_idx = idx; 2754 dmsg.end_idx = idx; 2755 vdcp->seq_num++; 2756 2757 DTRACE_IO2(send, vio_dring_msg_t *, &dmsg, vdc_t *, vdcp); 2758 2759 DMSG(vdcp, 2, "ident=0x%lx, st=%u, end=%u, seq=%ld\n", 2760 vdcp->dring_ident, dmsg.start_idx, dmsg.end_idx, dmsg.seq_num); 2761 2762 /* 2763 * note we're still holding the lock here to 2764 * make sure the message goes out in order !!!... 2765 */ 2766 msglen = sizeof (dmsg); 2767 rv = vdc_send(vdcp, (caddr_t)&dmsg, &msglen); 2768 switch (rv) { 2769 case ECONNRESET: 2770 /* 2771 * vdc_send initiates the reset on failure. 2772 * Since the transaction has already been put 2773 * on the local dring, it will automatically get 2774 * retried when the channel is reset. Given that, 2775 * it is ok to just return success even though the 2776 * send failed. 2777 */ 2778 rv = 0; 2779 break; 2780 2781 case 0: /* EOK */ 2782 DMSG(vdcp, 1, "sent via LDC: rv=%d\n", rv); 2783 break; 2784 2785 default: 2786 goto cleanup_and_exit; 2787 } 2788 2789 vdcp->threads_pending--; 2790 return (rv); 2791 2792 cleanup_and_exit: 2793 DMSG(vdcp, 0, "unexpected error, rv=%d\n", rv); 2794 return (ENXIO); 2795 } 2796 2797 /* 2798 * Function: 2799 * vdc_do_sync_op 2800 * 2801 * Description: 2802 * Wrapper around vdc_populate_descriptor that blocks until the 2803 * response to the message is available. 2804 * 2805 * Arguments: 2806 * vdcp - the soft state pointer 2807 * operation - operation we want vds to perform (VD_OP_XXX) 2808 * addr - address of data buf to be read/written. 2809 * nbytes - number of bytes to read/write 2810 * slice - the disk slice this request is for 2811 * offset - relative disk offset 2812 * cb_type - type of call - STRATEGY or SYNC 2813 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2814 * . mode for ioctl(9e) 2815 * . LP64 diskaddr_t (block I/O) 2816 * dir - direction of operation (READ/WRITE/BOTH) 2817 * 2818 * Return Codes: 2819 * 0 2820 * EAGAIN 2821 * EFAULT 2822 * ENXIO 2823 * EIO 2824 */ 2825 static int 2826 vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, size_t nbytes, 2827 int slice, diskaddr_t offset, int cb_type, void *cb_arg, 2828 vio_desc_direction_t dir) 2829 { 2830 int status; 2831 2832 ASSERT(cb_type == CB_SYNC); 2833 2834 /* 2835 * Grab the lock, if blocked wait until the server 2836 * response causes us to wake up again. 2837 */ 2838 mutex_enter(&vdcp->lock); 2839 vdcp->sync_op_cnt++; 2840 while (vdcp->sync_op_blocked && vdcp->state != VDC_STATE_DETACH) 2841 cv_wait(&vdcp->sync_blocked_cv, &vdcp->lock); 2842 2843 if (vdcp->state == VDC_STATE_DETACH) { 2844 cv_broadcast(&vdcp->sync_blocked_cv); 2845 vdcp->sync_op_cnt--; 2846 mutex_exit(&vdcp->lock); 2847 return (ENXIO); 2848 } 2849 2850 /* now block anyone other thread entering after us */ 2851 vdcp->sync_op_blocked = B_TRUE; 2852 vdcp->sync_op_pending = B_TRUE; 2853 mutex_exit(&vdcp->lock); 2854 2855 status = vdc_send_request(vdcp, operation, addr, 2856 nbytes, slice, offset, cb_type, cb_arg, dir); 2857 2858 mutex_enter(&vdcp->lock); 2859 2860 if (status != 0) { 2861 vdcp->sync_op_pending = B_FALSE; 2862 } else { 2863 /* 2864 * block until our transaction completes. 2865 * Also anyone else waiting also gets to go next. 2866 */ 2867 while (vdcp->sync_op_pending && vdcp->state != VDC_STATE_DETACH) 2868 cv_wait(&vdcp->sync_pending_cv, &vdcp->lock); 2869 2870 DMSG(vdcp, 2, ": operation returned %d\n", 2871 vdcp->sync_op_status); 2872 if (vdcp->state == VDC_STATE_DETACH) { 2873 vdcp->sync_op_pending = B_FALSE; 2874 status = ENXIO; 2875 } else { 2876 status = vdcp->sync_op_status; 2877 } 2878 } 2879 2880 vdcp->sync_op_status = 0; 2881 vdcp->sync_op_blocked = B_FALSE; 2882 vdcp->sync_op_cnt--; 2883 2884 /* signal the next waiting thread */ 2885 cv_signal(&vdcp->sync_blocked_cv); 2886 mutex_exit(&vdcp->lock); 2887 2888 return (status); 2889 } 2890 2891 2892 /* 2893 * Function: 2894 * vdc_drain_response() 2895 * 2896 * Description: 2897 * When a guest is panicking, the completion of requests needs to be 2898 * handled differently because interrupts are disabled and vdc 2899 * will not get messages. We have to poll for the messages instead. 2900 * 2901 * Arguments: 2902 * vdc - soft state pointer for this instance of the device driver. 2903 * 2904 * Return Code: 2905 * 0 - Success 2906 */ 2907 static int 2908 vdc_drain_response(vdc_t *vdc) 2909 { 2910 int rv, idx, retries; 2911 size_t msglen; 2912 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2913 vio_dring_msg_t dmsg; 2914 2915 mutex_enter(&vdc->lock); 2916 2917 retries = 0; 2918 for (;;) { 2919 msglen = sizeof (dmsg); 2920 rv = ldc_read(vdc->ldc_handle, (caddr_t)&dmsg, &msglen); 2921 if (rv) { 2922 rv = EINVAL; 2923 break; 2924 } 2925 2926 /* 2927 * if there are no packets wait and check again 2928 */ 2929 if ((rv == 0) && (msglen == 0)) { 2930 if (retries++ > vdc_dump_retries) { 2931 rv = EAGAIN; 2932 break; 2933 } 2934 2935 drv_usecwait(vdc_usec_timeout_dump); 2936 continue; 2937 } 2938 2939 /* 2940 * Ignore all messages that are not ACKs/NACKs to 2941 * DRing requests. 2942 */ 2943 if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 2944 (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 2945 DMSG(vdc, 0, "discard pkt: type=%d sub=%d env=%d\n", 2946 dmsg.tag.vio_msgtype, 2947 dmsg.tag.vio_subtype, 2948 dmsg.tag.vio_subtype_env); 2949 continue; 2950 } 2951 2952 /* 2953 * set the appropriate return value for the current request. 2954 */ 2955 switch (dmsg.tag.vio_subtype) { 2956 case VIO_SUBTYPE_ACK: 2957 rv = 0; 2958 break; 2959 case VIO_SUBTYPE_NACK: 2960 rv = EAGAIN; 2961 break; 2962 default: 2963 continue; 2964 } 2965 2966 idx = dmsg.start_idx; 2967 if (idx >= vdc->dring_len) { 2968 DMSG(vdc, 0, "[%d] Bogus ack data : start %d\n", 2969 vdc->instance, idx); 2970 continue; 2971 } 2972 ldep = &vdc->local_dring[idx]; 2973 if (ldep->dep->hdr.dstate != VIO_DESC_DONE) { 2974 DMSG(vdc, 0, "[%d] Entry @ %d - state !DONE %d\n", 2975 vdc->instance, idx, ldep->dep->hdr.dstate); 2976 continue; 2977 } 2978 2979 DMSG(vdc, 1, "[%d] Depopulating idx=%d state=%d\n", 2980 vdc->instance, idx, ldep->dep->hdr.dstate); 2981 rv = vdc_depopulate_descriptor(vdc, idx); 2982 if (rv) { 2983 DMSG(vdc, 0, 2984 "[%d] Entry @ %d - depopulate failed ..\n", 2985 vdc->instance, idx); 2986 } 2987 2988 /* if this is the last descriptor - break out of loop */ 2989 if ((idx + 1) % vdc->dring_len == vdc->dring_curr_idx) 2990 break; 2991 } 2992 2993 mutex_exit(&vdc->lock); 2994 DMSG(vdc, 0, "End idx=%d\n", idx); 2995 2996 return (rv); 2997 } 2998 2999 3000 /* 3001 * Function: 3002 * vdc_depopulate_descriptor() 3003 * 3004 * Description: 3005 * 3006 * Arguments: 3007 * vdc - soft state pointer for this instance of the device driver. 3008 * idx - Index of the Descriptor Ring entry being modified 3009 * 3010 * Return Code: 3011 * 0 - Success 3012 */ 3013 static int 3014 vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 3015 { 3016 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 3017 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 3018 int status = ENXIO; 3019 int rv = 0; 3020 3021 ASSERT(vdc != NULL); 3022 ASSERT(idx < vdc->dring_len); 3023 ldep = &vdc->local_dring[idx]; 3024 ASSERT(ldep != NULL); 3025 ASSERT(MUTEX_HELD(&vdc->lock)); 3026 3027 DMSG(vdc, 2, ": idx = %d\n", idx); 3028 dep = ldep->dep; 3029 ASSERT(dep != NULL); 3030 ASSERT((dep->hdr.dstate == VIO_DESC_DONE) || 3031 (dep->payload.status == ECANCELED)); 3032 3033 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 3034 3035 ldep->is_free = B_TRUE; 3036 status = dep->payload.status; 3037 DMSG(vdc, 2, ": is_free = %d : status = %d\n", ldep->is_free, status); 3038 3039 /* 3040 * If no buffers were used to transfer information to the server when 3041 * populating the descriptor then no memory handles need to be unbound 3042 * and we can return now. 3043 */ 3044 if (ldep->nbytes == 0) { 3045 cv_signal(&vdc->dring_free_cv); 3046 return (status); 3047 } 3048 3049 /* 3050 * If the upper layer passed in a misaligned address we copied the 3051 * data into an aligned buffer before sending it to LDC - we now 3052 * copy it back to the original buffer. 3053 */ 3054 if (ldep->align_addr) { 3055 ASSERT(ldep->addr != NULL); 3056 3057 if (dep->payload.nbytes > 0) 3058 bcopy(ldep->align_addr, ldep->addr, 3059 dep->payload.nbytes); 3060 kmem_free(ldep->align_addr, 3061 sizeof (caddr_t) * P2ROUNDUP(ldep->nbytes, 8)); 3062 ldep->align_addr = NULL; 3063 } 3064 3065 rv = ldc_mem_unbind_handle(ldep->desc_mhdl); 3066 if (rv != 0) { 3067 DMSG(vdc, 0, "?[%d] unbind mhdl 0x%lx @ idx %d failed (%d)", 3068 vdc->instance, ldep->desc_mhdl, idx, rv); 3069 /* 3070 * The error returned by the vDisk server is more informative 3071 * and thus has a higher priority but if it isn't set we ensure 3072 * that this function returns an error. 3073 */ 3074 if (status == 0) 3075 status = EINVAL; 3076 } 3077 3078 cv_signal(&vdc->membind_cv); 3079 cv_signal(&vdc->dring_free_cv); 3080 3081 return (status); 3082 } 3083 3084 /* 3085 * Function: 3086 * vdc_populate_mem_hdl() 3087 * 3088 * Description: 3089 * 3090 * Arguments: 3091 * vdc - soft state pointer for this instance of the device driver. 3092 * idx - Index of the Descriptor Ring entry being modified 3093 * addr - virtual address being mapped in 3094 * nybtes - number of bytes in 'addr' 3095 * operation - the vDisk operation being performed (VD_OP_xxx) 3096 * 3097 * Return Code: 3098 * 0 - Success 3099 */ 3100 static int 3101 vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep) 3102 { 3103 vd_dring_entry_t *dep = NULL; 3104 ldc_mem_handle_t mhdl; 3105 caddr_t vaddr; 3106 size_t nbytes; 3107 uint8_t perm = LDC_MEM_RW; 3108 uint8_t maptype; 3109 int rv = 0; 3110 int i; 3111 3112 ASSERT(vdcp != NULL); 3113 3114 dep = ldep->dep; 3115 mhdl = ldep->desc_mhdl; 3116 3117 switch (ldep->dir) { 3118 case VIO_read_dir: 3119 perm = LDC_MEM_W; 3120 break; 3121 3122 case VIO_write_dir: 3123 perm = LDC_MEM_R; 3124 break; 3125 3126 case VIO_both_dir: 3127 perm = LDC_MEM_RW; 3128 break; 3129 3130 default: 3131 ASSERT(0); /* catch bad programming in vdc */ 3132 } 3133 3134 /* 3135 * LDC expects any addresses passed in to be 8-byte aligned. We need 3136 * to copy the contents of any misaligned buffers to a newly allocated 3137 * buffer and bind it instead (and copy the the contents back to the 3138 * original buffer passed in when depopulating the descriptor) 3139 */ 3140 vaddr = ldep->addr; 3141 nbytes = ldep->nbytes; 3142 if (((uint64_t)vaddr & 0x7) != 0) { 3143 ASSERT(ldep->align_addr == NULL); 3144 ldep->align_addr = 3145 kmem_alloc(sizeof (caddr_t) * 3146 P2ROUNDUP(nbytes, 8), KM_SLEEP); 3147 DMSG(vdcp, 0, "[%d] Misaligned address %p reallocating " 3148 "(buf=%p nb=%ld op=%d)\n", 3149 vdcp->instance, (void *)vaddr, (void *)ldep->align_addr, 3150 nbytes, ldep->operation); 3151 if (perm != LDC_MEM_W) 3152 bcopy(vaddr, ldep->align_addr, nbytes); 3153 vaddr = ldep->align_addr; 3154 } 3155 3156 maptype = LDC_IO_MAP|LDC_SHADOW_MAP|LDC_DIRECT_MAP; 3157 rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 3158 maptype, perm, &dep->payload.cookie[0], &dep->payload.ncookies); 3159 DMSG(vdcp, 2, "[%d] bound mem handle; ncookies=%d\n", 3160 vdcp->instance, dep->payload.ncookies); 3161 if (rv != 0) { 3162 DMSG(vdcp, 0, "[%d] Failed to bind LDC memory handle " 3163 "(mhdl=%p, buf=%p, err=%d)\n", 3164 vdcp->instance, (void *)mhdl, (void *)vaddr, rv); 3165 if (ldep->align_addr) { 3166 kmem_free(ldep->align_addr, 3167 sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); 3168 ldep->align_addr = NULL; 3169 } 3170 return (EAGAIN); 3171 } 3172 3173 /* 3174 * Get the other cookies (if any). 3175 */ 3176 for (i = 1; i < dep->payload.ncookies; i++) { 3177 rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 3178 if (rv != 0) { 3179 (void) ldc_mem_unbind_handle(mhdl); 3180 DMSG(vdcp, 0, "?[%d] Failed to get next cookie " 3181 "(mhdl=%lx cnum=%d), err=%d", 3182 vdcp->instance, mhdl, i, rv); 3183 if (ldep->align_addr) { 3184 kmem_free(ldep->align_addr, 3185 sizeof (caddr_t) * ldep->nbytes); 3186 ldep->align_addr = NULL; 3187 } 3188 return (EAGAIN); 3189 } 3190 } 3191 3192 return (rv); 3193 } 3194 3195 /* 3196 * Interrupt handlers for messages from LDC 3197 */ 3198 3199 /* 3200 * Function: 3201 * vdc_handle_cb() 3202 * 3203 * Description: 3204 * 3205 * Arguments: 3206 * event - Type of event (LDC_EVT_xxx) that triggered the callback 3207 * arg - soft state pointer for this instance of the device driver. 3208 * 3209 * Return Code: 3210 * 0 - Success 3211 */ 3212 static uint_t 3213 vdc_handle_cb(uint64_t event, caddr_t arg) 3214 { 3215 ldc_status_t ldc_state; 3216 int rv = 0; 3217 3218 vdc_t *vdc = (vdc_t *)(void *)arg; 3219 3220 ASSERT(vdc != NULL); 3221 3222 DMSG(vdc, 1, "evt=%lx seqID=%ld\n", event, vdc->seq_num); 3223 3224 /* 3225 * Depending on the type of event that triggered this callback, 3226 * we modify the handshake state or read the data. 3227 * 3228 * NOTE: not done as a switch() as event could be triggered by 3229 * a state change and a read request. Also the ordering of the 3230 * check for the event types is deliberate. 3231 */ 3232 if (event & LDC_EVT_UP) { 3233 DMSG(vdc, 0, "[%d] Received LDC_EVT_UP\n", vdc->instance); 3234 3235 mutex_enter(&vdc->lock); 3236 3237 /* get LDC state */ 3238 rv = ldc_status(vdc->ldc_handle, &ldc_state); 3239 if (rv != 0) { 3240 DMSG(vdc, 0, "[%d] Couldn't get LDC status %d", 3241 vdc->instance, rv); 3242 return (LDC_SUCCESS); 3243 } 3244 if (vdc->ldc_state != LDC_UP && ldc_state == LDC_UP) { 3245 /* 3246 * Reset the transaction sequence numbers when 3247 * LDC comes up. We then kick off the handshake 3248 * negotiation with the vDisk server. 3249 */ 3250 vdc->seq_num = 1; 3251 vdc->seq_num_reply = 0; 3252 vdc->ldc_state = ldc_state; 3253 cv_signal(&vdc->initwait_cv); 3254 } 3255 3256 mutex_exit(&vdc->lock); 3257 } 3258 3259 if (event & LDC_EVT_READ) { 3260 DMSG(vdc, 0, "[%d] Received LDC_EVT_READ\n", vdc->instance); 3261 mutex_enter(&vdc->read_lock); 3262 cv_signal(&vdc->read_cv); 3263 vdc->read_state = VDC_READ_PENDING; 3264 mutex_exit(&vdc->read_lock); 3265 3266 /* that's all we have to do - no need to handle DOWN/RESET */ 3267 return (LDC_SUCCESS); 3268 } 3269 3270 if (event & (LDC_EVT_RESET|LDC_EVT_DOWN)) { 3271 3272 DMSG(vdc, 0, "[%d] Received LDC RESET event\n", vdc->instance); 3273 3274 mutex_enter(&vdc->lock); 3275 /* 3276 * Need to wake up any readers so they will 3277 * detect that a reset has occurred. 3278 */ 3279 mutex_enter(&vdc->read_lock); 3280 if ((vdc->read_state == VDC_READ_WAITING) || 3281 (vdc->read_state == VDC_READ_RESET)) 3282 cv_signal(&vdc->read_cv); 3283 vdc->read_state = VDC_READ_RESET; 3284 mutex_exit(&vdc->read_lock); 3285 3286 /* wake up any threads waiting for connection to come up */ 3287 if (vdc->state == VDC_STATE_INIT_WAITING) { 3288 vdc->state = VDC_STATE_RESETTING; 3289 cv_signal(&vdc->initwait_cv); 3290 } 3291 3292 mutex_exit(&vdc->lock); 3293 } 3294 3295 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 3296 DMSG(vdc, 0, "![%d] Unexpected LDC event (%lx) received", 3297 vdc->instance, event); 3298 3299 return (LDC_SUCCESS); 3300 } 3301 3302 /* 3303 * Function: 3304 * vdc_wait_for_response() 3305 * 3306 * Description: 3307 * Block waiting for a response from the server. If there is 3308 * no data the thread block on the read_cv that is signalled 3309 * by the callback when an EVT_READ occurs. 3310 * 3311 * Arguments: 3312 * vdcp - soft state pointer for this instance of the device driver. 3313 * 3314 * Return Code: 3315 * 0 - Success 3316 */ 3317 static int 3318 vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp) 3319 { 3320 size_t nbytes = sizeof (*msgp); 3321 int status; 3322 3323 ASSERT(vdcp != NULL); 3324 3325 DMSG(vdcp, 1, "[%d] Entered\n", vdcp->instance); 3326 3327 status = vdc_recv(vdcp, msgp, &nbytes); 3328 DMSG(vdcp, 3, "vdc_read() done.. status=0x%x size=0x%x\n", 3329 status, (int)nbytes); 3330 if (status) { 3331 DMSG(vdcp, 0, "?[%d] Error %d reading LDC msg\n", 3332 vdcp->instance, status); 3333 return (status); 3334 } 3335 3336 if (nbytes < sizeof (vio_msg_tag_t)) { 3337 DMSG(vdcp, 0, "?[%d] Expect %lu bytes; recv'd %lu\n", 3338 vdcp->instance, sizeof (vio_msg_tag_t), nbytes); 3339 return (ENOMSG); 3340 } 3341 3342 DMSG(vdcp, 2, "[%d] (%x/%x/%x)\n", vdcp->instance, 3343 msgp->tag.vio_msgtype, 3344 msgp->tag.vio_subtype, 3345 msgp->tag.vio_subtype_env); 3346 3347 /* 3348 * Verify the Session ID of the message 3349 * 3350 * Every message after the Version has been negotiated should 3351 * have the correct session ID set. 3352 */ 3353 if ((msgp->tag.vio_sid != vdcp->session_id) && 3354 (msgp->tag.vio_subtype_env != VIO_VER_INFO)) { 3355 DMSG(vdcp, 0, "[%d] Invalid SID: received 0x%x, " 3356 "expected 0x%lx [seq num %lx @ %d]", 3357 vdcp->instance, msgp->tag.vio_sid, 3358 vdcp->session_id, 3359 ((vio_dring_msg_t *)msgp)->seq_num, 3360 ((vio_dring_msg_t *)msgp)->start_idx); 3361 return (ENOMSG); 3362 } 3363 return (0); 3364 } 3365 3366 3367 /* 3368 * Function: 3369 * vdc_resubmit_backup_dring() 3370 * 3371 * Description: 3372 * Resubmit each descriptor in the backed up dring to 3373 * vDisk server. The Dring was backed up during connection 3374 * reset. 3375 * 3376 * Arguments: 3377 * vdcp - soft state pointer for this instance of the device driver. 3378 * 3379 * Return Code: 3380 * 0 - Success 3381 */ 3382 static int 3383 vdc_resubmit_backup_dring(vdc_t *vdcp) 3384 { 3385 int count; 3386 int b_idx; 3387 int rv; 3388 int dring_size; 3389 int status; 3390 vio_msg_t vio_msg; 3391 vdc_local_desc_t *curr_ldep; 3392 3393 ASSERT(MUTEX_NOT_HELD(&vdcp->lock)); 3394 ASSERT(vdcp->state == VDC_STATE_HANDLE_PENDING); 3395 3396 if (vdcp->local_dring_backup == NULL) { 3397 /* the pending requests have already been processed */ 3398 return (0); 3399 } 3400 3401 DMSG(vdcp, 1, "restoring pending dring entries (len=%d, tail=%d)\n", 3402 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3403 3404 /* 3405 * Walk the backup copy of the local descriptor ring and 3406 * resubmit all the outstanding transactions. 3407 */ 3408 b_idx = vdcp->local_dring_backup_tail; 3409 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 3410 3411 curr_ldep = &(vdcp->local_dring_backup[b_idx]); 3412 3413 /* only resubmit outstanding transactions */ 3414 if (!curr_ldep->is_free) { 3415 3416 DMSG(vdcp, 1, "resubmitting entry idx=%x\n", b_idx); 3417 mutex_enter(&vdcp->lock); 3418 rv = vdc_populate_descriptor(vdcp, curr_ldep->operation, 3419 curr_ldep->addr, curr_ldep->nbytes, 3420 curr_ldep->slice, curr_ldep->offset, 3421 curr_ldep->cb_type, curr_ldep->cb_arg, 3422 curr_ldep->dir); 3423 mutex_exit(&vdcp->lock); 3424 if (rv) { 3425 DMSG(vdcp, 1, "[%d] cannot resubmit entry %d\n", 3426 vdcp->instance, b_idx); 3427 return (rv); 3428 } 3429 3430 /* Wait for the response message. */ 3431 DMSG(vdcp, 1, "waiting for response to idx=%x\n", 3432 b_idx); 3433 status = vdc_wait_for_response(vdcp, &vio_msg); 3434 if (status) { 3435 DMSG(vdcp, 1, "[%d] wait_for_response " 3436 "returned err=%d\n", vdcp->instance, 3437 status); 3438 return (status); 3439 } 3440 3441 DMSG(vdcp, 1, "processing msg for idx=%x\n", b_idx); 3442 status = vdc_process_data_msg(vdcp, &vio_msg); 3443 if (status) { 3444 DMSG(vdcp, 1, "[%d] process_data_msg " 3445 "returned err=%d\n", vdcp->instance, 3446 status); 3447 return (status); 3448 } 3449 } 3450 3451 /* get the next element to submit */ 3452 if (++b_idx >= vdcp->local_dring_backup_len) 3453 b_idx = 0; 3454 } 3455 3456 /* all done - now clear up pending dring copy */ 3457 dring_size = vdcp->local_dring_backup_len * 3458 sizeof (vdcp->local_dring_backup[0]); 3459 3460 (void) kmem_free(vdcp->local_dring_backup, dring_size); 3461 3462 vdcp->local_dring_backup = NULL; 3463 3464 return (0); 3465 } 3466 3467 /* 3468 * Function: 3469 * vdc_cancel_backup_dring 3470 * 3471 * Description: 3472 * Cancel each descriptor in the backed up dring to vDisk server. 3473 * The Dring was backed up during connection reset. 3474 * 3475 * Arguments: 3476 * vdcp - soft state pointer for this instance of the device driver. 3477 * 3478 * Return Code: 3479 * None 3480 */ 3481 void 3482 vdc_cancel_backup_ring(vdc_t *vdcp) 3483 { 3484 vdc_local_desc_t *ldep; 3485 struct buf *bufp; 3486 int count; 3487 int b_idx; 3488 int dring_size; 3489 3490 ASSERT(MUTEX_HELD(&vdcp->lock)); 3491 ASSERT(vdcp->state == VDC_STATE_INIT || 3492 vdcp->state == VDC_STATE_INIT_WAITING || 3493 vdcp->state == VDC_STATE_NEGOTIATE || 3494 vdcp->state == VDC_STATE_RESETTING); 3495 3496 if (vdcp->local_dring_backup == NULL) { 3497 /* the pending requests have already been processed */ 3498 return; 3499 } 3500 3501 DMSG(vdcp, 1, "cancelling pending dring entries (len=%d, tail=%d)\n", 3502 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3503 3504 /* 3505 * Walk the backup copy of the local descriptor ring and 3506 * cancel all the outstanding transactions. 3507 */ 3508 b_idx = vdcp->local_dring_backup_tail; 3509 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 3510 3511 ldep = &(vdcp->local_dring_backup[b_idx]); 3512 3513 /* only cancel outstanding transactions */ 3514 if (!ldep->is_free) { 3515 3516 DMSG(vdcp, 1, "cancelling entry idx=%x\n", b_idx); 3517 3518 /* 3519 * All requests have already been cleared from the 3520 * local descriptor ring and the LDC channel has been 3521 * reset so we will never get any reply for these 3522 * requests. Now we just have to notify threads waiting 3523 * for replies that the request has failed. 3524 */ 3525 switch (ldep->cb_type) { 3526 case CB_SYNC: 3527 ASSERT(vdcp->sync_op_pending); 3528 vdcp->sync_op_status = EIO; 3529 vdcp->sync_op_pending = B_FALSE; 3530 cv_signal(&vdcp->sync_pending_cv); 3531 break; 3532 3533 case CB_STRATEGY: 3534 bufp = ldep->cb_arg; 3535 ASSERT(bufp != NULL); 3536 bufp->b_resid = bufp->b_bcount; 3537 bioerror(bufp, EIO); 3538 biodone(bufp); 3539 break; 3540 3541 default: 3542 ASSERT(0); 3543 } 3544 3545 } 3546 3547 /* get the next element to cancel */ 3548 if (++b_idx >= vdcp->local_dring_backup_len) 3549 b_idx = 0; 3550 } 3551 3552 /* all done - now clear up pending dring copy */ 3553 dring_size = vdcp->local_dring_backup_len * 3554 sizeof (vdcp->local_dring_backup[0]); 3555 3556 (void) kmem_free(vdcp->local_dring_backup, dring_size); 3557 3558 vdcp->local_dring_backup = NULL; 3559 3560 DTRACE_IO2(processed, int, count, vdc_t *, vdcp); 3561 } 3562 3563 /* 3564 * Function: 3565 * vdc_connection_timeout 3566 * 3567 * Description: 3568 * This function is invoked if the timeout set to establish the connection 3569 * with vds expires. This will happen if we spend too much time in the 3570 * VDC_STATE_INIT_WAITING or VDC_STATE_NEGOTIATE states. Then we will 3571 * cancel any pending request and mark them as failed. 3572 * 3573 * If the timeout does not expire, it will be cancelled when we reach the 3574 * VDC_STATE_HANDLE_PENDING or VDC_STATE_RESETTING state. This function can 3575 * be invoked while we are in the VDC_STATE_HANDLE_PENDING or 3576 * VDC_STATE_RESETTING state in which case we do nothing because the 3577 * timeout is being cancelled. 3578 * 3579 * Arguments: 3580 * arg - argument of the timeout function actually a soft state 3581 * pointer for the instance of the device driver. 3582 * 3583 * Return Code: 3584 * None 3585 */ 3586 void 3587 vdc_connection_timeout(void *arg) 3588 { 3589 vdc_t *vdcp = (vdc_t *)arg; 3590 3591 mutex_enter(&vdcp->lock); 3592 3593 if (vdcp->state == VDC_STATE_HANDLE_PENDING || 3594 vdcp->state == VDC_STATE_DETACH) { 3595 /* 3596 * The connection has just been re-established or 3597 * we are detaching. 3598 */ 3599 vdcp->ctimeout_reached = B_FALSE; 3600 mutex_exit(&vdcp->lock); 3601 return; 3602 } 3603 3604 vdcp->ctimeout_reached = B_TRUE; 3605 3606 /* notify requests waiting for sending */ 3607 cv_broadcast(&vdcp->running_cv); 3608 3609 /* cancel requests waiting for a result */ 3610 vdc_cancel_backup_ring(vdcp); 3611 3612 mutex_exit(&vdcp->lock); 3613 3614 cmn_err(CE_NOTE, "[%d] connection to service domain timeout", 3615 vdcp->instance); 3616 } 3617 3618 /* 3619 * Function: 3620 * vdc_backup_local_dring() 3621 * 3622 * Description: 3623 * Backup the current dring in the event of a reset. The Dring 3624 * transactions will be resubmitted to the server when the 3625 * connection is restored. 3626 * 3627 * Arguments: 3628 * vdcp - soft state pointer for this instance of the device driver. 3629 * 3630 * Return Code: 3631 * NONE 3632 */ 3633 static void 3634 vdc_backup_local_dring(vdc_t *vdcp) 3635 { 3636 int dring_size; 3637 3638 ASSERT(MUTEX_HELD(&vdcp->lock)); 3639 ASSERT(vdcp->state == VDC_STATE_RESETTING); 3640 3641 /* 3642 * If the backup dring is stil around, it means 3643 * that the last restore did not complete. However, 3644 * since we never got back into the running state, 3645 * the backup copy we have is still valid. 3646 */ 3647 if (vdcp->local_dring_backup != NULL) { 3648 DMSG(vdcp, 1, "reusing local descriptor ring backup " 3649 "(len=%d, tail=%d)\n", vdcp->local_dring_backup_len, 3650 vdcp->local_dring_backup_tail); 3651 return; 3652 } 3653 3654 /* 3655 * The backup dring can be NULL and the local dring may not be 3656 * initialized. This can happen if we had a reset while establishing 3657 * a new connection but after the connection has timed out. In that 3658 * case the backup dring is NULL because the requests have been 3659 * cancelled and the request occured before the local dring is 3660 * initialized. 3661 */ 3662 if (!(vdcp->initialized & VDC_DRING_LOCAL)) 3663 return; 3664 3665 DMSG(vdcp, 1, "backing up the local descriptor ring (len=%d, " 3666 "tail=%d)\n", vdcp->dring_len, vdcp->dring_curr_idx); 3667 3668 dring_size = vdcp->dring_len * sizeof (vdcp->local_dring[0]); 3669 3670 vdcp->local_dring_backup = kmem_alloc(dring_size, KM_SLEEP); 3671 bcopy(vdcp->local_dring, vdcp->local_dring_backup, dring_size); 3672 3673 vdcp->local_dring_backup_tail = vdcp->dring_curr_idx; 3674 vdcp->local_dring_backup_len = vdcp->dring_len; 3675 } 3676 3677 /* -------------------------------------------------------------------------- */ 3678 3679 /* 3680 * The following functions process the incoming messages from vds 3681 */ 3682 3683 /* 3684 * Function: 3685 * vdc_process_msg_thread() 3686 * 3687 * Description: 3688 * 3689 * Main VDC message processing thread. Each vDisk instance 3690 * consists of a copy of this thread. This thread triggers 3691 * all the handshakes and data exchange with the server. It 3692 * also handles all channel resets 3693 * 3694 * Arguments: 3695 * vdc - soft state pointer for this instance of the device driver. 3696 * 3697 * Return Code: 3698 * None 3699 */ 3700 static void 3701 vdc_process_msg_thread(vdc_t *vdcp) 3702 { 3703 int status; 3704 int ctimeout; 3705 timeout_id_t tmid = 0; 3706 3707 mutex_enter(&vdcp->lock); 3708 3709 for (;;) { 3710 3711 #define Q(_s) (vdcp->state == _s) ? #_s : 3712 DMSG(vdcp, 3, "state = %d (%s)\n", vdcp->state, 3713 Q(VDC_STATE_INIT) 3714 Q(VDC_STATE_INIT_WAITING) 3715 Q(VDC_STATE_NEGOTIATE) 3716 Q(VDC_STATE_HANDLE_PENDING) 3717 Q(VDC_STATE_RUNNING) 3718 Q(VDC_STATE_RESETTING) 3719 Q(VDC_STATE_DETACH) 3720 "UNKNOWN"); 3721 3722 switch (vdcp->state) { 3723 case VDC_STATE_INIT: 3724 3725 /* 3726 * If requested, start a timeout to check if the 3727 * connection with vds is established in the 3728 * specified delay. If the timeout expires, we 3729 * will cancel any pending request. 3730 * 3731 * If some reset have occurred while establishing 3732 * the connection, we already have a timeout armed 3733 * and in that case we don't need to arm a new one. 3734 */ 3735 ctimeout = (vdc_timeout != 0)? 3736 vdc_timeout : vdcp->ctimeout; 3737 3738 if (ctimeout != 0 && tmid == 0) { 3739 tmid = timeout(vdc_connection_timeout, vdcp, 3740 ctimeout * drv_usectohz(1000000)); 3741 } 3742 3743 /* Check if have re-initializing repeatedly */ 3744 if (vdcp->hshake_cnt++ > vdc_hshake_retries && 3745 vdcp->lifecycle != VDC_LC_ONLINE) { 3746 cmn_err(CE_NOTE, "[%d] disk access failed.\n", 3747 vdcp->instance); 3748 vdcp->state = VDC_STATE_DETACH; 3749 break; 3750 } 3751 3752 /* Bring up connection with vds via LDC */ 3753 status = vdc_start_ldc_connection(vdcp); 3754 if (status == EINVAL) { 3755 DMSG(vdcp, 0, "[%d] Could not start LDC", 3756 vdcp->instance); 3757 vdcp->state = VDC_STATE_DETACH; 3758 } else { 3759 vdcp->state = VDC_STATE_INIT_WAITING; 3760 } 3761 break; 3762 3763 case VDC_STATE_INIT_WAITING: 3764 3765 /* 3766 * Let the callback event move us on 3767 * when channel is open to server 3768 */ 3769 while (vdcp->ldc_state != LDC_UP) { 3770 cv_wait(&vdcp->initwait_cv, &vdcp->lock); 3771 if (vdcp->state != VDC_STATE_INIT_WAITING) { 3772 DMSG(vdcp, 0, 3773 "state moved to %d out from under us...\n", 3774 vdcp->state); 3775 3776 break; 3777 } 3778 } 3779 if (vdcp->state == VDC_STATE_INIT_WAITING && 3780 vdcp->ldc_state == LDC_UP) { 3781 vdcp->state = VDC_STATE_NEGOTIATE; 3782 } 3783 break; 3784 3785 case VDC_STATE_NEGOTIATE: 3786 switch (status = vdc_ver_negotiation(vdcp)) { 3787 case 0: 3788 break; 3789 default: 3790 DMSG(vdcp, 0, "ver negotiate failed (%d)..\n", 3791 status); 3792 goto reset; 3793 } 3794 3795 switch (status = vdc_attr_negotiation(vdcp)) { 3796 case 0: 3797 break; 3798 default: 3799 DMSG(vdcp, 0, "attr negotiate failed (%d)..\n", 3800 status); 3801 goto reset; 3802 } 3803 3804 switch (status = vdc_dring_negotiation(vdcp)) { 3805 case 0: 3806 break; 3807 default: 3808 DMSG(vdcp, 0, "dring negotiate failed (%d)..\n", 3809 status); 3810 goto reset; 3811 } 3812 3813 switch (status = vdc_rdx_exchange(vdcp)) { 3814 case 0: 3815 vdcp->state = VDC_STATE_HANDLE_PENDING; 3816 goto done; 3817 default: 3818 DMSG(vdcp, 0, "RDX xchg failed ..(%d)\n", 3819 status); 3820 goto reset; 3821 } 3822 reset: 3823 DMSG(vdcp, 0, "negotiation failed: resetting (%d)\n", 3824 status); 3825 vdcp->state = VDC_STATE_RESETTING; 3826 vdcp->self_reset = B_TRUE; 3827 done: 3828 DMSG(vdcp, 0, "negotiation complete (state=0x%x)...\n", 3829 vdcp->state); 3830 break; 3831 3832 case VDC_STATE_HANDLE_PENDING: 3833 3834 if (vdcp->ctimeout_reached) { 3835 /* 3836 * The connection timeout had been reached so 3837 * pending requests have been cancelled. Now 3838 * that the connection is back we can reset 3839 * the timeout. 3840 */ 3841 ASSERT(vdcp->local_dring_backup == NULL); 3842 ASSERT(tmid != 0); 3843 tmid = 0; 3844 vdcp->ctimeout_reached = B_FALSE; 3845 vdcp->state = VDC_STATE_RUNNING; 3846 DMSG(vdcp, 0, "[%d] connection to service " 3847 "domain is up", vdcp->instance); 3848 break; 3849 } 3850 3851 mutex_exit(&vdcp->lock); 3852 if (tmid != 0) { 3853 (void) untimeout(tmid); 3854 tmid = 0; 3855 } 3856 status = vdc_resubmit_backup_dring(vdcp); 3857 mutex_enter(&vdcp->lock); 3858 3859 if (status) 3860 vdcp->state = VDC_STATE_RESETTING; 3861 else 3862 vdcp->state = VDC_STATE_RUNNING; 3863 3864 break; 3865 3866 /* enter running state */ 3867 case VDC_STATE_RUNNING: 3868 /* 3869 * Signal anyone waiting for the connection 3870 * to come on line. 3871 */ 3872 vdcp->hshake_cnt = 0; 3873 cv_broadcast(&vdcp->running_cv); 3874 mutex_exit(&vdcp->lock); 3875 3876 for (;;) { 3877 vio_msg_t msg; 3878 status = vdc_wait_for_response(vdcp, &msg); 3879 if (status) break; 3880 3881 DMSG(vdcp, 1, "[%d] new pkt(s) available\n", 3882 vdcp->instance); 3883 status = vdc_process_data_msg(vdcp, &msg); 3884 if (status) { 3885 DMSG(vdcp, 1, "[%d] process_data_msg " 3886 "returned err=%d\n", vdcp->instance, 3887 status); 3888 break; 3889 } 3890 3891 } 3892 3893 mutex_enter(&vdcp->lock); 3894 3895 vdcp->state = VDC_STATE_RESETTING; 3896 vdcp->self_reset = B_TRUE; 3897 break; 3898 3899 case VDC_STATE_RESETTING: 3900 /* 3901 * When we reach this state, we either come from the 3902 * VDC_STATE_RUNNING state and we can have pending 3903 * request but no timeout is armed; or we come from 3904 * the VDC_STATE_INIT_WAITING, VDC_NEGOTIATE or 3905 * VDC_HANDLE_PENDING state and there is no pending 3906 * request or pending requests have already been copied 3907 * into the backup dring. So we can safely keep the 3908 * connection timeout armed while we are in this state. 3909 */ 3910 3911 DMSG(vdcp, 0, "Initiating channel reset " 3912 "(pending = %d)\n", (int)vdcp->threads_pending); 3913 3914 if (vdcp->self_reset) { 3915 DMSG(vdcp, 0, 3916 "[%d] calling stop_ldc_connection.\n", 3917 vdcp->instance); 3918 status = vdc_stop_ldc_connection(vdcp); 3919 vdcp->self_reset = B_FALSE; 3920 } 3921 3922 /* 3923 * Wait for all threads currently waiting 3924 * for a free dring entry to use. 3925 */ 3926 while (vdcp->threads_pending) { 3927 cv_broadcast(&vdcp->membind_cv); 3928 cv_broadcast(&vdcp->dring_free_cv); 3929 mutex_exit(&vdcp->lock); 3930 /* give the waiters enough time to wake up */ 3931 delay(vdc_hz_min_ldc_delay); 3932 mutex_enter(&vdcp->lock); 3933 } 3934 3935 ASSERT(vdcp->threads_pending == 0); 3936 3937 /* Sanity check that no thread is receiving */ 3938 ASSERT(vdcp->read_state != VDC_READ_WAITING); 3939 3940 vdcp->read_state = VDC_READ_IDLE; 3941 3942 vdc_backup_local_dring(vdcp); 3943 3944 /* cleanup the old d-ring */ 3945 vdc_destroy_descriptor_ring(vdcp); 3946 3947 /* go and start again */ 3948 vdcp->state = VDC_STATE_INIT; 3949 3950 break; 3951 3952 case VDC_STATE_DETACH: 3953 DMSG(vdcp, 0, "[%d] Reset thread exit cleanup ..\n", 3954 vdcp->instance); 3955 3956 /* cancel any pending timeout */ 3957 mutex_exit(&vdcp->lock); 3958 if (tmid != 0) { 3959 (void) untimeout(tmid); 3960 tmid = 0; 3961 } 3962 mutex_enter(&vdcp->lock); 3963 3964 /* 3965 * Signal anyone waiting for connection 3966 * to come online 3967 */ 3968 cv_broadcast(&vdcp->running_cv); 3969 3970 while (vdcp->sync_op_pending) { 3971 cv_signal(&vdcp->sync_pending_cv); 3972 cv_signal(&vdcp->sync_blocked_cv); 3973 mutex_exit(&vdcp->lock); 3974 /* give the waiters enough time to wake up */ 3975 delay(vdc_hz_min_ldc_delay); 3976 mutex_enter(&vdcp->lock); 3977 } 3978 3979 mutex_exit(&vdcp->lock); 3980 3981 DMSG(vdcp, 0, "[%d] Msg processing thread exiting ..\n", 3982 vdcp->instance); 3983 thread_exit(); 3984 break; 3985 } 3986 } 3987 } 3988 3989 3990 /* 3991 * Function: 3992 * vdc_process_data_msg() 3993 * 3994 * Description: 3995 * This function is called by the message processing thread each time 3996 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 3997 * be an ACK or NACK from vds[1] which vdc handles as follows. 3998 * ACK - wake up the waiting thread 3999 * NACK - resend any messages necessary 4000 * 4001 * [1] Although the message format allows it, vds should not send a 4002 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 4003 * some bizarre reason it does, vdc will reset the connection. 4004 * 4005 * Arguments: 4006 * vdc - soft state pointer for this instance of the device driver. 4007 * msg - the LDC message sent by vds 4008 * 4009 * Return Code: 4010 * 0 - Success. 4011 * > 0 - error value returned by LDC 4012 */ 4013 static int 4014 vdc_process_data_msg(vdc_t *vdcp, vio_msg_t *msg) 4015 { 4016 int status = 0; 4017 vio_dring_msg_t *dring_msg; 4018 vdc_local_desc_t *ldep = NULL; 4019 int start, end; 4020 int idx; 4021 4022 dring_msg = (vio_dring_msg_t *)msg; 4023 4024 ASSERT(msg->tag.vio_msgtype == VIO_TYPE_DATA); 4025 ASSERT(vdcp != NULL); 4026 4027 mutex_enter(&vdcp->lock); 4028 4029 /* 4030 * Check to see if the message has bogus data 4031 */ 4032 idx = start = dring_msg->start_idx; 4033 end = dring_msg->end_idx; 4034 if ((start >= vdcp->dring_len) || 4035 (end >= vdcp->dring_len) || (end < -1)) { 4036 DMSG(vdcp, 0, "[%d] Bogus ACK data : start %d, end %d\n", 4037 vdcp->instance, start, end); 4038 mutex_exit(&vdcp->lock); 4039 return (EINVAL); 4040 } 4041 4042 /* 4043 * Verify that the sequence number is what vdc expects. 4044 */ 4045 switch (vdc_verify_seq_num(vdcp, dring_msg)) { 4046 case VDC_SEQ_NUM_TODO: 4047 break; /* keep processing this message */ 4048 case VDC_SEQ_NUM_SKIP: 4049 mutex_exit(&vdcp->lock); 4050 return (0); 4051 case VDC_SEQ_NUM_INVALID: 4052 mutex_exit(&vdcp->lock); 4053 DMSG(vdcp, 0, "[%d] invalid seqno\n", vdcp->instance); 4054 return (ENXIO); 4055 } 4056 4057 if (msg->tag.vio_subtype == VIO_SUBTYPE_NACK) { 4058 DMSG(vdcp, 0, "[%d] DATA NACK\n", vdcp->instance); 4059 VDC_DUMP_DRING_MSG(dring_msg); 4060 mutex_exit(&vdcp->lock); 4061 return (EIO); 4062 4063 } else if (msg->tag.vio_subtype == VIO_SUBTYPE_INFO) { 4064 mutex_exit(&vdcp->lock); 4065 return (EPROTO); 4066 } 4067 4068 DTRACE_IO2(recv, vio_dring_msg_t, dring_msg, vdc_t *, vdcp); 4069 DMSG(vdcp, 1, ": start %d end %d\n", start, end); 4070 ASSERT(start == end); 4071 4072 ldep = &vdcp->local_dring[idx]; 4073 4074 DMSG(vdcp, 1, ": state 0x%x - cb_type 0x%x\n", 4075 ldep->dep->hdr.dstate, ldep->cb_type); 4076 4077 if (ldep->dep->hdr.dstate == VIO_DESC_DONE) { 4078 struct buf *bufp; 4079 4080 switch (ldep->cb_type) { 4081 case CB_SYNC: 4082 ASSERT(vdcp->sync_op_pending); 4083 4084 status = vdc_depopulate_descriptor(vdcp, idx); 4085 vdcp->sync_op_status = status; 4086 vdcp->sync_op_pending = B_FALSE; 4087 cv_signal(&vdcp->sync_pending_cv); 4088 break; 4089 4090 case CB_STRATEGY: 4091 bufp = ldep->cb_arg; 4092 ASSERT(bufp != NULL); 4093 bufp->b_resid = 4094 bufp->b_bcount - ldep->dep->payload.nbytes; 4095 status = ldep->dep->payload.status; /* Future:ntoh */ 4096 if (status != 0) { 4097 DMSG(vdcp, 1, "strategy status=%d\n", status); 4098 bioerror(bufp, status); 4099 } 4100 status = vdc_depopulate_descriptor(vdcp, idx); 4101 biodone(bufp); 4102 4103 DMSG(vdcp, 1, 4104 "strategy complete req=%ld bytes resp=%ld bytes\n", 4105 bufp->b_bcount, ldep->dep->payload.nbytes); 4106 break; 4107 4108 default: 4109 ASSERT(0); 4110 } 4111 } 4112 4113 /* let the arrival signal propogate */ 4114 mutex_exit(&vdcp->lock); 4115 4116 /* probe gives the count of how many entries were processed */ 4117 DTRACE_IO2(processed, int, 1, vdc_t *, vdcp); 4118 4119 return (0); 4120 } 4121 4122 /* 4123 * Function: 4124 * vdc_process_err_msg() 4125 * 4126 * NOTE: No error messages are used as part of the vDisk protocol 4127 */ 4128 static int 4129 vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg) 4130 { 4131 _NOTE(ARGUNUSED(vdc)) 4132 _NOTE(ARGUNUSED(msg)) 4133 4134 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_ERR); 4135 DMSG(vdc, 1, "[%d] Got an ERR msg", vdc->instance); 4136 4137 return (ENOTSUP); 4138 } 4139 4140 /* 4141 * Function: 4142 * vdc_handle_ver_msg() 4143 * 4144 * Description: 4145 * 4146 * Arguments: 4147 * vdc - soft state pointer for this instance of the device driver. 4148 * ver_msg - LDC message sent by vDisk server 4149 * 4150 * Return Code: 4151 * 0 - Success 4152 */ 4153 static int 4154 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) 4155 { 4156 int status = 0; 4157 4158 ASSERT(vdc != NULL); 4159 ASSERT(mutex_owned(&vdc->lock)); 4160 4161 if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { 4162 return (EPROTO); 4163 } 4164 4165 if (ver_msg->dev_class != VDEV_DISK_SERVER) { 4166 return (EINVAL); 4167 } 4168 4169 switch (ver_msg->tag.vio_subtype) { 4170 case VIO_SUBTYPE_ACK: 4171 /* 4172 * We check to see if the version returned is indeed supported 4173 * (The server may have also adjusted the minor number downwards 4174 * and if so 'ver_msg' will contain the actual version agreed) 4175 */ 4176 if (vdc_is_supported_version(ver_msg)) { 4177 vdc->ver.major = ver_msg->ver_major; 4178 vdc->ver.minor = ver_msg->ver_minor; 4179 ASSERT(vdc->ver.major > 0); 4180 } else { 4181 status = EPROTO; 4182 } 4183 break; 4184 4185 case VIO_SUBTYPE_NACK: 4186 /* 4187 * call vdc_is_supported_version() which will return the next 4188 * supported version (if any) in 'ver_msg' 4189 */ 4190 (void) vdc_is_supported_version(ver_msg); 4191 if (ver_msg->ver_major > 0) { 4192 size_t len = sizeof (*ver_msg); 4193 4194 ASSERT(vdc->ver.major > 0); 4195 4196 /* reset the necessary fields and resend */ 4197 ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; 4198 ver_msg->dev_class = VDEV_DISK; 4199 4200 status = vdc_send(vdc, (caddr_t)ver_msg, &len); 4201 DMSG(vdc, 0, "[%d] Resend VER info (LDC status = %d)\n", 4202 vdc->instance, status); 4203 if (len != sizeof (*ver_msg)) 4204 status = EBADMSG; 4205 } else { 4206 DMSG(vdc, 0, "[%d] No common version with vDisk server", 4207 vdc->instance); 4208 status = ENOTSUP; 4209 } 4210 4211 break; 4212 case VIO_SUBTYPE_INFO: 4213 /* 4214 * Handle the case where vds starts handshake 4215 * (for now only vdc is the instigator) 4216 */ 4217 status = ENOTSUP; 4218 break; 4219 4220 default: 4221 status = EINVAL; 4222 break; 4223 } 4224 4225 return (status); 4226 } 4227 4228 /* 4229 * Function: 4230 * vdc_handle_attr_msg() 4231 * 4232 * Description: 4233 * 4234 * Arguments: 4235 * vdc - soft state pointer for this instance of the device driver. 4236 * attr_msg - LDC message sent by vDisk server 4237 * 4238 * Return Code: 4239 * 0 - Success 4240 */ 4241 static int 4242 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) 4243 { 4244 int status = 0; 4245 4246 ASSERT(vdc != NULL); 4247 ASSERT(mutex_owned(&vdc->lock)); 4248 4249 if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { 4250 return (EPROTO); 4251 } 4252 4253 switch (attr_msg->tag.vio_subtype) { 4254 case VIO_SUBTYPE_ACK: 4255 /* 4256 * We now verify the attributes sent by vds. 4257 */ 4258 if (attr_msg->vdisk_size == 0) { 4259 DMSG(vdc, 0, "[%d] Invalid disk size from vds", 4260 vdc->instance); 4261 status = EINVAL; 4262 break; 4263 } 4264 4265 if (attr_msg->max_xfer_sz == 0) { 4266 DMSG(vdc, 0, "[%d] Invalid transfer size from vds", 4267 vdc->instance); 4268 status = EINVAL; 4269 break; 4270 } 4271 4272 /* 4273 * If the disk size is already set check that it hasn't changed. 4274 */ 4275 if ((vdc->vdisk_size != 0) && 4276 (vdc->vdisk_size != attr_msg->vdisk_size)) { 4277 DMSG(vdc, 0, "[%d] Different disk size from vds " 4278 "(old=0x%lx - new=0x%lx", vdc->instance, 4279 vdc->vdisk_size, attr_msg->vdisk_size) 4280 status = EINVAL; 4281 break; 4282 } 4283 4284 vdc->vdisk_size = attr_msg->vdisk_size; 4285 vdc->vdisk_type = attr_msg->vdisk_type; 4286 4287 DMSG(vdc, 0, "[%d] max_xfer_sz: sent %lx acked %lx\n", 4288 vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz); 4289 DMSG(vdc, 0, "[%d] vdisk_block_size: sent %lx acked %x\n", 4290 vdc->instance, vdc->block_size, 4291 attr_msg->vdisk_block_size); 4292 4293 /* 4294 * We don't know at compile time what the vDisk server will 4295 * think are good values but we apply an large (arbitrary) 4296 * upper bound to prevent memory exhaustion in vdc if it was 4297 * allocating a DRing based of huge values sent by the server. 4298 * We probably will never exceed this except if the message 4299 * was garbage. 4300 */ 4301 if ((attr_msg->max_xfer_sz * attr_msg->vdisk_block_size) <= 4302 (PAGESIZE * DEV_BSIZE)) { 4303 vdc->max_xfer_sz = attr_msg->max_xfer_sz; 4304 vdc->block_size = attr_msg->vdisk_block_size; 4305 } else { 4306 DMSG(vdc, 0, "[%d] vds block transfer size too big;" 4307 " using max supported by vdc", vdc->instance); 4308 } 4309 4310 if ((attr_msg->xfer_mode != VIO_DRING_MODE) || 4311 (attr_msg->vdisk_size > INT64_MAX) || 4312 (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 4313 DMSG(vdc, 0, "[%d] Invalid attributes from vds", 4314 vdc->instance); 4315 status = EINVAL; 4316 break; 4317 } 4318 4319 /* 4320 * Now that we have received all attributes we can create a 4321 * fake geometry for the disk. 4322 */ 4323 vdc_create_fake_geometry(vdc); 4324 break; 4325 4326 case VIO_SUBTYPE_NACK: 4327 /* 4328 * vds could not handle the attributes we sent so we 4329 * stop negotiating. 4330 */ 4331 status = EPROTO; 4332 break; 4333 4334 case VIO_SUBTYPE_INFO: 4335 /* 4336 * Handle the case where vds starts the handshake 4337 * (for now; vdc is the only supported instigatior) 4338 */ 4339 status = ENOTSUP; 4340 break; 4341 4342 default: 4343 status = ENOTSUP; 4344 break; 4345 } 4346 4347 return (status); 4348 } 4349 4350 /* 4351 * Function: 4352 * vdc_handle_dring_reg_msg() 4353 * 4354 * Description: 4355 * 4356 * Arguments: 4357 * vdc - soft state pointer for this instance of the driver. 4358 * dring_msg - LDC message sent by vDisk server 4359 * 4360 * Return Code: 4361 * 0 - Success 4362 */ 4363 static int 4364 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) 4365 { 4366 int status = 0; 4367 4368 ASSERT(vdc != NULL); 4369 ASSERT(mutex_owned(&vdc->lock)); 4370 4371 if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { 4372 return (EPROTO); 4373 } 4374 4375 switch (dring_msg->tag.vio_subtype) { 4376 case VIO_SUBTYPE_ACK: 4377 /* save the received dring_ident */ 4378 vdc->dring_ident = dring_msg->dring_ident; 4379 DMSG(vdc, 0, "[%d] Received dring ident=0x%lx\n", 4380 vdc->instance, vdc->dring_ident); 4381 break; 4382 4383 case VIO_SUBTYPE_NACK: 4384 /* 4385 * vds could not handle the DRing info we sent so we 4386 * stop negotiating. 4387 */ 4388 DMSG(vdc, 0, "[%d] server could not register DRing\n", 4389 vdc->instance); 4390 status = EPROTO; 4391 break; 4392 4393 case VIO_SUBTYPE_INFO: 4394 /* 4395 * Handle the case where vds starts handshake 4396 * (for now only vdc is the instigatior) 4397 */ 4398 status = ENOTSUP; 4399 break; 4400 default: 4401 status = ENOTSUP; 4402 } 4403 4404 return (status); 4405 } 4406 4407 /* 4408 * Function: 4409 * vdc_verify_seq_num() 4410 * 4411 * Description: 4412 * This functions verifies that the sequence number sent back by the vDisk 4413 * server with the latest message is what is expected (i.e. it is greater 4414 * than the last seq num sent by the vDisk server and less than or equal 4415 * to the last seq num generated by vdc). 4416 * 4417 * It then checks the request ID to see if any requests need processing 4418 * in the DRing. 4419 * 4420 * Arguments: 4421 * vdc - soft state pointer for this instance of the driver. 4422 * dring_msg - pointer to the LDC message sent by vds 4423 * 4424 * Return Code: 4425 * VDC_SEQ_NUM_TODO - Message needs to be processed 4426 * VDC_SEQ_NUM_SKIP - Message has already been processed 4427 * VDC_SEQ_NUM_INVALID - The seq numbers are so out of sync, 4428 * vdc cannot deal with them 4429 */ 4430 static int 4431 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg) 4432 { 4433 ASSERT(vdc != NULL); 4434 ASSERT(dring_msg != NULL); 4435 ASSERT(mutex_owned(&vdc->lock)); 4436 4437 /* 4438 * Check to see if the messages were responded to in the correct 4439 * order by vds. 4440 */ 4441 if ((dring_msg->seq_num <= vdc->seq_num_reply) || 4442 (dring_msg->seq_num > vdc->seq_num)) { 4443 DMSG(vdc, 0, "?[%d] Bogus sequence_number %lu: " 4444 "%lu > expected <= %lu (last proc req %lu sent %lu)\n", 4445 vdc->instance, dring_msg->seq_num, 4446 vdc->seq_num_reply, vdc->seq_num, 4447 vdc->req_id_proc, vdc->req_id); 4448 return (VDC_SEQ_NUM_INVALID); 4449 } 4450 vdc->seq_num_reply = dring_msg->seq_num; 4451 4452 if (vdc->req_id_proc < vdc->req_id) 4453 return (VDC_SEQ_NUM_TODO); 4454 else 4455 return (VDC_SEQ_NUM_SKIP); 4456 } 4457 4458 4459 /* 4460 * Function: 4461 * vdc_is_supported_version() 4462 * 4463 * Description: 4464 * This routine checks if the major/minor version numbers specified in 4465 * 'ver_msg' are supported. If not it finds the next version that is 4466 * in the supported version list 'vdc_version[]' and sets the fields in 4467 * 'ver_msg' to those values 4468 * 4469 * Arguments: 4470 * ver_msg - LDC message sent by vDisk server 4471 * 4472 * Return Code: 4473 * B_TRUE - Success 4474 * B_FALSE - Version not supported 4475 */ 4476 static boolean_t 4477 vdc_is_supported_version(vio_ver_msg_t *ver_msg) 4478 { 4479 int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); 4480 4481 for (int i = 0; i < vdc_num_versions; i++) { 4482 ASSERT(vdc_version[i].major > 0); 4483 ASSERT((i == 0) || 4484 (vdc_version[i].major < vdc_version[i-1].major)); 4485 4486 /* 4487 * If the major versions match, adjust the minor version, if 4488 * necessary, down to the highest value supported by this 4489 * client. The server should support all minor versions lower 4490 * than the value it sent 4491 */ 4492 if (ver_msg->ver_major == vdc_version[i].major) { 4493 if (ver_msg->ver_minor > vdc_version[i].minor) { 4494 DMSGX(0, 4495 "Adjusting minor version from %u to %u", 4496 ver_msg->ver_minor, vdc_version[i].minor); 4497 ver_msg->ver_minor = vdc_version[i].minor; 4498 } 4499 return (B_TRUE); 4500 } 4501 4502 /* 4503 * If the message contains a higher major version number, set 4504 * the message's major/minor versions to the current values 4505 * and return false, so this message will get resent with 4506 * these values, and the server will potentially try again 4507 * with the same or a lower version 4508 */ 4509 if (ver_msg->ver_major > vdc_version[i].major) { 4510 ver_msg->ver_major = vdc_version[i].major; 4511 ver_msg->ver_minor = vdc_version[i].minor; 4512 DMSGX(0, "Suggesting major/minor (0x%x/0x%x)\n", 4513 ver_msg->ver_major, ver_msg->ver_minor); 4514 4515 return (B_FALSE); 4516 } 4517 4518 /* 4519 * Otherwise, the message's major version is less than the 4520 * current major version, so continue the loop to the next 4521 * (lower) supported version 4522 */ 4523 } 4524 4525 /* 4526 * No common version was found; "ground" the version pair in the 4527 * message to terminate negotiation 4528 */ 4529 ver_msg->ver_major = 0; 4530 ver_msg->ver_minor = 0; 4531 4532 return (B_FALSE); 4533 } 4534 /* -------------------------------------------------------------------------- */ 4535 4536 /* 4537 * DKIO(7) support 4538 */ 4539 4540 typedef struct vdc_dk_arg { 4541 struct dk_callback dkc; 4542 int mode; 4543 dev_t dev; 4544 vdc_t *vdc; 4545 } vdc_dk_arg_t; 4546 4547 /* 4548 * Function: 4549 * vdc_dkio_flush_cb() 4550 * 4551 * Description: 4552 * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 4553 * by kernel code. 4554 * 4555 * Arguments: 4556 * arg - a pointer to a vdc_dk_arg_t structure. 4557 */ 4558 void 4559 vdc_dkio_flush_cb(void *arg) 4560 { 4561 struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 4562 struct dk_callback *dkc = NULL; 4563 vdc_t *vdc = NULL; 4564 int rv; 4565 4566 if (dk_arg == NULL) { 4567 cmn_err(CE_NOTE, "?[Unk] DKIOCFLUSHWRITECACHE arg is NULL\n"); 4568 return; 4569 } 4570 dkc = &dk_arg->dkc; 4571 vdc = dk_arg->vdc; 4572 ASSERT(vdc != NULL); 4573 4574 rv = vdc_do_sync_op(vdc, VD_OP_FLUSH, NULL, 0, 4575 VDCPART(dk_arg->dev), 0, CB_SYNC, 0, VIO_both_dir); 4576 if (rv != 0) { 4577 DMSG(vdc, 0, "[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n", 4578 vdc->instance, rv, 4579 ddi_model_convert_from(dk_arg->mode & FMODELS)); 4580 } 4581 4582 /* 4583 * Trigger the call back to notify the caller the the ioctl call has 4584 * been completed. 4585 */ 4586 if ((dk_arg->mode & FKIOCTL) && 4587 (dkc != NULL) && 4588 (dkc->dkc_callback != NULL)) { 4589 ASSERT(dkc->dkc_cookie != NULL); 4590 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 4591 } 4592 4593 /* Indicate that one less DKIO write flush is outstanding */ 4594 mutex_enter(&vdc->lock); 4595 vdc->dkio_flush_pending--; 4596 ASSERT(vdc->dkio_flush_pending >= 0); 4597 mutex_exit(&vdc->lock); 4598 4599 /* free the mem that was allocated when the callback was dispatched */ 4600 kmem_free(arg, sizeof (vdc_dk_arg_t)); 4601 } 4602 4603 /* 4604 * Function: 4605 * vdc_dkio_get_partition() 4606 * 4607 * Description: 4608 * This function implements the DKIOCGAPART ioctl. 4609 * 4610 * Arguments: 4611 * vdc - soft state pointer 4612 * arg - a pointer to a dk_map[NDKMAP] or dk_map32[NDKMAP] structure 4613 * flag - ioctl flags 4614 */ 4615 static int 4616 vdc_dkio_get_partition(vdc_t *vdc, caddr_t arg, int flag) 4617 { 4618 struct dk_geom *geom; 4619 struct vtoc *vtoc; 4620 union { 4621 struct dk_map map[NDKMAP]; 4622 struct dk_map32 map32[NDKMAP]; 4623 } data; 4624 int i, rv, size; 4625 4626 mutex_enter(&vdc->lock); 4627 4628 if ((rv = vdc_validate_geometry(vdc)) != 0) { 4629 mutex_exit(&vdc->lock); 4630 return (rv); 4631 } 4632 4633 vtoc = vdc->vtoc; 4634 geom = vdc->geom; 4635 4636 if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 4637 4638 for (i = 0; i < vtoc->v_nparts; i++) { 4639 data.map32[i].dkl_cylno = vtoc->v_part[i].p_start / 4640 (geom->dkg_nhead * geom->dkg_nsect); 4641 data.map32[i].dkl_nblk = vtoc->v_part[i].p_size; 4642 } 4643 size = NDKMAP * sizeof (struct dk_map32); 4644 4645 } else { 4646 4647 for (i = 0; i < vtoc->v_nparts; i++) { 4648 data.map[i].dkl_cylno = vtoc->v_part[i].p_start / 4649 (geom->dkg_nhead * geom->dkg_nsect); 4650 data.map[i].dkl_nblk = vtoc->v_part[i].p_size; 4651 } 4652 size = NDKMAP * sizeof (struct dk_map); 4653 4654 } 4655 4656 mutex_exit(&vdc->lock); 4657 4658 if (ddi_copyout(&data, arg, size, flag) != 0) 4659 return (EFAULT); 4660 4661 return (0); 4662 } 4663 4664 /* 4665 * Function: 4666 * vdc_dioctl_rwcmd() 4667 * 4668 * Description: 4669 * This function implements the DIOCTL_RWCMD ioctl. This ioctl is used 4670 * for DKC_DIRECT disks to read or write at an absolute disk offset. 4671 * 4672 * Arguments: 4673 * dev - device 4674 * arg - a pointer to a dadkio_rwcmd or dadkio_rwcmd32 structure 4675 * flag - ioctl flags 4676 */ 4677 static int 4678 vdc_dioctl_rwcmd(dev_t dev, caddr_t arg, int flag) 4679 { 4680 struct dadkio_rwcmd32 rwcmd32; 4681 struct dadkio_rwcmd rwcmd; 4682 struct iovec aiov; 4683 struct uio auio; 4684 int rw, status; 4685 struct buf *buf; 4686 4687 if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 4688 if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd32, 4689 sizeof (struct dadkio_rwcmd32), flag)) { 4690 return (EFAULT); 4691 } 4692 rwcmd.cmd = rwcmd32.cmd; 4693 rwcmd.flags = rwcmd32.flags; 4694 rwcmd.blkaddr = (daddr_t)rwcmd32.blkaddr; 4695 rwcmd.buflen = rwcmd32.buflen; 4696 rwcmd.bufaddr = (caddr_t)(uintptr_t)rwcmd32.bufaddr; 4697 } else { 4698 if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd, 4699 sizeof (struct dadkio_rwcmd), flag)) { 4700 return (EFAULT); 4701 } 4702 } 4703 4704 switch (rwcmd.cmd) { 4705 case DADKIO_RWCMD_READ: 4706 rw = B_READ; 4707 break; 4708 case DADKIO_RWCMD_WRITE: 4709 rw = B_WRITE; 4710 break; 4711 default: 4712 return (EINVAL); 4713 } 4714 4715 bzero((caddr_t)&aiov, sizeof (struct iovec)); 4716 aiov.iov_base = rwcmd.bufaddr; 4717 aiov.iov_len = rwcmd.buflen; 4718 4719 bzero((caddr_t)&auio, sizeof (struct uio)); 4720 auio.uio_iov = &aiov; 4721 auio.uio_iovcnt = 1; 4722 auio.uio_loffset = rwcmd.blkaddr * DEV_BSIZE; 4723 auio.uio_resid = rwcmd.buflen; 4724 auio.uio_segflg = flag & FKIOCTL ? UIO_SYSSPACE : UIO_USERSPACE; 4725 4726 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 4727 bioinit(buf); 4728 /* 4729 * We use the private field of buf to specify that this is an 4730 * I/O using an absolute offset. 4731 */ 4732 buf->b_private = (void *)VD_SLICE_NONE; 4733 4734 status = physio(vdc_strategy, buf, dev, rw, vdc_min, &auio); 4735 4736 biofini(buf); 4737 kmem_free(buf, sizeof (buf_t)); 4738 4739 return (status); 4740 } 4741 4742 /* 4743 * This structure is used in the DKIO(7I) array below. 4744 */ 4745 typedef struct vdc_dk_ioctl { 4746 uint8_t op; /* VD_OP_XXX value */ 4747 int cmd; /* Solaris ioctl operation number */ 4748 size_t nbytes; /* size of structure to be copied */ 4749 4750 /* function to convert between vDisk and Solaris structure formats */ 4751 int (*convert)(vdc_t *vdc, void *vd_buf, void *ioctl_arg, 4752 int mode, int dir); 4753 } vdc_dk_ioctl_t; 4754 4755 /* 4756 * Subset of DKIO(7I) operations currently supported 4757 */ 4758 static vdc_dk_ioctl_t dk_ioctl[] = { 4759 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, 0, 4760 vdc_null_copy_func}, 4761 {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), 4762 vdc_get_wce_convert}, 4763 {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), 4764 vdc_set_wce_convert}, 4765 {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), 4766 vdc_get_vtoc_convert}, 4767 {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), 4768 vdc_set_vtoc_convert}, 4769 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), 4770 vdc_get_geom_convert}, 4771 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), 4772 vdc_get_geom_convert}, 4773 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), 4774 vdc_get_geom_convert}, 4775 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 4776 vdc_set_geom_convert}, 4777 {VD_OP_GET_EFI, DKIOCGETEFI, 0, 4778 vdc_get_efi_convert}, 4779 {VD_OP_SET_EFI, DKIOCSETEFI, 0, 4780 vdc_set_efi_convert}, 4781 4782 /* DIOCTL_RWCMD is converted to a read or a write */ 4783 {0, DIOCTL_RWCMD, sizeof (struct dadkio_rwcmd), NULL}, 4784 4785 /* 4786 * These particular ioctls are not sent to the server - vdc fakes up 4787 * the necessary info. 4788 */ 4789 {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, 4790 {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, 4791 {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, 4792 {0, DKIOCGAPART, 0, vdc_null_copy_func }, 4793 {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, 4794 {0, CDROMREADOFFSET, 0, vdc_null_copy_func} 4795 }; 4796 4797 /* 4798 * Function: 4799 * vd_process_ioctl() 4800 * 4801 * Description: 4802 * This routine processes disk specific ioctl calls 4803 * 4804 * Arguments: 4805 * dev - the device number 4806 * cmd - the operation [dkio(7I)] to be processed 4807 * arg - pointer to user provided structure 4808 * (contains data to be set or reference parameter for get) 4809 * mode - bit flag, indicating open settings, 32/64 bit type, etc 4810 * 4811 * Return Code: 4812 * 0 4813 * EFAULT 4814 * ENXIO 4815 * EIO 4816 * ENOTSUP 4817 */ 4818 static int 4819 vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode) 4820 { 4821 int instance = VDCUNIT(dev); 4822 vdc_t *vdc = NULL; 4823 int rv = -1; 4824 int idx = 0; /* index into dk_ioctl[] */ 4825 size_t len = 0; /* #bytes to send to vds */ 4826 size_t alloc_len = 0; /* #bytes to allocate mem for */ 4827 caddr_t mem_p = NULL; 4828 size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 4829 vdc_dk_ioctl_t *iop; 4830 4831 vdc = ddi_get_soft_state(vdc_state, instance); 4832 if (vdc == NULL) { 4833 cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 4834 instance); 4835 return (ENXIO); 4836 } 4837 4838 DMSG(vdc, 0, "[%d] Processing ioctl(%x) for dev %lx : model %x\n", 4839 instance, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 4840 4841 /* 4842 * Validate the ioctl operation to be performed. 4843 * 4844 * If we have looped through the array without finding a match then we 4845 * don't support this ioctl. 4846 */ 4847 for (idx = 0; idx < nioctls; idx++) { 4848 if (cmd == dk_ioctl[idx].cmd) 4849 break; 4850 } 4851 4852 if (idx >= nioctls) { 4853 DMSG(vdc, 0, "[%d] Unsupported ioctl (0x%x)\n", 4854 vdc->instance, cmd); 4855 return (ENOTSUP); 4856 } 4857 4858 iop = &(dk_ioctl[idx]); 4859 4860 if (cmd == DKIOCGETEFI || cmd == DKIOCSETEFI) { 4861 /* size is not fixed for EFI ioctls, it depends on ioctl arg */ 4862 dk_efi_t dk_efi; 4863 4864 rv = ddi_copyin(arg, &dk_efi, sizeof (dk_efi_t), mode); 4865 if (rv != 0) 4866 return (EFAULT); 4867 4868 len = sizeof (vd_efi_t) - 1 + dk_efi.dki_length; 4869 } else { 4870 len = iop->nbytes; 4871 } 4872 4873 /* 4874 * Deal with the ioctls which the server does not provide. vdc can 4875 * fake these up and return immediately 4876 */ 4877 switch (cmd) { 4878 case CDROMREADOFFSET: 4879 case DKIOCREMOVABLE: 4880 case USCSICMD: 4881 return (ENOTTY); 4882 4883 case DIOCTL_RWCMD: 4884 { 4885 if (vdc->cinfo == NULL) 4886 return (ENXIO); 4887 4888 if (vdc->cinfo->dki_ctype != DKC_DIRECT) 4889 return (ENOTTY); 4890 4891 return (vdc_dioctl_rwcmd(dev, arg, mode)); 4892 } 4893 4894 case DKIOCGAPART: 4895 { 4896 return (vdc_dkio_get_partition(vdc, arg, mode)); 4897 } 4898 4899 case DKIOCINFO: 4900 { 4901 struct dk_cinfo cinfo; 4902 if (vdc->cinfo == NULL) 4903 return (ENXIO); 4904 4905 bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 4906 cinfo.dki_partition = VDCPART(dev); 4907 4908 rv = ddi_copyout(&cinfo, (void *)arg, 4909 sizeof (struct dk_cinfo), mode); 4910 if (rv != 0) 4911 return (EFAULT); 4912 4913 return (0); 4914 } 4915 4916 case DKIOCGMEDIAINFO: 4917 { 4918 if (vdc->minfo == NULL) 4919 return (ENXIO); 4920 4921 rv = ddi_copyout(vdc->minfo, (void *)arg, 4922 sizeof (struct dk_minfo), mode); 4923 if (rv != 0) 4924 return (EFAULT); 4925 4926 return (0); 4927 } 4928 4929 case DKIOCFLUSHWRITECACHE: 4930 { 4931 struct dk_callback *dkc = (struct dk_callback *)arg; 4932 vdc_dk_arg_t *dkarg = NULL; 4933 4934 DMSG(vdc, 1, "[%d] Flush W$: mode %x\n", 4935 instance, mode); 4936 4937 /* 4938 * If arg is NULL, then there is no callback function 4939 * registered and the call operates synchronously; we 4940 * break and continue with the rest of the function and 4941 * wait for vds to return (i.e. after the request to 4942 * vds returns successfully, all writes completed prior 4943 * to the ioctl will have been flushed from the disk 4944 * write cache to persistent media. 4945 * 4946 * If a callback function is registered, we dispatch 4947 * the request on a task queue and return immediately. 4948 * The callback will deal with informing the calling 4949 * thread that the flush request is completed. 4950 */ 4951 if (dkc == NULL) 4952 break; 4953 4954 /* 4955 * the asynchronous callback is only supported if 4956 * invoked from within the kernel 4957 */ 4958 if ((mode & FKIOCTL) == 0) 4959 return (ENOTSUP); 4960 4961 dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP); 4962 4963 dkarg->mode = mode; 4964 dkarg->dev = dev; 4965 bcopy(dkc, &dkarg->dkc, sizeof (*dkc)); 4966 4967 mutex_enter(&vdc->lock); 4968 vdc->dkio_flush_pending++; 4969 dkarg->vdc = vdc; 4970 mutex_exit(&vdc->lock); 4971 4972 /* put the request on a task queue */ 4973 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 4974 (void *)dkarg, DDI_SLEEP); 4975 if (rv == NULL) { 4976 /* clean up if dispatch fails */ 4977 mutex_enter(&vdc->lock); 4978 vdc->dkio_flush_pending--; 4979 mutex_exit(&vdc->lock); 4980 kmem_free(dkarg, sizeof (vdc_dk_arg_t)); 4981 } 4982 4983 return (rv == NULL ? ENOMEM : 0); 4984 } 4985 } 4986 4987 /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 4988 ASSERT(iop->op != 0); 4989 4990 /* LDC requires that the memory being mapped is 8-byte aligned */ 4991 alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 4992 DMSG(vdc, 1, "[%d] struct size %ld alloc %ld\n", 4993 instance, len, alloc_len); 4994 4995 ASSERT(alloc_len >= 0); /* sanity check */ 4996 if (alloc_len > 0) 4997 mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 4998 4999 /* 5000 * Call the conversion function for this ioctl which, if necessary, 5001 * converts from the Solaris format to the format ARC'ed 5002 * as part of the vDisk protocol (FWARC 2006/195) 5003 */ 5004 ASSERT(iop->convert != NULL); 5005 rv = (iop->convert)(vdc, arg, mem_p, mode, VD_COPYIN); 5006 if (rv != 0) { 5007 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 5008 instance, rv, cmd); 5009 if (mem_p != NULL) 5010 kmem_free(mem_p, alloc_len); 5011 return (rv); 5012 } 5013 5014 /* 5015 * send request to vds to service the ioctl. 5016 */ 5017 rv = vdc_do_sync_op(vdc, iop->op, mem_p, alloc_len, 5018 VDCPART(dev), 0, CB_SYNC, (void *)(uint64_t)mode, 5019 VIO_both_dir); 5020 5021 if (cmd == DKIOCSVTOC || cmd == DKIOCSETEFI) { 5022 /* 5023 * The disk label may have changed. Revalidate the disk 5024 * geometry. This will also update the device nodes and 5025 * properties. 5026 */ 5027 vdc_validate(vdc); 5028 } 5029 5030 if (rv != 0) { 5031 /* 5032 * This is not necessarily an error. The ioctl could 5033 * be returning a value such as ENOTTY to indicate 5034 * that the ioctl is not applicable. 5035 */ 5036 DMSG(vdc, 0, "[%d] vds returned %d for ioctl 0x%x\n", 5037 instance, rv, cmd); 5038 if (mem_p != NULL) 5039 kmem_free(mem_p, alloc_len); 5040 5041 return (rv); 5042 } 5043 5044 /* 5045 * Call the conversion function (if it exists) for this ioctl 5046 * which converts from the format ARC'ed as part of the vDisk 5047 * protocol (FWARC 2006/195) back to a format understood by 5048 * the rest of Solaris. 5049 */ 5050 rv = (iop->convert)(vdc, mem_p, arg, mode, VD_COPYOUT); 5051 if (rv != 0) { 5052 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 5053 instance, rv, cmd); 5054 if (mem_p != NULL) 5055 kmem_free(mem_p, alloc_len); 5056 return (rv); 5057 } 5058 5059 if (mem_p != NULL) 5060 kmem_free(mem_p, alloc_len); 5061 5062 return (rv); 5063 } 5064 5065 /* 5066 * Function: 5067 * 5068 * Description: 5069 * This is an empty conversion function used by ioctl calls which 5070 * do not need to convert the data being passed in/out to userland 5071 */ 5072 static int 5073 vdc_null_copy_func(vdc_t *vdc, void *from, void *to, int mode, int dir) 5074 { 5075 _NOTE(ARGUNUSED(vdc)) 5076 _NOTE(ARGUNUSED(from)) 5077 _NOTE(ARGUNUSED(to)) 5078 _NOTE(ARGUNUSED(mode)) 5079 _NOTE(ARGUNUSED(dir)) 5080 5081 return (0); 5082 } 5083 5084 static int 5085 vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 5086 int mode, int dir) 5087 { 5088 _NOTE(ARGUNUSED(vdc)) 5089 5090 if (dir == VD_COPYIN) 5091 return (0); /* nothing to do */ 5092 5093 if (ddi_copyout(from, to, sizeof (int), mode) != 0) 5094 return (EFAULT); 5095 5096 return (0); 5097 } 5098 5099 static int 5100 vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 5101 int mode, int dir) 5102 { 5103 _NOTE(ARGUNUSED(vdc)) 5104 5105 if (dir == VD_COPYOUT) 5106 return (0); /* nothing to do */ 5107 5108 if (ddi_copyin(from, to, sizeof (int), mode) != 0) 5109 return (EFAULT); 5110 5111 return (0); 5112 } 5113 5114 /* 5115 * Function: 5116 * vdc_get_vtoc_convert() 5117 * 5118 * Description: 5119 * This routine performs the necessary convertions from the DKIOCGVTOC 5120 * Solaris structure to the format defined in FWARC 2006/195. 5121 * 5122 * In the struct vtoc definition, the timestamp field is marked as not 5123 * supported so it is not part of vDisk protocol (FWARC 2006/195). 5124 * However SVM uses that field to check it can write into the VTOC, 5125 * so we fake up the info of that field. 5126 * 5127 * Arguments: 5128 * vdc - the vDisk client 5129 * from - the buffer containing the data to be copied from 5130 * to - the buffer to be copied to 5131 * mode - flags passed to ioctl() call 5132 * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT 5133 * 5134 * Return Code: 5135 * 0 - Success 5136 * ENXIO - incorrect buffer passed in. 5137 * EFAULT - ddi_copyout routine encountered an error. 5138 */ 5139 static int 5140 vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5141 { 5142 int i; 5143 void *tmp_mem = NULL; 5144 void *tmp_memp; 5145 struct vtoc vt; 5146 struct vtoc32 vt32; 5147 int copy_len = 0; 5148 int rv = 0; 5149 5150 if (dir != VD_COPYOUT) 5151 return (0); /* nothing to do */ 5152 5153 if ((from == NULL) || (to == NULL)) 5154 return (ENXIO); 5155 5156 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 5157 copy_len = sizeof (struct vtoc32); 5158 else 5159 copy_len = sizeof (struct vtoc); 5160 5161 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 5162 5163 VD_VTOC2VTOC((vd_vtoc_t *)from, &vt); 5164 5165 /* fake the VTOC timestamp field */ 5166 for (i = 0; i < V_NUMPAR; i++) { 5167 vt.timestamp[i] = vdc->vtoc->timestamp[i]; 5168 } 5169 5170 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 5171 vtoctovtoc32(vt, vt32); 5172 tmp_memp = &vt32; 5173 } else { 5174 tmp_memp = &vt; 5175 } 5176 rv = ddi_copyout(tmp_memp, to, copy_len, mode); 5177 if (rv != 0) 5178 rv = EFAULT; 5179 5180 kmem_free(tmp_mem, copy_len); 5181 return (rv); 5182 } 5183 5184 /* 5185 * Function: 5186 * vdc_set_vtoc_convert() 5187 * 5188 * Description: 5189 * This routine performs the necessary convertions from the DKIOCSVTOC 5190 * Solaris structure to the format defined in FWARC 2006/195. 5191 * 5192 * Arguments: 5193 * vdc - the vDisk client 5194 * from - Buffer with data 5195 * to - Buffer where data is to be copied to 5196 * mode - flags passed to ioctl 5197 * dir - direction of copy (in or out) 5198 * 5199 * Return Code: 5200 * 0 - Success 5201 * ENXIO - Invalid buffer passed in 5202 * EFAULT - ddi_copyin of data failed 5203 */ 5204 static int 5205 vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5206 { 5207 _NOTE(ARGUNUSED(vdc)) 5208 5209 void *tmp_mem = NULL; 5210 struct vtoc vt; 5211 struct vtoc *vtp = &vt; 5212 vd_vtoc_t vtvd; 5213 int copy_len = 0; 5214 int rv = 0; 5215 5216 if (dir != VD_COPYIN) 5217 return (0); /* nothing to do */ 5218 5219 if ((from == NULL) || (to == NULL)) 5220 return (ENXIO); 5221 5222 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 5223 copy_len = sizeof (struct vtoc32); 5224 else 5225 copy_len = sizeof (struct vtoc); 5226 5227 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 5228 5229 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 5230 if (rv != 0) { 5231 kmem_free(tmp_mem, copy_len); 5232 return (EFAULT); 5233 } 5234 5235 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 5236 vtoc32tovtoc((*(struct vtoc32 *)tmp_mem), vt); 5237 } else { 5238 vtp = tmp_mem; 5239 } 5240 5241 VTOC2VD_VTOC(vtp, &vtvd); 5242 bcopy(&vtvd, to, sizeof (vd_vtoc_t)); 5243 kmem_free(tmp_mem, copy_len); 5244 5245 return (0); 5246 } 5247 5248 /* 5249 * Function: 5250 * vdc_get_geom_convert() 5251 * 5252 * Description: 5253 * This routine performs the necessary convertions from the DKIOCGGEOM, 5254 * DKIOCG_PHYSGEOM and DKIOG_VIRTGEOM Solaris structures to the format 5255 * defined in FWARC 2006/195 5256 * 5257 * Arguments: 5258 * vdc - the vDisk client 5259 * from - Buffer with data 5260 * to - Buffer where data is to be copied to 5261 * mode - flags passed to ioctl 5262 * dir - direction of copy (in or out) 5263 * 5264 * Return Code: 5265 * 0 - Success 5266 * ENXIO - Invalid buffer passed in 5267 * EFAULT - ddi_copyout of data failed 5268 */ 5269 static int 5270 vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5271 { 5272 _NOTE(ARGUNUSED(vdc)) 5273 5274 struct dk_geom geom; 5275 int copy_len = sizeof (struct dk_geom); 5276 int rv = 0; 5277 5278 if (dir != VD_COPYOUT) 5279 return (0); /* nothing to do */ 5280 5281 if ((from == NULL) || (to == NULL)) 5282 return (ENXIO); 5283 5284 VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); 5285 rv = ddi_copyout(&geom, to, copy_len, mode); 5286 if (rv != 0) 5287 rv = EFAULT; 5288 5289 return (rv); 5290 } 5291 5292 /* 5293 * Function: 5294 * vdc_set_geom_convert() 5295 * 5296 * Description: 5297 * This routine performs the necessary convertions from the DKIOCSGEOM 5298 * Solaris structure to the format defined in FWARC 2006/195. 5299 * 5300 * Arguments: 5301 * vdc - the vDisk client 5302 * from - Buffer with data 5303 * to - Buffer where data is to be copied to 5304 * mode - flags passed to ioctl 5305 * dir - direction of copy (in or out) 5306 * 5307 * Return Code: 5308 * 0 - Success 5309 * ENXIO - Invalid buffer passed in 5310 * EFAULT - ddi_copyin of data failed 5311 */ 5312 static int 5313 vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5314 { 5315 _NOTE(ARGUNUSED(vdc)) 5316 5317 vd_geom_t vdgeom; 5318 void *tmp_mem = NULL; 5319 int copy_len = sizeof (struct dk_geom); 5320 int rv = 0; 5321 5322 if (dir != VD_COPYIN) 5323 return (0); /* nothing to do */ 5324 5325 if ((from == NULL) || (to == NULL)) 5326 return (ENXIO); 5327 5328 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 5329 5330 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 5331 if (rv != 0) { 5332 kmem_free(tmp_mem, copy_len); 5333 return (EFAULT); 5334 } 5335 DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); 5336 bcopy(&vdgeom, to, sizeof (vdgeom)); 5337 kmem_free(tmp_mem, copy_len); 5338 5339 return (0); 5340 } 5341 5342 static int 5343 vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5344 { 5345 _NOTE(ARGUNUSED(vdc)) 5346 5347 vd_efi_t *vd_efi; 5348 dk_efi_t dk_efi; 5349 int rv = 0; 5350 void *uaddr; 5351 5352 if ((from == NULL) || (to == NULL)) 5353 return (ENXIO); 5354 5355 if (dir == VD_COPYIN) { 5356 5357 vd_efi = (vd_efi_t *)to; 5358 5359 rv = ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode); 5360 if (rv != 0) 5361 return (EFAULT); 5362 5363 vd_efi->lba = dk_efi.dki_lba; 5364 vd_efi->length = dk_efi.dki_length; 5365 bzero(vd_efi->data, vd_efi->length); 5366 5367 } else { 5368 5369 rv = ddi_copyin(to, &dk_efi, sizeof (dk_efi_t), mode); 5370 if (rv != 0) 5371 return (EFAULT); 5372 5373 uaddr = dk_efi.dki_data; 5374 5375 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 5376 5377 VD_EFI2DK_EFI((vd_efi_t *)from, &dk_efi); 5378 5379 rv = ddi_copyout(dk_efi.dki_data, uaddr, dk_efi.dki_length, 5380 mode); 5381 if (rv != 0) 5382 return (EFAULT); 5383 5384 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 5385 } 5386 5387 return (0); 5388 } 5389 5390 static int 5391 vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5392 { 5393 _NOTE(ARGUNUSED(vdc)) 5394 5395 dk_efi_t dk_efi; 5396 void *uaddr; 5397 5398 if (dir == VD_COPYOUT) 5399 return (0); /* nothing to do */ 5400 5401 if ((from == NULL) || (to == NULL)) 5402 return (ENXIO); 5403 5404 if (ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode) != 0) 5405 return (EFAULT); 5406 5407 uaddr = dk_efi.dki_data; 5408 5409 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 5410 5411 if (ddi_copyin(uaddr, dk_efi.dki_data, dk_efi.dki_length, mode) != 0) 5412 return (EFAULT); 5413 5414 DK_EFI2VD_EFI(&dk_efi, (vd_efi_t *)to); 5415 5416 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 5417 5418 return (0); 5419 } 5420 5421 /* 5422 * Function: 5423 * vdc_create_fake_geometry() 5424 * 5425 * Description: 5426 * This routine fakes up the disk info needed for some DKIO ioctls. 5427 * - DKIOCINFO 5428 * - DKIOCGMEDIAINFO 5429 * 5430 * [ just like lofi(7D) and ramdisk(7D) ] 5431 * 5432 * Arguments: 5433 * vdc - soft state pointer for this instance of the device driver. 5434 * 5435 * Return Code: 5436 * none. 5437 */ 5438 static void 5439 vdc_create_fake_geometry(vdc_t *vdc) 5440 { 5441 ASSERT(vdc != NULL); 5442 ASSERT(vdc->vdisk_size != 0); 5443 ASSERT(vdc->max_xfer_sz != 0); 5444 5445 /* 5446 * DKIOCINFO support 5447 */ 5448 if (vdc->cinfo == NULL) 5449 vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 5450 5451 (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 5452 (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 5453 /* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */ 5454 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; 5455 /* 5456 * We currently set the controller type to DKC_DIRECT for any disk. 5457 * When SCSI support is implemented, we will eventually change this 5458 * type to DKC_SCSI_CCS for disks supporting the SCSI protocol. 5459 */ 5460 vdc->cinfo->dki_ctype = DKC_DIRECT; 5461 vdc->cinfo->dki_flags = DKI_FMTVOL; 5462 vdc->cinfo->dki_cnum = 0; 5463 vdc->cinfo->dki_addr = 0; 5464 vdc->cinfo->dki_space = 0; 5465 vdc->cinfo->dki_prio = 0; 5466 vdc->cinfo->dki_vec = 0; 5467 vdc->cinfo->dki_unit = vdc->instance; 5468 vdc->cinfo->dki_slave = 0; 5469 /* 5470 * The partition number will be created on the fly depending on the 5471 * actual slice (i.e. minor node) that is used to request the data. 5472 */ 5473 vdc->cinfo->dki_partition = 0; 5474 5475 /* 5476 * DKIOCGMEDIAINFO support 5477 */ 5478 if (vdc->minfo == NULL) 5479 vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 5480 vdc->minfo->dki_media_type = DK_FIXED_DISK; 5481 vdc->minfo->dki_capacity = vdc->vdisk_size; 5482 vdc->minfo->dki_lbsize = DEV_BSIZE; 5483 } 5484 5485 static ushort_t 5486 vdc_lbl2cksum(struct dk_label *label) 5487 { 5488 int count; 5489 ushort_t sum, *sp; 5490 5491 count = (sizeof (struct dk_label)) / (sizeof (short)) - 1; 5492 sp = (ushort_t *)label; 5493 sum = 0; 5494 while (count--) { 5495 sum ^= *sp++; 5496 } 5497 5498 return (sum); 5499 } 5500 5501 /* 5502 * Function: 5503 * vdc_validate_geometry 5504 * 5505 * Description: 5506 * This routine discovers the label and geometry of the disk. It stores 5507 * the disk label and related information in the vdc structure. If it 5508 * fails to validate the geometry or to discover the disk label then 5509 * the label is marked as unknown (VD_DISK_LABEL_UNK). 5510 * 5511 * Arguments: 5512 * vdc - soft state pointer for this instance of the device driver. 5513 * 5514 * Return Code: 5515 * 0 - success. 5516 * EINVAL - unknown disk label. 5517 * ENOTSUP - geometry not applicable (EFI label). 5518 * EIO - error accessing the disk. 5519 */ 5520 static int 5521 vdc_validate_geometry(vdc_t *vdc) 5522 { 5523 buf_t *buf; /* BREAD requests need to be in a buf_t structure */ 5524 dev_t dev; 5525 int rv; 5526 struct dk_label label; 5527 struct dk_geom geom; 5528 struct vtoc vtoc; 5529 5530 ASSERT(vdc != NULL); 5531 ASSERT(vdc->vtoc != NULL && vdc->geom != NULL); 5532 ASSERT(MUTEX_HELD(&vdc->lock)); 5533 5534 mutex_exit(&vdc->lock); 5535 5536 dev = makedevice(ddi_driver_major(vdc->dip), 5537 VD_MAKE_DEV(vdc->instance, 0)); 5538 5539 rv = vd_process_ioctl(dev, DKIOCGGEOM, (caddr_t)&geom, FKIOCTL); 5540 if (rv == 0) 5541 rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)&vtoc, FKIOCTL); 5542 5543 if (rv == ENOTSUP) { 5544 /* 5545 * If the device does not support VTOC then we try 5546 * to read an EFI label. 5547 */ 5548 struct dk_gpt *efi; 5549 size_t efi_len; 5550 5551 rv = vdc_efi_alloc_and_read(dev, &efi, &efi_len); 5552 5553 if (rv) { 5554 DMSG(vdc, 0, "[%d] Failed to get EFI (err=%d)", 5555 vdc->instance, rv); 5556 mutex_enter(&vdc->lock); 5557 vdc_store_label_unk(vdc); 5558 return (EIO); 5559 } 5560 5561 mutex_enter(&vdc->lock); 5562 vdc_store_label_efi(vdc, efi); 5563 vd_efi_free(efi, efi_len); 5564 return (ENOTSUP); 5565 } 5566 5567 if (rv != 0) { 5568 DMSG(vdc, 0, "[%d] Failed to get VTOC (err=%d)", 5569 vdc->instance, rv); 5570 mutex_enter(&vdc->lock); 5571 vdc_store_label_unk(vdc); 5572 if (rv != EINVAL) 5573 rv = EIO; 5574 return (rv); 5575 } 5576 5577 /* check that geometry and vtoc are valid */ 5578 if (geom.dkg_nhead == 0 || geom.dkg_nsect == 0 || 5579 vtoc.v_sanity != VTOC_SANE) { 5580 mutex_enter(&vdc->lock); 5581 vdc_store_label_unk(vdc); 5582 return (EINVAL); 5583 } 5584 5585 /* 5586 * We have a disk and a valid VTOC. However this does not mean 5587 * that the disk currently have a VTOC label. The returned VTOC may 5588 * be a default VTOC to be used for configuring the disk (this is 5589 * what is done for disk image). So we read the label from the 5590 * beginning of the disk to ensure we really have a VTOC label. 5591 * 5592 * FUTURE: This could be the default way for reading the VTOC 5593 * from the disk as opposed to sending the VD_OP_GET_VTOC 5594 * to the server. This will be the default if vdc is implemented 5595 * ontop of cmlb. 5596 */ 5597 5598 /* 5599 * Single slice disk does not support read using an absolute disk 5600 * offset so we just rely on the DKIOCGVTOC ioctl in that case. 5601 */ 5602 if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 5603 mutex_enter(&vdc->lock); 5604 if (vtoc.v_nparts != 1) { 5605 vdc_store_label_unk(vdc); 5606 return (EINVAL); 5607 } 5608 vdc_store_label_vtoc(vdc, &geom, &vtoc); 5609 return (0); 5610 } 5611 5612 if (vtoc.v_nparts != V_NUMPAR) { 5613 mutex_enter(&vdc->lock); 5614 vdc_store_label_unk(vdc); 5615 return (EINVAL); 5616 } 5617 5618 /* 5619 * Read disk label from start of disk 5620 */ 5621 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 5622 bioinit(buf); 5623 buf->b_un.b_addr = (caddr_t)&label; 5624 buf->b_bcount = DK_LABEL_SIZE; 5625 buf->b_flags = B_BUSY | B_READ; 5626 buf->b_dev = dev; 5627 rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)&label, 5628 DK_LABEL_SIZE, VD_SLICE_NONE, 0, CB_STRATEGY, buf, VIO_read_dir); 5629 if (rv) { 5630 DMSG(vdc, 1, "[%d] Failed to read disk block 0\n", 5631 vdc->instance); 5632 } else { 5633 rv = biowait(buf); 5634 biofini(buf); 5635 } 5636 kmem_free(buf, sizeof (buf_t)); 5637 5638 if (rv != 0 || label.dkl_magic != DKL_MAGIC || 5639 label.dkl_cksum != vdc_lbl2cksum(&label)) { 5640 DMSG(vdc, 1, "[%d] Got VTOC with invalid label\n", 5641 vdc->instance); 5642 mutex_enter(&vdc->lock); 5643 vdc_store_label_unk(vdc); 5644 return (EINVAL); 5645 } 5646 5647 mutex_enter(&vdc->lock); 5648 vdc_store_label_vtoc(vdc, &geom, &vtoc); 5649 return (0); 5650 } 5651 5652 /* 5653 * Function: 5654 * vdc_validate 5655 * 5656 * Description: 5657 * This routine discovers the label of the disk and create the 5658 * appropriate device nodes if the label has changed. 5659 * 5660 * Arguments: 5661 * vdc - soft state pointer for this instance of the device driver. 5662 * 5663 * Return Code: 5664 * none. 5665 */ 5666 static void 5667 vdc_validate(vdc_t *vdc) 5668 { 5669 vd_disk_label_t old_label; 5670 struct vtoc old_vtoc; 5671 int rv; 5672 5673 ASSERT(!MUTEX_HELD(&vdc->lock)); 5674 5675 mutex_enter(&vdc->lock); 5676 5677 /* save the current label and vtoc */ 5678 old_label = vdc->vdisk_label; 5679 bcopy(vdc->vtoc, &old_vtoc, sizeof (struct vtoc)); 5680 5681 /* check the geometry */ 5682 (void) vdc_validate_geometry(vdc); 5683 5684 /* if the disk label has changed, update device nodes */ 5685 if (vdc->vdisk_label != old_label) { 5686 5687 if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 5688 rv = vdc_create_device_nodes_efi(vdc); 5689 else 5690 rv = vdc_create_device_nodes_vtoc(vdc); 5691 5692 if (rv != 0) { 5693 DMSG(vdc, 0, "![%d] Failed to update device nodes", 5694 vdc->instance); 5695 } 5696 } 5697 5698 /* if the vtoc has changed, update device nodes properties */ 5699 if (bcmp(vdc->vtoc, &old_vtoc, sizeof (struct vtoc)) != 0) { 5700 5701 if (vdc_create_device_nodes_props(vdc) != 0) { 5702 DMSG(vdc, 0, "![%d] Failed to update device nodes" 5703 " properties", vdc->instance); 5704 } 5705 } 5706 5707 mutex_exit(&vdc->lock); 5708 } 5709 5710 static void 5711 vdc_validate_task(void *arg) 5712 { 5713 vdc_t *vdc = (vdc_t *)arg; 5714 5715 vdc_validate(vdc); 5716 5717 mutex_enter(&vdc->lock); 5718 ASSERT(vdc->validate_pending > 0); 5719 vdc->validate_pending--; 5720 mutex_exit(&vdc->lock); 5721 } 5722 5723 /* 5724 * Function: 5725 * vdc_setup_devid() 5726 * 5727 * Description: 5728 * This routine discovers the devid of a vDisk. It requests the devid of 5729 * the underlying device from the vDisk server, builds an encapsulated 5730 * devid based on the retrieved devid and registers that new devid to 5731 * the vDisk. 5732 * 5733 * Arguments: 5734 * vdc - soft state pointer for this instance of the device driver. 5735 * 5736 * Return Code: 5737 * 0 - A devid was succesfully registered for the vDisk 5738 */ 5739 static int 5740 vdc_setup_devid(vdc_t *vdc) 5741 { 5742 int rv; 5743 vd_devid_t *vd_devid; 5744 size_t bufsize, bufid_len; 5745 5746 /* 5747 * At first sight, we don't know the size of the devid that the 5748 * server will return but this size will be encoded into the 5749 * reply. So we do a first request using a default size then we 5750 * check if this size was large enough. If not then we do a second 5751 * request with the correct size returned by the server. Note that 5752 * ldc requires size to be 8-byte aligned. 5753 */ 5754 bufsize = P2ROUNDUP(VD_DEVID_SIZE(VD_DEVID_DEFAULT_LEN), 5755 sizeof (uint64_t)); 5756 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 5757 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 5758 5759 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, (caddr_t)vd_devid, 5760 bufsize, 0, 0, CB_SYNC, 0, VIO_both_dir); 5761 5762 DMSG(vdc, 2, "sync_op returned %d\n", rv); 5763 5764 if (rv) { 5765 kmem_free(vd_devid, bufsize); 5766 return (rv); 5767 } 5768 5769 if (vd_devid->length > bufid_len) { 5770 /* 5771 * The returned devid is larger than the buffer used. Try again 5772 * with a buffer with the right size. 5773 */ 5774 kmem_free(vd_devid, bufsize); 5775 bufsize = P2ROUNDUP(VD_DEVID_SIZE(vd_devid->length), 5776 sizeof (uint64_t)); 5777 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 5778 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 5779 5780 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, 5781 (caddr_t)vd_devid, bufsize, 0, 0, CB_SYNC, 0, 5782 VIO_both_dir); 5783 5784 if (rv) { 5785 kmem_free(vd_devid, bufsize); 5786 return (rv); 5787 } 5788 } 5789 5790 /* 5791 * The virtual disk should have the same device id as the one associated 5792 * with the physical disk it is mapped on, otherwise sharing a disk 5793 * between a LDom and a non-LDom may not work (for example for a shared 5794 * SVM disk set). 5795 * 5796 * The DDI framework does not allow creating a device id with any 5797 * type so we first create a device id of type DEVID_ENCAP and then 5798 * we restore the orignal type of the physical device. 5799 */ 5800 5801 DMSG(vdc, 2, ": devid length = %d\n", vd_devid->length); 5802 5803 /* build an encapsulated devid based on the returned devid */ 5804 if (ddi_devid_init(vdc->dip, DEVID_ENCAP, vd_devid->length, 5805 vd_devid->id, &vdc->devid) != DDI_SUCCESS) { 5806 DMSG(vdc, 1, "[%d] Fail to created devid\n", vdc->instance); 5807 kmem_free(vd_devid, bufsize); 5808 return (1); 5809 } 5810 5811 DEVID_FORMTYPE((impl_devid_t *)vdc->devid, vd_devid->type); 5812 5813 ASSERT(ddi_devid_valid(vdc->devid) == DDI_SUCCESS); 5814 5815 kmem_free(vd_devid, bufsize); 5816 5817 if (ddi_devid_register(vdc->dip, vdc->devid) != DDI_SUCCESS) { 5818 DMSG(vdc, 1, "[%d] Fail to register devid\n", vdc->instance); 5819 return (1); 5820 } 5821 5822 return (0); 5823 } 5824 5825 static void 5826 vdc_store_label_efi(vdc_t *vdc, struct dk_gpt *efi) 5827 { 5828 struct vtoc *vtoc = vdc->vtoc; 5829 5830 ASSERT(MUTEX_HELD(&vdc->lock)); 5831 5832 vdc->vdisk_label = VD_DISK_LABEL_EFI; 5833 bzero(vdc->geom, sizeof (struct dk_geom)); 5834 vd_efi_to_vtoc(efi, vtoc); 5835 if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 5836 /* 5837 * vd_efi_to_vtoc() will store information about the EFI Sun 5838 * reserved partition (representing the entire disk) into 5839 * partition 7. However single-slice device will only have 5840 * that single partition and the vdc driver expects to find 5841 * information about that partition in slice 0. So we need 5842 * to copy information from slice 7 to slice 0. 5843 */ 5844 vtoc->v_part[0].p_tag = vtoc->v_part[VD_EFI_WD_SLICE].p_tag; 5845 vtoc->v_part[0].p_flag = vtoc->v_part[VD_EFI_WD_SLICE].p_flag; 5846 vtoc->v_part[0].p_start = vtoc->v_part[VD_EFI_WD_SLICE].p_start; 5847 vtoc->v_part[0].p_size = vtoc->v_part[VD_EFI_WD_SLICE].p_size; 5848 } 5849 } 5850 5851 static void 5852 vdc_store_label_vtoc(vdc_t *vdc, struct dk_geom *geom, struct vtoc *vtoc) 5853 { 5854 ASSERT(MUTEX_HELD(&vdc->lock)); 5855 5856 vdc->vdisk_label = VD_DISK_LABEL_VTOC; 5857 bcopy(vtoc, vdc->vtoc, sizeof (struct vtoc)); 5858 bcopy(geom, vdc->geom, sizeof (struct dk_geom)); 5859 } 5860 5861 static void 5862 vdc_store_label_unk(vdc_t *vdc) 5863 { 5864 ASSERT(MUTEX_HELD(&vdc->lock)); 5865 5866 vdc->vdisk_label = VD_DISK_LABEL_UNK; 5867 bzero(vdc->vtoc, sizeof (struct vtoc)); 5868 bzero(vdc->geom, sizeof (struct dk_geom)); 5869 } 5870