1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * LDoms virtual disk client (vdc) device driver 31 * 32 * This driver runs on a guest logical domain and communicates with the virtual 33 * disk server (vds) driver running on the service domain which is exporting 34 * virtualized "disks" to the guest logical domain. 35 * 36 * The driver can be divided into four sections: 37 * 38 * 1) generic device driver housekeeping 39 * _init, _fini, attach, detach, ops structures, etc. 40 * 41 * 2) communication channel setup 42 * Setup the communications link over the LDC channel that vdc uses to 43 * talk to the vDisk server. Initialise the descriptor ring which 44 * allows the LDC clients to transfer data via memory mappings. 45 * 46 * 3) Support exported to upper layers (filesystems, etc) 47 * The upper layers call into vdc via strategy(9E) and DKIO(7I) 48 * ioctl calls. vdc will copy the data to be written to the descriptor 49 * ring or maps the buffer to store the data read by the vDisk 50 * server into the descriptor ring. It then sends a message to the 51 * vDisk server requesting it to complete the operation. 52 * 53 * 4) Handling responses from vDisk server. 54 * The vDisk server will ACK some or all of the messages vdc sends to it 55 * (this is configured during the handshake). Upon receipt of an ACK 56 * vdc will check the descriptor ring and signal to the upper layer 57 * code waiting on the IO. 58 */ 59 60 #include <sys/atomic.h> 61 #include <sys/conf.h> 62 #include <sys/disp.h> 63 #include <sys/ddi.h> 64 #include <sys/dkio.h> 65 #include <sys/efi_partition.h> 66 #include <sys/fcntl.h> 67 #include <sys/file.h> 68 #include <sys/mach_descrip.h> 69 #include <sys/modctl.h> 70 #include <sys/mdeg.h> 71 #include <sys/note.h> 72 #include <sys/open.h> 73 #include <sys/sdt.h> 74 #include <sys/stat.h> 75 #include <sys/sunddi.h> 76 #include <sys/types.h> 77 #include <sys/promif.h> 78 #include <sys/vtoc.h> 79 #include <sys/archsystm.h> 80 #include <sys/sysmacros.h> 81 82 #include <sys/cdio.h> 83 #include <sys/dktp/fdisk.h> 84 #include <sys/dktp/dadkio.h> 85 #include <sys/scsi/generic/sense.h> 86 #include <sys/scsi/impl/uscsi.h> /* Needed for defn of USCSICMD ioctl */ 87 88 #include <sys/ldoms.h> 89 #include <sys/ldc.h> 90 #include <sys/vio_common.h> 91 #include <sys/vio_mailbox.h> 92 #include <sys/vdsk_common.h> 93 #include <sys/vdsk_mailbox.h> 94 #include <sys/vdc.h> 95 96 /* 97 * function prototypes 98 */ 99 100 /* standard driver functions */ 101 static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 102 static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 103 static int vdc_strategy(struct buf *buf); 104 static int vdc_print(dev_t dev, char *str); 105 static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 106 static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 107 static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 108 static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 109 cred_t *credp, int *rvalp); 110 static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 111 static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 112 113 static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 114 void *arg, void **resultp); 115 static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 116 static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 117 118 /* setup */ 119 static void vdc_min(struct buf *bufp); 120 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 121 static int vdc_do_ldc_init(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_node); 122 static int vdc_start_ldc_connection(vdc_t *vdc); 123 static int vdc_create_device_nodes(vdc_t *vdc); 124 static int vdc_create_device_nodes_efi(vdc_t *vdc); 125 static int vdc_create_device_nodes_vtoc(vdc_t *vdc); 126 static int vdc_create_device_nodes_props(vdc_t *vdc); 127 static int vdc_get_md_node(dev_info_t *dip, md_t **mdpp, 128 mde_cookie_t *vd_nodep, mde_cookie_t *vd_portp); 129 static int vdc_get_ldc_id(md_t *, mde_cookie_t, uint64_t *); 130 static int vdc_do_ldc_up(vdc_t *vdc); 131 static void vdc_terminate_ldc(vdc_t *vdc); 132 static int vdc_init_descriptor_ring(vdc_t *vdc); 133 static void vdc_destroy_descriptor_ring(vdc_t *vdc); 134 static int vdc_setup_devid(vdc_t *vdc); 135 static void vdc_store_label_efi(vdc_t *vdc, struct dk_gpt *efi); 136 static void vdc_store_label_vtoc(vdc_t *, struct dk_geom *, struct vtoc *); 137 static void vdc_store_label_unk(vdc_t *vdc); 138 static boolean_t vdc_is_opened(vdc_t *vdc); 139 140 /* handshake with vds */ 141 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 142 static int vdc_ver_negotiation(vdc_t *vdcp); 143 static int vdc_init_attr_negotiation(vdc_t *vdc); 144 static int vdc_attr_negotiation(vdc_t *vdcp); 145 static int vdc_init_dring_negotiate(vdc_t *vdc); 146 static int vdc_dring_negotiation(vdc_t *vdcp); 147 static int vdc_send_rdx(vdc_t *vdcp); 148 static int vdc_rdx_exchange(vdc_t *vdcp); 149 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 150 151 /* processing incoming messages from vDisk server */ 152 static void vdc_process_msg_thread(vdc_t *vdc); 153 static int vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp); 154 155 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 156 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t *msg); 157 static int vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg); 158 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 159 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 160 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 161 static int vdc_send_request(vdc_t *vdcp, int operation, 162 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 163 int cb_type, void *cb_arg, vio_desc_direction_t dir); 164 static int vdc_map_to_shared_dring(vdc_t *vdcp, int idx); 165 static int vdc_populate_descriptor(vdc_t *vdcp, int operation, 166 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 167 int cb_type, void *cb_arg, vio_desc_direction_t dir); 168 static int vdc_do_sync_op(vdc_t *vdcp, int operation, 169 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 170 int cb_type, void *cb_arg, vio_desc_direction_t dir); 171 172 static int vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp); 173 static int vdc_drain_response(vdc_t *vdcp); 174 static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 175 static int vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep); 176 static int vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg); 177 178 /* dkio */ 179 static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode); 180 static void vdc_create_fake_geometry(vdc_t *vdc); 181 static int vdc_validate_geometry(vdc_t *vdc); 182 static void vdc_validate(vdc_t *vdc); 183 static void vdc_validate_task(void *arg); 184 static int vdc_null_copy_func(vdc_t *vdc, void *from, void *to, 185 int mode, int dir); 186 static int vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 187 int mode, int dir); 188 static int vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 189 int mode, int dir); 190 static int vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, 191 int mode, int dir); 192 static int vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, 193 int mode, int dir); 194 static int vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, 195 int mode, int dir); 196 static int vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, 197 int mode, int dir); 198 static int vdc_uscsicmd_convert(vdc_t *vdc, void *from, void *to, 199 int mode, int dir); 200 static int vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, 201 int mode, int dir); 202 static int vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, 203 int mode, int dir); 204 205 /* 206 * Module variables 207 */ 208 209 /* 210 * Tunable variables to control how long vdc waits before timing out on 211 * various operations 212 */ 213 static int vdc_retries = 10; 214 static int vdc_hshake_retries = 3; 215 216 static int vdc_timeout = 0; /* units: seconds */ 217 218 /* calculated from 'vdc_usec_timeout' during attach */ 219 static uint64_t vdc_hz_timeout; /* units: Hz */ 220 static uint64_t vdc_usec_timeout = 30 * MICROSEC; /* 30s units: ns */ 221 222 static uint64_t vdc_hz_min_ldc_delay; 223 static uint64_t vdc_min_timeout_ldc = 1 * MILLISEC; 224 static uint64_t vdc_hz_max_ldc_delay; 225 static uint64_t vdc_max_timeout_ldc = 100 * MILLISEC; 226 227 static uint64_t vdc_ldc_read_init_delay = 1 * MILLISEC; 228 static uint64_t vdc_ldc_read_max_delay = 100 * MILLISEC; 229 230 /* values for dumping - need to run in a tighter loop */ 231 static uint64_t vdc_usec_timeout_dump = 100 * MILLISEC; /* 0.1s units: ns */ 232 static int vdc_dump_retries = 100; 233 234 /* Count of the number of vdc instances attached */ 235 static volatile uint32_t vdc_instance_count = 0; 236 237 /* Soft state pointer */ 238 static void *vdc_state; 239 240 /* 241 * Controlling the verbosity of the error/debug messages 242 * 243 * vdc_msglevel - controls level of messages 244 * vdc_matchinst - 64-bit variable where each bit corresponds 245 * to the vdc instance the vdc_msglevel applies. 246 */ 247 int vdc_msglevel = 0x0; 248 uint64_t vdc_matchinst = 0ull; 249 250 /* 251 * Supported vDisk protocol version pairs. 252 * 253 * The first array entry is the latest and preferred version. 254 */ 255 static const vio_ver_t vdc_version[] = {{1, 0}}; 256 257 static struct cb_ops vdc_cb_ops = { 258 vdc_open, /* cb_open */ 259 vdc_close, /* cb_close */ 260 vdc_strategy, /* cb_strategy */ 261 vdc_print, /* cb_print */ 262 vdc_dump, /* cb_dump */ 263 vdc_read, /* cb_read */ 264 vdc_write, /* cb_write */ 265 vdc_ioctl, /* cb_ioctl */ 266 nodev, /* cb_devmap */ 267 nodev, /* cb_mmap */ 268 nodev, /* cb_segmap */ 269 nochpoll, /* cb_chpoll */ 270 ddi_prop_op, /* cb_prop_op */ 271 NULL, /* cb_str */ 272 D_MP | D_64BIT, /* cb_flag */ 273 CB_REV, /* cb_rev */ 274 vdc_aread, /* cb_aread */ 275 vdc_awrite /* cb_awrite */ 276 }; 277 278 static struct dev_ops vdc_ops = { 279 DEVO_REV, /* devo_rev */ 280 0, /* devo_refcnt */ 281 vdc_getinfo, /* devo_getinfo */ 282 nulldev, /* devo_identify */ 283 nulldev, /* devo_probe */ 284 vdc_attach, /* devo_attach */ 285 vdc_detach, /* devo_detach */ 286 nodev, /* devo_reset */ 287 &vdc_cb_ops, /* devo_cb_ops */ 288 NULL, /* devo_bus_ops */ 289 nulldev /* devo_power */ 290 }; 291 292 static struct modldrv modldrv = { 293 &mod_driverops, 294 "virtual disk client", 295 &vdc_ops, 296 }; 297 298 static struct modlinkage modlinkage = { 299 MODREV_1, 300 &modldrv, 301 NULL 302 }; 303 304 /* -------------------------------------------------------------------------- */ 305 306 /* 307 * Device Driver housekeeping and setup 308 */ 309 310 int 311 _init(void) 312 { 313 int status; 314 315 if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 316 return (status); 317 if ((status = mod_install(&modlinkage)) != 0) 318 ddi_soft_state_fini(&vdc_state); 319 vdc_efi_init(vd_process_ioctl); 320 return (status); 321 } 322 323 int 324 _info(struct modinfo *modinfop) 325 { 326 return (mod_info(&modlinkage, modinfop)); 327 } 328 329 int 330 _fini(void) 331 { 332 int status; 333 334 if ((status = mod_remove(&modlinkage)) != 0) 335 return (status); 336 vdc_efi_fini(); 337 ddi_soft_state_fini(&vdc_state); 338 return (0); 339 } 340 341 static int 342 vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 343 { 344 _NOTE(ARGUNUSED(dip)) 345 346 int instance = VDCUNIT((dev_t)arg); 347 vdc_t *vdc = NULL; 348 349 switch (cmd) { 350 case DDI_INFO_DEVT2DEVINFO: 351 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 352 *resultp = NULL; 353 return (DDI_FAILURE); 354 } 355 *resultp = vdc->dip; 356 return (DDI_SUCCESS); 357 case DDI_INFO_DEVT2INSTANCE: 358 *resultp = (void *)(uintptr_t)instance; 359 return (DDI_SUCCESS); 360 default: 361 *resultp = NULL; 362 return (DDI_FAILURE); 363 } 364 } 365 366 static int 367 vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 368 { 369 int instance; 370 int rv; 371 vdc_t *vdc = NULL; 372 373 switch (cmd) { 374 case DDI_DETACH: 375 /* the real work happens below */ 376 break; 377 case DDI_SUSPEND: 378 /* nothing to do for this non-device */ 379 return (DDI_SUCCESS); 380 default: 381 return (DDI_FAILURE); 382 } 383 384 ASSERT(cmd == DDI_DETACH); 385 instance = ddi_get_instance(dip); 386 DMSGX(1, "[%d] Entered\n", instance); 387 388 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 389 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 390 return (DDI_FAILURE); 391 } 392 393 if (vdc_is_opened(vdc)) { 394 DMSG(vdc, 0, "[%d] Cannot detach: device is open", instance); 395 return (DDI_FAILURE); 396 } 397 398 if (vdc->dkio_flush_pending) { 399 DMSG(vdc, 0, 400 "[%d] Cannot detach: %d outstanding DKIO flushes\n", 401 instance, vdc->dkio_flush_pending); 402 return (DDI_FAILURE); 403 } 404 405 if (vdc->validate_pending) { 406 DMSG(vdc, 0, 407 "[%d] Cannot detach: %d outstanding validate request\n", 408 instance, vdc->validate_pending); 409 return (DDI_FAILURE); 410 } 411 412 DMSG(vdc, 0, "[%d] proceeding...\n", instance); 413 414 /* mark instance as detaching */ 415 vdc->lifecycle = VDC_LC_DETACHING; 416 417 /* 418 * try and disable callbacks to prevent another handshake 419 */ 420 rv = ldc_set_cb_mode(vdc->ldc_handle, LDC_CB_DISABLE); 421 DMSG(vdc, 0, "callback disabled (rv=%d)\n", rv); 422 423 if (vdc->initialized & VDC_THREAD) { 424 mutex_enter(&vdc->read_lock); 425 if ((vdc->read_state == VDC_READ_WAITING) || 426 (vdc->read_state == VDC_READ_RESET)) { 427 vdc->read_state = VDC_READ_RESET; 428 cv_signal(&vdc->read_cv); 429 } 430 431 mutex_exit(&vdc->read_lock); 432 433 /* wake up any thread waiting for connection to come online */ 434 mutex_enter(&vdc->lock); 435 if (vdc->state == VDC_STATE_INIT_WAITING) { 436 DMSG(vdc, 0, 437 "[%d] write reset - move to resetting state...\n", 438 instance); 439 vdc->state = VDC_STATE_RESETTING; 440 cv_signal(&vdc->initwait_cv); 441 } 442 mutex_exit(&vdc->lock); 443 444 /* now wait until state transitions to VDC_STATE_DETACH */ 445 thread_join(vdc->msg_proc_thr->t_did); 446 ASSERT(vdc->state == VDC_STATE_DETACH); 447 DMSG(vdc, 0, "[%d] Reset thread exit and join ..\n", 448 vdc->instance); 449 } 450 451 mutex_enter(&vdc->lock); 452 453 if (vdc->initialized & VDC_DRING) 454 vdc_destroy_descriptor_ring(vdc); 455 456 if (vdc->initialized & VDC_LDC) 457 vdc_terminate_ldc(vdc); 458 459 mutex_exit(&vdc->lock); 460 461 if (vdc->initialized & VDC_MINOR) { 462 ddi_prop_remove_all(dip); 463 ddi_remove_minor_node(dip, NULL); 464 } 465 466 if (vdc->initialized & VDC_LOCKS) { 467 mutex_destroy(&vdc->lock); 468 mutex_destroy(&vdc->read_lock); 469 cv_destroy(&vdc->initwait_cv); 470 cv_destroy(&vdc->dring_free_cv); 471 cv_destroy(&vdc->membind_cv); 472 cv_destroy(&vdc->sync_pending_cv); 473 cv_destroy(&vdc->sync_blocked_cv); 474 cv_destroy(&vdc->read_cv); 475 cv_destroy(&vdc->running_cv); 476 } 477 478 if (vdc->minfo) 479 kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 480 481 if (vdc->cinfo) 482 kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 483 484 if (vdc->vtoc) 485 kmem_free(vdc->vtoc, sizeof (struct vtoc)); 486 487 if (vdc->geom) 488 kmem_free(vdc->geom, sizeof (struct dk_geom)); 489 490 if (vdc->devid) { 491 ddi_devid_unregister(dip); 492 ddi_devid_free(vdc->devid); 493 } 494 495 if (vdc->initialized & VDC_SOFT_STATE) 496 ddi_soft_state_free(vdc_state, instance); 497 498 DMSG(vdc, 0, "[%d] End %p\n", instance, (void *)vdc); 499 500 return (DDI_SUCCESS); 501 } 502 503 504 static int 505 vdc_do_attach(dev_info_t *dip) 506 { 507 int instance; 508 vdc_t *vdc = NULL; 509 int status; 510 md_t *mdp; 511 mde_cookie_t vd_node, vd_port; 512 513 ASSERT(dip != NULL); 514 515 instance = ddi_get_instance(dip); 516 if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 517 cmn_err(CE_NOTE, "[%d] Couldn't alloc state structure", 518 instance); 519 return (DDI_FAILURE); 520 } 521 522 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 523 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 524 return (DDI_FAILURE); 525 } 526 527 /* 528 * We assign the value to initialized in this case to zero out the 529 * variable and then set bits in it to indicate what has been done 530 */ 531 vdc->initialized = VDC_SOFT_STATE; 532 533 vdc_hz_timeout = drv_usectohz(vdc_usec_timeout); 534 535 vdc_hz_min_ldc_delay = drv_usectohz(vdc_min_timeout_ldc); 536 vdc_hz_max_ldc_delay = drv_usectohz(vdc_max_timeout_ldc); 537 538 vdc->dip = dip; 539 vdc->instance = instance; 540 vdc->vdisk_type = VD_DISK_TYPE_UNK; 541 vdc->vdisk_label = VD_DISK_LABEL_UNK; 542 vdc->state = VDC_STATE_INIT; 543 vdc->lifecycle = VDC_LC_ATTACHING; 544 vdc->ldc_state = 0; 545 vdc->session_id = 0; 546 vdc->block_size = DEV_BSIZE; 547 vdc->max_xfer_sz = maxphys / DEV_BSIZE; 548 549 vdc->vtoc = NULL; 550 vdc->geom = NULL; 551 vdc->cinfo = NULL; 552 vdc->minfo = NULL; 553 554 mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 555 cv_init(&vdc->initwait_cv, NULL, CV_DRIVER, NULL); 556 cv_init(&vdc->dring_free_cv, NULL, CV_DRIVER, NULL); 557 cv_init(&vdc->membind_cv, NULL, CV_DRIVER, NULL); 558 cv_init(&vdc->running_cv, NULL, CV_DRIVER, NULL); 559 560 vdc->threads_pending = 0; 561 vdc->sync_op_pending = B_FALSE; 562 vdc->sync_op_blocked = B_FALSE; 563 cv_init(&vdc->sync_pending_cv, NULL, CV_DRIVER, NULL); 564 cv_init(&vdc->sync_blocked_cv, NULL, CV_DRIVER, NULL); 565 566 /* init blocking msg read functionality */ 567 mutex_init(&vdc->read_lock, NULL, MUTEX_DRIVER, NULL); 568 cv_init(&vdc->read_cv, NULL, CV_DRIVER, NULL); 569 vdc->read_state = VDC_READ_IDLE; 570 571 vdc->initialized |= VDC_LOCKS; 572 573 /* get device and port MD node for this disk instance */ 574 if (vdc_get_md_node(dip, &mdp, &vd_node, &vd_port) != 0) { 575 cmn_err(CE_NOTE, "[%d] Could not get machine description node", 576 instance); 577 return (DDI_FAILURE); 578 } 579 580 /* set the connection timeout */ 581 if (vd_port == NULL || (md_get_prop_val(mdp, vd_port, 582 VDC_MD_TIMEOUT, &vdc->ctimeout) != 0)) { 583 vdc->ctimeout = 0; 584 } 585 586 /* initialise LDC channel which will be used to communicate with vds */ 587 status = vdc_do_ldc_init(vdc, mdp, vd_node); 588 589 (void) md_fini_handle(mdp); 590 591 if (status != 0) { 592 cmn_err(CE_NOTE, "[%d] Couldn't initialize LDC", instance); 593 goto return_status; 594 } 595 596 /* initialize the thread responsible for managing state with server */ 597 vdc->msg_proc_thr = thread_create(NULL, 0, vdc_process_msg_thread, 598 vdc, 0, &p0, TS_RUN, minclsyspri); 599 if (vdc->msg_proc_thr == NULL) { 600 cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 601 instance); 602 return (DDI_FAILURE); 603 } 604 605 vdc->initialized |= VDC_THREAD; 606 607 atomic_inc_32(&vdc_instance_count); 608 609 /* 610 * Check the disk label. This will send requests and do the handshake. 611 * We don't really care about the disk label now. What we really need is 612 * the handshake do be done so that we know the type of the disk (slice 613 * or full disk) and the appropriate device nodes can be created. 614 */ 615 vdc->vdisk_label = VD_DISK_LABEL_UNK; 616 vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); 617 vdc->geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP); 618 619 mutex_enter(&vdc->lock); 620 (void) vdc_validate_geometry(vdc); 621 mutex_exit(&vdc->lock); 622 623 /* 624 * Now that we have the device info we can create the 625 * device nodes and properties 626 */ 627 status = vdc_create_device_nodes(vdc); 628 if (status) { 629 DMSG(vdc, 0, "[%d] Failed to create device nodes", 630 instance); 631 goto return_status; 632 } 633 status = vdc_create_device_nodes_props(vdc); 634 if (status) { 635 DMSG(vdc, 0, "[%d] Failed to create device nodes" 636 " properties (%d)", instance, status); 637 goto return_status; 638 } 639 640 /* 641 * Setup devid 642 */ 643 if (vdc_setup_devid(vdc)) { 644 DMSG(vdc, 0, "[%d] No device id available\n", instance); 645 } 646 647 ddi_report_dev(dip); 648 vdc->lifecycle = VDC_LC_ONLINE; 649 DMSG(vdc, 0, "[%d] Attach tasks successful\n", instance); 650 651 return_status: 652 DMSG(vdc, 0, "[%d] Attach completed\n", instance); 653 return (status); 654 } 655 656 static int 657 vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 658 { 659 int status; 660 661 switch (cmd) { 662 case DDI_ATTACH: 663 if ((status = vdc_do_attach(dip)) != 0) 664 (void) vdc_detach(dip, DDI_DETACH); 665 return (status); 666 case DDI_RESUME: 667 /* nothing to do for this non-device */ 668 return (DDI_SUCCESS); 669 default: 670 return (DDI_FAILURE); 671 } 672 } 673 674 static int 675 vdc_do_ldc_init(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_node) 676 { 677 int status = 0; 678 ldc_status_t ldc_state; 679 ldc_attr_t ldc_attr; 680 uint64_t ldc_id = 0; 681 682 ASSERT(vdc != NULL); 683 684 vdc->initialized |= VDC_LDC; 685 686 if ((status = vdc_get_ldc_id(mdp, vd_node, &ldc_id)) != 0) { 687 DMSG(vdc, 0, "[%d] Failed to get LDC channel ID property", 688 vdc->instance); 689 return (EIO); 690 } 691 692 DMSGX(0, "[%d] LDC id is 0x%lx\n", vdc->instance, ldc_id); 693 694 vdc->ldc_id = ldc_id; 695 696 ldc_attr.devclass = LDC_DEV_BLK; 697 ldc_attr.instance = vdc->instance; 698 ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 699 ldc_attr.mtu = VD_LDC_MTU; 700 701 if ((vdc->initialized & VDC_LDC_INIT) == 0) { 702 status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle); 703 if (status != 0) { 704 DMSG(vdc, 0, "[%d] ldc_init(chan %ld) returned %d", 705 vdc->instance, ldc_id, status); 706 return (status); 707 } 708 vdc->initialized |= VDC_LDC_INIT; 709 } 710 status = ldc_status(vdc->ldc_handle, &ldc_state); 711 if (status != 0) { 712 DMSG(vdc, 0, "[%d] Cannot discover LDC status [err=%d]", 713 vdc->instance, status); 714 return (status); 715 } 716 vdc->ldc_state = ldc_state; 717 718 if ((vdc->initialized & VDC_LDC_CB) == 0) { 719 status = ldc_reg_callback(vdc->ldc_handle, vdc_handle_cb, 720 (caddr_t)vdc); 721 if (status != 0) { 722 DMSG(vdc, 0, "[%d] LDC callback reg. failed (%d)", 723 vdc->instance, status); 724 return (status); 725 } 726 vdc->initialized |= VDC_LDC_CB; 727 } 728 729 vdc->initialized |= VDC_LDC; 730 731 /* 732 * At this stage we have initialised LDC, we will now try and open 733 * the connection. 734 */ 735 if (vdc->ldc_state == LDC_INIT) { 736 status = ldc_open(vdc->ldc_handle); 737 if (status != 0) { 738 DMSG(vdc, 0, "[%d] ldc_open(chan %ld) returned %d", 739 vdc->instance, vdc->ldc_id, status); 740 return (status); 741 } 742 vdc->initialized |= VDC_LDC_OPEN; 743 } 744 745 return (status); 746 } 747 748 static int 749 vdc_start_ldc_connection(vdc_t *vdc) 750 { 751 int status = 0; 752 753 ASSERT(vdc != NULL); 754 755 ASSERT(MUTEX_HELD(&vdc->lock)); 756 757 status = vdc_do_ldc_up(vdc); 758 759 DMSG(vdc, 0, "[%d] Finished bringing up LDC\n", vdc->instance); 760 761 return (status); 762 } 763 764 static int 765 vdc_stop_ldc_connection(vdc_t *vdcp) 766 { 767 int status; 768 769 DMSG(vdcp, 0, ": Resetting connection to vDisk server : state %d\n", 770 vdcp->state); 771 772 status = ldc_down(vdcp->ldc_handle); 773 DMSG(vdcp, 0, "ldc_down() = %d\n", status); 774 775 vdcp->initialized &= ~VDC_HANDSHAKE; 776 DMSG(vdcp, 0, "initialized=%x\n", vdcp->initialized); 777 778 return (status); 779 } 780 781 static int 782 vdc_create_device_nodes_efi(vdc_t *vdc) 783 { 784 ddi_remove_minor_node(vdc->dip, "h"); 785 ddi_remove_minor_node(vdc->dip, "h,raw"); 786 787 if (ddi_create_minor_node(vdc->dip, "wd", S_IFBLK, 788 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 789 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 790 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd'", 791 vdc->instance); 792 return (EIO); 793 } 794 795 /* if any device node is created we set this flag */ 796 vdc->initialized |= VDC_MINOR; 797 798 if (ddi_create_minor_node(vdc->dip, "wd,raw", S_IFCHR, 799 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 800 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 801 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd,raw'", 802 vdc->instance); 803 return (EIO); 804 } 805 806 return (0); 807 } 808 809 static int 810 vdc_create_device_nodes_vtoc(vdc_t *vdc) 811 { 812 ddi_remove_minor_node(vdc->dip, "wd"); 813 ddi_remove_minor_node(vdc->dip, "wd,raw"); 814 815 if (ddi_create_minor_node(vdc->dip, "h", S_IFBLK, 816 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 817 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 818 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h'", 819 vdc->instance); 820 return (EIO); 821 } 822 823 /* if any device node is created we set this flag */ 824 vdc->initialized |= VDC_MINOR; 825 826 if (ddi_create_minor_node(vdc->dip, "h,raw", S_IFCHR, 827 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 828 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 829 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h,raw'", 830 vdc->instance); 831 return (EIO); 832 } 833 834 return (0); 835 } 836 837 /* 838 * Function: 839 * vdc_create_device_nodes 840 * 841 * Description: 842 * This function creates the block and character device nodes under 843 * /devices along with the node properties. It is called as part of 844 * the attach(9E) of the instance during the handshake with vds after 845 * vds has sent the attributes to vdc. 846 * 847 * If the device is of type VD_DISK_TYPE_SLICE then the minor node 848 * of 2 is used in keeping with the Solaris convention that slice 2 849 * refers to a whole disk. Slices start at 'a' 850 * 851 * Parameters: 852 * vdc - soft state pointer 853 * 854 * Return Values 855 * 0 - Success 856 * EIO - Failed to create node 857 * EINVAL - Unknown type of disk exported 858 */ 859 static int 860 vdc_create_device_nodes(vdc_t *vdc) 861 { 862 char name[sizeof ("s,raw")]; 863 dev_info_t *dip = NULL; 864 int instance, status; 865 int num_slices = 1; 866 int i; 867 868 ASSERT(vdc != NULL); 869 870 instance = vdc->instance; 871 dip = vdc->dip; 872 873 switch (vdc->vdisk_type) { 874 case VD_DISK_TYPE_DISK: 875 num_slices = V_NUMPAR; 876 break; 877 case VD_DISK_TYPE_SLICE: 878 num_slices = 1; 879 break; 880 case VD_DISK_TYPE_UNK: 881 default: 882 return (EINVAL); 883 } 884 885 /* 886 * Minor nodes are different for EFI disks: EFI disks do not have 887 * a minor node 'g' for the minor number corresponding to slice 888 * VD_EFI_WD_SLICE (slice 7) instead they have a minor node 'wd' 889 * representing the whole disk. 890 */ 891 for (i = 0; i < num_slices; i++) { 892 893 if (i == VD_EFI_WD_SLICE) { 894 if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 895 status = vdc_create_device_nodes_efi(vdc); 896 else 897 status = vdc_create_device_nodes_vtoc(vdc); 898 if (status != 0) 899 return (status); 900 continue; 901 } 902 903 (void) snprintf(name, sizeof (name), "%c", 'a' + i); 904 if (ddi_create_minor_node(dip, name, S_IFBLK, 905 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 906 cmn_err(CE_NOTE, "[%d] Couldn't add block node '%s'", 907 instance, name); 908 return (EIO); 909 } 910 911 /* if any device node is created we set this flag */ 912 vdc->initialized |= VDC_MINOR; 913 914 (void) snprintf(name, sizeof (name), "%c%s", 'a' + i, ",raw"); 915 916 if (ddi_create_minor_node(dip, name, S_IFCHR, 917 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 918 cmn_err(CE_NOTE, "[%d] Couldn't add raw node '%s'", 919 instance, name); 920 return (EIO); 921 } 922 } 923 924 return (0); 925 } 926 927 /* 928 * Function: 929 * vdc_create_device_nodes_props 930 * 931 * Description: 932 * This function creates the block and character device nodes under 933 * /devices along with the node properties. It is called as part of 934 * the attach(9E) of the instance during the handshake with vds after 935 * vds has sent the attributes to vdc. 936 * 937 * Parameters: 938 * vdc - soft state pointer 939 * 940 * Return Values 941 * 0 - Success 942 * EIO - Failed to create device node property 943 * EINVAL - Unknown type of disk exported 944 */ 945 static int 946 vdc_create_device_nodes_props(vdc_t *vdc) 947 { 948 dev_info_t *dip = NULL; 949 int instance; 950 int num_slices = 1; 951 int64_t size = 0; 952 dev_t dev; 953 int rv; 954 int i; 955 956 ASSERT(vdc != NULL); 957 ASSERT(vdc->vtoc != NULL); 958 959 instance = vdc->instance; 960 dip = vdc->dip; 961 962 switch (vdc->vdisk_type) { 963 case VD_DISK_TYPE_DISK: 964 num_slices = V_NUMPAR; 965 break; 966 case VD_DISK_TYPE_SLICE: 967 num_slices = 1; 968 break; 969 case VD_DISK_TYPE_UNK: 970 default: 971 return (EINVAL); 972 } 973 974 if (vdc->vdisk_label == VD_DISK_LABEL_UNK) { 975 /* remove all properties */ 976 for (i = 0; i < num_slices; i++) { 977 dev = makedevice(ddi_driver_major(dip), 978 VD_MAKE_DEV(instance, i)); 979 (void) ddi_prop_remove(dev, dip, VDC_SIZE_PROP_NAME); 980 (void) ddi_prop_remove(dev, dip, VDC_NBLOCKS_PROP_NAME); 981 } 982 return (0); 983 } 984 985 for (i = 0; i < num_slices; i++) { 986 dev = makedevice(ddi_driver_major(dip), 987 VD_MAKE_DEV(instance, i)); 988 989 size = vdc->vtoc->v_part[i].p_size * vdc->vtoc->v_sectorsz; 990 DMSG(vdc, 0, "[%d] sz %ld (%ld Mb) p_size %lx\n", 991 instance, size, size / (1024 * 1024), 992 vdc->vtoc->v_part[i].p_size); 993 994 rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size); 995 if (rv != DDI_PROP_SUCCESS) { 996 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop of [%ld]", 997 instance, VDC_SIZE_PROP_NAME, size); 998 return (EIO); 999 } 1000 1001 rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME, 1002 lbtodb(size)); 1003 if (rv != DDI_PROP_SUCCESS) { 1004 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop [%llu]", 1005 instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size)); 1006 return (EIO); 1007 } 1008 } 1009 1010 return (0); 1011 } 1012 1013 /* 1014 * Function: 1015 * vdc_is_opened 1016 * 1017 * Description: 1018 * This function checks if any slice of a given virtual disk is 1019 * currently opened. 1020 * 1021 * Parameters: 1022 * vdc - soft state pointer 1023 * 1024 * Return Values 1025 * B_TRUE - at least one slice is opened. 1026 * B_FALSE - no slice is opened. 1027 */ 1028 static boolean_t 1029 vdc_is_opened(vdc_t *vdc) 1030 { 1031 int i, nslices; 1032 1033 switch (vdc->vdisk_type) { 1034 case VD_DISK_TYPE_DISK: 1035 nslices = V_NUMPAR; 1036 break; 1037 case VD_DISK_TYPE_SLICE: 1038 nslices = 1; 1039 break; 1040 case VD_DISK_TYPE_UNK: 1041 default: 1042 ASSERT(0); 1043 } 1044 1045 /* check if there's any layered open */ 1046 for (i = 0; i < nslices; i++) { 1047 if (vdc->open_lyr[i] > 0) 1048 return (B_TRUE); 1049 } 1050 1051 /* check if there is any other kind of open */ 1052 for (i = 0; i < OTYPCNT; i++) { 1053 if (vdc->open[i] != 0) 1054 return (B_TRUE); 1055 } 1056 1057 return (B_FALSE); 1058 } 1059 1060 static int 1061 vdc_mark_opened(vdc_t *vdc, int slice, int flag, int otyp) 1062 { 1063 uint8_t slicemask; 1064 int i; 1065 1066 ASSERT(otyp < OTYPCNT); 1067 ASSERT(slice < V_NUMPAR); 1068 ASSERT(MUTEX_HELD(&vdc->lock)); 1069 1070 slicemask = 1 << slice; 1071 1072 /* check if slice is already exclusively opened */ 1073 if (vdc->open_excl & slicemask) 1074 return (EBUSY); 1075 1076 /* if open exclusive, check if slice is already opened */ 1077 if (flag & FEXCL) { 1078 if (vdc->open_lyr[slice] > 0) 1079 return (EBUSY); 1080 for (i = 0; i < OTYPCNT; i++) { 1081 if (vdc->open[i] & slicemask) 1082 return (EBUSY); 1083 } 1084 vdc->open_excl |= slicemask; 1085 } 1086 1087 /* mark slice as opened */ 1088 if (otyp == OTYP_LYR) { 1089 vdc->open_lyr[slice]++; 1090 } else { 1091 vdc->open[otyp] |= slicemask; 1092 } 1093 1094 return (0); 1095 } 1096 1097 static void 1098 vdc_mark_closed(vdc_t *vdc, int slice, int flag, int otyp) 1099 { 1100 uint8_t slicemask; 1101 1102 ASSERT(otyp < OTYPCNT); 1103 ASSERT(slice < V_NUMPAR); 1104 ASSERT(MUTEX_HELD(&vdc->lock)); 1105 1106 slicemask = 1 << slice; 1107 1108 if (otyp == OTYP_LYR) { 1109 ASSERT(vdc->open_lyr[slice] > 0); 1110 vdc->open_lyr[slice]--; 1111 } else { 1112 vdc->open[otyp] &= ~slicemask; 1113 } 1114 1115 if (flag & FEXCL) 1116 vdc->open_excl &= ~slicemask; 1117 } 1118 1119 static int 1120 vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 1121 { 1122 _NOTE(ARGUNUSED(cred)) 1123 1124 int instance; 1125 int slice, status = 0; 1126 vdc_t *vdc; 1127 1128 ASSERT(dev != NULL); 1129 instance = VDCUNIT(*dev); 1130 1131 if (otyp >= OTYPCNT) 1132 return (EINVAL); 1133 1134 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1135 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1136 return (ENXIO); 1137 } 1138 1139 DMSG(vdc, 0, "minor = %d flag = %x, otyp = %x\n", 1140 getminor(*dev), flag, otyp); 1141 1142 slice = VDCPART(*dev); 1143 1144 mutex_enter(&vdc->lock); 1145 1146 status = vdc_mark_opened(vdc, slice, flag, otyp); 1147 1148 if (status != 0) { 1149 mutex_exit(&vdc->lock); 1150 return (status); 1151 } 1152 1153 if (flag & (FNDELAY | FNONBLOCK)) { 1154 1155 /* don't resubmit a validate request if there's already one */ 1156 if (vdc->validate_pending > 0) { 1157 mutex_exit(&vdc->lock); 1158 return (0); 1159 } 1160 1161 /* call vdc_validate() asynchronously to avoid blocking */ 1162 if (taskq_dispatch(system_taskq, vdc_validate_task, 1163 (void *)vdc, TQ_NOSLEEP) == NULL) { 1164 vdc_mark_closed(vdc, slice, flag, otyp); 1165 mutex_exit(&vdc->lock); 1166 return (ENXIO); 1167 } 1168 1169 vdc->validate_pending++; 1170 mutex_exit(&vdc->lock); 1171 return (0); 1172 } 1173 1174 mutex_exit(&vdc->lock); 1175 1176 vdc_validate(vdc); 1177 1178 mutex_enter(&vdc->lock); 1179 1180 if (vdc->vdisk_label == VD_DISK_LABEL_UNK || 1181 vdc->vtoc->v_part[slice].p_size == 0) { 1182 vdc_mark_closed(vdc, slice, flag, otyp); 1183 status = EIO; 1184 } 1185 1186 mutex_exit(&vdc->lock); 1187 1188 return (status); 1189 } 1190 1191 static int 1192 vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 1193 { 1194 _NOTE(ARGUNUSED(cred)) 1195 1196 int instance; 1197 int slice; 1198 vdc_t *vdc; 1199 1200 instance = VDCUNIT(dev); 1201 1202 if (otyp >= OTYPCNT) 1203 return (EINVAL); 1204 1205 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1206 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1207 return (ENXIO); 1208 } 1209 1210 DMSG(vdc, 0, "[%d] flag = %x, otyp = %x\n", instance, flag, otyp); 1211 1212 slice = VDCPART(dev); 1213 1214 mutex_enter(&vdc->lock); 1215 vdc_mark_closed(vdc, slice, flag, otyp); 1216 mutex_exit(&vdc->lock); 1217 1218 return (0); 1219 } 1220 1221 static int 1222 vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1223 { 1224 _NOTE(ARGUNUSED(credp)) 1225 _NOTE(ARGUNUSED(rvalp)) 1226 1227 return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode)); 1228 } 1229 1230 static int 1231 vdc_print(dev_t dev, char *str) 1232 { 1233 cmn_err(CE_NOTE, "vdc%d: %s", VDCUNIT(dev), str); 1234 return (0); 1235 } 1236 1237 static int 1238 vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 1239 { 1240 int rv; 1241 size_t nbytes = nblk * DEV_BSIZE; 1242 int instance = VDCUNIT(dev); 1243 vdc_t *vdc = NULL; 1244 1245 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1246 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1247 return (ENXIO); 1248 } 1249 1250 DMSG(vdc, 2, "[%d] dump %ld bytes at block 0x%lx : addr=0x%p\n", 1251 instance, nbytes, blkno, (void *)addr); 1252 rv = vdc_send_request(vdc, VD_OP_BWRITE, addr, nbytes, 1253 VDCPART(dev), blkno, CB_STRATEGY, 0, VIO_write_dir); 1254 if (rv) { 1255 DMSG(vdc, 0, "Failed to do a disk dump (err=%d)\n", rv); 1256 return (rv); 1257 } 1258 1259 if (ddi_in_panic()) 1260 (void) vdc_drain_response(vdc); 1261 1262 DMSG(vdc, 0, "[%d] End\n", instance); 1263 1264 return (0); 1265 } 1266 1267 /* -------------------------------------------------------------------------- */ 1268 1269 /* 1270 * Disk access routines 1271 * 1272 */ 1273 1274 /* 1275 * vdc_strategy() 1276 * 1277 * Return Value: 1278 * 0: As per strategy(9E), the strategy() function must return 0 1279 * [ bioerror(9f) sets b_flags to the proper error code ] 1280 */ 1281 static int 1282 vdc_strategy(struct buf *buf) 1283 { 1284 int rv = -1; 1285 vdc_t *vdc = NULL; 1286 int instance = VDCUNIT(buf->b_edev); 1287 int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 1288 int slice; 1289 1290 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1291 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1292 bioerror(buf, ENXIO); 1293 biodone(buf); 1294 return (0); 1295 } 1296 1297 DMSG(vdc, 2, "[%d] %s %ld bytes at block %llx : b_addr=0x%p\n", 1298 instance, (buf->b_flags & B_READ) ? "Read" : "Write", 1299 buf->b_bcount, buf->b_lblkno, (void *)buf->b_un.b_addr); 1300 DTRACE_IO2(vstart, buf_t *, buf, vdc_t *, vdc); 1301 1302 bp_mapin(buf); 1303 1304 if ((long)buf->b_private == VD_SLICE_NONE) { 1305 /* I/O using an absolute disk offset */ 1306 slice = VD_SLICE_NONE; 1307 } else { 1308 slice = VDCPART(buf->b_edev); 1309 } 1310 1311 rv = vdc_send_request(vdc, op, (caddr_t)buf->b_un.b_addr, 1312 buf->b_bcount, slice, buf->b_lblkno, 1313 CB_STRATEGY, buf, (op == VD_OP_BREAD) ? VIO_read_dir : 1314 VIO_write_dir); 1315 1316 /* 1317 * If the request was successfully sent, the strategy call returns and 1318 * the ACK handler calls the bioxxx functions when the vDisk server is 1319 * done. 1320 */ 1321 if (rv) { 1322 DMSG(vdc, 0, "Failed to read/write (err=%d)\n", rv); 1323 bioerror(buf, rv); 1324 biodone(buf); 1325 } 1326 1327 return (0); 1328 } 1329 1330 /* 1331 * Function: 1332 * vdc_min 1333 * 1334 * Description: 1335 * Routine to limit the size of a data transfer. Used in 1336 * conjunction with physio(9F). 1337 * 1338 * Arguments: 1339 * bp - pointer to the indicated buf(9S) struct. 1340 * 1341 */ 1342 static void 1343 vdc_min(struct buf *bufp) 1344 { 1345 vdc_t *vdc = NULL; 1346 int instance = VDCUNIT(bufp->b_edev); 1347 1348 vdc = ddi_get_soft_state(vdc_state, instance); 1349 VERIFY(vdc != NULL); 1350 1351 if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->block_size)) { 1352 bufp->b_bcount = vdc->max_xfer_sz * vdc->block_size; 1353 } 1354 } 1355 1356 static int 1357 vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 1358 { 1359 _NOTE(ARGUNUSED(cred)) 1360 1361 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1362 return (physio(vdc_strategy, NULL, dev, B_READ, vdc_min, uio)); 1363 } 1364 1365 static int 1366 vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 1367 { 1368 _NOTE(ARGUNUSED(cred)) 1369 1370 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1371 return (physio(vdc_strategy, NULL, dev, B_WRITE, vdc_min, uio)); 1372 } 1373 1374 static int 1375 vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 1376 { 1377 _NOTE(ARGUNUSED(cred)) 1378 1379 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1380 return (aphysio(vdc_strategy, anocancel, dev, B_READ, vdc_min, aio)); 1381 } 1382 1383 static int 1384 vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 1385 { 1386 _NOTE(ARGUNUSED(cred)) 1387 1388 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1389 return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, vdc_min, aio)); 1390 } 1391 1392 1393 /* -------------------------------------------------------------------------- */ 1394 1395 /* 1396 * Handshake support 1397 */ 1398 1399 1400 /* 1401 * Function: 1402 * vdc_init_ver_negotiation() 1403 * 1404 * Description: 1405 * 1406 * Arguments: 1407 * vdc - soft state pointer for this instance of the device driver. 1408 * 1409 * Return Code: 1410 * 0 - Success 1411 */ 1412 static int 1413 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 1414 { 1415 vio_ver_msg_t pkt; 1416 size_t msglen = sizeof (pkt); 1417 int status = -1; 1418 1419 ASSERT(vdc != NULL); 1420 ASSERT(mutex_owned(&vdc->lock)); 1421 1422 DMSG(vdc, 0, "[%d] Entered.\n", vdc->instance); 1423 1424 /* 1425 * set the Session ID to a unique value 1426 * (the lower 32 bits of the clock tick) 1427 */ 1428 vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 1429 DMSG(vdc, 0, "[%d] Set SID to 0x%lx\n", vdc->instance, vdc->session_id); 1430 1431 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1432 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1433 pkt.tag.vio_subtype_env = VIO_VER_INFO; 1434 pkt.tag.vio_sid = vdc->session_id; 1435 pkt.dev_class = VDEV_DISK; 1436 pkt.ver_major = ver.major; 1437 pkt.ver_minor = ver.minor; 1438 1439 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1440 DMSG(vdc, 0, "[%d] Ver info sent (status = %d)\n", 1441 vdc->instance, status); 1442 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1443 DMSG(vdc, 0, "[%d] Failed to send Ver negotiation info: " 1444 "id(%lx) rv(%d) size(%ld)", vdc->instance, vdc->ldc_handle, 1445 status, msglen); 1446 if (msglen != sizeof (vio_ver_msg_t)) 1447 status = ENOMSG; 1448 } 1449 1450 return (status); 1451 } 1452 1453 /* 1454 * Function: 1455 * vdc_ver_negotiation() 1456 * 1457 * Description: 1458 * 1459 * Arguments: 1460 * vdcp - soft state pointer for this instance of the device driver. 1461 * 1462 * Return Code: 1463 * 0 - Success 1464 */ 1465 static int 1466 vdc_ver_negotiation(vdc_t *vdcp) 1467 { 1468 vio_msg_t vio_msg; 1469 int status; 1470 1471 if (status = vdc_init_ver_negotiation(vdcp, vdc_version[0])) 1472 return (status); 1473 1474 /* release lock and wait for response */ 1475 mutex_exit(&vdcp->lock); 1476 status = vdc_wait_for_response(vdcp, &vio_msg); 1477 mutex_enter(&vdcp->lock); 1478 if (status) { 1479 DMSG(vdcp, 0, 1480 "[%d] Failed waiting for Ver negotiation response, rv(%d)", 1481 vdcp->instance, status); 1482 return (status); 1483 } 1484 1485 /* check type and sub_type ... */ 1486 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1487 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1488 DMSG(vdcp, 0, "[%d] Invalid ver negotiation response\n", 1489 vdcp->instance); 1490 return (EPROTO); 1491 } 1492 1493 return (vdc_handle_ver_msg(vdcp, (vio_ver_msg_t *)&vio_msg)); 1494 } 1495 1496 /* 1497 * Function: 1498 * vdc_init_attr_negotiation() 1499 * 1500 * Description: 1501 * 1502 * Arguments: 1503 * vdc - soft state pointer for this instance of the device driver. 1504 * 1505 * Return Code: 1506 * 0 - Success 1507 */ 1508 static int 1509 vdc_init_attr_negotiation(vdc_t *vdc) 1510 { 1511 vd_attr_msg_t pkt; 1512 size_t msglen = sizeof (pkt); 1513 int status; 1514 1515 ASSERT(vdc != NULL); 1516 ASSERT(mutex_owned(&vdc->lock)); 1517 1518 DMSG(vdc, 0, "[%d] entered\n", vdc->instance); 1519 1520 /* fill in tag */ 1521 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1522 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1523 pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 1524 pkt.tag.vio_sid = vdc->session_id; 1525 /* fill in payload */ 1526 pkt.max_xfer_sz = vdc->max_xfer_sz; 1527 pkt.vdisk_block_size = vdc->block_size; 1528 pkt.xfer_mode = VIO_DRING_MODE; 1529 pkt.operations = 0; /* server will set bits of valid operations */ 1530 pkt.vdisk_type = 0; /* server will set to valid device type */ 1531 pkt.vdisk_size = 0; /* server will set to valid size */ 1532 1533 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1534 DMSG(vdc, 0, "Attr info sent (status = %d)\n", status); 1535 1536 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1537 DMSG(vdc, 0, "[%d] Failed to send Attr negotiation info: " 1538 "id(%lx) rv(%d) size(%ld)", vdc->instance, vdc->ldc_handle, 1539 status, msglen); 1540 if (msglen != sizeof (vio_ver_msg_t)) 1541 status = ENOMSG; 1542 } 1543 1544 return (status); 1545 } 1546 1547 /* 1548 * Function: 1549 * vdc_attr_negotiation() 1550 * 1551 * Description: 1552 * 1553 * Arguments: 1554 * vdc - soft state pointer for this instance of the device driver. 1555 * 1556 * Return Code: 1557 * 0 - Success 1558 */ 1559 static int 1560 vdc_attr_negotiation(vdc_t *vdcp) 1561 { 1562 int status; 1563 vio_msg_t vio_msg; 1564 1565 if (status = vdc_init_attr_negotiation(vdcp)) 1566 return (status); 1567 1568 /* release lock and wait for response */ 1569 mutex_exit(&vdcp->lock); 1570 status = vdc_wait_for_response(vdcp, &vio_msg); 1571 mutex_enter(&vdcp->lock); 1572 if (status) { 1573 DMSG(vdcp, 0, 1574 "[%d] Failed waiting for Attr negotiation response, rv(%d)", 1575 vdcp->instance, status); 1576 return (status); 1577 } 1578 1579 /* check type and sub_type ... */ 1580 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1581 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1582 DMSG(vdcp, 0, "[%d] Invalid attr negotiation response\n", 1583 vdcp->instance); 1584 return (EPROTO); 1585 } 1586 1587 return (vdc_handle_attr_msg(vdcp, (vd_attr_msg_t *)&vio_msg)); 1588 } 1589 1590 1591 /* 1592 * Function: 1593 * vdc_init_dring_negotiate() 1594 * 1595 * Description: 1596 * 1597 * Arguments: 1598 * vdc - soft state pointer for this instance of the device driver. 1599 * 1600 * Return Code: 1601 * 0 - Success 1602 */ 1603 static int 1604 vdc_init_dring_negotiate(vdc_t *vdc) 1605 { 1606 vio_dring_reg_msg_t pkt; 1607 size_t msglen = sizeof (pkt); 1608 int status = -1; 1609 int retry; 1610 int nretries = 10; 1611 1612 ASSERT(vdc != NULL); 1613 ASSERT(mutex_owned(&vdc->lock)); 1614 1615 for (retry = 0; retry < nretries; retry++) { 1616 status = vdc_init_descriptor_ring(vdc); 1617 if (status != EAGAIN) 1618 break; 1619 drv_usecwait(vdc_min_timeout_ldc); 1620 } 1621 1622 if (status != 0) { 1623 DMSG(vdc, 0, "[%d] Failed to init DRing (status = %d)\n", 1624 vdc->instance, status); 1625 return (status); 1626 } 1627 1628 DMSG(vdc, 0, "[%d] Init of descriptor ring completed (status = %d)\n", 1629 vdc->instance, status); 1630 1631 /* fill in tag */ 1632 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1633 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1634 pkt.tag.vio_subtype_env = VIO_DRING_REG; 1635 pkt.tag.vio_sid = vdc->session_id; 1636 /* fill in payload */ 1637 pkt.dring_ident = 0; 1638 pkt.num_descriptors = vdc->dring_len; 1639 pkt.descriptor_size = vdc->dring_entry_size; 1640 pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 1641 pkt.ncookies = vdc->dring_cookie_count; 1642 pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 1643 1644 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1645 if (status != 0) { 1646 DMSG(vdc, 0, "[%d] Failed to register DRing (err = %d)", 1647 vdc->instance, status); 1648 } 1649 1650 return (status); 1651 } 1652 1653 1654 /* 1655 * Function: 1656 * vdc_dring_negotiation() 1657 * 1658 * Description: 1659 * 1660 * Arguments: 1661 * vdc - soft state pointer for this instance of the device driver. 1662 * 1663 * Return Code: 1664 * 0 - Success 1665 */ 1666 static int 1667 vdc_dring_negotiation(vdc_t *vdcp) 1668 { 1669 int status; 1670 vio_msg_t vio_msg; 1671 1672 if (status = vdc_init_dring_negotiate(vdcp)) 1673 return (status); 1674 1675 /* release lock and wait for response */ 1676 mutex_exit(&vdcp->lock); 1677 status = vdc_wait_for_response(vdcp, &vio_msg); 1678 mutex_enter(&vdcp->lock); 1679 if (status) { 1680 DMSG(vdcp, 0, 1681 "[%d] Failed waiting for Dring negotiation response," 1682 " rv(%d)", vdcp->instance, status); 1683 return (status); 1684 } 1685 1686 /* check type and sub_type ... */ 1687 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1688 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1689 DMSG(vdcp, 0, "[%d] Invalid Dring negotiation response\n", 1690 vdcp->instance); 1691 return (EPROTO); 1692 } 1693 1694 return (vdc_handle_dring_reg_msg(vdcp, 1695 (vio_dring_reg_msg_t *)&vio_msg)); 1696 } 1697 1698 1699 /* 1700 * Function: 1701 * vdc_send_rdx() 1702 * 1703 * Description: 1704 * 1705 * Arguments: 1706 * vdc - soft state pointer for this instance of the device driver. 1707 * 1708 * Return Code: 1709 * 0 - Success 1710 */ 1711 static int 1712 vdc_send_rdx(vdc_t *vdcp) 1713 { 1714 vio_msg_t msg; 1715 size_t msglen = sizeof (vio_msg_t); 1716 int status; 1717 1718 /* 1719 * Send an RDX message to vds to indicate we are ready 1720 * to send data 1721 */ 1722 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 1723 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 1724 msg.tag.vio_subtype_env = VIO_RDX; 1725 msg.tag.vio_sid = vdcp->session_id; 1726 status = vdc_send(vdcp, (caddr_t)&msg, &msglen); 1727 if (status != 0) { 1728 DMSG(vdcp, 0, "[%d] Failed to send RDX message (%d)", 1729 vdcp->instance, status); 1730 } 1731 1732 return (status); 1733 } 1734 1735 /* 1736 * Function: 1737 * vdc_handle_rdx() 1738 * 1739 * Description: 1740 * 1741 * Arguments: 1742 * vdc - soft state pointer for this instance of the device driver. 1743 * msgp - received msg 1744 * 1745 * Return Code: 1746 * 0 - Success 1747 */ 1748 static int 1749 vdc_handle_rdx(vdc_t *vdcp, vio_rdx_msg_t *msgp) 1750 { 1751 _NOTE(ARGUNUSED(vdcp)) 1752 _NOTE(ARGUNUSED(msgp)) 1753 1754 ASSERT(msgp->tag.vio_msgtype == VIO_TYPE_CTRL); 1755 ASSERT(msgp->tag.vio_subtype == VIO_SUBTYPE_ACK); 1756 ASSERT(msgp->tag.vio_subtype_env == VIO_RDX); 1757 1758 DMSG(vdcp, 1, "[%d] Got an RDX msg", vdcp->instance); 1759 1760 return (0); 1761 } 1762 1763 /* 1764 * Function: 1765 * vdc_rdx_exchange() 1766 * 1767 * Description: 1768 * 1769 * Arguments: 1770 * vdc - soft state pointer for this instance of the device driver. 1771 * 1772 * Return Code: 1773 * 0 - Success 1774 */ 1775 static int 1776 vdc_rdx_exchange(vdc_t *vdcp) 1777 { 1778 int status; 1779 vio_msg_t vio_msg; 1780 1781 if (status = vdc_send_rdx(vdcp)) 1782 return (status); 1783 1784 /* release lock and wait for response */ 1785 mutex_exit(&vdcp->lock); 1786 status = vdc_wait_for_response(vdcp, &vio_msg); 1787 mutex_enter(&vdcp->lock); 1788 if (status) { 1789 DMSG(vdcp, 0, "[%d] Failed waiting for RDX response, rv(%d)", 1790 vdcp->instance, status); 1791 return (status); 1792 } 1793 1794 /* check type and sub_type ... */ 1795 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1796 vio_msg.tag.vio_subtype != VIO_SUBTYPE_ACK) { 1797 DMSG(vdcp, 0, "[%d] Invalid RDX response\n", vdcp->instance); 1798 return (EPROTO); 1799 } 1800 1801 return (vdc_handle_rdx(vdcp, (vio_rdx_msg_t *)&vio_msg)); 1802 } 1803 1804 1805 /* -------------------------------------------------------------------------- */ 1806 1807 /* 1808 * LDC helper routines 1809 */ 1810 1811 static int 1812 vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp) 1813 { 1814 int status; 1815 boolean_t q_has_pkts = B_FALSE; 1816 int delay_time; 1817 size_t len; 1818 1819 mutex_enter(&vdc->read_lock); 1820 1821 if (vdc->read_state == VDC_READ_IDLE) 1822 vdc->read_state = VDC_READ_WAITING; 1823 1824 while (vdc->read_state != VDC_READ_PENDING) { 1825 1826 /* detect if the connection has been reset */ 1827 if (vdc->read_state == VDC_READ_RESET) { 1828 status = ECONNRESET; 1829 goto done; 1830 } 1831 1832 cv_wait(&vdc->read_cv, &vdc->read_lock); 1833 } 1834 1835 /* 1836 * Until we get a blocking ldc read we have to retry 1837 * until the entire LDC message has arrived before 1838 * ldc_read() will succeed. Note we also bail out if 1839 * the channel is reset or goes away. 1840 */ 1841 delay_time = vdc_ldc_read_init_delay; 1842 loop: 1843 len = *nbytesp; 1844 status = ldc_read(vdc->ldc_handle, (caddr_t)msgp, &len); 1845 switch (status) { 1846 case EAGAIN: 1847 delay_time *= 2; 1848 if (delay_time >= vdc_ldc_read_max_delay) 1849 delay_time = vdc_ldc_read_max_delay; 1850 delay(delay_time); 1851 goto loop; 1852 1853 case 0: 1854 if (len == 0) { 1855 DMSG(vdc, 0, "[%d] ldc_read returned 0 bytes with " 1856 "no error!\n", vdc->instance); 1857 goto loop; 1858 } 1859 1860 *nbytesp = len; 1861 1862 /* 1863 * If there are pending messages, leave the 1864 * read state as pending. Otherwise, set the state 1865 * back to idle. 1866 */ 1867 status = ldc_chkq(vdc->ldc_handle, &q_has_pkts); 1868 if (status == 0 && !q_has_pkts) 1869 vdc->read_state = VDC_READ_IDLE; 1870 1871 break; 1872 default: 1873 DMSG(vdc, 0, "ldc_read returned %d\n", status); 1874 break; 1875 } 1876 1877 done: 1878 mutex_exit(&vdc->read_lock); 1879 1880 return (status); 1881 } 1882 1883 1884 1885 #ifdef DEBUG 1886 void 1887 vdc_decode_tag(vdc_t *vdcp, vio_msg_t *msg) 1888 { 1889 char *ms, *ss, *ses; 1890 switch (msg->tag.vio_msgtype) { 1891 #define Q(_s) case _s : ms = #_s; break; 1892 Q(VIO_TYPE_CTRL) 1893 Q(VIO_TYPE_DATA) 1894 Q(VIO_TYPE_ERR) 1895 #undef Q 1896 default: ms = "unknown"; break; 1897 } 1898 1899 switch (msg->tag.vio_subtype) { 1900 #define Q(_s) case _s : ss = #_s; break; 1901 Q(VIO_SUBTYPE_INFO) 1902 Q(VIO_SUBTYPE_ACK) 1903 Q(VIO_SUBTYPE_NACK) 1904 #undef Q 1905 default: ss = "unknown"; break; 1906 } 1907 1908 switch (msg->tag.vio_subtype_env) { 1909 #define Q(_s) case _s : ses = #_s; break; 1910 Q(VIO_VER_INFO) 1911 Q(VIO_ATTR_INFO) 1912 Q(VIO_DRING_REG) 1913 Q(VIO_DRING_UNREG) 1914 Q(VIO_RDX) 1915 Q(VIO_PKT_DATA) 1916 Q(VIO_DESC_DATA) 1917 Q(VIO_DRING_DATA) 1918 #undef Q 1919 default: ses = "unknown"; break; 1920 } 1921 1922 DMSG(vdcp, 3, "(%x/%x/%x) message : (%s/%s/%s)\n", 1923 msg->tag.vio_msgtype, msg->tag.vio_subtype, 1924 msg->tag.vio_subtype_env, ms, ss, ses); 1925 } 1926 #endif 1927 1928 /* 1929 * Function: 1930 * vdc_send() 1931 * 1932 * Description: 1933 * The function encapsulates the call to write a message using LDC. 1934 * If LDC indicates that the call failed due to the queue being full, 1935 * we retry the ldc_write() [ up to 'vdc_retries' time ], otherwise 1936 * we return the error returned by LDC. 1937 * 1938 * Arguments: 1939 * ldc_handle - LDC handle for the channel this instance of vdc uses 1940 * pkt - address of LDC message to be sent 1941 * msglen - the size of the message being sent. When the function 1942 * returns, this contains the number of bytes written. 1943 * 1944 * Return Code: 1945 * 0 - Success. 1946 * EINVAL - pkt or msglen were NULL 1947 * ECONNRESET - The connection was not up. 1948 * EWOULDBLOCK - LDC queue is full 1949 * xxx - other error codes returned by ldc_write 1950 */ 1951 static int 1952 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 1953 { 1954 size_t size = 0; 1955 int status = 0; 1956 clock_t delay_ticks; 1957 1958 ASSERT(vdc != NULL); 1959 ASSERT(mutex_owned(&vdc->lock)); 1960 ASSERT(msglen != NULL); 1961 ASSERT(*msglen != 0); 1962 1963 #ifdef DEBUG 1964 vdc_decode_tag(vdc, (vio_msg_t *)pkt); 1965 #endif 1966 /* 1967 * Wait indefinitely to send if channel 1968 * is busy, but bail out if we succeed or 1969 * if the channel closes or is reset. 1970 */ 1971 delay_ticks = vdc_hz_min_ldc_delay; 1972 do { 1973 size = *msglen; 1974 status = ldc_write(vdc->ldc_handle, pkt, &size); 1975 if (status == EWOULDBLOCK) { 1976 delay(delay_ticks); 1977 /* geometric backoff */ 1978 delay_ticks *= 2; 1979 if (delay_ticks > vdc_hz_max_ldc_delay) 1980 delay_ticks = vdc_hz_max_ldc_delay; 1981 } 1982 } while (status == EWOULDBLOCK); 1983 1984 /* if LDC had serious issues --- reset vdc state */ 1985 if (status == EIO || status == ECONNRESET) { 1986 /* LDC had serious issues --- reset vdc state */ 1987 mutex_enter(&vdc->read_lock); 1988 if ((vdc->read_state == VDC_READ_WAITING) || 1989 (vdc->read_state == VDC_READ_RESET)) 1990 cv_signal(&vdc->read_cv); 1991 vdc->read_state = VDC_READ_RESET; 1992 mutex_exit(&vdc->read_lock); 1993 1994 /* wake up any waiters in the reset thread */ 1995 if (vdc->state == VDC_STATE_INIT_WAITING) { 1996 DMSG(vdc, 0, "[%d] write reset - " 1997 "vdc is resetting ..\n", vdc->instance); 1998 vdc->state = VDC_STATE_RESETTING; 1999 cv_signal(&vdc->initwait_cv); 2000 } 2001 2002 return (ECONNRESET); 2003 } 2004 2005 /* return the last size written */ 2006 *msglen = size; 2007 2008 return (status); 2009 } 2010 2011 /* 2012 * Function: 2013 * vdc_get_md_node 2014 * 2015 * Description: 2016 * Get the MD, the device node and the port node for the given 2017 * disk instance. The caller is responsible for cleaning up the 2018 * reference to the returned MD (mdpp) by calling md_fini_handle(). 2019 * 2020 * Arguments: 2021 * dip - dev info pointer for this instance of the device driver. 2022 * mdpp - the returned MD. 2023 * vd_nodep - the returned device node. 2024 * vd_portp - the returned port node. The returned port node is NULL 2025 * if no port node is found. 2026 * 2027 * Return Code: 2028 * 0 - Success. 2029 * ENOENT - Expected node or property did not exist. 2030 * ENXIO - Unexpected error communicating with MD framework 2031 */ 2032 static int 2033 vdc_get_md_node(dev_info_t *dip, md_t **mdpp, mde_cookie_t *vd_nodep, 2034 mde_cookie_t *vd_portp) 2035 { 2036 int status = ENOENT; 2037 char *node_name = NULL; 2038 md_t *mdp = NULL; 2039 int num_nodes; 2040 int num_vdevs; 2041 int num_vports; 2042 mde_cookie_t rootnode; 2043 mde_cookie_t *listp = NULL; 2044 boolean_t found_inst = B_FALSE; 2045 int listsz; 2046 int idx; 2047 uint64_t md_inst; 2048 int obp_inst; 2049 int instance = ddi_get_instance(dip); 2050 2051 /* 2052 * Get the OBP instance number for comparison with the MD instance 2053 * 2054 * The "cfg-handle" property of a vdc node in an MD contains the MD's 2055 * notion of "instance", or unique identifier, for that node; OBP 2056 * stores the value of the "cfg-handle" MD property as the value of 2057 * the "reg" property on the node in the device tree it builds from 2058 * the MD and passes to Solaris. Thus, we look up the devinfo node's 2059 * "reg" property value to uniquely identify this device instance. 2060 * If the "reg" property cannot be found, the device tree state is 2061 * presumably so broken that there is no point in continuing. 2062 */ 2063 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 2064 cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 2065 return (ENOENT); 2066 } 2067 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 2068 OBP_REG, -1); 2069 DMSGX(1, "[%d] OBP inst=%d\n", instance, obp_inst); 2070 2071 /* 2072 * We now walk the MD nodes to find the node for this vdisk. 2073 */ 2074 if ((mdp = md_get_handle()) == NULL) { 2075 cmn_err(CE_WARN, "unable to init machine description"); 2076 return (ENXIO); 2077 } 2078 2079 num_nodes = md_node_count(mdp); 2080 ASSERT(num_nodes > 0); 2081 2082 listsz = num_nodes * sizeof (mde_cookie_t); 2083 2084 /* allocate memory for nodes */ 2085 listp = kmem_zalloc(listsz, KM_SLEEP); 2086 2087 rootnode = md_root_node(mdp); 2088 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 2089 2090 /* 2091 * Search for all the virtual devices, we will then check to see which 2092 * ones are disk nodes. 2093 */ 2094 num_vdevs = md_scan_dag(mdp, rootnode, 2095 md_find_name(mdp, VDC_MD_VDEV_NAME), 2096 md_find_name(mdp, "fwd"), listp); 2097 2098 if (num_vdevs <= 0) { 2099 cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 2100 status = ENOENT; 2101 goto done; 2102 } 2103 2104 DMSGX(1, "[%d] num_vdevs=%d\n", instance, num_vdevs); 2105 for (idx = 0; idx < num_vdevs; idx++) { 2106 status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 2107 if ((status != 0) || (node_name == NULL)) { 2108 cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 2109 ": err %d", VDC_MD_VDEV_NAME, status); 2110 continue; 2111 } 2112 2113 DMSGX(1, "[%d] Found node '%s'\n", instance, node_name); 2114 if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 2115 status = md_get_prop_val(mdp, listp[idx], 2116 VDC_MD_CFG_HDL, &md_inst); 2117 DMSGX(1, "[%d] vdc inst in MD=%lx\n", 2118 instance, md_inst); 2119 if ((status == 0) && (md_inst == obp_inst)) { 2120 found_inst = B_TRUE; 2121 break; 2122 } 2123 } 2124 } 2125 2126 if (!found_inst) { 2127 DMSGX(0, "Unable to find correct '%s' node", VDC_MD_DISK_NAME); 2128 status = ENOENT; 2129 goto done; 2130 } 2131 DMSGX(0, "[%d] MD inst=%lx\n", instance, md_inst); 2132 2133 *vd_nodep = listp[idx]; 2134 *mdpp = mdp; 2135 2136 num_vports = md_scan_dag(mdp, *vd_nodep, 2137 md_find_name(mdp, VDC_MD_PORT_NAME), 2138 md_find_name(mdp, "fwd"), listp); 2139 2140 if (num_vports != 1) { 2141 DMSGX(0, "Expected 1 '%s' node for '%s' port, found %d\n", 2142 VDC_MD_PORT_NAME, VDC_MD_VDEV_NAME, num_vports); 2143 } 2144 2145 *vd_portp = (num_vports == 0)? NULL: listp[0]; 2146 2147 done: 2148 kmem_free(listp, listsz); 2149 return (status); 2150 } 2151 2152 /* 2153 * Function: 2154 * vdc_get_ldc_id() 2155 * 2156 * Description: 2157 * This function gets the 'ldc-id' for this particular instance of vdc. 2158 * The id returned is the guest domain channel endpoint LDC uses for 2159 * communication with vds. 2160 * 2161 * Arguments: 2162 * mdp - pointer to the machine description. 2163 * vd_node - the vdisk element from the MD. 2164 * ldc_id - pointer to variable used to return the 'ldc-id' found. 2165 * 2166 * Return Code: 2167 * 0 - Success. 2168 * ENOENT - Expected node or property did not exist. 2169 */ 2170 static int 2171 vdc_get_ldc_id(md_t *mdp, mde_cookie_t vd_node, uint64_t *ldc_id) 2172 { 2173 mde_cookie_t *chanp = NULL; 2174 int listsz; 2175 int num_chans; 2176 int num_nodes; 2177 int status = 0; 2178 2179 num_nodes = md_node_count(mdp); 2180 ASSERT(num_nodes > 0); 2181 2182 listsz = num_nodes * sizeof (mde_cookie_t); 2183 2184 /* allocate memory for nodes */ 2185 chanp = kmem_zalloc(listsz, KM_SLEEP); 2186 2187 /* get the channels for this node */ 2188 num_chans = md_scan_dag(mdp, vd_node, 2189 md_find_name(mdp, VDC_MD_CHAN_NAME), 2190 md_find_name(mdp, "fwd"), chanp); 2191 2192 /* expecting at least one channel */ 2193 if (num_chans <= 0) { 2194 cmn_err(CE_NOTE, "No '%s' node for '%s' port", 2195 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 2196 status = ENOENT; 2197 goto done; 2198 2199 } else if (num_chans != 1) { 2200 DMSGX(0, "Expected 1 '%s' node for '%s' port, found %d\n", 2201 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, num_chans); 2202 } 2203 2204 /* 2205 * We use the first channel found (index 0), irrespective of how 2206 * many are there in total. 2207 */ 2208 if (md_get_prop_val(mdp, chanp[0], VDC_MD_ID, ldc_id) != 0) { 2209 cmn_err(CE_NOTE, "Channel '%s' property not found", VDC_MD_ID); 2210 status = ENOENT; 2211 } 2212 2213 done: 2214 kmem_free(chanp, listsz); 2215 return (status); 2216 } 2217 2218 static int 2219 vdc_do_ldc_up(vdc_t *vdc) 2220 { 2221 int status; 2222 ldc_status_t ldc_state; 2223 2224 DMSG(vdc, 0, "[%d] Bringing up channel %lx\n", 2225 vdc->instance, vdc->ldc_id); 2226 2227 if (vdc->lifecycle == VDC_LC_DETACHING) 2228 return (EINVAL); 2229 2230 if ((status = ldc_up(vdc->ldc_handle)) != 0) { 2231 switch (status) { 2232 case ECONNREFUSED: /* listener not ready at other end */ 2233 DMSG(vdc, 0, "[%d] ldc_up(%lx,...) return %d\n", 2234 vdc->instance, vdc->ldc_id, status); 2235 status = 0; 2236 break; 2237 default: 2238 DMSG(vdc, 0, "[%d] Failed to bring up LDC: " 2239 "channel=%ld, err=%d", vdc->instance, vdc->ldc_id, 2240 status); 2241 break; 2242 } 2243 } 2244 2245 if (ldc_status(vdc->ldc_handle, &ldc_state) == 0) { 2246 vdc->ldc_state = ldc_state; 2247 if (ldc_state == LDC_UP) { 2248 DMSG(vdc, 0, "[%d] LDC channel already up\n", 2249 vdc->instance); 2250 vdc->seq_num = 1; 2251 vdc->seq_num_reply = 0; 2252 } 2253 } 2254 2255 return (status); 2256 } 2257 2258 /* 2259 * Function: 2260 * vdc_terminate_ldc() 2261 * 2262 * Description: 2263 * 2264 * Arguments: 2265 * vdc - soft state pointer for this instance of the device driver. 2266 * 2267 * Return Code: 2268 * None 2269 */ 2270 static void 2271 vdc_terminate_ldc(vdc_t *vdc) 2272 { 2273 int instance = ddi_get_instance(vdc->dip); 2274 2275 ASSERT(vdc != NULL); 2276 ASSERT(mutex_owned(&vdc->lock)); 2277 2278 DMSG(vdc, 0, "[%d] initialized=%x\n", instance, vdc->initialized); 2279 2280 if (vdc->initialized & VDC_LDC_OPEN) { 2281 DMSG(vdc, 0, "[%d] ldc_close()\n", instance); 2282 (void) ldc_close(vdc->ldc_handle); 2283 } 2284 if (vdc->initialized & VDC_LDC_CB) { 2285 DMSG(vdc, 0, "[%d] ldc_unreg_callback()\n", instance); 2286 (void) ldc_unreg_callback(vdc->ldc_handle); 2287 } 2288 if (vdc->initialized & VDC_LDC) { 2289 DMSG(vdc, 0, "[%d] ldc_fini()\n", instance); 2290 (void) ldc_fini(vdc->ldc_handle); 2291 vdc->ldc_handle = NULL; 2292 } 2293 2294 vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN); 2295 } 2296 2297 /* -------------------------------------------------------------------------- */ 2298 2299 /* 2300 * Descriptor Ring helper routines 2301 */ 2302 2303 /* 2304 * Function: 2305 * vdc_init_descriptor_ring() 2306 * 2307 * Description: 2308 * 2309 * Arguments: 2310 * vdc - soft state pointer for this instance of the device driver. 2311 * 2312 * Return Code: 2313 * 0 - Success 2314 */ 2315 static int 2316 vdc_init_descriptor_ring(vdc_t *vdc) 2317 { 2318 vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 2319 int status = 0; 2320 int i; 2321 2322 DMSG(vdc, 0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized); 2323 2324 ASSERT(vdc != NULL); 2325 ASSERT(mutex_owned(&vdc->lock)); 2326 ASSERT(vdc->ldc_handle != NULL); 2327 2328 /* ensure we have enough room to store max sized block */ 2329 ASSERT(maxphys <= VD_MAX_BLOCK_SIZE); 2330 2331 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 2332 DMSG(vdc, 0, "[%d] ldc_mem_dring_create\n", vdc->instance); 2333 /* 2334 * Calculate the maximum block size we can transmit using one 2335 * Descriptor Ring entry from the attributes returned by the 2336 * vDisk server. This is subject to a minimum of 'maxphys' 2337 * as we do not have the capability to split requests over 2338 * multiple DRing entries. 2339 */ 2340 if ((vdc->max_xfer_sz * vdc->block_size) < maxphys) { 2341 DMSG(vdc, 0, "[%d] using minimum DRing size\n", 2342 vdc->instance); 2343 vdc->dring_max_cookies = maxphys / PAGESIZE; 2344 } else { 2345 vdc->dring_max_cookies = 2346 (vdc->max_xfer_sz * vdc->block_size) / PAGESIZE; 2347 } 2348 vdc->dring_entry_size = (sizeof (vd_dring_entry_t) + 2349 (sizeof (ldc_mem_cookie_t) * 2350 (vdc->dring_max_cookies - 1))); 2351 vdc->dring_len = VD_DRING_LEN; 2352 2353 status = ldc_mem_dring_create(vdc->dring_len, 2354 vdc->dring_entry_size, &vdc->ldc_dring_hdl); 2355 if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) { 2356 DMSG(vdc, 0, "[%d] Descriptor ring creation failed", 2357 vdc->instance); 2358 return (status); 2359 } 2360 vdc->initialized |= VDC_DRING_INIT; 2361 } 2362 2363 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 2364 DMSG(vdc, 0, "[%d] ldc_mem_dring_bind\n", vdc->instance); 2365 vdc->dring_cookie = 2366 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 2367 2368 status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl, 2369 LDC_SHADOW_MAP|LDC_DIRECT_MAP, LDC_MEM_RW, 2370 &vdc->dring_cookie[0], 2371 &vdc->dring_cookie_count); 2372 if (status != 0) { 2373 DMSG(vdc, 0, "[%d] Failed to bind descriptor ring " 2374 "(%lx) to channel (%lx) status=%d\n", 2375 vdc->instance, vdc->ldc_dring_hdl, 2376 vdc->ldc_handle, status); 2377 return (status); 2378 } 2379 ASSERT(vdc->dring_cookie_count == 1); 2380 vdc->initialized |= VDC_DRING_BOUND; 2381 } 2382 2383 status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info); 2384 if (status != 0) { 2385 DMSG(vdc, 0, 2386 "[%d] Failed to get info for descriptor ring (%lx)\n", 2387 vdc->instance, vdc->ldc_dring_hdl); 2388 return (status); 2389 } 2390 2391 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 2392 DMSG(vdc, 0, "[%d] local dring\n", vdc->instance); 2393 2394 /* Allocate the local copy of this dring */ 2395 vdc->local_dring = 2396 kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t), 2397 KM_SLEEP); 2398 vdc->initialized |= VDC_DRING_LOCAL; 2399 } 2400 2401 /* 2402 * Mark all DRing entries as free and initialize the private 2403 * descriptor's memory handles. If any entry is initialized, 2404 * we need to free it later so we set the bit in 'initialized' 2405 * at the start. 2406 */ 2407 vdc->initialized |= VDC_DRING_ENTRY; 2408 for (i = 0; i < vdc->dring_len; i++) { 2409 dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 2410 dep->hdr.dstate = VIO_DESC_FREE; 2411 2412 status = ldc_mem_alloc_handle(vdc->ldc_handle, 2413 &vdc->local_dring[i].desc_mhdl); 2414 if (status != 0) { 2415 DMSG(vdc, 0, "![%d] Failed to alloc mem handle for" 2416 " descriptor %d", vdc->instance, i); 2417 return (status); 2418 } 2419 vdc->local_dring[i].is_free = B_TRUE; 2420 vdc->local_dring[i].dep = dep; 2421 } 2422 2423 /* Initialize the starting index */ 2424 vdc->dring_curr_idx = 0; 2425 2426 return (status); 2427 } 2428 2429 /* 2430 * Function: 2431 * vdc_destroy_descriptor_ring() 2432 * 2433 * Description: 2434 * 2435 * Arguments: 2436 * vdc - soft state pointer for this instance of the device driver. 2437 * 2438 * Return Code: 2439 * None 2440 */ 2441 static void 2442 vdc_destroy_descriptor_ring(vdc_t *vdc) 2443 { 2444 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2445 ldc_mem_handle_t mhdl = NULL; 2446 ldc_mem_info_t minfo; 2447 int status = -1; 2448 int i; /* loop */ 2449 2450 ASSERT(vdc != NULL); 2451 ASSERT(mutex_owned(&vdc->lock)); 2452 2453 DMSG(vdc, 0, "[%d] Entered\n", vdc->instance); 2454 2455 if (vdc->initialized & VDC_DRING_ENTRY) { 2456 DMSG(vdc, 0, 2457 "[%d] Removing Local DRing entries\n", vdc->instance); 2458 for (i = 0; i < vdc->dring_len; i++) { 2459 ldep = &vdc->local_dring[i]; 2460 mhdl = ldep->desc_mhdl; 2461 2462 if (mhdl == NULL) 2463 continue; 2464 2465 if ((status = ldc_mem_info(mhdl, &minfo)) != 0) { 2466 DMSG(vdc, 0, 2467 "ldc_mem_info returned an error: %d\n", 2468 status); 2469 2470 /* 2471 * This must mean that the mem handle 2472 * is not valid. Clear it out so that 2473 * no one tries to use it. 2474 */ 2475 ldep->desc_mhdl = NULL; 2476 continue; 2477 } 2478 2479 if (minfo.status == LDC_BOUND) { 2480 (void) ldc_mem_unbind_handle(mhdl); 2481 } 2482 2483 (void) ldc_mem_free_handle(mhdl); 2484 2485 ldep->desc_mhdl = NULL; 2486 } 2487 vdc->initialized &= ~VDC_DRING_ENTRY; 2488 } 2489 2490 if (vdc->initialized & VDC_DRING_LOCAL) { 2491 DMSG(vdc, 0, "[%d] Freeing Local DRing\n", vdc->instance); 2492 kmem_free(vdc->local_dring, 2493 vdc->dring_len * sizeof (vdc_local_desc_t)); 2494 vdc->initialized &= ~VDC_DRING_LOCAL; 2495 } 2496 2497 if (vdc->initialized & VDC_DRING_BOUND) { 2498 DMSG(vdc, 0, "[%d] Unbinding DRing\n", vdc->instance); 2499 status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl); 2500 if (status == 0) { 2501 vdc->initialized &= ~VDC_DRING_BOUND; 2502 } else { 2503 DMSG(vdc, 0, "[%d] Error %d unbinding DRing %lx", 2504 vdc->instance, status, vdc->ldc_dring_hdl); 2505 } 2506 kmem_free(vdc->dring_cookie, sizeof (ldc_mem_cookie_t)); 2507 } 2508 2509 if (vdc->initialized & VDC_DRING_INIT) { 2510 DMSG(vdc, 0, "[%d] Destroying DRing\n", vdc->instance); 2511 status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl); 2512 if (status == 0) { 2513 vdc->ldc_dring_hdl = NULL; 2514 bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 2515 vdc->initialized &= ~VDC_DRING_INIT; 2516 } else { 2517 DMSG(vdc, 0, "[%d] Error %d destroying DRing (%lx)", 2518 vdc->instance, status, vdc->ldc_dring_hdl); 2519 } 2520 } 2521 } 2522 2523 /* 2524 * Function: 2525 * vdc_map_to_shared_ring() 2526 * 2527 * Description: 2528 * Copy contents of the local descriptor to the shared 2529 * memory descriptor. 2530 * 2531 * Arguments: 2532 * vdcp - soft state pointer for this instance of the device driver. 2533 * idx - descriptor ring index 2534 * 2535 * Return Code: 2536 * None 2537 */ 2538 static int 2539 vdc_map_to_shared_dring(vdc_t *vdcp, int idx) 2540 { 2541 vdc_local_desc_t *ldep; 2542 vd_dring_entry_t *dep; 2543 int rv; 2544 2545 ldep = &(vdcp->local_dring[idx]); 2546 2547 /* for now leave in the old pop_mem_hdl stuff */ 2548 if (ldep->nbytes > 0) { 2549 rv = vdc_populate_mem_hdl(vdcp, ldep); 2550 if (rv) { 2551 DMSG(vdcp, 0, "[%d] Cannot populate mem handle\n", 2552 vdcp->instance); 2553 return (rv); 2554 } 2555 } 2556 2557 /* 2558 * fill in the data details into the DRing 2559 */ 2560 dep = ldep->dep; 2561 ASSERT(dep != NULL); 2562 2563 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdcp); 2564 dep->payload.operation = ldep->operation; 2565 dep->payload.addr = ldep->offset; 2566 dep->payload.nbytes = ldep->nbytes; 2567 dep->payload.status = (uint32_t)-1; /* vds will set valid value */ 2568 dep->payload.slice = ldep->slice; 2569 dep->hdr.dstate = VIO_DESC_READY; 2570 dep->hdr.ack = 1; /* request an ACK for every message */ 2571 2572 return (0); 2573 } 2574 2575 /* 2576 * Function: 2577 * vdc_send_request 2578 * 2579 * Description: 2580 * This routine writes the data to be transmitted to vds into the 2581 * descriptor, notifies vds that the ring has been updated and 2582 * then waits for the request to be processed. 2583 * 2584 * Arguments: 2585 * vdcp - the soft state pointer 2586 * operation - operation we want vds to perform (VD_OP_XXX) 2587 * addr - address of data buf to be read/written. 2588 * nbytes - number of bytes to read/write 2589 * slice - the disk slice this request is for 2590 * offset - relative disk offset 2591 * cb_type - type of call - STRATEGY or SYNC 2592 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2593 * . mode for ioctl(9e) 2594 * . LP64 diskaddr_t (block I/O) 2595 * dir - direction of operation (READ/WRITE/BOTH) 2596 * 2597 * Return Codes: 2598 * 0 2599 * ENXIO 2600 */ 2601 static int 2602 vdc_send_request(vdc_t *vdcp, int operation, caddr_t addr, 2603 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 2604 void *cb_arg, vio_desc_direction_t dir) 2605 { 2606 ASSERT(vdcp != NULL); 2607 ASSERT(slice == VD_SLICE_NONE || slice < V_NUMPAR); 2608 2609 mutex_enter(&vdcp->lock); 2610 2611 do { 2612 while (vdcp->state != VDC_STATE_RUNNING) { 2613 2614 /* return error if detaching */ 2615 if (vdcp->state == VDC_STATE_DETACH) { 2616 mutex_exit(&vdcp->lock); 2617 return (ENXIO); 2618 } 2619 2620 /* fail request if connection timeout is reached */ 2621 if (vdcp->ctimeout_reached) { 2622 mutex_exit(&vdcp->lock); 2623 return (EIO); 2624 } 2625 2626 cv_wait(&vdcp->running_cv, &vdcp->lock); 2627 } 2628 2629 } while (vdc_populate_descriptor(vdcp, operation, addr, 2630 nbytes, slice, offset, cb_type, cb_arg, dir)); 2631 2632 mutex_exit(&vdcp->lock); 2633 return (0); 2634 } 2635 2636 2637 /* 2638 * Function: 2639 * vdc_populate_descriptor 2640 * 2641 * Description: 2642 * This routine writes the data to be transmitted to vds into the 2643 * descriptor, notifies vds that the ring has been updated and 2644 * then waits for the request to be processed. 2645 * 2646 * Arguments: 2647 * vdcp - the soft state pointer 2648 * operation - operation we want vds to perform (VD_OP_XXX) 2649 * addr - address of data buf to be read/written. 2650 * nbytes - number of bytes to read/write 2651 * slice - the disk slice this request is for 2652 * offset - relative disk offset 2653 * cb_type - type of call - STRATEGY or SYNC 2654 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2655 * . mode for ioctl(9e) 2656 * . LP64 diskaddr_t (block I/O) 2657 * dir - direction of operation (READ/WRITE/BOTH) 2658 * 2659 * Return Codes: 2660 * 0 2661 * EAGAIN 2662 * EFAULT 2663 * ENXIO 2664 * EIO 2665 */ 2666 static int 2667 vdc_populate_descriptor(vdc_t *vdcp, int operation, caddr_t addr, 2668 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 2669 void *cb_arg, vio_desc_direction_t dir) 2670 { 2671 vdc_local_desc_t *local_dep = NULL; /* Local Dring Pointer */ 2672 int idx; /* Index of DRing entry used */ 2673 int next_idx; 2674 vio_dring_msg_t dmsg; 2675 size_t msglen; 2676 int rv; 2677 2678 ASSERT(MUTEX_HELD(&vdcp->lock)); 2679 vdcp->threads_pending++; 2680 loop: 2681 DMSG(vdcp, 2, ": dring_curr_idx = %d\n", vdcp->dring_curr_idx); 2682 2683 /* Get next available D-Ring entry */ 2684 idx = vdcp->dring_curr_idx; 2685 local_dep = &(vdcp->local_dring[idx]); 2686 2687 if (!local_dep->is_free) { 2688 DMSG(vdcp, 2, "[%d]: dring full - waiting for space\n", 2689 vdcp->instance); 2690 cv_wait(&vdcp->dring_free_cv, &vdcp->lock); 2691 if (vdcp->state == VDC_STATE_RUNNING || 2692 vdcp->state == VDC_STATE_HANDLE_PENDING) { 2693 goto loop; 2694 } 2695 vdcp->threads_pending--; 2696 return (ECONNRESET); 2697 } 2698 2699 next_idx = idx + 1; 2700 if (next_idx >= vdcp->dring_len) 2701 next_idx = 0; 2702 vdcp->dring_curr_idx = next_idx; 2703 2704 ASSERT(local_dep->is_free); 2705 2706 local_dep->operation = operation; 2707 local_dep->addr = addr; 2708 local_dep->nbytes = nbytes; 2709 local_dep->slice = slice; 2710 local_dep->offset = offset; 2711 local_dep->cb_type = cb_type; 2712 local_dep->cb_arg = cb_arg; 2713 local_dep->dir = dir; 2714 2715 local_dep->is_free = B_FALSE; 2716 2717 rv = vdc_map_to_shared_dring(vdcp, idx); 2718 if (rv) { 2719 DMSG(vdcp, 0, "[%d]: cannot bind memory - waiting ..\n", 2720 vdcp->instance); 2721 /* free the descriptor */ 2722 local_dep->is_free = B_TRUE; 2723 vdcp->dring_curr_idx = idx; 2724 cv_wait(&vdcp->membind_cv, &vdcp->lock); 2725 if (vdcp->state == VDC_STATE_RUNNING || 2726 vdcp->state == VDC_STATE_HANDLE_PENDING) { 2727 goto loop; 2728 } 2729 vdcp->threads_pending--; 2730 return (ECONNRESET); 2731 } 2732 2733 /* 2734 * Send a msg with the DRing details to vds 2735 */ 2736 VIO_INIT_DRING_DATA_TAG(dmsg); 2737 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdcp); 2738 dmsg.dring_ident = vdcp->dring_ident; 2739 dmsg.start_idx = idx; 2740 dmsg.end_idx = idx; 2741 vdcp->seq_num++; 2742 2743 DTRACE_IO2(send, vio_dring_msg_t *, &dmsg, vdc_t *, vdcp); 2744 2745 DMSG(vdcp, 2, "ident=0x%lx, st=%u, end=%u, seq=%ld\n", 2746 vdcp->dring_ident, dmsg.start_idx, dmsg.end_idx, dmsg.seq_num); 2747 2748 /* 2749 * note we're still holding the lock here to 2750 * make sure the message goes out in order !!!... 2751 */ 2752 msglen = sizeof (dmsg); 2753 rv = vdc_send(vdcp, (caddr_t)&dmsg, &msglen); 2754 switch (rv) { 2755 case ECONNRESET: 2756 /* 2757 * vdc_send initiates the reset on failure. 2758 * Since the transaction has already been put 2759 * on the local dring, it will automatically get 2760 * retried when the channel is reset. Given that, 2761 * it is ok to just return success even though the 2762 * send failed. 2763 */ 2764 rv = 0; 2765 break; 2766 2767 case 0: /* EOK */ 2768 DMSG(vdcp, 1, "sent via LDC: rv=%d\n", rv); 2769 break; 2770 2771 default: 2772 goto cleanup_and_exit; 2773 } 2774 2775 vdcp->threads_pending--; 2776 return (rv); 2777 2778 cleanup_and_exit: 2779 DMSG(vdcp, 0, "unexpected error, rv=%d\n", rv); 2780 return (ENXIO); 2781 } 2782 2783 /* 2784 * Function: 2785 * vdc_do_sync_op 2786 * 2787 * Description: 2788 * Wrapper around vdc_populate_descriptor that blocks until the 2789 * response to the message is available. 2790 * 2791 * Arguments: 2792 * vdcp - the soft state pointer 2793 * operation - operation we want vds to perform (VD_OP_XXX) 2794 * addr - address of data buf to be read/written. 2795 * nbytes - number of bytes to read/write 2796 * slice - the disk slice this request is for 2797 * offset - relative disk offset 2798 * cb_type - type of call - STRATEGY or SYNC 2799 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2800 * . mode for ioctl(9e) 2801 * . LP64 diskaddr_t (block I/O) 2802 * dir - direction of operation (READ/WRITE/BOTH) 2803 * 2804 * Return Codes: 2805 * 0 2806 * EAGAIN 2807 * EFAULT 2808 * ENXIO 2809 * EIO 2810 */ 2811 static int 2812 vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, size_t nbytes, 2813 int slice, diskaddr_t offset, int cb_type, void *cb_arg, 2814 vio_desc_direction_t dir) 2815 { 2816 int status; 2817 2818 ASSERT(cb_type == CB_SYNC); 2819 2820 /* 2821 * Grab the lock, if blocked wait until the server 2822 * response causes us to wake up again. 2823 */ 2824 mutex_enter(&vdcp->lock); 2825 vdcp->sync_op_cnt++; 2826 while (vdcp->sync_op_blocked && vdcp->state != VDC_STATE_DETACH) 2827 cv_wait(&vdcp->sync_blocked_cv, &vdcp->lock); 2828 2829 if (vdcp->state == VDC_STATE_DETACH) { 2830 cv_broadcast(&vdcp->sync_blocked_cv); 2831 vdcp->sync_op_cnt--; 2832 mutex_exit(&vdcp->lock); 2833 return (ENXIO); 2834 } 2835 2836 /* now block anyone other thread entering after us */ 2837 vdcp->sync_op_blocked = B_TRUE; 2838 vdcp->sync_op_pending = B_TRUE; 2839 mutex_exit(&vdcp->lock); 2840 2841 status = vdc_send_request(vdcp, operation, addr, 2842 nbytes, slice, offset, cb_type, cb_arg, dir); 2843 2844 mutex_enter(&vdcp->lock); 2845 2846 if (status != 0) { 2847 vdcp->sync_op_pending = B_FALSE; 2848 } else { 2849 /* 2850 * block until our transaction completes. 2851 * Also anyone else waiting also gets to go next. 2852 */ 2853 while (vdcp->sync_op_pending && vdcp->state != VDC_STATE_DETACH) 2854 cv_wait(&vdcp->sync_pending_cv, &vdcp->lock); 2855 2856 DMSG(vdcp, 2, ": operation returned %d\n", 2857 vdcp->sync_op_status); 2858 if (vdcp->state == VDC_STATE_DETACH) { 2859 vdcp->sync_op_pending = B_FALSE; 2860 status = ENXIO; 2861 } else { 2862 status = vdcp->sync_op_status; 2863 } 2864 } 2865 2866 vdcp->sync_op_status = 0; 2867 vdcp->sync_op_blocked = B_FALSE; 2868 vdcp->sync_op_cnt--; 2869 2870 /* signal the next waiting thread */ 2871 cv_signal(&vdcp->sync_blocked_cv); 2872 mutex_exit(&vdcp->lock); 2873 2874 return (status); 2875 } 2876 2877 2878 /* 2879 * Function: 2880 * vdc_drain_response() 2881 * 2882 * Description: 2883 * When a guest is panicking, the completion of requests needs to be 2884 * handled differently because interrupts are disabled and vdc 2885 * will not get messages. We have to poll for the messages instead. 2886 * 2887 * Arguments: 2888 * vdc - soft state pointer for this instance of the device driver. 2889 * 2890 * Return Code: 2891 * 0 - Success 2892 */ 2893 static int 2894 vdc_drain_response(vdc_t *vdc) 2895 { 2896 int rv, idx, retries; 2897 size_t msglen; 2898 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2899 vio_dring_msg_t dmsg; 2900 2901 mutex_enter(&vdc->lock); 2902 2903 retries = 0; 2904 for (;;) { 2905 msglen = sizeof (dmsg); 2906 rv = ldc_read(vdc->ldc_handle, (caddr_t)&dmsg, &msglen); 2907 if (rv) { 2908 rv = EINVAL; 2909 break; 2910 } 2911 2912 /* 2913 * if there are no packets wait and check again 2914 */ 2915 if ((rv == 0) && (msglen == 0)) { 2916 if (retries++ > vdc_dump_retries) { 2917 rv = EAGAIN; 2918 break; 2919 } 2920 2921 drv_usecwait(vdc_usec_timeout_dump); 2922 continue; 2923 } 2924 2925 /* 2926 * Ignore all messages that are not ACKs/NACKs to 2927 * DRing requests. 2928 */ 2929 if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 2930 (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 2931 DMSG(vdc, 0, "discard pkt: type=%d sub=%d env=%d\n", 2932 dmsg.tag.vio_msgtype, 2933 dmsg.tag.vio_subtype, 2934 dmsg.tag.vio_subtype_env); 2935 continue; 2936 } 2937 2938 /* 2939 * set the appropriate return value for the current request. 2940 */ 2941 switch (dmsg.tag.vio_subtype) { 2942 case VIO_SUBTYPE_ACK: 2943 rv = 0; 2944 break; 2945 case VIO_SUBTYPE_NACK: 2946 rv = EAGAIN; 2947 break; 2948 default: 2949 continue; 2950 } 2951 2952 idx = dmsg.start_idx; 2953 if (idx >= vdc->dring_len) { 2954 DMSG(vdc, 0, "[%d] Bogus ack data : start %d\n", 2955 vdc->instance, idx); 2956 continue; 2957 } 2958 ldep = &vdc->local_dring[idx]; 2959 if (ldep->dep->hdr.dstate != VIO_DESC_DONE) { 2960 DMSG(vdc, 0, "[%d] Entry @ %d - state !DONE %d\n", 2961 vdc->instance, idx, ldep->dep->hdr.dstate); 2962 continue; 2963 } 2964 2965 DMSG(vdc, 1, "[%d] Depopulating idx=%d state=%d\n", 2966 vdc->instance, idx, ldep->dep->hdr.dstate); 2967 rv = vdc_depopulate_descriptor(vdc, idx); 2968 if (rv) { 2969 DMSG(vdc, 0, 2970 "[%d] Entry @ %d - depopulate failed ..\n", 2971 vdc->instance, idx); 2972 } 2973 2974 /* if this is the last descriptor - break out of loop */ 2975 if ((idx + 1) % vdc->dring_len == vdc->dring_curr_idx) 2976 break; 2977 } 2978 2979 mutex_exit(&vdc->lock); 2980 DMSG(vdc, 0, "End idx=%d\n", idx); 2981 2982 return (rv); 2983 } 2984 2985 2986 /* 2987 * Function: 2988 * vdc_depopulate_descriptor() 2989 * 2990 * Description: 2991 * 2992 * Arguments: 2993 * vdc - soft state pointer for this instance of the device driver. 2994 * idx - Index of the Descriptor Ring entry being modified 2995 * 2996 * Return Code: 2997 * 0 - Success 2998 */ 2999 static int 3000 vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 3001 { 3002 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 3003 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 3004 int status = ENXIO; 3005 int rv = 0; 3006 3007 ASSERT(vdc != NULL); 3008 ASSERT(idx < vdc->dring_len); 3009 ldep = &vdc->local_dring[idx]; 3010 ASSERT(ldep != NULL); 3011 ASSERT(MUTEX_HELD(&vdc->lock)); 3012 3013 DMSG(vdc, 2, ": idx = %d\n", idx); 3014 dep = ldep->dep; 3015 ASSERT(dep != NULL); 3016 ASSERT((dep->hdr.dstate == VIO_DESC_DONE) || 3017 (dep->payload.status == ECANCELED)); 3018 3019 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 3020 3021 ldep->is_free = B_TRUE; 3022 status = dep->payload.status; 3023 DMSG(vdc, 2, ": is_free = %d : status = %d\n", ldep->is_free, status); 3024 3025 /* 3026 * If no buffers were used to transfer information to the server when 3027 * populating the descriptor then no memory handles need to be unbound 3028 * and we can return now. 3029 */ 3030 if (ldep->nbytes == 0) { 3031 cv_signal(&vdc->dring_free_cv); 3032 return (status); 3033 } 3034 3035 /* 3036 * If the upper layer passed in a misaligned address we copied the 3037 * data into an aligned buffer before sending it to LDC - we now 3038 * copy it back to the original buffer. 3039 */ 3040 if (ldep->align_addr) { 3041 ASSERT(ldep->addr != NULL); 3042 3043 if (dep->payload.nbytes > 0) 3044 bcopy(ldep->align_addr, ldep->addr, 3045 dep->payload.nbytes); 3046 kmem_free(ldep->align_addr, 3047 sizeof (caddr_t) * P2ROUNDUP(ldep->nbytes, 8)); 3048 ldep->align_addr = NULL; 3049 } 3050 3051 rv = ldc_mem_unbind_handle(ldep->desc_mhdl); 3052 if (rv != 0) { 3053 DMSG(vdc, 0, "?[%d] unbind mhdl 0x%lx @ idx %d failed (%d)", 3054 vdc->instance, ldep->desc_mhdl, idx, rv); 3055 /* 3056 * The error returned by the vDisk server is more informative 3057 * and thus has a higher priority but if it isn't set we ensure 3058 * that this function returns an error. 3059 */ 3060 if (status == 0) 3061 status = EINVAL; 3062 } 3063 3064 cv_signal(&vdc->membind_cv); 3065 cv_signal(&vdc->dring_free_cv); 3066 3067 return (status); 3068 } 3069 3070 /* 3071 * Function: 3072 * vdc_populate_mem_hdl() 3073 * 3074 * Description: 3075 * 3076 * Arguments: 3077 * vdc - soft state pointer for this instance of the device driver. 3078 * idx - Index of the Descriptor Ring entry being modified 3079 * addr - virtual address being mapped in 3080 * nybtes - number of bytes in 'addr' 3081 * operation - the vDisk operation being performed (VD_OP_xxx) 3082 * 3083 * Return Code: 3084 * 0 - Success 3085 */ 3086 static int 3087 vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep) 3088 { 3089 vd_dring_entry_t *dep = NULL; 3090 ldc_mem_handle_t mhdl; 3091 caddr_t vaddr; 3092 size_t nbytes; 3093 uint8_t perm = LDC_MEM_RW; 3094 uint8_t maptype; 3095 int rv = 0; 3096 int i; 3097 3098 ASSERT(vdcp != NULL); 3099 3100 dep = ldep->dep; 3101 mhdl = ldep->desc_mhdl; 3102 3103 switch (ldep->dir) { 3104 case VIO_read_dir: 3105 perm = LDC_MEM_W; 3106 break; 3107 3108 case VIO_write_dir: 3109 perm = LDC_MEM_R; 3110 break; 3111 3112 case VIO_both_dir: 3113 perm = LDC_MEM_RW; 3114 break; 3115 3116 default: 3117 ASSERT(0); /* catch bad programming in vdc */ 3118 } 3119 3120 /* 3121 * LDC expects any addresses passed in to be 8-byte aligned. We need 3122 * to copy the contents of any misaligned buffers to a newly allocated 3123 * buffer and bind it instead (and copy the the contents back to the 3124 * original buffer passed in when depopulating the descriptor) 3125 */ 3126 vaddr = ldep->addr; 3127 nbytes = ldep->nbytes; 3128 if (((uint64_t)vaddr & 0x7) != 0) { 3129 ASSERT(ldep->align_addr == NULL); 3130 ldep->align_addr = 3131 kmem_alloc(sizeof (caddr_t) * 3132 P2ROUNDUP(nbytes, 8), KM_SLEEP); 3133 DMSG(vdcp, 0, "[%d] Misaligned address %p reallocating " 3134 "(buf=%p nb=%ld op=%d)\n", 3135 vdcp->instance, (void *)vaddr, (void *)ldep->align_addr, 3136 nbytes, ldep->operation); 3137 if (perm != LDC_MEM_W) 3138 bcopy(vaddr, ldep->align_addr, nbytes); 3139 vaddr = ldep->align_addr; 3140 } 3141 3142 maptype = LDC_IO_MAP|LDC_SHADOW_MAP|LDC_DIRECT_MAP; 3143 rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 3144 maptype, perm, &dep->payload.cookie[0], &dep->payload.ncookies); 3145 DMSG(vdcp, 2, "[%d] bound mem handle; ncookies=%d\n", 3146 vdcp->instance, dep->payload.ncookies); 3147 if (rv != 0) { 3148 DMSG(vdcp, 0, "[%d] Failed to bind LDC memory handle " 3149 "(mhdl=%p, buf=%p, err=%d)\n", 3150 vdcp->instance, (void *)mhdl, (void *)vaddr, rv); 3151 if (ldep->align_addr) { 3152 kmem_free(ldep->align_addr, 3153 sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); 3154 ldep->align_addr = NULL; 3155 } 3156 return (EAGAIN); 3157 } 3158 3159 /* 3160 * Get the other cookies (if any). 3161 */ 3162 for (i = 1; i < dep->payload.ncookies; i++) { 3163 rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 3164 if (rv != 0) { 3165 (void) ldc_mem_unbind_handle(mhdl); 3166 DMSG(vdcp, 0, "?[%d] Failed to get next cookie " 3167 "(mhdl=%lx cnum=%d), err=%d", 3168 vdcp->instance, mhdl, i, rv); 3169 if (ldep->align_addr) { 3170 kmem_free(ldep->align_addr, 3171 sizeof (caddr_t) * ldep->nbytes); 3172 ldep->align_addr = NULL; 3173 } 3174 return (EAGAIN); 3175 } 3176 } 3177 3178 return (rv); 3179 } 3180 3181 /* 3182 * Interrupt handlers for messages from LDC 3183 */ 3184 3185 /* 3186 * Function: 3187 * vdc_handle_cb() 3188 * 3189 * Description: 3190 * 3191 * Arguments: 3192 * event - Type of event (LDC_EVT_xxx) that triggered the callback 3193 * arg - soft state pointer for this instance of the device driver. 3194 * 3195 * Return Code: 3196 * 0 - Success 3197 */ 3198 static uint_t 3199 vdc_handle_cb(uint64_t event, caddr_t arg) 3200 { 3201 ldc_status_t ldc_state; 3202 int rv = 0; 3203 3204 vdc_t *vdc = (vdc_t *)(void *)arg; 3205 3206 ASSERT(vdc != NULL); 3207 3208 DMSG(vdc, 1, "evt=%lx seqID=%ld\n", event, vdc->seq_num); 3209 3210 /* 3211 * Depending on the type of event that triggered this callback, 3212 * we modify the handshake state or read the data. 3213 * 3214 * NOTE: not done as a switch() as event could be triggered by 3215 * a state change and a read request. Also the ordering of the 3216 * check for the event types is deliberate. 3217 */ 3218 if (event & LDC_EVT_UP) { 3219 DMSG(vdc, 0, "[%d] Received LDC_EVT_UP\n", vdc->instance); 3220 3221 mutex_enter(&vdc->lock); 3222 3223 /* get LDC state */ 3224 rv = ldc_status(vdc->ldc_handle, &ldc_state); 3225 if (rv != 0) { 3226 DMSG(vdc, 0, "[%d] Couldn't get LDC status %d", 3227 vdc->instance, rv); 3228 return (LDC_SUCCESS); 3229 } 3230 if (vdc->ldc_state != LDC_UP && ldc_state == LDC_UP) { 3231 /* 3232 * Reset the transaction sequence numbers when 3233 * LDC comes up. We then kick off the handshake 3234 * negotiation with the vDisk server. 3235 */ 3236 vdc->seq_num = 1; 3237 vdc->seq_num_reply = 0; 3238 vdc->ldc_state = ldc_state; 3239 cv_signal(&vdc->initwait_cv); 3240 } 3241 3242 mutex_exit(&vdc->lock); 3243 } 3244 3245 if (event & LDC_EVT_READ) { 3246 DMSG(vdc, 0, "[%d] Received LDC_EVT_READ\n", vdc->instance); 3247 mutex_enter(&vdc->read_lock); 3248 cv_signal(&vdc->read_cv); 3249 vdc->read_state = VDC_READ_PENDING; 3250 mutex_exit(&vdc->read_lock); 3251 3252 /* that's all we have to do - no need to handle DOWN/RESET */ 3253 return (LDC_SUCCESS); 3254 } 3255 3256 if (event & (LDC_EVT_RESET|LDC_EVT_DOWN)) { 3257 3258 DMSG(vdc, 0, "[%d] Received LDC RESET event\n", vdc->instance); 3259 3260 mutex_enter(&vdc->lock); 3261 /* 3262 * Need to wake up any readers so they will 3263 * detect that a reset has occurred. 3264 */ 3265 mutex_enter(&vdc->read_lock); 3266 if ((vdc->read_state == VDC_READ_WAITING) || 3267 (vdc->read_state == VDC_READ_RESET)) 3268 cv_signal(&vdc->read_cv); 3269 vdc->read_state = VDC_READ_RESET; 3270 mutex_exit(&vdc->read_lock); 3271 3272 /* wake up any threads waiting for connection to come up */ 3273 if (vdc->state == VDC_STATE_INIT_WAITING) { 3274 vdc->state = VDC_STATE_RESETTING; 3275 cv_signal(&vdc->initwait_cv); 3276 } 3277 3278 mutex_exit(&vdc->lock); 3279 } 3280 3281 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 3282 DMSG(vdc, 0, "![%d] Unexpected LDC event (%lx) received", 3283 vdc->instance, event); 3284 3285 return (LDC_SUCCESS); 3286 } 3287 3288 /* 3289 * Function: 3290 * vdc_wait_for_response() 3291 * 3292 * Description: 3293 * Block waiting for a response from the server. If there is 3294 * no data the thread block on the read_cv that is signalled 3295 * by the callback when an EVT_READ occurs. 3296 * 3297 * Arguments: 3298 * vdcp - soft state pointer for this instance of the device driver. 3299 * 3300 * Return Code: 3301 * 0 - Success 3302 */ 3303 static int 3304 vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp) 3305 { 3306 size_t nbytes = sizeof (*msgp); 3307 int status; 3308 3309 ASSERT(vdcp != NULL); 3310 3311 DMSG(vdcp, 1, "[%d] Entered\n", vdcp->instance); 3312 3313 status = vdc_recv(vdcp, msgp, &nbytes); 3314 DMSG(vdcp, 3, "vdc_read() done.. status=0x%x size=0x%x\n", 3315 status, (int)nbytes); 3316 if (status) { 3317 DMSG(vdcp, 0, "?[%d] Error %d reading LDC msg\n", 3318 vdcp->instance, status); 3319 return (status); 3320 } 3321 3322 if (nbytes < sizeof (vio_msg_tag_t)) { 3323 DMSG(vdcp, 0, "?[%d] Expect %lu bytes; recv'd %lu\n", 3324 vdcp->instance, sizeof (vio_msg_tag_t), nbytes); 3325 return (ENOMSG); 3326 } 3327 3328 DMSG(vdcp, 2, "[%d] (%x/%x/%x)\n", vdcp->instance, 3329 msgp->tag.vio_msgtype, 3330 msgp->tag.vio_subtype, 3331 msgp->tag.vio_subtype_env); 3332 3333 /* 3334 * Verify the Session ID of the message 3335 * 3336 * Every message after the Version has been negotiated should 3337 * have the correct session ID set. 3338 */ 3339 if ((msgp->tag.vio_sid != vdcp->session_id) && 3340 (msgp->tag.vio_subtype_env != VIO_VER_INFO)) { 3341 DMSG(vdcp, 0, "[%d] Invalid SID: received 0x%x, " 3342 "expected 0x%lx [seq num %lx @ %d]", 3343 vdcp->instance, msgp->tag.vio_sid, 3344 vdcp->session_id, 3345 ((vio_dring_msg_t *)msgp)->seq_num, 3346 ((vio_dring_msg_t *)msgp)->start_idx); 3347 return (ENOMSG); 3348 } 3349 return (0); 3350 } 3351 3352 3353 /* 3354 * Function: 3355 * vdc_resubmit_backup_dring() 3356 * 3357 * Description: 3358 * Resubmit each descriptor in the backed up dring to 3359 * vDisk server. The Dring was backed up during connection 3360 * reset. 3361 * 3362 * Arguments: 3363 * vdcp - soft state pointer for this instance of the device driver. 3364 * 3365 * Return Code: 3366 * 0 - Success 3367 */ 3368 static int 3369 vdc_resubmit_backup_dring(vdc_t *vdcp) 3370 { 3371 int count; 3372 int b_idx; 3373 int rv; 3374 int dring_size; 3375 int status; 3376 vio_msg_t vio_msg; 3377 vdc_local_desc_t *curr_ldep; 3378 3379 ASSERT(MUTEX_NOT_HELD(&vdcp->lock)); 3380 ASSERT(vdcp->state == VDC_STATE_HANDLE_PENDING); 3381 3382 if (vdcp->local_dring_backup == NULL) { 3383 /* the pending requests have already been processed */ 3384 return (0); 3385 } 3386 3387 DMSG(vdcp, 1, "restoring pending dring entries (len=%d, tail=%d)\n", 3388 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3389 3390 /* 3391 * Walk the backup copy of the local descriptor ring and 3392 * resubmit all the outstanding transactions. 3393 */ 3394 b_idx = vdcp->local_dring_backup_tail; 3395 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 3396 3397 curr_ldep = &(vdcp->local_dring_backup[b_idx]); 3398 3399 /* only resubmit outstanding transactions */ 3400 if (!curr_ldep->is_free) { 3401 3402 DMSG(vdcp, 1, "resubmitting entry idx=%x\n", b_idx); 3403 mutex_enter(&vdcp->lock); 3404 rv = vdc_populate_descriptor(vdcp, curr_ldep->operation, 3405 curr_ldep->addr, curr_ldep->nbytes, 3406 curr_ldep->slice, curr_ldep->offset, 3407 curr_ldep->cb_type, curr_ldep->cb_arg, 3408 curr_ldep->dir); 3409 mutex_exit(&vdcp->lock); 3410 if (rv) { 3411 DMSG(vdcp, 1, "[%d] cannot resubmit entry %d\n", 3412 vdcp->instance, b_idx); 3413 return (rv); 3414 } 3415 3416 /* Wait for the response message. */ 3417 DMSG(vdcp, 1, "waiting for response to idx=%x\n", 3418 b_idx); 3419 status = vdc_wait_for_response(vdcp, &vio_msg); 3420 if (status) { 3421 DMSG(vdcp, 1, "[%d] wait_for_response " 3422 "returned err=%d\n", vdcp->instance, 3423 status); 3424 return (status); 3425 } 3426 3427 DMSG(vdcp, 1, "processing msg for idx=%x\n", b_idx); 3428 status = vdc_process_data_msg(vdcp, &vio_msg); 3429 if (status) { 3430 DMSG(vdcp, 1, "[%d] process_data_msg " 3431 "returned err=%d\n", vdcp->instance, 3432 status); 3433 return (status); 3434 } 3435 } 3436 3437 /* get the next element to submit */ 3438 if (++b_idx >= vdcp->local_dring_backup_len) 3439 b_idx = 0; 3440 } 3441 3442 /* all done - now clear up pending dring copy */ 3443 dring_size = vdcp->local_dring_backup_len * 3444 sizeof (vdcp->local_dring_backup[0]); 3445 3446 (void) kmem_free(vdcp->local_dring_backup, dring_size); 3447 3448 vdcp->local_dring_backup = NULL; 3449 3450 return (0); 3451 } 3452 3453 /* 3454 * Function: 3455 * vdc_cancel_backup_dring 3456 * 3457 * Description: 3458 * Cancel each descriptor in the backed up dring to vDisk server. 3459 * The Dring was backed up during connection reset. 3460 * 3461 * Arguments: 3462 * vdcp - soft state pointer for this instance of the device driver. 3463 * 3464 * Return Code: 3465 * None 3466 */ 3467 void 3468 vdc_cancel_backup_ring(vdc_t *vdcp) 3469 { 3470 vdc_local_desc_t *ldep; 3471 struct buf *bufp; 3472 int count; 3473 int b_idx; 3474 int dring_size; 3475 3476 ASSERT(MUTEX_HELD(&vdcp->lock)); 3477 ASSERT(vdcp->state == VDC_STATE_INIT || 3478 vdcp->state == VDC_STATE_INIT_WAITING || 3479 vdcp->state == VDC_STATE_NEGOTIATE || 3480 vdcp->state == VDC_STATE_RESETTING); 3481 3482 if (vdcp->local_dring_backup == NULL) { 3483 /* the pending requests have already been processed */ 3484 return; 3485 } 3486 3487 DMSG(vdcp, 1, "cancelling pending dring entries (len=%d, tail=%d)\n", 3488 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3489 3490 /* 3491 * Walk the backup copy of the local descriptor ring and 3492 * cancel all the outstanding transactions. 3493 */ 3494 b_idx = vdcp->local_dring_backup_tail; 3495 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 3496 3497 ldep = &(vdcp->local_dring_backup[b_idx]); 3498 3499 /* only cancel outstanding transactions */ 3500 if (!ldep->is_free) { 3501 3502 DMSG(vdcp, 1, "cancelling entry idx=%x\n", b_idx); 3503 3504 /* 3505 * All requests have already been cleared from the 3506 * local descriptor ring and the LDC channel has been 3507 * reset so we will never get any reply for these 3508 * requests. Now we just have to notify threads waiting 3509 * for replies that the request has failed. 3510 */ 3511 switch (ldep->cb_type) { 3512 case CB_SYNC: 3513 ASSERT(vdcp->sync_op_pending); 3514 vdcp->sync_op_status = EIO; 3515 vdcp->sync_op_pending = B_FALSE; 3516 cv_signal(&vdcp->sync_pending_cv); 3517 break; 3518 3519 case CB_STRATEGY: 3520 bufp = ldep->cb_arg; 3521 ASSERT(bufp != NULL); 3522 bufp->b_resid = bufp->b_bcount; 3523 bioerror(bufp, EIO); 3524 biodone(bufp); 3525 break; 3526 3527 default: 3528 ASSERT(0); 3529 } 3530 3531 } 3532 3533 /* get the next element to cancel */ 3534 if (++b_idx >= vdcp->local_dring_backup_len) 3535 b_idx = 0; 3536 } 3537 3538 /* all done - now clear up pending dring copy */ 3539 dring_size = vdcp->local_dring_backup_len * 3540 sizeof (vdcp->local_dring_backup[0]); 3541 3542 (void) kmem_free(vdcp->local_dring_backup, dring_size); 3543 3544 vdcp->local_dring_backup = NULL; 3545 3546 DTRACE_IO2(processed, int, count, vdc_t *, vdcp); 3547 } 3548 3549 /* 3550 * Function: 3551 * vdc_connection_timeout 3552 * 3553 * Description: 3554 * This function is invoked if the timeout set to establish the connection 3555 * with vds expires. This will happen if we spend too much time in the 3556 * VDC_STATE_INIT_WAITING or VDC_STATE_NEGOTIATE states. Then we will 3557 * cancel any pending request and mark them as failed. 3558 * 3559 * If the timeout does not expire, it will be cancelled when we reach the 3560 * VDC_STATE_HANDLE_PENDING or VDC_STATE_RESETTING state. This function can 3561 * be invoked while we are in the VDC_STATE_HANDLE_PENDING or 3562 * VDC_STATE_RESETTING state in which case we do nothing because the 3563 * timeout is being cancelled. 3564 * 3565 * Arguments: 3566 * arg - argument of the timeout function actually a soft state 3567 * pointer for the instance of the device driver. 3568 * 3569 * Return Code: 3570 * None 3571 */ 3572 void 3573 vdc_connection_timeout(void *arg) 3574 { 3575 vdc_t *vdcp = (vdc_t *)arg; 3576 3577 mutex_enter(&vdcp->lock); 3578 3579 if (vdcp->state == VDC_STATE_HANDLE_PENDING || 3580 vdcp->state == VDC_STATE_DETACH) { 3581 /* 3582 * The connection has just been re-established or 3583 * we are detaching. 3584 */ 3585 vdcp->ctimeout_reached = B_FALSE; 3586 mutex_exit(&vdcp->lock); 3587 return; 3588 } 3589 3590 vdcp->ctimeout_reached = B_TRUE; 3591 3592 /* notify requests waiting for sending */ 3593 cv_broadcast(&vdcp->running_cv); 3594 3595 /* cancel requests waiting for a result */ 3596 vdc_cancel_backup_ring(vdcp); 3597 3598 mutex_exit(&vdcp->lock); 3599 3600 cmn_err(CE_NOTE, "[%d] connection to service domain timeout", 3601 vdcp->instance); 3602 } 3603 3604 /* 3605 * Function: 3606 * vdc_backup_local_dring() 3607 * 3608 * Description: 3609 * Backup the current dring in the event of a reset. The Dring 3610 * transactions will be resubmitted to the server when the 3611 * connection is restored. 3612 * 3613 * Arguments: 3614 * vdcp - soft state pointer for this instance of the device driver. 3615 * 3616 * Return Code: 3617 * NONE 3618 */ 3619 static void 3620 vdc_backup_local_dring(vdc_t *vdcp) 3621 { 3622 int dring_size; 3623 3624 ASSERT(MUTEX_HELD(&vdcp->lock)); 3625 ASSERT(vdcp->state == VDC_STATE_RESETTING); 3626 3627 /* 3628 * If the backup dring is stil around, it means 3629 * that the last restore did not complete. However, 3630 * since we never got back into the running state, 3631 * the backup copy we have is still valid. 3632 */ 3633 if (vdcp->local_dring_backup != NULL) { 3634 DMSG(vdcp, 1, "reusing local descriptor ring backup " 3635 "(len=%d, tail=%d)\n", vdcp->local_dring_backup_len, 3636 vdcp->local_dring_backup_tail); 3637 return; 3638 } 3639 3640 /* 3641 * The backup dring can be NULL and the local dring may not be 3642 * initialized. This can happen if we had a reset while establishing 3643 * a new connection but after the connection has timed out. In that 3644 * case the backup dring is NULL because the requests have been 3645 * cancelled and the request occured before the local dring is 3646 * initialized. 3647 */ 3648 if (!(vdcp->initialized & VDC_DRING_LOCAL)) 3649 return; 3650 3651 DMSG(vdcp, 1, "backing up the local descriptor ring (len=%d, " 3652 "tail=%d)\n", vdcp->dring_len, vdcp->dring_curr_idx); 3653 3654 dring_size = vdcp->dring_len * sizeof (vdcp->local_dring[0]); 3655 3656 vdcp->local_dring_backup = kmem_alloc(dring_size, KM_SLEEP); 3657 bcopy(vdcp->local_dring, vdcp->local_dring_backup, dring_size); 3658 3659 vdcp->local_dring_backup_tail = vdcp->dring_curr_idx; 3660 vdcp->local_dring_backup_len = vdcp->dring_len; 3661 } 3662 3663 /* -------------------------------------------------------------------------- */ 3664 3665 /* 3666 * The following functions process the incoming messages from vds 3667 */ 3668 3669 /* 3670 * Function: 3671 * vdc_process_msg_thread() 3672 * 3673 * Description: 3674 * 3675 * Main VDC message processing thread. Each vDisk instance 3676 * consists of a copy of this thread. This thread triggers 3677 * all the handshakes and data exchange with the server. It 3678 * also handles all channel resets 3679 * 3680 * Arguments: 3681 * vdc - soft state pointer for this instance of the device driver. 3682 * 3683 * Return Code: 3684 * None 3685 */ 3686 static void 3687 vdc_process_msg_thread(vdc_t *vdcp) 3688 { 3689 int status; 3690 int ctimeout; 3691 timeout_id_t tmid = 0; 3692 3693 mutex_enter(&vdcp->lock); 3694 3695 for (;;) { 3696 3697 #define Q(_s) (vdcp->state == _s) ? #_s : 3698 DMSG(vdcp, 3, "state = %d (%s)\n", vdcp->state, 3699 Q(VDC_STATE_INIT) 3700 Q(VDC_STATE_INIT_WAITING) 3701 Q(VDC_STATE_NEGOTIATE) 3702 Q(VDC_STATE_HANDLE_PENDING) 3703 Q(VDC_STATE_RUNNING) 3704 Q(VDC_STATE_RESETTING) 3705 Q(VDC_STATE_DETACH) 3706 "UNKNOWN"); 3707 3708 switch (vdcp->state) { 3709 case VDC_STATE_INIT: 3710 3711 /* 3712 * If requested, start a timeout to check if the 3713 * connection with vds is established in the 3714 * specified delay. If the timeout expires, we 3715 * will cancel any pending request. 3716 * 3717 * If some reset have occurred while establishing 3718 * the connection, we already have a timeout armed 3719 * and in that case we don't need to arm a new one. 3720 */ 3721 ctimeout = (vdc_timeout != 0)? 3722 vdc_timeout : vdcp->ctimeout; 3723 3724 if (ctimeout != 0 && tmid == 0) { 3725 tmid = timeout(vdc_connection_timeout, vdcp, 3726 ctimeout * drv_usectohz(1000000)); 3727 } 3728 3729 /* Check if have re-initializing repeatedly */ 3730 if (vdcp->hshake_cnt++ > vdc_hshake_retries && 3731 vdcp->lifecycle != VDC_LC_ONLINE) { 3732 cmn_err(CE_NOTE, "[%d] disk access failed.\n", 3733 vdcp->instance); 3734 vdcp->state = VDC_STATE_DETACH; 3735 break; 3736 } 3737 3738 /* Bring up connection with vds via LDC */ 3739 status = vdc_start_ldc_connection(vdcp); 3740 if (status == EINVAL) { 3741 DMSG(vdcp, 0, "[%d] Could not start LDC", 3742 vdcp->instance); 3743 vdcp->state = VDC_STATE_DETACH; 3744 } else { 3745 vdcp->state = VDC_STATE_INIT_WAITING; 3746 } 3747 break; 3748 3749 case VDC_STATE_INIT_WAITING: 3750 3751 /* 3752 * Let the callback event move us on 3753 * when channel is open to server 3754 */ 3755 while (vdcp->ldc_state != LDC_UP) { 3756 cv_wait(&vdcp->initwait_cv, &vdcp->lock); 3757 if (vdcp->state != VDC_STATE_INIT_WAITING) { 3758 DMSG(vdcp, 0, 3759 "state moved to %d out from under us...\n", 3760 vdcp->state); 3761 3762 break; 3763 } 3764 } 3765 if (vdcp->state == VDC_STATE_INIT_WAITING && 3766 vdcp->ldc_state == LDC_UP) { 3767 vdcp->state = VDC_STATE_NEGOTIATE; 3768 } 3769 break; 3770 3771 case VDC_STATE_NEGOTIATE: 3772 switch (status = vdc_ver_negotiation(vdcp)) { 3773 case 0: 3774 break; 3775 default: 3776 DMSG(vdcp, 0, "ver negotiate failed (%d)..\n", 3777 status); 3778 goto reset; 3779 } 3780 3781 switch (status = vdc_attr_negotiation(vdcp)) { 3782 case 0: 3783 break; 3784 default: 3785 DMSG(vdcp, 0, "attr negotiate failed (%d)..\n", 3786 status); 3787 goto reset; 3788 } 3789 3790 switch (status = vdc_dring_negotiation(vdcp)) { 3791 case 0: 3792 break; 3793 default: 3794 DMSG(vdcp, 0, "dring negotiate failed (%d)..\n", 3795 status); 3796 goto reset; 3797 } 3798 3799 switch (status = vdc_rdx_exchange(vdcp)) { 3800 case 0: 3801 vdcp->state = VDC_STATE_HANDLE_PENDING; 3802 goto done; 3803 default: 3804 DMSG(vdcp, 0, "RDX xchg failed ..(%d)\n", 3805 status); 3806 goto reset; 3807 } 3808 reset: 3809 DMSG(vdcp, 0, "negotiation failed: resetting (%d)\n", 3810 status); 3811 vdcp->state = VDC_STATE_RESETTING; 3812 vdcp->self_reset = B_TRUE; 3813 done: 3814 DMSG(vdcp, 0, "negotiation complete (state=0x%x)...\n", 3815 vdcp->state); 3816 break; 3817 3818 case VDC_STATE_HANDLE_PENDING: 3819 3820 if (vdcp->ctimeout_reached) { 3821 /* 3822 * The connection timeout had been reached so 3823 * pending requests have been cancelled. Now 3824 * that the connection is back we can reset 3825 * the timeout. 3826 */ 3827 ASSERT(vdcp->local_dring_backup == NULL); 3828 ASSERT(tmid != 0); 3829 tmid = 0; 3830 vdcp->ctimeout_reached = B_FALSE; 3831 vdcp->state = VDC_STATE_RUNNING; 3832 DMSG(vdcp, 0, "[%d] connection to service " 3833 "domain is up", vdcp->instance); 3834 break; 3835 } 3836 3837 mutex_exit(&vdcp->lock); 3838 if (tmid != 0) { 3839 (void) untimeout(tmid); 3840 tmid = 0; 3841 } 3842 status = vdc_resubmit_backup_dring(vdcp); 3843 mutex_enter(&vdcp->lock); 3844 3845 if (status) 3846 vdcp->state = VDC_STATE_RESETTING; 3847 else 3848 vdcp->state = VDC_STATE_RUNNING; 3849 3850 break; 3851 3852 /* enter running state */ 3853 case VDC_STATE_RUNNING: 3854 /* 3855 * Signal anyone waiting for the connection 3856 * to come on line. 3857 */ 3858 vdcp->hshake_cnt = 0; 3859 cv_broadcast(&vdcp->running_cv); 3860 mutex_exit(&vdcp->lock); 3861 3862 for (;;) { 3863 vio_msg_t msg; 3864 status = vdc_wait_for_response(vdcp, &msg); 3865 if (status) break; 3866 3867 DMSG(vdcp, 1, "[%d] new pkt(s) available\n", 3868 vdcp->instance); 3869 status = vdc_process_data_msg(vdcp, &msg); 3870 if (status) { 3871 DMSG(vdcp, 1, "[%d] process_data_msg " 3872 "returned err=%d\n", vdcp->instance, 3873 status); 3874 break; 3875 } 3876 3877 } 3878 3879 mutex_enter(&vdcp->lock); 3880 3881 vdcp->state = VDC_STATE_RESETTING; 3882 vdcp->self_reset = B_TRUE; 3883 break; 3884 3885 case VDC_STATE_RESETTING: 3886 /* 3887 * When we reach this state, we either come from the 3888 * VDC_STATE_RUNNING state and we can have pending 3889 * request but no timeout is armed; or we come from 3890 * the VDC_STATE_INIT_WAITING, VDC_NEGOTIATE or 3891 * VDC_HANDLE_PENDING state and there is no pending 3892 * request or pending requests have already been copied 3893 * into the backup dring. So we can safely keep the 3894 * connection timeout armed while we are in this state. 3895 */ 3896 3897 DMSG(vdcp, 0, "Initiating channel reset " 3898 "(pending = %d)\n", (int)vdcp->threads_pending); 3899 3900 if (vdcp->self_reset) { 3901 DMSG(vdcp, 0, 3902 "[%d] calling stop_ldc_connection.\n", 3903 vdcp->instance); 3904 status = vdc_stop_ldc_connection(vdcp); 3905 vdcp->self_reset = B_FALSE; 3906 } 3907 3908 /* 3909 * Wait for all threads currently waiting 3910 * for a free dring entry to use. 3911 */ 3912 while (vdcp->threads_pending) { 3913 cv_broadcast(&vdcp->membind_cv); 3914 cv_broadcast(&vdcp->dring_free_cv); 3915 mutex_exit(&vdcp->lock); 3916 /* give the waiters enough time to wake up */ 3917 delay(vdc_hz_min_ldc_delay); 3918 mutex_enter(&vdcp->lock); 3919 } 3920 3921 ASSERT(vdcp->threads_pending == 0); 3922 3923 /* Sanity check that no thread is receiving */ 3924 ASSERT(vdcp->read_state != VDC_READ_WAITING); 3925 3926 vdcp->read_state = VDC_READ_IDLE; 3927 3928 vdc_backup_local_dring(vdcp); 3929 3930 /* cleanup the old d-ring */ 3931 vdc_destroy_descriptor_ring(vdcp); 3932 3933 /* go and start again */ 3934 vdcp->state = VDC_STATE_INIT; 3935 3936 break; 3937 3938 case VDC_STATE_DETACH: 3939 DMSG(vdcp, 0, "[%d] Reset thread exit cleanup ..\n", 3940 vdcp->instance); 3941 3942 /* cancel any pending timeout */ 3943 mutex_exit(&vdcp->lock); 3944 if (tmid != 0) { 3945 (void) untimeout(tmid); 3946 tmid = 0; 3947 } 3948 mutex_enter(&vdcp->lock); 3949 3950 /* 3951 * Signal anyone waiting for connection 3952 * to come online 3953 */ 3954 cv_broadcast(&vdcp->running_cv); 3955 3956 while (vdcp->sync_op_pending) { 3957 cv_signal(&vdcp->sync_pending_cv); 3958 cv_signal(&vdcp->sync_blocked_cv); 3959 mutex_exit(&vdcp->lock); 3960 /* give the waiters enough time to wake up */ 3961 delay(vdc_hz_min_ldc_delay); 3962 mutex_enter(&vdcp->lock); 3963 } 3964 3965 mutex_exit(&vdcp->lock); 3966 3967 DMSG(vdcp, 0, "[%d] Msg processing thread exiting ..\n", 3968 vdcp->instance); 3969 thread_exit(); 3970 break; 3971 } 3972 } 3973 } 3974 3975 3976 /* 3977 * Function: 3978 * vdc_process_data_msg() 3979 * 3980 * Description: 3981 * This function is called by the message processing thread each time 3982 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 3983 * be an ACK or NACK from vds[1] which vdc handles as follows. 3984 * ACK - wake up the waiting thread 3985 * NACK - resend any messages necessary 3986 * 3987 * [1] Although the message format allows it, vds should not send a 3988 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 3989 * some bizarre reason it does, vdc will reset the connection. 3990 * 3991 * Arguments: 3992 * vdc - soft state pointer for this instance of the device driver. 3993 * msg - the LDC message sent by vds 3994 * 3995 * Return Code: 3996 * 0 - Success. 3997 * > 0 - error value returned by LDC 3998 */ 3999 static int 4000 vdc_process_data_msg(vdc_t *vdcp, vio_msg_t *msg) 4001 { 4002 int status = 0; 4003 vio_dring_msg_t *dring_msg; 4004 vdc_local_desc_t *ldep = NULL; 4005 int start, end; 4006 int idx; 4007 4008 dring_msg = (vio_dring_msg_t *)msg; 4009 4010 ASSERT(msg->tag.vio_msgtype == VIO_TYPE_DATA); 4011 ASSERT(vdcp != NULL); 4012 4013 mutex_enter(&vdcp->lock); 4014 4015 /* 4016 * Check to see if the message has bogus data 4017 */ 4018 idx = start = dring_msg->start_idx; 4019 end = dring_msg->end_idx; 4020 if ((start >= vdcp->dring_len) || 4021 (end >= vdcp->dring_len) || (end < -1)) { 4022 DMSG(vdcp, 0, "[%d] Bogus ACK data : start %d, end %d\n", 4023 vdcp->instance, start, end); 4024 mutex_exit(&vdcp->lock); 4025 return (EINVAL); 4026 } 4027 4028 /* 4029 * Verify that the sequence number is what vdc expects. 4030 */ 4031 switch (vdc_verify_seq_num(vdcp, dring_msg)) { 4032 case VDC_SEQ_NUM_TODO: 4033 break; /* keep processing this message */ 4034 case VDC_SEQ_NUM_SKIP: 4035 mutex_exit(&vdcp->lock); 4036 return (0); 4037 case VDC_SEQ_NUM_INVALID: 4038 mutex_exit(&vdcp->lock); 4039 DMSG(vdcp, 0, "[%d] invalid seqno\n", vdcp->instance); 4040 return (ENXIO); 4041 } 4042 4043 if (msg->tag.vio_subtype == VIO_SUBTYPE_NACK) { 4044 DMSG(vdcp, 0, "[%d] DATA NACK\n", vdcp->instance); 4045 VDC_DUMP_DRING_MSG(dring_msg); 4046 mutex_exit(&vdcp->lock); 4047 return (EIO); 4048 4049 } else if (msg->tag.vio_subtype == VIO_SUBTYPE_INFO) { 4050 mutex_exit(&vdcp->lock); 4051 return (EPROTO); 4052 } 4053 4054 DTRACE_IO2(recv, vio_dring_msg_t, dring_msg, vdc_t *, vdcp); 4055 DMSG(vdcp, 1, ": start %d end %d\n", start, end); 4056 ASSERT(start == end); 4057 4058 ldep = &vdcp->local_dring[idx]; 4059 4060 DMSG(vdcp, 1, ": state 0x%x - cb_type 0x%x\n", 4061 ldep->dep->hdr.dstate, ldep->cb_type); 4062 4063 if (ldep->dep->hdr.dstate == VIO_DESC_DONE) { 4064 struct buf *bufp; 4065 4066 switch (ldep->cb_type) { 4067 case CB_SYNC: 4068 ASSERT(vdcp->sync_op_pending); 4069 4070 status = vdc_depopulate_descriptor(vdcp, idx); 4071 vdcp->sync_op_status = status; 4072 vdcp->sync_op_pending = B_FALSE; 4073 cv_signal(&vdcp->sync_pending_cv); 4074 break; 4075 4076 case CB_STRATEGY: 4077 bufp = ldep->cb_arg; 4078 ASSERT(bufp != NULL); 4079 bufp->b_resid = 4080 bufp->b_bcount - ldep->dep->payload.nbytes; 4081 status = ldep->dep->payload.status; /* Future:ntoh */ 4082 if (status != 0) { 4083 DMSG(vdcp, 1, "strategy status=%d\n", status); 4084 bioerror(bufp, status); 4085 } 4086 status = vdc_depopulate_descriptor(vdcp, idx); 4087 biodone(bufp); 4088 4089 DMSG(vdcp, 1, 4090 "strategy complete req=%ld bytes resp=%ld bytes\n", 4091 bufp->b_bcount, ldep->dep->payload.nbytes); 4092 break; 4093 4094 default: 4095 ASSERT(0); 4096 } 4097 } 4098 4099 /* let the arrival signal propogate */ 4100 mutex_exit(&vdcp->lock); 4101 4102 /* probe gives the count of how many entries were processed */ 4103 DTRACE_IO2(processed, int, 1, vdc_t *, vdcp); 4104 4105 return (0); 4106 } 4107 4108 /* 4109 * Function: 4110 * vdc_process_err_msg() 4111 * 4112 * NOTE: No error messages are used as part of the vDisk protocol 4113 */ 4114 static int 4115 vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg) 4116 { 4117 _NOTE(ARGUNUSED(vdc)) 4118 _NOTE(ARGUNUSED(msg)) 4119 4120 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_ERR); 4121 DMSG(vdc, 1, "[%d] Got an ERR msg", vdc->instance); 4122 4123 return (ENOTSUP); 4124 } 4125 4126 /* 4127 * Function: 4128 * vdc_handle_ver_msg() 4129 * 4130 * Description: 4131 * 4132 * Arguments: 4133 * vdc - soft state pointer for this instance of the device driver. 4134 * ver_msg - LDC message sent by vDisk server 4135 * 4136 * Return Code: 4137 * 0 - Success 4138 */ 4139 static int 4140 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) 4141 { 4142 int status = 0; 4143 4144 ASSERT(vdc != NULL); 4145 ASSERT(mutex_owned(&vdc->lock)); 4146 4147 if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { 4148 return (EPROTO); 4149 } 4150 4151 if (ver_msg->dev_class != VDEV_DISK_SERVER) { 4152 return (EINVAL); 4153 } 4154 4155 switch (ver_msg->tag.vio_subtype) { 4156 case VIO_SUBTYPE_ACK: 4157 /* 4158 * We check to see if the version returned is indeed supported 4159 * (The server may have also adjusted the minor number downwards 4160 * and if so 'ver_msg' will contain the actual version agreed) 4161 */ 4162 if (vdc_is_supported_version(ver_msg)) { 4163 vdc->ver.major = ver_msg->ver_major; 4164 vdc->ver.minor = ver_msg->ver_minor; 4165 ASSERT(vdc->ver.major > 0); 4166 } else { 4167 status = EPROTO; 4168 } 4169 break; 4170 4171 case VIO_SUBTYPE_NACK: 4172 /* 4173 * call vdc_is_supported_version() which will return the next 4174 * supported version (if any) in 'ver_msg' 4175 */ 4176 (void) vdc_is_supported_version(ver_msg); 4177 if (ver_msg->ver_major > 0) { 4178 size_t len = sizeof (*ver_msg); 4179 4180 ASSERT(vdc->ver.major > 0); 4181 4182 /* reset the necessary fields and resend */ 4183 ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; 4184 ver_msg->dev_class = VDEV_DISK; 4185 4186 status = vdc_send(vdc, (caddr_t)ver_msg, &len); 4187 DMSG(vdc, 0, "[%d] Resend VER info (LDC status = %d)\n", 4188 vdc->instance, status); 4189 if (len != sizeof (*ver_msg)) 4190 status = EBADMSG; 4191 } else { 4192 DMSG(vdc, 0, "[%d] No common version with vDisk server", 4193 vdc->instance); 4194 status = ENOTSUP; 4195 } 4196 4197 break; 4198 case VIO_SUBTYPE_INFO: 4199 /* 4200 * Handle the case where vds starts handshake 4201 * (for now only vdc is the instigator) 4202 */ 4203 status = ENOTSUP; 4204 break; 4205 4206 default: 4207 status = EINVAL; 4208 break; 4209 } 4210 4211 return (status); 4212 } 4213 4214 /* 4215 * Function: 4216 * vdc_handle_attr_msg() 4217 * 4218 * Description: 4219 * 4220 * Arguments: 4221 * vdc - soft state pointer for this instance of the device driver. 4222 * attr_msg - LDC message sent by vDisk server 4223 * 4224 * Return Code: 4225 * 0 - Success 4226 */ 4227 static int 4228 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) 4229 { 4230 int status = 0; 4231 4232 ASSERT(vdc != NULL); 4233 ASSERT(mutex_owned(&vdc->lock)); 4234 4235 if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { 4236 return (EPROTO); 4237 } 4238 4239 switch (attr_msg->tag.vio_subtype) { 4240 case VIO_SUBTYPE_ACK: 4241 /* 4242 * We now verify the attributes sent by vds. 4243 */ 4244 if (attr_msg->vdisk_size == 0) { 4245 DMSG(vdc, 0, "[%d] Invalid disk size from vds", 4246 vdc->instance); 4247 status = EINVAL; 4248 break; 4249 } 4250 4251 if (attr_msg->max_xfer_sz == 0) { 4252 DMSG(vdc, 0, "[%d] Invalid transfer size from vds", 4253 vdc->instance); 4254 status = EINVAL; 4255 break; 4256 } 4257 4258 /* 4259 * If the disk size is already set check that it hasn't changed. 4260 */ 4261 if ((vdc->vdisk_size != 0) && 4262 (vdc->vdisk_size != attr_msg->vdisk_size)) { 4263 DMSG(vdc, 0, "[%d] Different disk size from vds " 4264 "(old=0x%lx - new=0x%lx", vdc->instance, 4265 vdc->vdisk_size, attr_msg->vdisk_size) 4266 status = EINVAL; 4267 break; 4268 } 4269 4270 vdc->vdisk_size = attr_msg->vdisk_size; 4271 vdc->vdisk_type = attr_msg->vdisk_type; 4272 4273 DMSG(vdc, 0, "[%d] max_xfer_sz: sent %lx acked %lx\n", 4274 vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz); 4275 DMSG(vdc, 0, "[%d] vdisk_block_size: sent %lx acked %x\n", 4276 vdc->instance, vdc->block_size, 4277 attr_msg->vdisk_block_size); 4278 4279 /* 4280 * We don't know at compile time what the vDisk server will 4281 * think are good values but we apply an large (arbitrary) 4282 * upper bound to prevent memory exhaustion in vdc if it was 4283 * allocating a DRing based of huge values sent by the server. 4284 * We probably will never exceed this except if the message 4285 * was garbage. 4286 */ 4287 if ((attr_msg->max_xfer_sz * attr_msg->vdisk_block_size) <= 4288 (PAGESIZE * DEV_BSIZE)) { 4289 vdc->max_xfer_sz = attr_msg->max_xfer_sz; 4290 vdc->block_size = attr_msg->vdisk_block_size; 4291 } else { 4292 DMSG(vdc, 0, "[%d] vds block transfer size too big;" 4293 " using max supported by vdc", vdc->instance); 4294 } 4295 4296 if ((attr_msg->xfer_mode != VIO_DRING_MODE) || 4297 (attr_msg->vdisk_size > INT64_MAX) || 4298 (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 4299 DMSG(vdc, 0, "[%d] Invalid attributes from vds", 4300 vdc->instance); 4301 status = EINVAL; 4302 break; 4303 } 4304 4305 /* 4306 * Now that we have received all attributes we can create a 4307 * fake geometry for the disk. 4308 */ 4309 vdc_create_fake_geometry(vdc); 4310 break; 4311 4312 case VIO_SUBTYPE_NACK: 4313 /* 4314 * vds could not handle the attributes we sent so we 4315 * stop negotiating. 4316 */ 4317 status = EPROTO; 4318 break; 4319 4320 case VIO_SUBTYPE_INFO: 4321 /* 4322 * Handle the case where vds starts the handshake 4323 * (for now; vdc is the only supported instigatior) 4324 */ 4325 status = ENOTSUP; 4326 break; 4327 4328 default: 4329 status = ENOTSUP; 4330 break; 4331 } 4332 4333 return (status); 4334 } 4335 4336 /* 4337 * Function: 4338 * vdc_handle_dring_reg_msg() 4339 * 4340 * Description: 4341 * 4342 * Arguments: 4343 * vdc - soft state pointer for this instance of the driver. 4344 * dring_msg - LDC message sent by vDisk server 4345 * 4346 * Return Code: 4347 * 0 - Success 4348 */ 4349 static int 4350 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) 4351 { 4352 int status = 0; 4353 4354 ASSERT(vdc != NULL); 4355 ASSERT(mutex_owned(&vdc->lock)); 4356 4357 if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { 4358 return (EPROTO); 4359 } 4360 4361 switch (dring_msg->tag.vio_subtype) { 4362 case VIO_SUBTYPE_ACK: 4363 /* save the received dring_ident */ 4364 vdc->dring_ident = dring_msg->dring_ident; 4365 DMSG(vdc, 0, "[%d] Received dring ident=0x%lx\n", 4366 vdc->instance, vdc->dring_ident); 4367 break; 4368 4369 case VIO_SUBTYPE_NACK: 4370 /* 4371 * vds could not handle the DRing info we sent so we 4372 * stop negotiating. 4373 */ 4374 DMSG(vdc, 0, "[%d] server could not register DRing\n", 4375 vdc->instance); 4376 status = EPROTO; 4377 break; 4378 4379 case VIO_SUBTYPE_INFO: 4380 /* 4381 * Handle the case where vds starts handshake 4382 * (for now only vdc is the instigatior) 4383 */ 4384 status = ENOTSUP; 4385 break; 4386 default: 4387 status = ENOTSUP; 4388 } 4389 4390 return (status); 4391 } 4392 4393 /* 4394 * Function: 4395 * vdc_verify_seq_num() 4396 * 4397 * Description: 4398 * This functions verifies that the sequence number sent back by the vDisk 4399 * server with the latest message is what is expected (i.e. it is greater 4400 * than the last seq num sent by the vDisk server and less than or equal 4401 * to the last seq num generated by vdc). 4402 * 4403 * It then checks the request ID to see if any requests need processing 4404 * in the DRing. 4405 * 4406 * Arguments: 4407 * vdc - soft state pointer for this instance of the driver. 4408 * dring_msg - pointer to the LDC message sent by vds 4409 * 4410 * Return Code: 4411 * VDC_SEQ_NUM_TODO - Message needs to be processed 4412 * VDC_SEQ_NUM_SKIP - Message has already been processed 4413 * VDC_SEQ_NUM_INVALID - The seq numbers are so out of sync, 4414 * vdc cannot deal with them 4415 */ 4416 static int 4417 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg) 4418 { 4419 ASSERT(vdc != NULL); 4420 ASSERT(dring_msg != NULL); 4421 ASSERT(mutex_owned(&vdc->lock)); 4422 4423 /* 4424 * Check to see if the messages were responded to in the correct 4425 * order by vds. 4426 */ 4427 if ((dring_msg->seq_num <= vdc->seq_num_reply) || 4428 (dring_msg->seq_num > vdc->seq_num)) { 4429 DMSG(vdc, 0, "?[%d] Bogus sequence_number %lu: " 4430 "%lu > expected <= %lu (last proc req %lu sent %lu)\n", 4431 vdc->instance, dring_msg->seq_num, 4432 vdc->seq_num_reply, vdc->seq_num, 4433 vdc->req_id_proc, vdc->req_id); 4434 return (VDC_SEQ_NUM_INVALID); 4435 } 4436 vdc->seq_num_reply = dring_msg->seq_num; 4437 4438 if (vdc->req_id_proc < vdc->req_id) 4439 return (VDC_SEQ_NUM_TODO); 4440 else 4441 return (VDC_SEQ_NUM_SKIP); 4442 } 4443 4444 4445 /* 4446 * Function: 4447 * vdc_is_supported_version() 4448 * 4449 * Description: 4450 * This routine checks if the major/minor version numbers specified in 4451 * 'ver_msg' are supported. If not it finds the next version that is 4452 * in the supported version list 'vdc_version[]' and sets the fields in 4453 * 'ver_msg' to those values 4454 * 4455 * Arguments: 4456 * ver_msg - LDC message sent by vDisk server 4457 * 4458 * Return Code: 4459 * B_TRUE - Success 4460 * B_FALSE - Version not supported 4461 */ 4462 static boolean_t 4463 vdc_is_supported_version(vio_ver_msg_t *ver_msg) 4464 { 4465 int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); 4466 4467 for (int i = 0; i < vdc_num_versions; i++) { 4468 ASSERT(vdc_version[i].major > 0); 4469 ASSERT((i == 0) || 4470 (vdc_version[i].major < vdc_version[i-1].major)); 4471 4472 /* 4473 * If the major versions match, adjust the minor version, if 4474 * necessary, down to the highest value supported by this 4475 * client. The server should support all minor versions lower 4476 * than the value it sent 4477 */ 4478 if (ver_msg->ver_major == vdc_version[i].major) { 4479 if (ver_msg->ver_minor > vdc_version[i].minor) { 4480 DMSGX(0, 4481 "Adjusting minor version from %u to %u", 4482 ver_msg->ver_minor, vdc_version[i].minor); 4483 ver_msg->ver_minor = vdc_version[i].minor; 4484 } 4485 return (B_TRUE); 4486 } 4487 4488 /* 4489 * If the message contains a higher major version number, set 4490 * the message's major/minor versions to the current values 4491 * and return false, so this message will get resent with 4492 * these values, and the server will potentially try again 4493 * with the same or a lower version 4494 */ 4495 if (ver_msg->ver_major > vdc_version[i].major) { 4496 ver_msg->ver_major = vdc_version[i].major; 4497 ver_msg->ver_minor = vdc_version[i].minor; 4498 DMSGX(0, "Suggesting major/minor (0x%x/0x%x)\n", 4499 ver_msg->ver_major, ver_msg->ver_minor); 4500 4501 return (B_FALSE); 4502 } 4503 4504 /* 4505 * Otherwise, the message's major version is less than the 4506 * current major version, so continue the loop to the next 4507 * (lower) supported version 4508 */ 4509 } 4510 4511 /* 4512 * No common version was found; "ground" the version pair in the 4513 * message to terminate negotiation 4514 */ 4515 ver_msg->ver_major = 0; 4516 ver_msg->ver_minor = 0; 4517 4518 return (B_FALSE); 4519 } 4520 /* -------------------------------------------------------------------------- */ 4521 4522 /* 4523 * DKIO(7) support 4524 */ 4525 4526 typedef struct vdc_dk_arg { 4527 struct dk_callback dkc; 4528 int mode; 4529 dev_t dev; 4530 vdc_t *vdc; 4531 } vdc_dk_arg_t; 4532 4533 /* 4534 * Function: 4535 * vdc_dkio_flush_cb() 4536 * 4537 * Description: 4538 * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 4539 * by kernel code. 4540 * 4541 * Arguments: 4542 * arg - a pointer to a vdc_dk_arg_t structure. 4543 */ 4544 void 4545 vdc_dkio_flush_cb(void *arg) 4546 { 4547 struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 4548 struct dk_callback *dkc = NULL; 4549 vdc_t *vdc = NULL; 4550 int rv; 4551 4552 if (dk_arg == NULL) { 4553 cmn_err(CE_NOTE, "?[Unk] DKIOCFLUSHWRITECACHE arg is NULL\n"); 4554 return; 4555 } 4556 dkc = &dk_arg->dkc; 4557 vdc = dk_arg->vdc; 4558 ASSERT(vdc != NULL); 4559 4560 rv = vdc_do_sync_op(vdc, VD_OP_FLUSH, NULL, 0, 4561 VDCPART(dk_arg->dev), 0, CB_SYNC, 0, VIO_both_dir); 4562 if (rv != 0) { 4563 DMSG(vdc, 0, "[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n", 4564 vdc->instance, rv, 4565 ddi_model_convert_from(dk_arg->mode & FMODELS)); 4566 } 4567 4568 /* 4569 * Trigger the call back to notify the caller the the ioctl call has 4570 * been completed. 4571 */ 4572 if ((dk_arg->mode & FKIOCTL) && 4573 (dkc != NULL) && 4574 (dkc->dkc_callback != NULL)) { 4575 ASSERT(dkc->dkc_cookie != NULL); 4576 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 4577 } 4578 4579 /* Indicate that one less DKIO write flush is outstanding */ 4580 mutex_enter(&vdc->lock); 4581 vdc->dkio_flush_pending--; 4582 ASSERT(vdc->dkio_flush_pending >= 0); 4583 mutex_exit(&vdc->lock); 4584 4585 /* free the mem that was allocated when the callback was dispatched */ 4586 kmem_free(arg, sizeof (vdc_dk_arg_t)); 4587 } 4588 4589 /* 4590 * Function: 4591 * vdc_dkio_get_partition() 4592 * 4593 * Description: 4594 * This function implements the DKIOCGAPART ioctl. 4595 * 4596 * Arguments: 4597 * vdc - soft state pointer 4598 * arg - a pointer to a dk_map[NDKMAP] or dk_map32[NDKMAP] structure 4599 * flag - ioctl flags 4600 */ 4601 static int 4602 vdc_dkio_get_partition(vdc_t *vdc, caddr_t arg, int flag) 4603 { 4604 struct dk_geom *geom; 4605 struct vtoc *vtoc; 4606 union { 4607 struct dk_map map[NDKMAP]; 4608 struct dk_map32 map32[NDKMAP]; 4609 } data; 4610 int i, rv, size; 4611 4612 mutex_enter(&vdc->lock); 4613 4614 if ((rv = vdc_validate_geometry(vdc)) != 0) { 4615 mutex_exit(&vdc->lock); 4616 return (rv); 4617 } 4618 4619 vtoc = vdc->vtoc; 4620 geom = vdc->geom; 4621 4622 if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 4623 4624 for (i = 0; i < vtoc->v_nparts; i++) { 4625 data.map32[i].dkl_cylno = vtoc->v_part[i].p_start / 4626 (geom->dkg_nhead * geom->dkg_nsect); 4627 data.map32[i].dkl_nblk = vtoc->v_part[i].p_size; 4628 } 4629 size = NDKMAP * sizeof (struct dk_map32); 4630 4631 } else { 4632 4633 for (i = 0; i < vtoc->v_nparts; i++) { 4634 data.map[i].dkl_cylno = vtoc->v_part[i].p_start / 4635 (geom->dkg_nhead * geom->dkg_nsect); 4636 data.map[i].dkl_nblk = vtoc->v_part[i].p_size; 4637 } 4638 size = NDKMAP * sizeof (struct dk_map); 4639 4640 } 4641 4642 mutex_exit(&vdc->lock); 4643 4644 if (ddi_copyout(&data, arg, size, flag) != 0) 4645 return (EFAULT); 4646 4647 return (0); 4648 } 4649 4650 /* 4651 * Function: 4652 * vdc_dioctl_rwcmd() 4653 * 4654 * Description: 4655 * This function implements the DIOCTL_RWCMD ioctl. This ioctl is used 4656 * for DKC_DIRECT disks to read or write at an absolute disk offset. 4657 * 4658 * Arguments: 4659 * dev - device 4660 * arg - a pointer to a dadkio_rwcmd or dadkio_rwcmd32 structure 4661 * flag - ioctl flags 4662 */ 4663 static int 4664 vdc_dioctl_rwcmd(dev_t dev, caddr_t arg, int flag) 4665 { 4666 struct dadkio_rwcmd32 rwcmd32; 4667 struct dadkio_rwcmd rwcmd; 4668 struct iovec aiov; 4669 struct uio auio; 4670 int rw, status; 4671 struct buf *buf; 4672 4673 if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 4674 if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd32, 4675 sizeof (struct dadkio_rwcmd32), flag)) { 4676 return (EFAULT); 4677 } 4678 rwcmd.cmd = rwcmd32.cmd; 4679 rwcmd.flags = rwcmd32.flags; 4680 rwcmd.blkaddr = (daddr_t)rwcmd32.blkaddr; 4681 rwcmd.buflen = rwcmd32.buflen; 4682 rwcmd.bufaddr = (caddr_t)(uintptr_t)rwcmd32.bufaddr; 4683 } else { 4684 if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd, 4685 sizeof (struct dadkio_rwcmd), flag)) { 4686 return (EFAULT); 4687 } 4688 } 4689 4690 switch (rwcmd.cmd) { 4691 case DADKIO_RWCMD_READ: 4692 rw = B_READ; 4693 break; 4694 case DADKIO_RWCMD_WRITE: 4695 rw = B_WRITE; 4696 break; 4697 default: 4698 return (EINVAL); 4699 } 4700 4701 bzero((caddr_t)&aiov, sizeof (struct iovec)); 4702 aiov.iov_base = rwcmd.bufaddr; 4703 aiov.iov_len = rwcmd.buflen; 4704 4705 bzero((caddr_t)&auio, sizeof (struct uio)); 4706 auio.uio_iov = &aiov; 4707 auio.uio_iovcnt = 1; 4708 auio.uio_loffset = rwcmd.blkaddr * DEV_BSIZE; 4709 auio.uio_resid = rwcmd.buflen; 4710 auio.uio_segflg = flag & FKIOCTL ? UIO_SYSSPACE : UIO_USERSPACE; 4711 4712 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 4713 bioinit(buf); 4714 /* 4715 * We use the private field of buf to specify that this is an 4716 * I/O using an absolute offset. 4717 */ 4718 buf->b_private = (void *)VD_SLICE_NONE; 4719 4720 status = physio(vdc_strategy, buf, dev, rw, vdc_min, &auio); 4721 4722 biofini(buf); 4723 kmem_free(buf, sizeof (buf_t)); 4724 4725 return (status); 4726 } 4727 4728 /* 4729 * This structure is used in the DKIO(7I) array below. 4730 */ 4731 typedef struct vdc_dk_ioctl { 4732 uint8_t op; /* VD_OP_XXX value */ 4733 int cmd; /* Solaris ioctl operation number */ 4734 size_t nbytes; /* size of structure to be copied */ 4735 4736 /* function to convert between vDisk and Solaris structure formats */ 4737 int (*convert)(vdc_t *vdc, void *vd_buf, void *ioctl_arg, 4738 int mode, int dir); 4739 } vdc_dk_ioctl_t; 4740 4741 /* 4742 * Subset of DKIO(7I) operations currently supported 4743 */ 4744 static vdc_dk_ioctl_t dk_ioctl[] = { 4745 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, 0, 4746 vdc_null_copy_func}, 4747 {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), 4748 vdc_get_wce_convert}, 4749 {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), 4750 vdc_set_wce_convert}, 4751 {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), 4752 vdc_get_vtoc_convert}, 4753 {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), 4754 vdc_set_vtoc_convert}, 4755 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), 4756 vdc_get_geom_convert}, 4757 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), 4758 vdc_get_geom_convert}, 4759 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), 4760 vdc_get_geom_convert}, 4761 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 4762 vdc_set_geom_convert}, 4763 {VD_OP_GET_EFI, DKIOCGETEFI, 0, 4764 vdc_get_efi_convert}, 4765 {VD_OP_SET_EFI, DKIOCSETEFI, 0, 4766 vdc_set_efi_convert}, 4767 4768 /* DIOCTL_RWCMD is converted to a read or a write */ 4769 {0, DIOCTL_RWCMD, sizeof (struct dadkio_rwcmd), NULL}, 4770 4771 /* 4772 * These particular ioctls are not sent to the server - vdc fakes up 4773 * the necessary info. 4774 */ 4775 {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, 4776 {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, 4777 {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, 4778 {0, DKIOCGAPART, 0, vdc_null_copy_func }, 4779 {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, 4780 {0, CDROMREADOFFSET, 0, vdc_null_copy_func} 4781 }; 4782 4783 /* 4784 * Function: 4785 * vd_process_ioctl() 4786 * 4787 * Description: 4788 * This routine processes disk specific ioctl calls 4789 * 4790 * Arguments: 4791 * dev - the device number 4792 * cmd - the operation [dkio(7I)] to be processed 4793 * arg - pointer to user provided structure 4794 * (contains data to be set or reference parameter for get) 4795 * mode - bit flag, indicating open settings, 32/64 bit type, etc 4796 * 4797 * Return Code: 4798 * 0 4799 * EFAULT 4800 * ENXIO 4801 * EIO 4802 * ENOTSUP 4803 */ 4804 static int 4805 vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode) 4806 { 4807 int instance = VDCUNIT(dev); 4808 vdc_t *vdc = NULL; 4809 int rv = -1; 4810 int idx = 0; /* index into dk_ioctl[] */ 4811 size_t len = 0; /* #bytes to send to vds */ 4812 size_t alloc_len = 0; /* #bytes to allocate mem for */ 4813 caddr_t mem_p = NULL; 4814 size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 4815 vdc_dk_ioctl_t *iop; 4816 4817 vdc = ddi_get_soft_state(vdc_state, instance); 4818 if (vdc == NULL) { 4819 cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 4820 instance); 4821 return (ENXIO); 4822 } 4823 4824 DMSG(vdc, 0, "[%d] Processing ioctl(%x) for dev %lx : model %x\n", 4825 instance, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 4826 4827 /* 4828 * Validate the ioctl operation to be performed. 4829 * 4830 * If we have looped through the array without finding a match then we 4831 * don't support this ioctl. 4832 */ 4833 for (idx = 0; idx < nioctls; idx++) { 4834 if (cmd == dk_ioctl[idx].cmd) 4835 break; 4836 } 4837 4838 if (idx >= nioctls) { 4839 DMSG(vdc, 0, "[%d] Unsupported ioctl (0x%x)\n", 4840 vdc->instance, cmd); 4841 return (ENOTSUP); 4842 } 4843 4844 iop = &(dk_ioctl[idx]); 4845 4846 if (cmd == DKIOCGETEFI || cmd == DKIOCSETEFI) { 4847 /* size is not fixed for EFI ioctls, it depends on ioctl arg */ 4848 dk_efi_t dk_efi; 4849 4850 rv = ddi_copyin(arg, &dk_efi, sizeof (dk_efi_t), mode); 4851 if (rv != 0) 4852 return (EFAULT); 4853 4854 len = sizeof (vd_efi_t) - 1 + dk_efi.dki_length; 4855 } else { 4856 len = iop->nbytes; 4857 } 4858 4859 /* 4860 * Deal with the ioctls which the server does not provide. vdc can 4861 * fake these up and return immediately 4862 */ 4863 switch (cmd) { 4864 case CDROMREADOFFSET: 4865 case DKIOCREMOVABLE: 4866 case USCSICMD: 4867 return (ENOTTY); 4868 4869 case DIOCTL_RWCMD: 4870 { 4871 if (vdc->cinfo == NULL) 4872 return (ENXIO); 4873 4874 if (vdc->cinfo->dki_ctype != DKC_DIRECT) 4875 return (ENOTTY); 4876 4877 return (vdc_dioctl_rwcmd(dev, arg, mode)); 4878 } 4879 4880 case DKIOCGAPART: 4881 { 4882 return (vdc_dkio_get_partition(vdc, arg, mode)); 4883 } 4884 4885 case DKIOCINFO: 4886 { 4887 struct dk_cinfo cinfo; 4888 if (vdc->cinfo == NULL) 4889 return (ENXIO); 4890 4891 bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 4892 cinfo.dki_partition = VDCPART(dev); 4893 4894 rv = ddi_copyout(&cinfo, (void *)arg, 4895 sizeof (struct dk_cinfo), mode); 4896 if (rv != 0) 4897 return (EFAULT); 4898 4899 return (0); 4900 } 4901 4902 case DKIOCGMEDIAINFO: 4903 { 4904 if (vdc->minfo == NULL) 4905 return (ENXIO); 4906 4907 rv = ddi_copyout(vdc->minfo, (void *)arg, 4908 sizeof (struct dk_minfo), mode); 4909 if (rv != 0) 4910 return (EFAULT); 4911 4912 return (0); 4913 } 4914 4915 case DKIOCFLUSHWRITECACHE: 4916 { 4917 struct dk_callback *dkc = (struct dk_callback *)arg; 4918 vdc_dk_arg_t *dkarg = NULL; 4919 4920 DMSG(vdc, 1, "[%d] Flush W$: mode %x\n", 4921 instance, mode); 4922 4923 /* 4924 * If the backing device is not a 'real' disk then the 4925 * W$ operation request to the vDisk server will fail 4926 * so we might as well save the cycles and return now. 4927 */ 4928 if (vdc->vdisk_type != VD_DISK_TYPE_DISK) 4929 return (ENOTTY); 4930 4931 /* 4932 * If arg is NULL, then there is no callback function 4933 * registered and the call operates synchronously; we 4934 * break and continue with the rest of the function and 4935 * wait for vds to return (i.e. after the request to 4936 * vds returns successfully, all writes completed prior 4937 * to the ioctl will have been flushed from the disk 4938 * write cache to persistent media. 4939 * 4940 * If a callback function is registered, we dispatch 4941 * the request on a task queue and return immediately. 4942 * The callback will deal with informing the calling 4943 * thread that the flush request is completed. 4944 */ 4945 if (dkc == NULL) 4946 break; 4947 4948 /* 4949 * the asynchronous callback is only supported if 4950 * invoked from within the kernel 4951 */ 4952 if ((mode & FKIOCTL) == 0) 4953 return (ENOTSUP); 4954 4955 dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP); 4956 4957 dkarg->mode = mode; 4958 dkarg->dev = dev; 4959 bcopy(dkc, &dkarg->dkc, sizeof (*dkc)); 4960 4961 mutex_enter(&vdc->lock); 4962 vdc->dkio_flush_pending++; 4963 dkarg->vdc = vdc; 4964 mutex_exit(&vdc->lock); 4965 4966 /* put the request on a task queue */ 4967 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 4968 (void *)dkarg, DDI_SLEEP); 4969 if (rv == NULL) { 4970 /* clean up if dispatch fails */ 4971 mutex_enter(&vdc->lock); 4972 vdc->dkio_flush_pending--; 4973 mutex_exit(&vdc->lock); 4974 kmem_free(dkarg, sizeof (vdc_dk_arg_t)); 4975 } 4976 4977 return (rv == NULL ? ENOMEM : 0); 4978 } 4979 } 4980 4981 /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 4982 ASSERT(iop->op != 0); 4983 4984 /* LDC requires that the memory being mapped is 8-byte aligned */ 4985 alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 4986 DMSG(vdc, 1, "[%d] struct size %ld alloc %ld\n", 4987 instance, len, alloc_len); 4988 4989 ASSERT(alloc_len >= 0); /* sanity check */ 4990 if (alloc_len > 0) 4991 mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 4992 4993 /* 4994 * Call the conversion function for this ioctl which, if necessary, 4995 * converts from the Solaris format to the format ARC'ed 4996 * as part of the vDisk protocol (FWARC 2006/195) 4997 */ 4998 ASSERT(iop->convert != NULL); 4999 rv = (iop->convert)(vdc, arg, mem_p, mode, VD_COPYIN); 5000 if (rv != 0) { 5001 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 5002 instance, rv, cmd); 5003 if (mem_p != NULL) 5004 kmem_free(mem_p, alloc_len); 5005 return (rv); 5006 } 5007 5008 /* 5009 * send request to vds to service the ioctl. 5010 */ 5011 rv = vdc_do_sync_op(vdc, iop->op, mem_p, alloc_len, 5012 VDCPART(dev), 0, CB_SYNC, (void *)(uint64_t)mode, 5013 VIO_both_dir); 5014 5015 if (cmd == DKIOCSVTOC || cmd == DKIOCSETEFI) { 5016 /* 5017 * The disk label may have changed. Revalidate the disk 5018 * geometry. This will also update the device nodes and 5019 * properties. 5020 */ 5021 vdc_validate(vdc); 5022 } 5023 5024 if (rv != 0) { 5025 /* 5026 * This is not necessarily an error. The ioctl could 5027 * be returning a value such as ENOTTY to indicate 5028 * that the ioctl is not applicable. 5029 */ 5030 DMSG(vdc, 0, "[%d] vds returned %d for ioctl 0x%x\n", 5031 instance, rv, cmd); 5032 if (mem_p != NULL) 5033 kmem_free(mem_p, alloc_len); 5034 5035 return (rv); 5036 } 5037 5038 /* 5039 * Call the conversion function (if it exists) for this ioctl 5040 * which converts from the format ARC'ed as part of the vDisk 5041 * protocol (FWARC 2006/195) back to a format understood by 5042 * the rest of Solaris. 5043 */ 5044 rv = (iop->convert)(vdc, mem_p, arg, mode, VD_COPYOUT); 5045 if (rv != 0) { 5046 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 5047 instance, rv, cmd); 5048 if (mem_p != NULL) 5049 kmem_free(mem_p, alloc_len); 5050 return (rv); 5051 } 5052 5053 if (mem_p != NULL) 5054 kmem_free(mem_p, alloc_len); 5055 5056 return (rv); 5057 } 5058 5059 /* 5060 * Function: 5061 * 5062 * Description: 5063 * This is an empty conversion function used by ioctl calls which 5064 * do not need to convert the data being passed in/out to userland 5065 */ 5066 static int 5067 vdc_null_copy_func(vdc_t *vdc, void *from, void *to, int mode, int dir) 5068 { 5069 _NOTE(ARGUNUSED(vdc)) 5070 _NOTE(ARGUNUSED(from)) 5071 _NOTE(ARGUNUSED(to)) 5072 _NOTE(ARGUNUSED(mode)) 5073 _NOTE(ARGUNUSED(dir)) 5074 5075 return (0); 5076 } 5077 5078 static int 5079 vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 5080 int mode, int dir) 5081 { 5082 _NOTE(ARGUNUSED(vdc)) 5083 5084 if (dir == VD_COPYIN) 5085 return (0); /* nothing to do */ 5086 5087 if (ddi_copyout(from, to, sizeof (int), mode) != 0) 5088 return (EFAULT); 5089 5090 return (0); 5091 } 5092 5093 static int 5094 vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 5095 int mode, int dir) 5096 { 5097 _NOTE(ARGUNUSED(vdc)) 5098 5099 if (dir == VD_COPYOUT) 5100 return (0); /* nothing to do */ 5101 5102 if (ddi_copyin(from, to, sizeof (int), mode) != 0) 5103 return (EFAULT); 5104 5105 return (0); 5106 } 5107 5108 /* 5109 * Function: 5110 * vdc_get_vtoc_convert() 5111 * 5112 * Description: 5113 * This routine performs the necessary convertions from the DKIOCGVTOC 5114 * Solaris structure to the format defined in FWARC 2006/195. 5115 * 5116 * In the struct vtoc definition, the timestamp field is marked as not 5117 * supported so it is not part of vDisk protocol (FWARC 2006/195). 5118 * However SVM uses that field to check it can write into the VTOC, 5119 * so we fake up the info of that field. 5120 * 5121 * Arguments: 5122 * vdc - the vDisk client 5123 * from - the buffer containing the data to be copied from 5124 * to - the buffer to be copied to 5125 * mode - flags passed to ioctl() call 5126 * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT 5127 * 5128 * Return Code: 5129 * 0 - Success 5130 * ENXIO - incorrect buffer passed in. 5131 * EFAULT - ddi_copyout routine encountered an error. 5132 */ 5133 static int 5134 vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5135 { 5136 int i; 5137 void *tmp_mem = NULL; 5138 void *tmp_memp; 5139 struct vtoc vt; 5140 struct vtoc32 vt32; 5141 int copy_len = 0; 5142 int rv = 0; 5143 5144 if (dir != VD_COPYOUT) 5145 return (0); /* nothing to do */ 5146 5147 if ((from == NULL) || (to == NULL)) 5148 return (ENXIO); 5149 5150 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 5151 copy_len = sizeof (struct vtoc32); 5152 else 5153 copy_len = sizeof (struct vtoc); 5154 5155 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 5156 5157 VD_VTOC2VTOC((vd_vtoc_t *)from, &vt); 5158 5159 /* fake the VTOC timestamp field */ 5160 for (i = 0; i < V_NUMPAR; i++) { 5161 vt.timestamp[i] = vdc->vtoc->timestamp[i]; 5162 } 5163 5164 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 5165 vtoctovtoc32(vt, vt32); 5166 tmp_memp = &vt32; 5167 } else { 5168 tmp_memp = &vt; 5169 } 5170 rv = ddi_copyout(tmp_memp, to, copy_len, mode); 5171 if (rv != 0) 5172 rv = EFAULT; 5173 5174 kmem_free(tmp_mem, copy_len); 5175 return (rv); 5176 } 5177 5178 /* 5179 * Function: 5180 * vdc_set_vtoc_convert() 5181 * 5182 * Description: 5183 * This routine performs the necessary convertions from the DKIOCSVTOC 5184 * Solaris structure to the format defined in FWARC 2006/195. 5185 * 5186 * Arguments: 5187 * vdc - the vDisk client 5188 * from - Buffer with data 5189 * to - Buffer where data is to be copied to 5190 * mode - flags passed to ioctl 5191 * dir - direction of copy (in or out) 5192 * 5193 * Return Code: 5194 * 0 - Success 5195 * ENXIO - Invalid buffer passed in 5196 * EFAULT - ddi_copyin of data failed 5197 */ 5198 static int 5199 vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5200 { 5201 _NOTE(ARGUNUSED(vdc)) 5202 5203 void *tmp_mem = NULL; 5204 struct vtoc vt; 5205 struct vtoc *vtp = &vt; 5206 vd_vtoc_t vtvd; 5207 int copy_len = 0; 5208 int rv = 0; 5209 5210 if (dir != VD_COPYIN) 5211 return (0); /* nothing to do */ 5212 5213 if ((from == NULL) || (to == NULL)) 5214 return (ENXIO); 5215 5216 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 5217 copy_len = sizeof (struct vtoc32); 5218 else 5219 copy_len = sizeof (struct vtoc); 5220 5221 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 5222 5223 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 5224 if (rv != 0) { 5225 kmem_free(tmp_mem, copy_len); 5226 return (EFAULT); 5227 } 5228 5229 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 5230 vtoc32tovtoc((*(struct vtoc32 *)tmp_mem), vt); 5231 } else { 5232 vtp = tmp_mem; 5233 } 5234 5235 VTOC2VD_VTOC(vtp, &vtvd); 5236 bcopy(&vtvd, to, sizeof (vd_vtoc_t)); 5237 kmem_free(tmp_mem, copy_len); 5238 5239 return (0); 5240 } 5241 5242 /* 5243 * Function: 5244 * vdc_get_geom_convert() 5245 * 5246 * Description: 5247 * This routine performs the necessary convertions from the DKIOCGGEOM, 5248 * DKIOCG_PHYSGEOM and DKIOG_VIRTGEOM Solaris structures to the format 5249 * defined in FWARC 2006/195 5250 * 5251 * Arguments: 5252 * vdc - the vDisk client 5253 * from - Buffer with data 5254 * to - Buffer where data is to be copied to 5255 * mode - flags passed to ioctl 5256 * dir - direction of copy (in or out) 5257 * 5258 * Return Code: 5259 * 0 - Success 5260 * ENXIO - Invalid buffer passed in 5261 * EFAULT - ddi_copyout of data failed 5262 */ 5263 static int 5264 vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5265 { 5266 _NOTE(ARGUNUSED(vdc)) 5267 5268 struct dk_geom geom; 5269 int copy_len = sizeof (struct dk_geom); 5270 int rv = 0; 5271 5272 if (dir != VD_COPYOUT) 5273 return (0); /* nothing to do */ 5274 5275 if ((from == NULL) || (to == NULL)) 5276 return (ENXIO); 5277 5278 VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); 5279 rv = ddi_copyout(&geom, to, copy_len, mode); 5280 if (rv != 0) 5281 rv = EFAULT; 5282 5283 return (rv); 5284 } 5285 5286 /* 5287 * Function: 5288 * vdc_set_geom_convert() 5289 * 5290 * Description: 5291 * This routine performs the necessary convertions from the DKIOCSGEOM 5292 * Solaris structure to the format defined in FWARC 2006/195. 5293 * 5294 * Arguments: 5295 * vdc - the vDisk client 5296 * from - Buffer with data 5297 * to - Buffer where data is to be copied to 5298 * mode - flags passed to ioctl 5299 * dir - direction of copy (in or out) 5300 * 5301 * Return Code: 5302 * 0 - Success 5303 * ENXIO - Invalid buffer passed in 5304 * EFAULT - ddi_copyin of data failed 5305 */ 5306 static int 5307 vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5308 { 5309 _NOTE(ARGUNUSED(vdc)) 5310 5311 vd_geom_t vdgeom; 5312 void *tmp_mem = NULL; 5313 int copy_len = sizeof (struct dk_geom); 5314 int rv = 0; 5315 5316 if (dir != VD_COPYIN) 5317 return (0); /* nothing to do */ 5318 5319 if ((from == NULL) || (to == NULL)) 5320 return (ENXIO); 5321 5322 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 5323 5324 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 5325 if (rv != 0) { 5326 kmem_free(tmp_mem, copy_len); 5327 return (EFAULT); 5328 } 5329 DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); 5330 bcopy(&vdgeom, to, sizeof (vdgeom)); 5331 kmem_free(tmp_mem, copy_len); 5332 5333 return (0); 5334 } 5335 5336 static int 5337 vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5338 { 5339 _NOTE(ARGUNUSED(vdc)) 5340 5341 vd_efi_t *vd_efi; 5342 dk_efi_t dk_efi; 5343 int rv = 0; 5344 void *uaddr; 5345 5346 if ((from == NULL) || (to == NULL)) 5347 return (ENXIO); 5348 5349 if (dir == VD_COPYIN) { 5350 5351 vd_efi = (vd_efi_t *)to; 5352 5353 rv = ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode); 5354 if (rv != 0) 5355 return (EFAULT); 5356 5357 vd_efi->lba = dk_efi.dki_lba; 5358 vd_efi->length = dk_efi.dki_length; 5359 bzero(vd_efi->data, vd_efi->length); 5360 5361 } else { 5362 5363 rv = ddi_copyin(to, &dk_efi, sizeof (dk_efi_t), mode); 5364 if (rv != 0) 5365 return (EFAULT); 5366 5367 uaddr = dk_efi.dki_data; 5368 5369 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 5370 5371 VD_EFI2DK_EFI((vd_efi_t *)from, &dk_efi); 5372 5373 rv = ddi_copyout(dk_efi.dki_data, uaddr, dk_efi.dki_length, 5374 mode); 5375 if (rv != 0) 5376 return (EFAULT); 5377 5378 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 5379 } 5380 5381 return (0); 5382 } 5383 5384 static int 5385 vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 5386 { 5387 _NOTE(ARGUNUSED(vdc)) 5388 5389 dk_efi_t dk_efi; 5390 void *uaddr; 5391 5392 if (dir == VD_COPYOUT) 5393 return (0); /* nothing to do */ 5394 5395 if ((from == NULL) || (to == NULL)) 5396 return (ENXIO); 5397 5398 if (ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode) != 0) 5399 return (EFAULT); 5400 5401 uaddr = dk_efi.dki_data; 5402 5403 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 5404 5405 if (ddi_copyin(uaddr, dk_efi.dki_data, dk_efi.dki_length, mode) != 0) 5406 return (EFAULT); 5407 5408 DK_EFI2VD_EFI(&dk_efi, (vd_efi_t *)to); 5409 5410 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 5411 5412 return (0); 5413 } 5414 5415 /* 5416 * Function: 5417 * vdc_create_fake_geometry() 5418 * 5419 * Description: 5420 * This routine fakes up the disk info needed for some DKIO ioctls. 5421 * - DKIOCINFO 5422 * - DKIOCGMEDIAINFO 5423 * 5424 * [ just like lofi(7D) and ramdisk(7D) ] 5425 * 5426 * Arguments: 5427 * vdc - soft state pointer for this instance of the device driver. 5428 * 5429 * Return Code: 5430 * none. 5431 */ 5432 static void 5433 vdc_create_fake_geometry(vdc_t *vdc) 5434 { 5435 ASSERT(vdc != NULL); 5436 ASSERT(vdc->vdisk_size != 0); 5437 ASSERT(vdc->max_xfer_sz != 0); 5438 5439 /* 5440 * DKIOCINFO support 5441 */ 5442 if (vdc->cinfo == NULL) 5443 vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 5444 5445 (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 5446 (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 5447 /* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */ 5448 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; 5449 /* 5450 * We currently set the controller type to DKC_DIRECT for any disk. 5451 * When SCSI support is implemented, we will eventually change this 5452 * type to DKC_SCSI_CCS for disks supporting the SCSI protocol. 5453 */ 5454 vdc->cinfo->dki_ctype = DKC_DIRECT; 5455 vdc->cinfo->dki_flags = DKI_FMTVOL; 5456 vdc->cinfo->dki_cnum = 0; 5457 vdc->cinfo->dki_addr = 0; 5458 vdc->cinfo->dki_space = 0; 5459 vdc->cinfo->dki_prio = 0; 5460 vdc->cinfo->dki_vec = 0; 5461 vdc->cinfo->dki_unit = vdc->instance; 5462 vdc->cinfo->dki_slave = 0; 5463 /* 5464 * The partition number will be created on the fly depending on the 5465 * actual slice (i.e. minor node) that is used to request the data. 5466 */ 5467 vdc->cinfo->dki_partition = 0; 5468 5469 /* 5470 * DKIOCGMEDIAINFO support 5471 */ 5472 if (vdc->minfo == NULL) 5473 vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 5474 vdc->minfo->dki_media_type = DK_FIXED_DISK; 5475 vdc->minfo->dki_capacity = vdc->vdisk_size; 5476 vdc->minfo->dki_lbsize = DEV_BSIZE; 5477 } 5478 5479 static ushort_t 5480 vdc_lbl2cksum(struct dk_label *label) 5481 { 5482 int count; 5483 ushort_t sum, *sp; 5484 5485 count = (sizeof (struct dk_label)) / (sizeof (short)) - 1; 5486 sp = (ushort_t *)label; 5487 sum = 0; 5488 while (count--) { 5489 sum ^= *sp++; 5490 } 5491 5492 return (sum); 5493 } 5494 5495 /* 5496 * Function: 5497 * vdc_validate_geometry 5498 * 5499 * Description: 5500 * This routine discovers the label and geometry of the disk. It stores 5501 * the disk label and related information in the vdc structure. If it 5502 * fails to validate the geometry or to discover the disk label then 5503 * the label is marked as unknown (VD_DISK_LABEL_UNK). 5504 * 5505 * Arguments: 5506 * vdc - soft state pointer for this instance of the device driver. 5507 * 5508 * Return Code: 5509 * 0 - success. 5510 * EINVAL - unknown disk label. 5511 * ENOTSUP - geometry not applicable (EFI label). 5512 * EIO - error accessing the disk. 5513 */ 5514 static int 5515 vdc_validate_geometry(vdc_t *vdc) 5516 { 5517 buf_t *buf; /* BREAD requests need to be in a buf_t structure */ 5518 dev_t dev; 5519 int rv; 5520 struct dk_label label; 5521 struct dk_geom geom; 5522 struct vtoc vtoc; 5523 5524 ASSERT(vdc != NULL); 5525 ASSERT(vdc->vtoc != NULL && vdc->geom != NULL); 5526 ASSERT(MUTEX_HELD(&vdc->lock)); 5527 5528 mutex_exit(&vdc->lock); 5529 5530 dev = makedevice(ddi_driver_major(vdc->dip), 5531 VD_MAKE_DEV(vdc->instance, 0)); 5532 5533 rv = vd_process_ioctl(dev, DKIOCGGEOM, (caddr_t)&geom, FKIOCTL); 5534 if (rv == 0) 5535 rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)&vtoc, FKIOCTL); 5536 5537 if (rv == ENOTSUP) { 5538 /* 5539 * If the device does not support VTOC then we try 5540 * to read an EFI label. 5541 */ 5542 struct dk_gpt *efi; 5543 size_t efi_len; 5544 5545 rv = vdc_efi_alloc_and_read(dev, &efi, &efi_len); 5546 5547 if (rv) { 5548 DMSG(vdc, 0, "[%d] Failed to get EFI (err=%d)", 5549 vdc->instance, rv); 5550 mutex_enter(&vdc->lock); 5551 vdc_store_label_unk(vdc); 5552 return (EIO); 5553 } 5554 5555 mutex_enter(&vdc->lock); 5556 vdc_store_label_efi(vdc, efi); 5557 vd_efi_free(efi, efi_len); 5558 return (ENOTSUP); 5559 } 5560 5561 if (rv != 0) { 5562 DMSG(vdc, 0, "[%d] Failed to get VTOC (err=%d)", 5563 vdc->instance, rv); 5564 mutex_enter(&vdc->lock); 5565 vdc_store_label_unk(vdc); 5566 if (rv != EINVAL) 5567 rv = EIO; 5568 return (rv); 5569 } 5570 5571 /* check that geometry and vtoc are valid */ 5572 if (geom.dkg_nhead == 0 || geom.dkg_nsect == 0 || 5573 vtoc.v_sanity != VTOC_SANE) { 5574 mutex_enter(&vdc->lock); 5575 vdc_store_label_unk(vdc); 5576 return (EINVAL); 5577 } 5578 5579 /* 5580 * We have a disk and a valid VTOC. However this does not mean 5581 * that the disk currently have a VTOC label. The returned VTOC may 5582 * be a default VTOC to be used for configuring the disk (this is 5583 * what is done for disk image). So we read the label from the 5584 * beginning of the disk to ensure we really have a VTOC label. 5585 * 5586 * FUTURE: This could be the default way for reading the VTOC 5587 * from the disk as opposed to sending the VD_OP_GET_VTOC 5588 * to the server. This will be the default if vdc is implemented 5589 * ontop of cmlb. 5590 */ 5591 5592 /* 5593 * Single slice disk does not support read using an absolute disk 5594 * offset so we just rely on the DKIOCGVTOC ioctl in that case. 5595 */ 5596 if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 5597 mutex_enter(&vdc->lock); 5598 if (vtoc.v_nparts != 1) { 5599 vdc_store_label_unk(vdc); 5600 return (EINVAL); 5601 } 5602 vdc_store_label_vtoc(vdc, &geom, &vtoc); 5603 return (0); 5604 } 5605 5606 if (vtoc.v_nparts != V_NUMPAR) { 5607 mutex_enter(&vdc->lock); 5608 vdc_store_label_unk(vdc); 5609 return (EINVAL); 5610 } 5611 5612 /* 5613 * Read disk label from start of disk 5614 */ 5615 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 5616 bioinit(buf); 5617 buf->b_un.b_addr = (caddr_t)&label; 5618 buf->b_bcount = DK_LABEL_SIZE; 5619 buf->b_flags = B_BUSY | B_READ; 5620 buf->b_dev = dev; 5621 rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)&label, 5622 DK_LABEL_SIZE, VD_SLICE_NONE, 0, CB_STRATEGY, buf, VIO_read_dir); 5623 if (rv) { 5624 DMSG(vdc, 1, "[%d] Failed to read disk block 0\n", 5625 vdc->instance); 5626 } else { 5627 rv = biowait(buf); 5628 biofini(buf); 5629 } 5630 kmem_free(buf, sizeof (buf_t)); 5631 5632 if (rv != 0 || label.dkl_magic != DKL_MAGIC || 5633 label.dkl_cksum != vdc_lbl2cksum(&label)) { 5634 DMSG(vdc, 1, "[%d] Got VTOC with invalid label\n", 5635 vdc->instance); 5636 mutex_enter(&vdc->lock); 5637 vdc_store_label_unk(vdc); 5638 return (EINVAL); 5639 } 5640 5641 mutex_enter(&vdc->lock); 5642 vdc_store_label_vtoc(vdc, &geom, &vtoc); 5643 return (0); 5644 } 5645 5646 /* 5647 * Function: 5648 * vdc_validate 5649 * 5650 * Description: 5651 * This routine discovers the label of the disk and create the 5652 * appropriate device nodes if the label has changed. 5653 * 5654 * Arguments: 5655 * vdc - soft state pointer for this instance of the device driver. 5656 * 5657 * Return Code: 5658 * none. 5659 */ 5660 static void 5661 vdc_validate(vdc_t *vdc) 5662 { 5663 vd_disk_label_t old_label; 5664 struct vtoc old_vtoc; 5665 int rv; 5666 5667 ASSERT(!MUTEX_HELD(&vdc->lock)); 5668 5669 mutex_enter(&vdc->lock); 5670 5671 /* save the current label and vtoc */ 5672 old_label = vdc->vdisk_label; 5673 bcopy(vdc->vtoc, &old_vtoc, sizeof (struct vtoc)); 5674 5675 /* check the geometry */ 5676 (void) vdc_validate_geometry(vdc); 5677 5678 /* if the disk label has changed, update device nodes */ 5679 if (vdc->vdisk_label != old_label) { 5680 5681 if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 5682 rv = vdc_create_device_nodes_efi(vdc); 5683 else 5684 rv = vdc_create_device_nodes_vtoc(vdc); 5685 5686 if (rv != 0) { 5687 DMSG(vdc, 0, "![%d] Failed to update device nodes", 5688 vdc->instance); 5689 } 5690 } 5691 5692 /* if the vtoc has changed, update device nodes properties */ 5693 if (bcmp(vdc->vtoc, &old_vtoc, sizeof (struct vtoc)) != 0) { 5694 5695 if (vdc_create_device_nodes_props(vdc) != 0) { 5696 DMSG(vdc, 0, "![%d] Failed to update device nodes" 5697 " properties", vdc->instance); 5698 } 5699 } 5700 5701 mutex_exit(&vdc->lock); 5702 } 5703 5704 static void 5705 vdc_validate_task(void *arg) 5706 { 5707 vdc_t *vdc = (vdc_t *)arg; 5708 5709 vdc_validate(vdc); 5710 5711 mutex_enter(&vdc->lock); 5712 ASSERT(vdc->validate_pending > 0); 5713 vdc->validate_pending--; 5714 mutex_exit(&vdc->lock); 5715 } 5716 5717 /* 5718 * Function: 5719 * vdc_setup_devid() 5720 * 5721 * Description: 5722 * This routine discovers the devid of a vDisk. It requests the devid of 5723 * the underlying device from the vDisk server, builds an encapsulated 5724 * devid based on the retrieved devid and registers that new devid to 5725 * the vDisk. 5726 * 5727 * Arguments: 5728 * vdc - soft state pointer for this instance of the device driver. 5729 * 5730 * Return Code: 5731 * 0 - A devid was succesfully registered for the vDisk 5732 */ 5733 static int 5734 vdc_setup_devid(vdc_t *vdc) 5735 { 5736 int rv; 5737 vd_devid_t *vd_devid; 5738 size_t bufsize, bufid_len; 5739 5740 /* 5741 * At first sight, we don't know the size of the devid that the 5742 * server will return but this size will be encoded into the 5743 * reply. So we do a first request using a default size then we 5744 * check if this size was large enough. If not then we do a second 5745 * request with the correct size returned by the server. Note that 5746 * ldc requires size to be 8-byte aligned. 5747 */ 5748 bufsize = P2ROUNDUP(VD_DEVID_SIZE(VD_DEVID_DEFAULT_LEN), 5749 sizeof (uint64_t)); 5750 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 5751 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 5752 5753 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, (caddr_t)vd_devid, 5754 bufsize, 0, 0, CB_SYNC, 0, VIO_both_dir); 5755 5756 DMSG(vdc, 2, "sync_op returned %d\n", rv); 5757 5758 if (rv) { 5759 kmem_free(vd_devid, bufsize); 5760 return (rv); 5761 } 5762 5763 if (vd_devid->length > bufid_len) { 5764 /* 5765 * The returned devid is larger than the buffer used. Try again 5766 * with a buffer with the right size. 5767 */ 5768 kmem_free(vd_devid, bufsize); 5769 bufsize = P2ROUNDUP(VD_DEVID_SIZE(vd_devid->length), 5770 sizeof (uint64_t)); 5771 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 5772 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 5773 5774 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, 5775 (caddr_t)vd_devid, bufsize, 0, 0, CB_SYNC, 0, 5776 VIO_both_dir); 5777 5778 if (rv) { 5779 kmem_free(vd_devid, bufsize); 5780 return (rv); 5781 } 5782 } 5783 5784 /* 5785 * The virtual disk should have the same device id as the one associated 5786 * with the physical disk it is mapped on, otherwise sharing a disk 5787 * between a LDom and a non-LDom may not work (for example for a shared 5788 * SVM disk set). 5789 * 5790 * The DDI framework does not allow creating a device id with any 5791 * type so we first create a device id of type DEVID_ENCAP and then 5792 * we restore the orignal type of the physical device. 5793 */ 5794 5795 DMSG(vdc, 2, ": devid length = %d\n", vd_devid->length); 5796 5797 /* build an encapsulated devid based on the returned devid */ 5798 if (ddi_devid_init(vdc->dip, DEVID_ENCAP, vd_devid->length, 5799 vd_devid->id, &vdc->devid) != DDI_SUCCESS) { 5800 DMSG(vdc, 1, "[%d] Fail to created devid\n", vdc->instance); 5801 kmem_free(vd_devid, bufsize); 5802 return (1); 5803 } 5804 5805 DEVID_FORMTYPE((impl_devid_t *)vdc->devid, vd_devid->type); 5806 5807 ASSERT(ddi_devid_valid(vdc->devid) == DDI_SUCCESS); 5808 5809 kmem_free(vd_devid, bufsize); 5810 5811 if (ddi_devid_register(vdc->dip, vdc->devid) != DDI_SUCCESS) { 5812 DMSG(vdc, 1, "[%d] Fail to register devid\n", vdc->instance); 5813 return (1); 5814 } 5815 5816 return (0); 5817 } 5818 5819 static void 5820 vdc_store_label_efi(vdc_t *vdc, struct dk_gpt *efi) 5821 { 5822 struct vtoc *vtoc = vdc->vtoc; 5823 5824 ASSERT(MUTEX_HELD(&vdc->lock)); 5825 5826 vdc->vdisk_label = VD_DISK_LABEL_EFI; 5827 bzero(vdc->geom, sizeof (struct dk_geom)); 5828 vd_efi_to_vtoc(efi, vtoc); 5829 if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 5830 /* 5831 * vd_efi_to_vtoc() will store information about the EFI Sun 5832 * reserved partition (representing the entire disk) into 5833 * partition 7. However single-slice device will only have 5834 * that single partition and the vdc driver expects to find 5835 * information about that partition in slice 0. So we need 5836 * to copy information from slice 7 to slice 0. 5837 */ 5838 vtoc->v_part[0].p_tag = vtoc->v_part[VD_EFI_WD_SLICE].p_tag; 5839 vtoc->v_part[0].p_flag = vtoc->v_part[VD_EFI_WD_SLICE].p_flag; 5840 vtoc->v_part[0].p_start = vtoc->v_part[VD_EFI_WD_SLICE].p_start; 5841 vtoc->v_part[0].p_size = vtoc->v_part[VD_EFI_WD_SLICE].p_size; 5842 } 5843 } 5844 5845 static void 5846 vdc_store_label_vtoc(vdc_t *vdc, struct dk_geom *geom, struct vtoc *vtoc) 5847 { 5848 ASSERT(MUTEX_HELD(&vdc->lock)); 5849 5850 vdc->vdisk_label = VD_DISK_LABEL_VTOC; 5851 bcopy(vtoc, vdc->vtoc, sizeof (struct vtoc)); 5852 bcopy(geom, vdc->geom, sizeof (struct dk_geom)); 5853 } 5854 5855 static void 5856 vdc_store_label_unk(vdc_t *vdc) 5857 { 5858 ASSERT(MUTEX_HELD(&vdc->lock)); 5859 5860 vdc->vdisk_label = VD_DISK_LABEL_UNK; 5861 bzero(vdc->vtoc, sizeof (struct vtoc)); 5862 bzero(vdc->geom, sizeof (struct dk_geom)); 5863 } 5864