1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * LDoms virtual disk client (vdc) device driver 31 * 32 * This driver runs on a guest logical domain and communicates with the virtual 33 * disk server (vds) driver running on the service domain which is exporting 34 * virtualized "disks" to the guest logical domain. 35 * 36 * The driver can be divided into four sections: 37 * 38 * 1) generic device driver housekeeping 39 * _init, _fini, attach, detach, ops structures, etc. 40 * 41 * 2) communication channel setup 42 * Setup the communications link over the LDC channel that vdc uses to 43 * talk to the vDisk server. Initialise the descriptor ring which 44 * allows the LDC clients to transfer data via memory mappings. 45 * 46 * 3) Support exported to upper layers (filesystems, etc) 47 * The upper layers call into vdc via strategy(9E) and DKIO(7I) 48 * ioctl calls. vdc will copy the data to be written to the descriptor 49 * ring or maps the buffer to store the data read by the vDisk 50 * server into the descriptor ring. It then sends a message to the 51 * vDisk server requesting it to complete the operation. 52 * 53 * 4) Handling responses from vDisk server. 54 * The vDisk server will ACK some or all of the messages vdc sends to it 55 * (this is configured during the handshake). Upon receipt of an ACK 56 * vdc will check the descriptor ring and signal to the upper layer 57 * code waiting on the IO. 58 */ 59 60 #include <sys/atomic.h> 61 #include <sys/conf.h> 62 #include <sys/disp.h> 63 #include <sys/ddi.h> 64 #include <sys/dkio.h> 65 #include <sys/efi_partition.h> 66 #include <sys/fcntl.h> 67 #include <sys/file.h> 68 #include <sys/mach_descrip.h> 69 #include <sys/modctl.h> 70 #include <sys/mdeg.h> 71 #include <sys/note.h> 72 #include <sys/open.h> 73 #include <sys/sdt.h> 74 #include <sys/stat.h> 75 #include <sys/sunddi.h> 76 #include <sys/types.h> 77 #include <sys/promif.h> 78 #include <sys/vtoc.h> 79 #include <sys/archsystm.h> 80 #include <sys/sysmacros.h> 81 82 #include <sys/cdio.h> 83 #include <sys/dktp/cm.h> 84 #include <sys/dktp/fdisk.h> 85 #include <sys/scsi/generic/sense.h> 86 #include <sys/scsi/impl/uscsi.h> /* Needed for defn of USCSICMD ioctl */ 87 #include <sys/scsi/targets/sddef.h> 88 89 #include <sys/ldoms.h> 90 #include <sys/ldc.h> 91 #include <sys/vio_common.h> 92 #include <sys/vio_mailbox.h> 93 #include <sys/vdsk_common.h> 94 #include <sys/vdsk_mailbox.h> 95 #include <sys/vdc.h> 96 97 /* 98 * function prototypes 99 */ 100 101 /* standard driver functions */ 102 static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 103 static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 104 static int vdc_strategy(struct buf *buf); 105 static int vdc_print(dev_t dev, char *str); 106 static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 107 static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 108 static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 109 static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 110 cred_t *credp, int *rvalp); 111 static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 112 static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 113 114 static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 115 void *arg, void **resultp); 116 static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 117 static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 118 119 /* setup */ 120 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 121 static int vdc_do_ldc_init(vdc_t *vdc); 122 static int vdc_start_ldc_connection(vdc_t *vdc); 123 static int vdc_create_device_nodes(vdc_t *vdc); 124 static int vdc_create_device_nodes_props(vdc_t *vdc); 125 static int vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id); 126 static int vdc_do_ldc_up(vdc_t *vdc); 127 static void vdc_terminate_ldc(vdc_t *vdc); 128 static int vdc_init_descriptor_ring(vdc_t *vdc); 129 static void vdc_destroy_descriptor_ring(vdc_t *vdc); 130 131 /* handshake with vds */ 132 static void vdc_init_handshake_negotiation(void *arg); 133 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 134 static int vdc_init_attr_negotiation(vdc_t *vdc); 135 static int vdc_init_dring_negotiate(vdc_t *vdc); 136 static void vdc_reset_connection(vdc_t *vdc, boolean_t resetldc); 137 static boolean_t vdc_is_able_to_tx_data(vdc_t *vdc, int flag); 138 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 139 140 /* processing incoming messages from vDisk server */ 141 static void vdc_process_msg_thread(vdc_t *vdc); 142 static void vdc_process_msg(void *arg); 143 static void vdc_do_process_msg(vdc_t *vdc); 144 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 145 static int vdc_process_ctrl_msg(vdc_t *vdc, vio_msg_t msg); 146 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t msg); 147 static int vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg); 148 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 149 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 150 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 151 static int vdc_get_next_dring_entry_id(vdc_t *vdc, uint_t needed); 152 static int vdc_populate_descriptor(vdc_t *vdc, caddr_t addr, 153 size_t nbytes, int op, uint64_t arg, uint64_t slice); 154 static int vdc_wait_for_descriptor_update(vdc_t *vdc, uint_t idx, 155 vio_dring_msg_t dmsg); 156 static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 157 static int vdc_populate_mem_hdl(vdc_t *vdc, uint_t idx, 158 caddr_t addr, size_t nbytes, int operation); 159 static int vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg); 160 161 /* dkio */ 162 static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode); 163 static int vdc_create_fake_geometry(vdc_t *vdc); 164 static int vdc_setup_disk_layout(vdc_t *vdc); 165 static int vdc_null_copy_func(vdc_t *vdc, void *from, void *to, 166 int mode, int dir); 167 static int vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, 168 int mode, int dir); 169 static int vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, 170 int mode, int dir); 171 static int vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, 172 int mode, int dir); 173 static int vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, 174 int mode, int dir); 175 static int vdc_uscsicmd_convert(vdc_t *vdc, void *from, void *to, 176 int mode, int dir); 177 178 /* 179 * Module variables 180 */ 181 182 /* 183 * Tunable variables to control how long vdc waits before timing out on 184 * various operations 185 */ 186 static int vdc_retries = 10; 187 188 /* calculated from 'vdc_usec_timeout' during attach */ 189 static uint64_t vdc_hz_timeout; /* units: Hz */ 190 static uint64_t vdc_usec_timeout = 30 * MICROSEC; /* 30s units: ns */ 191 192 static uint64_t vdc_hz_timeout_ldc; /* units: Hz */ 193 static uint64_t vdc_usec_timeout_ldc = 10 * MILLISEC; /* 0.01s units: ns */ 194 195 /* values for dumping - need to run in a tighter loop */ 196 static uint64_t vdc_usec_timeout_dump = 100 * MILLISEC; /* 0.1s units: ns */ 197 static int vdc_dump_retries = 100; 198 199 /* Count of the number of vdc instances attached */ 200 static volatile uint32_t vdc_instance_count = 0; 201 202 /* Soft state pointer */ 203 static void *vdc_state; 204 205 /* variable level controlling the verbosity of the error/debug messages */ 206 int vdc_msglevel = 0; 207 208 /* 209 * Supported vDisk protocol version pairs. 210 * 211 * The first array entry is the latest and preferred version. 212 */ 213 static const vio_ver_t vdc_version[] = {{1, 0}}; 214 215 static struct cb_ops vdc_cb_ops = { 216 vdc_open, /* cb_open */ 217 vdc_close, /* cb_close */ 218 vdc_strategy, /* cb_strategy */ 219 vdc_print, /* cb_print */ 220 vdc_dump, /* cb_dump */ 221 vdc_read, /* cb_read */ 222 vdc_write, /* cb_write */ 223 vdc_ioctl, /* cb_ioctl */ 224 nodev, /* cb_devmap */ 225 nodev, /* cb_mmap */ 226 nodev, /* cb_segmap */ 227 nochpoll, /* cb_chpoll */ 228 ddi_prop_op, /* cb_prop_op */ 229 NULL, /* cb_str */ 230 D_MP | D_64BIT, /* cb_flag */ 231 CB_REV, /* cb_rev */ 232 vdc_aread, /* cb_aread */ 233 vdc_awrite /* cb_awrite */ 234 }; 235 236 static struct dev_ops vdc_ops = { 237 DEVO_REV, /* devo_rev */ 238 0, /* devo_refcnt */ 239 vdc_getinfo, /* devo_getinfo */ 240 nulldev, /* devo_identify */ 241 nulldev, /* devo_probe */ 242 vdc_attach, /* devo_attach */ 243 vdc_detach, /* devo_detach */ 244 nodev, /* devo_reset */ 245 &vdc_cb_ops, /* devo_cb_ops */ 246 NULL, /* devo_bus_ops */ 247 nulldev /* devo_power */ 248 }; 249 250 static struct modldrv modldrv = { 251 &mod_driverops, 252 "virtual disk client %I%", 253 &vdc_ops, 254 }; 255 256 static struct modlinkage modlinkage = { 257 MODREV_1, 258 &modldrv, 259 NULL 260 }; 261 262 /* -------------------------------------------------------------------------- */ 263 264 /* 265 * Device Driver housekeeping and setup 266 */ 267 268 int 269 _init(void) 270 { 271 int status; 272 273 if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 274 return (status); 275 if ((status = mod_install(&modlinkage)) != 0) 276 ddi_soft_state_fini(&vdc_state); 277 return (status); 278 } 279 280 int 281 _info(struct modinfo *modinfop) 282 { 283 return (mod_info(&modlinkage, modinfop)); 284 } 285 286 int 287 _fini(void) 288 { 289 int status; 290 291 if ((status = mod_remove(&modlinkage)) != 0) 292 return (status); 293 ddi_soft_state_fini(&vdc_state); 294 return (0); 295 } 296 297 static int 298 vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 299 { 300 _NOTE(ARGUNUSED(dip)) 301 302 int instance = SDUNIT(getminor((dev_t)arg)); 303 vdc_t *vdc = NULL; 304 305 switch (cmd) { 306 case DDI_INFO_DEVT2DEVINFO: 307 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 308 *resultp = NULL; 309 return (DDI_FAILURE); 310 } 311 *resultp = vdc->dip; 312 return (DDI_SUCCESS); 313 case DDI_INFO_DEVT2INSTANCE: 314 *resultp = (void *)(uintptr_t)instance; 315 return (DDI_SUCCESS); 316 default: 317 *resultp = NULL; 318 return (DDI_FAILURE); 319 } 320 } 321 322 static int 323 vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 324 { 325 int instance; 326 int rv; 327 uint_t retries = 0; 328 vdc_t *vdc = NULL; 329 330 switch (cmd) { 331 case DDI_DETACH: 332 /* the real work happens below */ 333 break; 334 case DDI_SUSPEND: 335 /* nothing to do for this non-device */ 336 return (DDI_SUCCESS); 337 default: 338 return (DDI_FAILURE); 339 } 340 341 ASSERT(cmd == DDI_DETACH); 342 instance = ddi_get_instance(dip); 343 DMSG(1, "[%d] Entered\n", instance); 344 345 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 346 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 347 return (DDI_FAILURE); 348 } 349 350 if (vdc->open) { 351 DMSG(0, "[%d] Cannot detach: device is open", instance); 352 return (DDI_FAILURE); 353 } 354 355 DMSG(0, "[%d] proceeding...\n", instance); 356 357 /* 358 * try and disable callbacks to prevent another handshake 359 */ 360 rv = ldc_set_cb_mode(vdc->ldc_handle, LDC_CB_DISABLE); 361 DMSG(0, "[%d] callback disabled (rv=%d)\n", instance, rv); 362 363 /* 364 * Prevent any more attempts to start a handshake with the vdisk 365 * server and tear down the existing connection. 366 */ 367 mutex_enter(&vdc->lock); 368 vdc->initialized |= VDC_HANDSHAKE_STOP; 369 vdc_reset_connection(vdc, B_TRUE); 370 mutex_exit(&vdc->lock); 371 372 if (vdc->initialized & VDC_THREAD) { 373 mutex_enter(&vdc->msg_proc_lock); 374 vdc->msg_proc_thr_state = VDC_THR_STOP; 375 vdc->msg_pending = B_TRUE; 376 cv_signal(&vdc->msg_proc_cv); 377 378 while (vdc->msg_proc_thr_state != VDC_THR_DONE) { 379 DMSG(0, "[%d] Waiting for thread to exit\n", instance); 380 rv = cv_timedwait(&vdc->msg_proc_cv, 381 &vdc->msg_proc_lock, 382 VD_GET_TIMEOUT_HZ(vdc_hz_timeout, 1)); 383 if ((rv == -1) && (retries++ > vdc_retries)) 384 break; 385 } 386 mutex_exit(&vdc->msg_proc_lock); 387 } 388 389 mutex_enter(&vdc->lock); 390 391 if (vdc->initialized & VDC_DRING) 392 vdc_destroy_descriptor_ring(vdc); 393 394 if (vdc->initialized & VDC_LDC) 395 vdc_terminate_ldc(vdc); 396 397 mutex_exit(&vdc->lock); 398 399 if (vdc->initialized & VDC_MINOR) { 400 ddi_prop_remove_all(dip); 401 ddi_remove_minor_node(dip, NULL); 402 } 403 404 if (vdc->initialized & VDC_LOCKS) { 405 mutex_destroy(&vdc->lock); 406 mutex_destroy(&vdc->attach_lock); 407 mutex_destroy(&vdc->msg_proc_lock); 408 mutex_destroy(&vdc->dring_lock); 409 cv_destroy(&vdc->cv); 410 cv_destroy(&vdc->attach_cv); 411 cv_destroy(&vdc->msg_proc_cv); 412 } 413 414 if (vdc->minfo) 415 kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 416 417 if (vdc->cinfo) 418 kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 419 420 if (vdc->vtoc) 421 kmem_free(vdc->vtoc, sizeof (struct vtoc)); 422 423 if (vdc->label) 424 kmem_free(vdc->label, DK_LABEL_SIZE); 425 426 if (vdc->initialized & VDC_SOFT_STATE) 427 ddi_soft_state_free(vdc_state, instance); 428 429 DMSG(0, "[%d] End %p\n", instance, (void *)vdc); 430 431 return (DDI_SUCCESS); 432 } 433 434 435 static int 436 vdc_do_attach(dev_info_t *dip) 437 { 438 int instance; 439 vdc_t *vdc = NULL; 440 int status; 441 uint_t retries = 0; 442 443 ASSERT(dip != NULL); 444 445 instance = ddi_get_instance(dip); 446 if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 447 cmn_err(CE_NOTE, "[%d] Couldn't alloc state structure", 448 instance); 449 return (DDI_FAILURE); 450 } 451 452 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 453 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 454 return (DDI_FAILURE); 455 } 456 457 /* 458 * We assign the value to initialized in this case to zero out the 459 * variable and then set bits in it to indicate what has been done 460 */ 461 vdc->initialized = VDC_SOFT_STATE; 462 463 vdc_hz_timeout = drv_usectohz(vdc_usec_timeout); 464 vdc_hz_timeout_ldc = drv_usectohz(vdc_usec_timeout_ldc); 465 466 vdc->dip = dip; 467 vdc->instance = instance; 468 vdc->open = 0; 469 vdc->vdisk_type = VD_DISK_TYPE_UNK; 470 vdc->state = VD_STATE_INIT; 471 vdc->ldc_state = 0; 472 vdc->session_id = 0; 473 vdc->block_size = DEV_BSIZE; 474 vdc->max_xfer_sz = maxphys / DEV_BSIZE; 475 476 vdc->vtoc = NULL; 477 vdc->cinfo = NULL; 478 vdc->minfo = NULL; 479 480 mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 481 mutex_init(&vdc->attach_lock, NULL, MUTEX_DRIVER, NULL); 482 mutex_init(&vdc->msg_proc_lock, NULL, MUTEX_DRIVER, NULL); 483 mutex_init(&vdc->dring_lock, NULL, MUTEX_DRIVER, NULL); 484 cv_init(&vdc->cv, NULL, CV_DRIVER, NULL); 485 cv_init(&vdc->attach_cv, NULL, CV_DRIVER, NULL); 486 cv_init(&vdc->msg_proc_cv, NULL, CV_DRIVER, NULL); 487 vdc->initialized |= VDC_LOCKS; 488 489 vdc->msg_pending = B_FALSE; 490 vdc->msg_proc_thr_id = thread_create(NULL, 0, vdc_process_msg_thread, 491 vdc, 0, &p0, TS_RUN, minclsyspri); 492 if (vdc->msg_proc_thr_id == NULL) { 493 cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 494 instance); 495 return (DDI_FAILURE); 496 } 497 vdc->initialized |= VDC_THREAD; 498 499 /* initialise LDC channel which will be used to communicate with vds */ 500 if (vdc_do_ldc_init(vdc) != 0) { 501 cmn_err(CE_NOTE, "[%d] Couldn't initialize LDC", instance); 502 return (DDI_FAILURE); 503 } 504 505 /* Bring up connection with vds via LDC */ 506 status = vdc_start_ldc_connection(vdc); 507 if (status != 0) { 508 cmn_err(CE_NOTE, "[%d] Could not start LDC", instance); 509 return (DDI_FAILURE); 510 } 511 512 /* 513 * We need to wait until the handshake has completed before leaving 514 * the attach(). If this is the first vdc device attached (i.e. the root 515 * filesystem) we will wait much longer in the hope that we can finally 516 * communicate with the vDisk server (the service domain may be 517 * rebooting, etc.). This wait is necessary so that the device node(s) 518 * are created before the attach(9E) return (otherwise the open(9E) will 519 * fail and and the root file system will not boot). 520 */ 521 atomic_inc_32(&vdc_instance_count); 522 mutex_enter(&vdc->attach_lock); 523 while ((vdc->ldc_state != LDC_UP) || (vdc->state != VD_STATE_DATA)) { 524 525 DMSG(0, "[%d] handshake in progress [VD %d (LDC %d)]\n", 526 instance, vdc->state, vdc->ldc_state); 527 528 status = cv_timedwait(&vdc->attach_cv, &vdc->attach_lock, 529 VD_GET_TIMEOUT_HZ(vdc_hz_timeout, retries)); 530 if (status == -1) { 531 /* 532 * If this is not the first instance attached or we 533 * have exceeeded the max number of retries we give 534 * up waiting and do not delay the attach any longer 535 */ 536 if ((vdc_instance_count != 1) || 537 (retries >= vdc_retries)) { 538 DMSG(0, "[%d] Giving up wait for handshake\n", 539 instance); 540 mutex_exit(&vdc->attach_lock); 541 return (DDI_FAILURE); 542 } else { 543 DMSG(0, "[%d] Retry #%d for handshake.\n", 544 instance, retries); 545 vdc_init_handshake_negotiation(vdc); 546 retries++; 547 } 548 } 549 } 550 mutex_exit(&vdc->attach_lock); 551 552 /* 553 * Once the handshake is complete, we can use the DRing to send 554 * requests to the vDisk server to calculate the geometry and 555 * VTOC of the "disk" 556 */ 557 status = vdc_setup_disk_layout(vdc); 558 if (status != 0) { 559 cmn_err(CE_NOTE, "[%d] Failed to discover disk layout (err%d)", 560 vdc->instance, status); 561 } 562 563 /* 564 * Now that we have the device info we can create the 565 * device nodes and properties 566 */ 567 status = vdc_create_device_nodes(vdc); 568 if (status) { 569 cmn_err(CE_NOTE, "[%d] Failed to create device nodes", 570 instance); 571 return (status); 572 } 573 status = vdc_create_device_nodes_props(vdc); 574 if (status) { 575 cmn_err(CE_NOTE, "[%d] Failed to create device nodes" 576 " properties (%d)", instance, status); 577 return (status); 578 } 579 580 ddi_report_dev(dip); 581 582 DMSG(0, "[%d] Attach completed\n", instance); 583 return (status); 584 } 585 586 static int 587 vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 588 { 589 int status; 590 591 switch (cmd) { 592 case DDI_ATTACH: 593 if ((status = vdc_do_attach(dip)) != 0) 594 (void) vdc_detach(dip, DDI_DETACH); 595 return (status); 596 case DDI_RESUME: 597 /* nothing to do for this non-device */ 598 return (DDI_SUCCESS); 599 default: 600 return (DDI_FAILURE); 601 } 602 } 603 604 static int 605 vdc_do_ldc_init(vdc_t *vdc) 606 { 607 int status = 0; 608 ldc_status_t ldc_state; 609 ldc_attr_t ldc_attr; 610 uint64_t ldc_id = 0; 611 dev_info_t *dip = NULL; 612 613 ASSERT(vdc != NULL); 614 615 dip = vdc->dip; 616 vdc->initialized |= VDC_LDC; 617 618 if ((status = vdc_get_ldc_id(dip, &ldc_id)) != 0) { 619 cmn_err(CE_NOTE, "[%d] Failed to get LDC channel ID property", 620 vdc->instance); 621 return (EIO); 622 } 623 vdc->ldc_id = ldc_id; 624 625 ldc_attr.devclass = LDC_DEV_BLK; 626 ldc_attr.instance = vdc->instance; 627 ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 628 ldc_attr.mtu = VD_LDC_MTU; 629 630 if ((vdc->initialized & VDC_LDC_INIT) == 0) { 631 status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle); 632 if (status != 0) { 633 cmn_err(CE_NOTE, "[%d] ldc_init(chan %ld) returned %d", 634 vdc->instance, ldc_id, status); 635 return (status); 636 } 637 vdc->initialized |= VDC_LDC_INIT; 638 } 639 status = ldc_status(vdc->ldc_handle, &ldc_state); 640 if (status != 0) { 641 cmn_err(CE_NOTE, "[%d] Cannot discover LDC status [err=%d]", 642 vdc->instance, status); 643 return (status); 644 } 645 vdc->ldc_state = ldc_state; 646 647 if ((vdc->initialized & VDC_LDC_CB) == 0) { 648 status = ldc_reg_callback(vdc->ldc_handle, vdc_handle_cb, 649 (caddr_t)vdc); 650 if (status != 0) { 651 cmn_err(CE_NOTE, "[%d] LDC callback reg. failed (%d)", 652 vdc->instance, status); 653 return (status); 654 } 655 vdc->initialized |= VDC_LDC_CB; 656 } 657 658 vdc->initialized |= VDC_LDC; 659 660 /* 661 * At this stage we have initialised LDC, we will now try and open 662 * the connection. 663 */ 664 if (vdc->ldc_state == LDC_INIT) { 665 status = ldc_open(vdc->ldc_handle); 666 if (status != 0) { 667 cmn_err(CE_NOTE, "[%d] ldc_open(chan %ld) returned %d", 668 vdc->instance, vdc->ldc_id, status); 669 return (status); 670 } 671 vdc->initialized |= VDC_LDC_OPEN; 672 } 673 674 return (status); 675 } 676 677 static int 678 vdc_start_ldc_connection(vdc_t *vdc) 679 { 680 int status = 0; 681 682 ASSERT(vdc != NULL); 683 684 mutex_enter(&vdc->lock); 685 686 if (vdc->ldc_state == LDC_UP) { 687 DMSG(0, "[%d] LDC is already UP ..\n", vdc->instance); 688 mutex_exit(&vdc->lock); 689 return (0); 690 } 691 692 status = vdc_do_ldc_up(vdc); 693 694 DMSG(0, "[%d] Finished bringing up LDC\n", vdc->instance); 695 696 mutex_exit(&vdc->lock); 697 698 return (status); 699 } 700 701 702 /* 703 * Function: 704 * vdc_create_device_nodes 705 * 706 * Description: 707 * This function creates the block and character device nodes under 708 * /devices along with the node properties. It is called as part of 709 * the attach(9E) of the instance during the handshake with vds after 710 * vds has sent the attributes to vdc. 711 * 712 * If the device is of type VD_DISK_TYPE_SLICE then the minor node 713 * of 2 is used in keeping with the Solaris convention that slice 2 714 * refers to a whole disk. Slices start at 'a' 715 * 716 * Parameters: 717 * vdc - soft state pointer 718 * 719 * Return Values 720 * 0 - Success 721 * EIO - Failed to create node 722 * EINVAL - Unknown type of disk exported 723 */ 724 static int 725 vdc_create_device_nodes(vdc_t *vdc) 726 { 727 /* uses NNNN which is OK as long as # of disks <= 10000 */ 728 char name[sizeof ("disk@NNNN:s,raw")]; 729 dev_info_t *dip = NULL; 730 int instance; 731 int num_slices = 1; 732 int i; 733 734 ASSERT(vdc != NULL); 735 736 instance = vdc->instance; 737 dip = vdc->dip; 738 739 switch (vdc->vdisk_type) { 740 case VD_DISK_TYPE_DISK: 741 num_slices = V_NUMPAR; 742 break; 743 case VD_DISK_TYPE_SLICE: 744 num_slices = 1; 745 break; 746 case VD_DISK_TYPE_UNK: 747 default: 748 return (EINVAL); 749 } 750 751 for (i = 0; i < num_slices; i++) { 752 (void) snprintf(name, sizeof (name), "%c", 'a' + i); 753 if (ddi_create_minor_node(dip, name, S_IFBLK, 754 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 755 cmn_err(CE_NOTE, "[%d] Couldn't add block node '%s'", 756 instance, name); 757 return (EIO); 758 } 759 760 /* if any device node is created we set this flag */ 761 vdc->initialized |= VDC_MINOR; 762 763 (void) snprintf(name, sizeof (name), "%c%s", 764 'a' + i, ",raw"); 765 if (ddi_create_minor_node(dip, name, S_IFCHR, 766 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 767 cmn_err(CE_NOTE, "[%d] Couldn't add raw node '%s'", 768 instance, name); 769 return (EIO); 770 } 771 } 772 773 return (0); 774 } 775 776 /* 777 * Function: 778 * vdc_create_device_nodes_props 779 * 780 * Description: 781 * This function creates the block and character device nodes under 782 * /devices along with the node properties. It is called as part of 783 * the attach(9E) of the instance during the handshake with vds after 784 * vds has sent the attributes to vdc. 785 * 786 * Parameters: 787 * vdc - soft state pointer 788 * 789 * Return Values 790 * 0 - Success 791 * EIO - Failed to create device node property 792 * EINVAL - Unknown type of disk exported 793 */ 794 static int 795 vdc_create_device_nodes_props(vdc_t *vdc) 796 { 797 dev_info_t *dip = NULL; 798 int instance; 799 int num_slices = 1; 800 int64_t size = 0; 801 dev_t dev; 802 int rv; 803 int i; 804 805 ASSERT(vdc != NULL); 806 807 instance = vdc->instance; 808 dip = vdc->dip; 809 810 if ((vdc->vtoc == NULL) || (vdc->vtoc->v_sanity != VTOC_SANE)) { 811 cmn_err(CE_NOTE, "![%d] Could not create device node property." 812 " No VTOC available", instance); 813 return (ENXIO); 814 } 815 816 switch (vdc->vdisk_type) { 817 case VD_DISK_TYPE_DISK: 818 num_slices = V_NUMPAR; 819 break; 820 case VD_DISK_TYPE_SLICE: 821 num_slices = 1; 822 break; 823 case VD_DISK_TYPE_UNK: 824 default: 825 return (EINVAL); 826 } 827 828 for (i = 0; i < num_slices; i++) { 829 dev = makedevice(ddi_driver_major(dip), 830 VD_MAKE_DEV(instance, i)); 831 832 size = vdc->vtoc->v_part[i].p_size * vdc->vtoc->v_sectorsz; 833 DMSG(0, "[%d] sz %ld (%ld Mb) p_size %lx\n", 834 instance, size, size / (1024 * 1024), 835 vdc->vtoc->v_part[i].p_size); 836 837 rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size); 838 if (rv != DDI_PROP_SUCCESS) { 839 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop of [%ld]", 840 instance, VDC_SIZE_PROP_NAME, size); 841 return (EIO); 842 } 843 844 rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME, 845 lbtodb(size)); 846 if (rv != DDI_PROP_SUCCESS) { 847 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop [%llu]", 848 instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size)); 849 return (EIO); 850 } 851 } 852 853 return (0); 854 } 855 856 static int 857 vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 858 { 859 _NOTE(ARGUNUSED(cred)) 860 861 int instance; 862 vdc_t *vdc; 863 864 ASSERT(dev != NULL); 865 instance = SDUNIT(getminor(*dev)); 866 867 DMSG(0, "[%d] minor = %d flag = %x, otyp = %x\n", 868 instance, getminor(*dev), flag, otyp); 869 870 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 871 return (EINVAL); 872 873 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 874 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 875 return (ENXIO); 876 } 877 878 /* 879 * Check to see if we can communicate with vds 880 */ 881 if (!vdc_is_able_to_tx_data(vdc, flag)) { 882 DMSG(0, "[%d] Not ready to transmit data (flag=%x)\n", 883 instance, flag); 884 return (ENOLINK); 885 } 886 887 mutex_enter(&vdc->lock); 888 vdc->open++; 889 mutex_exit(&vdc->lock); 890 891 return (0); 892 } 893 894 static int 895 vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 896 { 897 _NOTE(ARGUNUSED(cred)) 898 899 int instance; 900 vdc_t *vdc; 901 902 instance = SDUNIT(getminor(dev)); 903 904 DMSG(0, "[%d] flag = %x, otyp = %x\n", instance, flag, otyp); 905 906 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 907 return (EINVAL); 908 909 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 910 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 911 return (ENXIO); 912 } 913 914 /* 915 * Check to see if we can communicate with vds 916 */ 917 if (!vdc_is_able_to_tx_data(vdc, 0)) { 918 DMSG(0, "[%d] Not ready to transmit data (flag=%x)\n", 919 instance, flag); 920 return (ETIMEDOUT); 921 } 922 923 if (vdc->dkio_flush_pending) { 924 DMSG(0, "[%d] Cannot detach: %d outstanding DKIO flushes\n", 925 instance, vdc->dkio_flush_pending); 926 return (EBUSY); 927 } 928 929 /* 930 * Should not need the mutex here, since the framework should protect 931 * against more opens on this device, but just in case. 932 */ 933 mutex_enter(&vdc->lock); 934 vdc->open--; 935 mutex_exit(&vdc->lock); 936 937 return (0); 938 } 939 940 static int 941 vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 942 { 943 _NOTE(ARGUNUSED(credp)) 944 _NOTE(ARGUNUSED(rvalp)) 945 946 return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode)); 947 } 948 949 static int 950 vdc_print(dev_t dev, char *str) 951 { 952 cmn_err(CE_NOTE, "vdc%d: %s", SDUNIT(getminor(dev)), str); 953 return (0); 954 } 955 956 static int 957 vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 958 { 959 buf_t *buf; /* BWRITE requests need to be in a buf_t structure */ 960 int rv; 961 size_t nbytes = nblk * DEV_BSIZE; 962 int instance = SDUNIT(getminor(dev)); 963 vdc_t *vdc = NULL; 964 965 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 966 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 967 return (ENXIO); 968 } 969 970 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 971 bioinit(buf); 972 buf->b_un.b_addr = addr; 973 buf->b_bcount = nbytes; 974 buf->b_flags = B_BUSY | B_WRITE; 975 buf->b_dev = dev; 976 rv = vdc_populate_descriptor(vdc, (caddr_t)buf, nbytes, 977 VD_OP_BWRITE, blkno, SDPART(getminor(dev))); 978 979 /* 980 * If the OS instance is panicking, the call above will ensure that 981 * the descriptor is done before returning. This should always be 982 * case when coming through this function but we check just in case 983 * and wait if necessary for the vDisk server to ACK and trigger 984 * the biodone. 985 */ 986 if (!ddi_in_panic()) 987 rv = biowait(buf); 988 989 biofini(buf); 990 kmem_free(buf, sizeof (buf_t)); 991 992 DMSG(1, "[%d] status=%d\n", instance, rv); 993 994 return (rv); 995 } 996 997 /* -------------------------------------------------------------------------- */ 998 999 /* 1000 * Disk access routines 1001 * 1002 */ 1003 1004 /* 1005 * vdc_strategy() 1006 * 1007 * Return Value: 1008 * 0: As per strategy(9E), the strategy() function must return 0 1009 * [ bioerror(9f) sets b_flags to the proper error code ] 1010 */ 1011 static int 1012 vdc_strategy(struct buf *buf) 1013 { 1014 int rv = -1; 1015 vdc_t *vdc = NULL; 1016 int instance = SDUNIT(getminor(buf->b_edev)); 1017 int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 1018 1019 DMSG(2, "[%d] %s %ld bytes at block %llx : b_addr=0x%p", 1020 instance, (buf->b_flags & B_READ) ? "Read" : "Write", 1021 buf->b_bcount, buf->b_lblkno, (void *)buf->b_un.b_addr); 1022 1023 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1024 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1025 bioerror(buf, ENXIO); 1026 biodone(buf); 1027 return (0); 1028 } 1029 1030 DTRACE_IO2(vstart, buf_t *, buf, vdc_t *, vdc); 1031 1032 if (!vdc_is_able_to_tx_data(vdc, O_NONBLOCK)) { 1033 DMSG(0, "[%d] Not ready to transmit data\n", instance); 1034 bioerror(buf, ENXIO); 1035 biodone(buf); 1036 return (0); 1037 } 1038 bp_mapin(buf); 1039 1040 rv = vdc_populate_descriptor(vdc, (caddr_t)buf, buf->b_bcount, op, 1041 buf->b_lblkno, SDPART(getminor(buf->b_edev))); 1042 1043 /* 1044 * If the request was successfully sent, the strategy call returns and 1045 * the ACK handler calls the bioxxx functions when the vDisk server is 1046 * done. 1047 */ 1048 if (rv) { 1049 DMSG(0, "[%d] Failed to read/write (err=%d)\n", instance, rv); 1050 bioerror(buf, rv); 1051 biodone(buf); 1052 } 1053 1054 return (0); 1055 } 1056 1057 1058 static int 1059 vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 1060 { 1061 _NOTE(ARGUNUSED(cred)) 1062 1063 DMSG(1, "[%d] Entered", SDUNIT(getminor(dev))); 1064 return (physio(vdc_strategy, NULL, dev, B_READ, minphys, uio)); 1065 } 1066 1067 static int 1068 vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 1069 { 1070 _NOTE(ARGUNUSED(cred)) 1071 1072 DMSG(1, "[%d] Entered", SDUNIT(getminor(dev))); 1073 return (physio(vdc_strategy, NULL, dev, B_WRITE, minphys, uio)); 1074 } 1075 1076 static int 1077 vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 1078 { 1079 _NOTE(ARGUNUSED(cred)) 1080 1081 DMSG(1, "[%d] Entered", SDUNIT(getminor(dev))); 1082 return (aphysio(vdc_strategy, anocancel, dev, B_READ, minphys, aio)); 1083 } 1084 1085 static int 1086 vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 1087 { 1088 _NOTE(ARGUNUSED(cred)) 1089 1090 DMSG(1, "[%d] Entered", SDUNIT(getminor(dev))); 1091 return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, minphys, aio)); 1092 } 1093 1094 1095 /* -------------------------------------------------------------------------- */ 1096 1097 /* 1098 * Handshake support 1099 */ 1100 1101 /* 1102 * vdc_init_handshake_negotiation 1103 * 1104 * Description: 1105 * This function is called to trigger the handshake negotiations between 1106 * the client (vdc) and the server (vds). It may be called multiple times. 1107 * 1108 * Parameters: 1109 * vdc - soft state pointer 1110 */ 1111 static void 1112 vdc_init_handshake_negotiation(void *arg) 1113 { 1114 vdc_t *vdc = (vdc_t *)(void *)arg; 1115 ldc_status_t ldc_state; 1116 vd_state_t state; 1117 int status; 1118 1119 ASSERT(vdc != NULL); 1120 1121 DMSG(0, "[%d] Initializing vdc<->vds handshake\n", vdc->instance); 1122 1123 /* get LDC state */ 1124 status = ldc_status(vdc->ldc_handle, &ldc_state); 1125 if (status != 0) { 1126 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status (err=%d)", 1127 vdc->instance, status); 1128 return; 1129 } 1130 1131 /* 1132 * If the LDC connection is not UP we bring it up now and return. 1133 * The handshake will be started again when the callback is 1134 * triggered due to the UP event. 1135 */ 1136 if (ldc_state != LDC_UP) { 1137 DMSG(0, "[%d] Triggering LDC_UP & returning\n", vdc->instance); 1138 (void) vdc_do_ldc_up(vdc); 1139 return; 1140 } 1141 1142 mutex_enter(&vdc->lock); 1143 /* 1144 * Do not continue if another thread has triggered a handshake which 1145 * has not been reset or detach() has stopped further handshakes. 1146 */ 1147 if (vdc->initialized & (VDC_HANDSHAKE | VDC_HANDSHAKE_STOP)) { 1148 DMSG(0, "[%d] Negotiation not triggered. [init=%x]\n", 1149 vdc->instance, vdc->initialized); 1150 mutex_exit(&vdc->lock); 1151 return; 1152 } 1153 1154 if (vdc->hshake_cnt++ > vdc_retries) { 1155 cmn_err(CE_NOTE, "[%d] Failed repeatedly to complete handshake" 1156 "with vDisk server", vdc->instance); 1157 mutex_exit(&vdc->lock); 1158 return; 1159 } 1160 1161 vdc->initialized |= VDC_HANDSHAKE; 1162 vdc->ldc_state = ldc_state; 1163 1164 state = vdc->state; 1165 1166 if (state == VD_STATE_INIT) { 1167 /* 1168 * Set the desired version parameter to the first entry in the 1169 * version array. If this specific version is not supported, 1170 * the response handling code will step down the version number 1171 * to the next array entry and deal with it accordingly. 1172 */ 1173 (void) vdc_init_ver_negotiation(vdc, vdc_version[0]); 1174 } else if (state == VD_STATE_VER) { 1175 (void) vdc_init_attr_negotiation(vdc); 1176 } else if (state == VD_STATE_ATTR) { 1177 (void) vdc_init_dring_negotiate(vdc); 1178 } else if (state == VD_STATE_DATA) { 1179 /* 1180 * nothing to do - we have already completed the negotiation 1181 * and we can transmit data when ready. 1182 */ 1183 DMSG(0, "[%d] Negotiation triggered after handshake completed", 1184 vdc->instance); 1185 } 1186 1187 mutex_exit(&vdc->lock); 1188 } 1189 1190 /* 1191 * Function: 1192 * vdc_init_ver_negotiation() 1193 * 1194 * Description: 1195 * 1196 * Arguments: 1197 * vdc - soft state pointer for this instance of the device driver. 1198 * 1199 * Return Code: 1200 * 0 - Success 1201 */ 1202 static int 1203 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 1204 { 1205 vio_ver_msg_t pkt; 1206 size_t msglen = sizeof (pkt); 1207 int status = -1; 1208 1209 ASSERT(vdc != NULL); 1210 ASSERT(mutex_owned(&vdc->lock)); 1211 1212 DMSG(0, "[%d] Entered.\n", vdc->instance); 1213 1214 /* 1215 * set the Session ID to a unique value 1216 * (the lower 32 bits of the clock tick) 1217 */ 1218 vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 1219 DMSG(0, "[%d] Set SID to 0x%lx\n", vdc->instance, vdc->session_id); 1220 1221 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1222 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1223 pkt.tag.vio_subtype_env = VIO_VER_INFO; 1224 pkt.tag.vio_sid = vdc->session_id; 1225 pkt.dev_class = VDEV_DISK; 1226 pkt.ver_major = ver.major; 1227 pkt.ver_minor = ver.minor; 1228 1229 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1230 DMSG(0, "[%d] Ver info sent (status = %d)\n", vdc->instance, status); 1231 1232 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1233 cmn_err(CE_NOTE, "[%d] Failed to send Ver negotiation info: " 1234 "id(%lx) rv(%d) size(%ld)", 1235 vdc->instance, vdc->ldc_handle, 1236 status, msglen); 1237 if (msglen != sizeof (vio_ver_msg_t)) 1238 status = ENOMSG; 1239 } 1240 1241 return (status); 1242 } 1243 1244 /* 1245 * Function: 1246 * vdc_init_attr_negotiation() 1247 * 1248 * Description: 1249 * 1250 * Arguments: 1251 * vdc - soft state pointer for this instance of the device driver. 1252 * 1253 * Return Code: 1254 * 0 - Success 1255 */ 1256 static int 1257 vdc_init_attr_negotiation(vdc_t *vdc) 1258 { 1259 vd_attr_msg_t pkt; 1260 size_t msglen = sizeof (pkt); 1261 int status; 1262 1263 ASSERT(vdc != NULL); 1264 ASSERT(mutex_owned(&vdc->lock)); 1265 1266 DMSG(0, "[%d] entered\n", vdc->instance); 1267 1268 /* fill in tag */ 1269 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1270 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1271 pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 1272 pkt.tag.vio_sid = vdc->session_id; 1273 /* fill in payload */ 1274 pkt.max_xfer_sz = vdc->max_xfer_sz; 1275 pkt.vdisk_block_size = vdc->block_size; 1276 pkt.xfer_mode = VIO_DRING_MODE; 1277 pkt.operations = 0; /* server will set bits of valid operations */ 1278 pkt.vdisk_type = 0; /* server will set to valid device type */ 1279 pkt.vdisk_size = 0; /* server will set to valid size */ 1280 1281 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1282 DMSG(0, "[%d] Attr info sent (status = %d)\n", vdc->instance, status); 1283 1284 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1285 cmn_err(CE_NOTE, "[%d] Failed to send Attr negotiation info: " 1286 "id(%lx) rv(%d) size(%ld)", 1287 vdc->instance, vdc->ldc_handle, 1288 status, msglen); 1289 if (msglen != sizeof (vio_ver_msg_t)) 1290 status = ENOMSG; 1291 } 1292 1293 return (status); 1294 } 1295 1296 /* 1297 * Function: 1298 * vdc_init_dring_negotiate() 1299 * 1300 * Description: 1301 * 1302 * Arguments: 1303 * vdc - soft state pointer for this instance of the device driver. 1304 * 1305 * Return Code: 1306 * 0 - Success 1307 */ 1308 static int 1309 vdc_init_dring_negotiate(vdc_t *vdc) 1310 { 1311 vio_dring_reg_msg_t pkt; 1312 size_t msglen = sizeof (pkt); 1313 int status = -1; 1314 1315 ASSERT(vdc != NULL); 1316 ASSERT(mutex_owned(&vdc->lock)); 1317 1318 status = vdc_init_descriptor_ring(vdc); 1319 if (status != 0) { 1320 cmn_err(CE_CONT, "[%d] Failed to init DRing (status = %d)\n", 1321 vdc->instance, status); 1322 vdc_destroy_descriptor_ring(vdc); 1323 vdc_reset_connection(vdc, B_TRUE); 1324 return (status); 1325 } 1326 DMSG(0, "[%d] Init of descriptor ring completed (status = %d)\n", 1327 vdc->instance, status); 1328 1329 /* fill in tag */ 1330 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1331 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1332 pkt.tag.vio_subtype_env = VIO_DRING_REG; 1333 pkt.tag.vio_sid = vdc->session_id; 1334 /* fill in payload */ 1335 pkt.dring_ident = 0; 1336 pkt.num_descriptors = vdc->dring_len; 1337 pkt.descriptor_size = vdc->dring_entry_size; 1338 pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 1339 pkt.ncookies = vdc->dring_cookie_count; 1340 pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 1341 1342 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1343 if (status != 0) { 1344 cmn_err(CE_NOTE, "[%d] Failed to register DRing (err = %d)", 1345 vdc->instance, status); 1346 vdc_reset_connection(vdc, B_TRUE); 1347 } 1348 1349 return (status); 1350 } 1351 1352 1353 /* -------------------------------------------------------------------------- */ 1354 1355 /* 1356 * LDC helper routines 1357 */ 1358 1359 /* 1360 * Function: 1361 * vdc_send() 1362 * 1363 * Description: 1364 * The function encapsulates the call to write a message using LDC. 1365 * If LDC indicates that the call failed due to the queue being full, 1366 * we retry the ldc_write() [ up to 'vdc_retries' time ], otherwise 1367 * we return the error returned by LDC. 1368 * 1369 * Arguments: 1370 * ldc_handle - LDC handle for the channel this instance of vdc uses 1371 * pkt - address of LDC message to be sent 1372 * msglen - the size of the message being sent. When the function 1373 * returns, this contains the number of bytes written. 1374 * 1375 * Return Code: 1376 * 0 - Success. 1377 * EINVAL - pkt or msglen were NULL 1378 * ECONNRESET - The connection was not up. 1379 * EWOULDBLOCK - LDC queue is full 1380 * xxx - other error codes returned by ldc_write 1381 */ 1382 static int 1383 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 1384 { 1385 size_t size = 0; 1386 int retries = 0; 1387 int status = 0; 1388 1389 ASSERT(vdc != NULL); 1390 ASSERT(mutex_owned(&vdc->lock)); 1391 ASSERT(msglen != NULL); 1392 ASSERT(*msglen != 0); 1393 1394 do { 1395 size = *msglen; 1396 status = ldc_write(vdc->ldc_handle, pkt, &size); 1397 if (status == EWOULDBLOCK) 1398 delay(vdc_hz_timeout_ldc); 1399 } while (status == EWOULDBLOCK && retries++ < vdc_retries); 1400 1401 /* if LDC had serious issues --- reset vdc state */ 1402 if (status == EIO || status == ECONNRESET) { 1403 vdc_reset_connection(vdc, B_TRUE); 1404 } 1405 1406 /* return the last size written */ 1407 *msglen = size; 1408 1409 return (status); 1410 } 1411 1412 /* 1413 * Function: 1414 * vdc_get_ldc_id() 1415 * 1416 * Description: 1417 * This function gets the 'ldc-id' for this particular instance of vdc. 1418 * The id returned is the guest domain channel endpoint LDC uses for 1419 * communication with vds. 1420 * 1421 * Arguments: 1422 * dip - dev info pointer for this instance of the device driver. 1423 * ldc_id - pointer to variable used to return the 'ldc-id' found. 1424 * 1425 * Return Code: 1426 * 0 - Success. 1427 * ENOENT - Expected node or property did not exist. 1428 * ENXIO - Unexpected error communicating with MD framework 1429 */ 1430 static int 1431 vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id) 1432 { 1433 int status = ENOENT; 1434 char *node_name = NULL; 1435 md_t *mdp = NULL; 1436 int num_nodes; 1437 int num_vdevs; 1438 int num_chans; 1439 mde_cookie_t rootnode; 1440 mde_cookie_t *listp = NULL; 1441 mde_cookie_t *chanp = NULL; 1442 boolean_t found_inst = B_FALSE; 1443 int listsz; 1444 int idx; 1445 uint64_t md_inst; 1446 int obp_inst; 1447 int instance = ddi_get_instance(dip); 1448 1449 ASSERT(ldc_id != NULL); 1450 *ldc_id = 0; 1451 1452 /* 1453 * Get the OBP instance number for comparison with the MD instance 1454 * 1455 * The "cfg-handle" property of a vdc node in an MD contains the MD's 1456 * notion of "instance", or unique identifier, for that node; OBP 1457 * stores the value of the "cfg-handle" MD property as the value of 1458 * the "reg" property on the node in the device tree it builds from 1459 * the MD and passes to Solaris. Thus, we look up the devinfo node's 1460 * "reg" property value to uniquely identify this device instance. 1461 * If the "reg" property cannot be found, the device tree state is 1462 * presumably so broken that there is no point in continuing. 1463 */ 1464 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 1465 cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 1466 return (ENOENT); 1467 } 1468 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1469 OBP_REG, -1); 1470 DMSG(1, "[%d] OBP inst=%d\n", instance, obp_inst); 1471 1472 /* 1473 * We now walk the MD nodes and if an instance of a vdc node matches 1474 * the instance got from OBP we get the ldc-id property. 1475 */ 1476 if ((mdp = md_get_handle()) == NULL) { 1477 cmn_err(CE_WARN, "unable to init machine description"); 1478 return (ENXIO); 1479 } 1480 1481 num_nodes = md_node_count(mdp); 1482 ASSERT(num_nodes > 0); 1483 1484 listsz = num_nodes * sizeof (mde_cookie_t); 1485 1486 /* allocate memory for nodes */ 1487 listp = kmem_zalloc(listsz, KM_SLEEP); 1488 chanp = kmem_zalloc(listsz, KM_SLEEP); 1489 1490 rootnode = md_root_node(mdp); 1491 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 1492 1493 /* 1494 * Search for all the virtual devices, we will then check to see which 1495 * ones are disk nodes. 1496 */ 1497 num_vdevs = md_scan_dag(mdp, rootnode, 1498 md_find_name(mdp, VDC_MD_VDEV_NAME), 1499 md_find_name(mdp, "fwd"), listp); 1500 1501 if (num_vdevs <= 0) { 1502 cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 1503 status = ENOENT; 1504 goto done; 1505 } 1506 1507 DMSG(1, "[%d] num_vdevs=%d\n", instance, num_vdevs); 1508 for (idx = 0; idx < num_vdevs; idx++) { 1509 status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 1510 if ((status != 0) || (node_name == NULL)) { 1511 cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 1512 ": err %d", VDC_MD_VDEV_NAME, status); 1513 continue; 1514 } 1515 1516 DMSG(1, "[%d] Found node '%s'\n", instance, node_name); 1517 if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 1518 status = md_get_prop_val(mdp, listp[idx], 1519 VDC_MD_CFG_HDL, &md_inst); 1520 DMSG(1, "[%d] vdc inst in MD=%lx\n", instance, md_inst); 1521 if ((status == 0) && (md_inst == obp_inst)) { 1522 found_inst = B_TRUE; 1523 break; 1524 } 1525 } 1526 } 1527 1528 if (!found_inst) { 1529 cmn_err(CE_NOTE, "Unable to find correct '%s' node", 1530 VDC_MD_DISK_NAME); 1531 status = ENOENT; 1532 goto done; 1533 } 1534 DMSG(0, "[%d] MD inst=%lx\n", instance, md_inst); 1535 1536 /* get the channels for this node */ 1537 num_chans = md_scan_dag(mdp, listp[idx], 1538 md_find_name(mdp, VDC_MD_CHAN_NAME), 1539 md_find_name(mdp, "fwd"), chanp); 1540 1541 /* expecting at least one channel */ 1542 if (num_chans <= 0) { 1543 cmn_err(CE_NOTE, "No '%s' node for '%s' port", 1544 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 1545 status = ENOENT; 1546 goto done; 1547 1548 } else if (num_chans != 1) { 1549 DMSG(0, "[%d] Expected 1 '%s' node for '%s' port, found %d\n", 1550 instance, VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, 1551 num_chans); 1552 } 1553 1554 /* 1555 * We use the first channel found (index 0), irrespective of how 1556 * many are there in total. 1557 */ 1558 if (md_get_prop_val(mdp, chanp[0], VDC_ID_PROP, ldc_id) != 0) { 1559 cmn_err(CE_NOTE, "Channel '%s' property not found", 1560 VDC_ID_PROP); 1561 status = ENOENT; 1562 } 1563 1564 DMSG(0, "[%d] LDC id is 0x%lx\n", instance, *ldc_id); 1565 1566 done: 1567 if (chanp) 1568 kmem_free(chanp, listsz); 1569 if (listp) 1570 kmem_free(listp, listsz); 1571 1572 (void) md_fini_handle(mdp); 1573 1574 return (status); 1575 } 1576 1577 static int 1578 vdc_do_ldc_up(vdc_t *vdc) 1579 { 1580 int status; 1581 1582 DMSG(0, "[%d] Bringing up channel %lx\n", vdc->instance, vdc->ldc_id); 1583 1584 if ((status = ldc_up(vdc->ldc_handle)) != 0) { 1585 switch (status) { 1586 case ECONNREFUSED: /* listener not ready at other end */ 1587 DMSG(0, "[%d] ldc_up(%lx,...) return %d\n", 1588 vdc->instance, vdc->ldc_id, status); 1589 status = 0; 1590 break; 1591 default: 1592 cmn_err(CE_NOTE, "[%d] Failed to bring up LDC: " 1593 "channel=%ld, err=%d", 1594 vdc->instance, vdc->ldc_id, status); 1595 } 1596 } 1597 1598 return (status); 1599 } 1600 1601 1602 /* 1603 * vdc_is_able_to_tx_data() 1604 * 1605 * Description: 1606 * This function checks if we are able to send data to the 1607 * vDisk server (vds). The LDC connection needs to be up and 1608 * vdc & vds need to have completed the handshake negotiation. 1609 * 1610 * Parameters: 1611 * vdc - soft state pointer 1612 * flag - flag to indicate if we can block or not 1613 * [ If O_NONBLOCK or O_NDELAY (which are defined in 1614 * open(2)) are set then do not block) 1615 * 1616 * Return Values 1617 * B_TRUE - can talk to vds 1618 * B_FALSE - unable to talk to vds 1619 */ 1620 static boolean_t 1621 vdc_is_able_to_tx_data(vdc_t *vdc, int flag) 1622 { 1623 vd_state_t state; 1624 uint32_t ldc_state; 1625 uint_t retries = 0; 1626 int rv = -1; 1627 1628 ASSERT(vdc != NULL); 1629 1630 mutex_enter(&vdc->lock); 1631 state = vdc->state; 1632 ldc_state = vdc->ldc_state; 1633 mutex_exit(&vdc->lock); 1634 1635 if ((state == VD_STATE_DATA) && (ldc_state == LDC_UP)) 1636 return (B_TRUE); 1637 1638 if ((flag & O_NONBLOCK) || (flag & O_NDELAY)) { 1639 DMSG(0, "[%d] Not ready to tx - state %d LDC state %d\n", 1640 vdc->instance, state, ldc_state); 1641 return (B_FALSE); 1642 } 1643 1644 /* 1645 * We want to check and see if any negotiations triggered earlier 1646 * have succeeded. We are prepared to wait a little while in case 1647 * they are still in progress. 1648 */ 1649 mutex_enter(&vdc->lock); 1650 while ((vdc->ldc_state != LDC_UP) || (vdc->state != VD_STATE_DATA)) { 1651 DMSG(0, "[%d] Waiting for connection. (state %d : LDC %d)\n", 1652 vdc->instance, vdc->state, vdc->ldc_state); 1653 1654 rv = cv_timedwait(&vdc->cv, &vdc->lock, 1655 VD_GET_TIMEOUT_HZ(vdc_hz_timeout, retries)); 1656 1657 /* 1658 * An rv of -1 indicates that we timed out without the LDC 1659 * state changing so it looks like the other side (vdc) is 1660 * not yet ready/responding. 1661 * 1662 * Any other value of rv indicates that the LDC triggered an 1663 * interrupt so we just loop again, check the handshake state 1664 * and keep waiting if necessary. 1665 */ 1666 if (rv == -1) { 1667 if (retries >= vdc_retries) { 1668 DMSG(0, "[%d] handshake wait timed out\n", 1669 vdc->instance); 1670 mutex_exit(&vdc->lock); 1671 return (B_FALSE); 1672 } else { 1673 DMSG(1, "[%d] Handshake retry #%d timed out\n", 1674 vdc->instance, retries); 1675 retries++; 1676 } 1677 } 1678 } 1679 1680 ASSERT(vdc->ldc_state == LDC_UP); 1681 ASSERT(vdc->state == VD_STATE_DATA); 1682 1683 mutex_exit(&vdc->lock); 1684 1685 return (B_TRUE); 1686 } 1687 1688 1689 /* 1690 * Function: 1691 * vdc_terminate_ldc() 1692 * 1693 * Description: 1694 * 1695 * Arguments: 1696 * vdc - soft state pointer for this instance of the device driver. 1697 * 1698 * Return Code: 1699 * None 1700 */ 1701 static void 1702 vdc_terminate_ldc(vdc_t *vdc) 1703 { 1704 int instance = ddi_get_instance(vdc->dip); 1705 1706 ASSERT(vdc != NULL); 1707 ASSERT(mutex_owned(&vdc->lock)); 1708 1709 DMSG(0, "[%d] initialized=%x\n", instance, vdc->initialized); 1710 1711 if (vdc->initialized & VDC_LDC_OPEN) { 1712 DMSG(0, "[%d] ldc_close()\n", instance); 1713 (void) ldc_close(vdc->ldc_handle); 1714 } 1715 if (vdc->initialized & VDC_LDC_CB) { 1716 DMSG(0, "[%d] ldc_unreg_callback()\n", instance); 1717 (void) ldc_unreg_callback(vdc->ldc_handle); 1718 } 1719 if (vdc->initialized & VDC_LDC) { 1720 DMSG(0, "[%d] ldc_fini()\n", instance); 1721 (void) ldc_fini(vdc->ldc_handle); 1722 vdc->ldc_handle = NULL; 1723 } 1724 1725 vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN); 1726 } 1727 1728 /* 1729 * Function: 1730 * vdc_reset_connection() 1731 * 1732 * Description: 1733 * 1734 * Arguments: 1735 * vdc - soft state pointer for this instance of the device driver. 1736 * reset_ldc - Flag whether or not to reset the LDC connection also. 1737 * 1738 * Return Code: 1739 * None 1740 */ 1741 static void 1742 vdc_reset_connection(vdc_t *vdc, boolean_t reset_ldc) 1743 { 1744 int status; 1745 1746 ASSERT(vdc != NULL); 1747 ASSERT(mutex_owned(&vdc->lock)); 1748 1749 cmn_err(CE_CONT, "?[%d] Resetting connection to vDisk server\n", 1750 vdc->instance); 1751 1752 vdc->state = VD_STATE_INIT; 1753 1754 if (reset_ldc) { 1755 status = ldc_down(vdc->ldc_handle); 1756 DMSG(0, "[%d] ldc_down() = %d\n", vdc->instance, status); 1757 } 1758 1759 vdc->initialized &= ~VDC_HANDSHAKE; 1760 DMSG(0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized); 1761 } 1762 1763 /* -------------------------------------------------------------------------- */ 1764 1765 /* 1766 * Descriptor Ring helper routines 1767 */ 1768 1769 /* 1770 * Function: 1771 * vdc_init_descriptor_ring() 1772 * 1773 * Description: 1774 * 1775 * Arguments: 1776 * vdc - soft state pointer for this instance of the device driver. 1777 * 1778 * Return Code: 1779 * 0 - Success 1780 */ 1781 static int 1782 vdc_init_descriptor_ring(vdc_t *vdc) 1783 { 1784 vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 1785 int status = 0; 1786 int i; 1787 1788 DMSG(0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized); 1789 1790 ASSERT(vdc != NULL); 1791 ASSERT(mutex_owned(&vdc->lock)); 1792 ASSERT(vdc->ldc_handle != NULL); 1793 1794 /* ensure we have enough room to store max sized block */ 1795 ASSERT(maxphys <= VD_MAX_BLOCK_SIZE); 1796 1797 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 1798 DMSG(0, "[%d] ldc_mem_dring_create\n", vdc->instance); 1799 /* 1800 * Calculate the maximum block size we can transmit using one 1801 * Descriptor Ring entry from the attributes returned by the 1802 * vDisk server. This is subject to a minimum of 'maxphys' 1803 * as we do not have the capability to split requests over 1804 * multiple DRing entries. 1805 */ 1806 if ((vdc->max_xfer_sz * vdc->block_size) < maxphys) { 1807 DMSG(0, "[%d] using minimum DRing size\n", 1808 vdc->instance); 1809 vdc->dring_max_cookies = maxphys / PAGESIZE; 1810 } else { 1811 vdc->dring_max_cookies = 1812 (vdc->max_xfer_sz * vdc->block_size) / PAGESIZE; 1813 } 1814 vdc->dring_entry_size = (sizeof (vd_dring_entry_t) + 1815 (sizeof (ldc_mem_cookie_t) * 1816 (vdc->dring_max_cookies - 1))); 1817 vdc->dring_len = VD_DRING_LEN; 1818 1819 status = ldc_mem_dring_create(vdc->dring_len, 1820 vdc->dring_entry_size, &vdc->ldc_dring_hdl); 1821 if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) { 1822 cmn_err(CE_NOTE, "[%d] Descriptor ring creation failed", 1823 vdc->instance); 1824 return (status); 1825 } 1826 vdc->initialized |= VDC_DRING_INIT; 1827 } 1828 1829 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 1830 DMSG(0, "[%d] ldc_mem_dring_bind\n", vdc->instance); 1831 vdc->dring_cookie = 1832 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 1833 1834 status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl, 1835 LDC_SHADOW_MAP, LDC_MEM_RW, 1836 &vdc->dring_cookie[0], 1837 &vdc->dring_cookie_count); 1838 if (status != 0) { 1839 cmn_err(CE_NOTE, "[%d] Failed to bind descriptor ring " 1840 "(%lx) to channel (%lx)\n", vdc->instance, 1841 vdc->ldc_dring_hdl, vdc->ldc_handle); 1842 return (status); 1843 } 1844 ASSERT(vdc->dring_cookie_count == 1); 1845 vdc->initialized |= VDC_DRING_BOUND; 1846 } 1847 1848 status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info); 1849 if (status != 0) { 1850 DMSG(0, "[%d] Failed to get info for descriptor ring (%lx)\n", 1851 vdc->instance, vdc->ldc_dring_hdl); 1852 return (status); 1853 } 1854 1855 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 1856 DMSG(0, "[%d] local dring\n", vdc->instance); 1857 1858 /* Allocate the local copy of this dring */ 1859 vdc->local_dring = 1860 kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t), 1861 KM_SLEEP); 1862 vdc->initialized |= VDC_DRING_LOCAL; 1863 } 1864 1865 /* 1866 * Mark all DRing entries as free and initialize the private 1867 * descriptor's memory handles. If any entry is initialized, 1868 * we need to free it later so we set the bit in 'initialized' 1869 * at the start. 1870 */ 1871 vdc->initialized |= VDC_DRING_ENTRY; 1872 for (i = 0; i < vdc->dring_len; i++) { 1873 dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 1874 dep->hdr.dstate = VIO_DESC_FREE; 1875 1876 status = ldc_mem_alloc_handle(vdc->ldc_handle, 1877 &vdc->local_dring[i].desc_mhdl); 1878 if (status != 0) { 1879 cmn_err(CE_NOTE, "![%d] Failed to alloc mem handle for" 1880 " descriptor %d", vdc->instance, i); 1881 return (status); 1882 } 1883 vdc->local_dring[i].flags = VIO_DESC_FREE; 1884 vdc->local_dring[i].dep = dep; 1885 1886 mutex_init(&vdc->local_dring[i].lock, NULL, MUTEX_DRIVER, NULL); 1887 cv_init(&vdc->local_dring[i].cv, NULL, CV_DRIVER, NULL); 1888 } 1889 1890 /* 1891 * We init the index of the last DRing entry used. Since the code to 1892 * get the next available entry increments it before selecting one, 1893 * we set it to the last DRing entry so that it wraps around to zero 1894 * for the 1st entry to be used. 1895 */ 1896 vdc->dring_curr_idx = vdc->dring_len - 1; 1897 1898 vdc->dring_notify_server = B_TRUE; 1899 1900 return (status); 1901 } 1902 1903 /* 1904 * Function: 1905 * vdc_destroy_descriptor_ring() 1906 * 1907 * Description: 1908 * 1909 * Arguments: 1910 * vdc - soft state pointer for this instance of the device driver. 1911 * 1912 * Return Code: 1913 * None 1914 */ 1915 static void 1916 vdc_destroy_descriptor_ring(vdc_t *vdc) 1917 { 1918 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 1919 ldc_mem_handle_t mhdl = NULL; 1920 int status = -1; 1921 int i; /* loop */ 1922 1923 ASSERT(vdc != NULL); 1924 ASSERT(mutex_owned(&vdc->lock)); 1925 ASSERT(vdc->state == VD_STATE_INIT); 1926 1927 DMSG(0, "[%d] Entered\n", vdc->instance); 1928 1929 if (vdc->initialized & VDC_DRING_ENTRY) { 1930 DMSG(0, "[%d] Removing Local DRing entries\n", vdc->instance); 1931 for (i = 0; i < vdc->dring_len; i++) { 1932 ldep = &vdc->local_dring[i]; 1933 mhdl = ldep->desc_mhdl; 1934 1935 if (mhdl == NULL) 1936 continue; 1937 1938 (void) ldc_mem_free_handle(mhdl); 1939 mutex_destroy(&ldep->lock); 1940 cv_destroy(&ldep->cv); 1941 } 1942 vdc->initialized &= ~VDC_DRING_ENTRY; 1943 } 1944 1945 if (vdc->initialized & VDC_DRING_LOCAL) { 1946 DMSG(0, "[%d] Freeing Local DRing\n", vdc->instance); 1947 kmem_free(vdc->local_dring, 1948 vdc->dring_len * sizeof (vdc_local_desc_t)); 1949 vdc->initialized &= ~VDC_DRING_LOCAL; 1950 } 1951 1952 if (vdc->initialized & VDC_DRING_BOUND) { 1953 DMSG(0, "[%d] Unbinding DRing\n", vdc->instance); 1954 status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl); 1955 if (status == 0) { 1956 vdc->initialized &= ~VDC_DRING_BOUND; 1957 } else { 1958 cmn_err(CE_NOTE, "[%d] Error %d unbinding DRing %lx", 1959 vdc->instance, status, vdc->ldc_dring_hdl); 1960 } 1961 } 1962 1963 if (vdc->initialized & VDC_DRING_INIT) { 1964 DMSG(0, "[%d] Destroying DRing\n", vdc->instance); 1965 status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl); 1966 if (status == 0) { 1967 vdc->ldc_dring_hdl = NULL; 1968 bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 1969 vdc->initialized &= ~VDC_DRING_INIT; 1970 } else { 1971 cmn_err(CE_NOTE, "[%d] Error %d destroying DRing (%lx)", 1972 vdc->instance, status, vdc->ldc_dring_hdl); 1973 } 1974 } 1975 } 1976 1977 /* 1978 * vdc_get_next_dring_entry_idx() 1979 * 1980 * Description: 1981 * This function gets the index of the next Descriptor Ring entry available 1982 * If the ring is full, it will back off and wait for the next entry to be 1983 * freed (the ACK handler will signal). 1984 * 1985 * Return Value: 1986 * 0 <= rv < vdc->dring_len Next available slot 1987 * -1 DRing is full 1988 */ 1989 static int 1990 vdc_get_next_dring_entry_idx(vdc_t *vdc, uint_t num_slots_needed) 1991 { 1992 _NOTE(ARGUNUSED(num_slots_needed)) 1993 1994 vd_dring_entry_t *dep = NULL; /* DRing Entry Pointer */ 1995 vdc_local_desc_t *ldep = NULL; /* Local DRing Entry Pointer */ 1996 int idx = -1; 1997 1998 ASSERT(vdc != NULL); 1999 ASSERT(vdc->dring_len == vdc->dring_len); 2000 ASSERT(vdc->dring_curr_idx >= 0); 2001 ASSERT(vdc->dring_curr_idx < vdc->dring_len); 2002 ASSERT(mutex_owned(&vdc->dring_lock)); 2003 2004 /* pick the next descriptor after the last one used */ 2005 idx = (vdc->dring_curr_idx + 1) % vdc->dring_len; 2006 ldep = &vdc->local_dring[idx]; 2007 ASSERT(ldep != NULL); 2008 dep = ldep->dep; 2009 ASSERT(dep != NULL); 2010 2011 mutex_enter(&ldep->lock); 2012 if (dep->hdr.dstate == VIO_DESC_FREE) { 2013 vdc->dring_curr_idx = idx; 2014 } else { 2015 DTRACE_PROBE(full); 2016 (void) cv_timedwait(&ldep->cv, &ldep->lock, 2017 VD_GET_TIMEOUT_HZ(vdc_hz_timeout, 1)); 2018 if (dep->hdr.dstate == VIO_DESC_FREE) { 2019 vdc->dring_curr_idx = idx; 2020 } else { 2021 DMSG(0, "[%d] Entry %d unavailable still in state %d\n", 2022 vdc->instance, idx, dep->hdr.dstate); 2023 idx = -1; /* indicate that the ring is full */ 2024 } 2025 } 2026 mutex_exit(&ldep->lock); 2027 2028 return (idx); 2029 } 2030 2031 /* 2032 * Function: 2033 * vdc_populate_descriptor 2034 * 2035 * Description: 2036 * This routine writes the data to be transmitted to vds into the 2037 * descriptor, notifies vds that the ring has been updated and 2038 * then waits for the request to be processed. 2039 * 2040 * Arguments: 2041 * vdc - the soft state pointer 2042 * addr - address of structure to be written. In the case of block 2043 * reads and writes this structure will be a buf_t and the 2044 * address of the data to be written will be in the b_un.b_addr 2045 * field. Otherwise the value of addr will be the address 2046 * to be written. 2047 * nbytes - number of bytes to read/write 2048 * operation - operation we want vds to perform (VD_OP_XXX) 2049 * arg - parameter to be sent to server (depends on VD_OP_XXX type) 2050 * . mode for ioctl(9e) 2051 * . LP64 diskaddr_t (block I/O) 2052 * slice - the disk slice this request is for 2053 * 2054 * Return Codes: 2055 * 0 2056 * EAGAIN 2057 * EFAULT 2058 * ENXIO 2059 * EIO 2060 */ 2061 static int 2062 vdc_populate_descriptor(vdc_t *vdc, caddr_t addr, size_t nbytes, int operation, 2063 uint64_t arg, uint64_t slice) 2064 { 2065 vdc_local_desc_t *local_dep = NULL; /* Local Dring Entry Pointer */ 2066 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2067 int idx = 0; /* Index of DRing entry used */ 2068 vio_dring_msg_t dmsg; 2069 size_t msglen = sizeof (dmsg); 2070 int retries = 0; 2071 int rv; 2072 2073 ASSERT(vdc != NULL); 2074 ASSERT(slice < V_NUMPAR); 2075 2076 /* 2077 * Get next available DRing entry. 2078 */ 2079 mutex_enter(&vdc->dring_lock); 2080 idx = vdc_get_next_dring_entry_idx(vdc, 1); 2081 if (idx == -1) { 2082 mutex_exit(&vdc->dring_lock); 2083 DMSG(0, "[%d] no descriptor ring entry avail, last seq=%ld\n", 2084 vdc->instance, vdc->seq_num - 1); 2085 2086 /* 2087 * Since strategy should not block we don't wait for the DRing 2088 * to empty and instead return 2089 */ 2090 return (EAGAIN); 2091 } 2092 2093 ASSERT(idx < vdc->dring_len); 2094 local_dep = &vdc->local_dring[idx]; 2095 dep = local_dep->dep; 2096 ASSERT(dep != NULL); 2097 2098 /* 2099 * We now get the lock for this descriptor before dropping the overall 2100 * DRing lock. This prevents a race condition where another vdc thread 2101 * could grab the descriptor we selected. 2102 */ 2103 ASSERT(MUTEX_NOT_HELD(&local_dep->lock)); 2104 mutex_enter(&local_dep->lock); 2105 mutex_exit(&vdc->dring_lock); 2106 2107 switch (operation) { 2108 case VD_OP_BREAD: 2109 case VD_OP_BWRITE: 2110 local_dep->buf = (struct buf *)addr; 2111 local_dep->addr = local_dep->buf->b_un.b_addr; 2112 DMSG(2, "[%d] buf=%p, block=%lx, nbytes=%lu\n", 2113 vdc->instance, (void *)addr, arg, nbytes); 2114 dep->payload.addr = (diskaddr_t)arg; 2115 rv = vdc_populate_mem_hdl(vdc, idx, local_dep->addr, 2116 nbytes, operation); 2117 break; 2118 2119 case VD_OP_GET_VTOC: 2120 case VD_OP_SET_VTOC: 2121 case VD_OP_GET_DISKGEOM: 2122 case VD_OP_SET_DISKGEOM: 2123 case VD_OP_SCSICMD: 2124 local_dep->addr = addr; 2125 if (nbytes > 0) { 2126 rv = vdc_populate_mem_hdl(vdc, idx, addr, nbytes, 2127 operation); 2128 } 2129 break; 2130 2131 case VD_OP_FLUSH: 2132 case VD_OP_GET_WCE: 2133 case VD_OP_SET_WCE: 2134 rv = 0; /* nothing to bind */ 2135 break; 2136 2137 default: 2138 cmn_err(CE_CONT, "?[%d] Unsupported vDisk operation [%d]\n", 2139 vdc->instance, operation); 2140 rv = EINVAL; 2141 } 2142 2143 if (rv != 0) { 2144 mutex_exit(&local_dep->lock); 2145 return (rv); 2146 } 2147 2148 /* 2149 * fill in the data details into the DRing 2150 */ 2151 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdc); 2152 dep->payload.operation = operation; 2153 dep->payload.nbytes = nbytes; 2154 dep->payload.status = -1; /* vds will set valid value */ 2155 dep->payload.slice = slice; 2156 dep->hdr.dstate = VIO_DESC_READY; 2157 dep->hdr.ack = 1; /* request an ACK for every message */ 2158 2159 local_dep->flags = VIO_DESC_READY; 2160 2161 /* 2162 * Send a msg with the DRing details to vds 2163 */ 2164 mutex_enter(&vdc->lock); 2165 VIO_INIT_DRING_DATA_TAG(dmsg); 2166 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc); 2167 dmsg.dring_ident = vdc->dring_ident; 2168 dmsg.start_idx = idx; 2169 dmsg.end_idx = idx; 2170 2171 DTRACE_IO2(send, vio_dring_msg_t *, &dmsg, vdc_t *, vdc); 2172 2173 DMSG(2, "[%d] ident=0x%lx, st=%u, end=%u, seq=%ld req=%ld dep=%p\n", 2174 vdc->instance, vdc->dring_ident, 2175 dmsg.start_idx, dmsg.end_idx, 2176 dmsg.seq_num, dep->payload.req_id, (void *)dep); 2177 2178 rv = vdc_send(vdc, (caddr_t)&dmsg, &msglen); 2179 DMSG(1, "[%d] send via LDC: rv=%d\n", vdc->instance, rv); 2180 if (rv != 0) { 2181 cmn_err(CE_NOTE, "[%d] err (%d) sending DRing data msg via LDC", 2182 vdc->instance, rv); 2183 2184 /* Clear the DRing entry */ 2185 rv = vdc_depopulate_descriptor(vdc, idx); 2186 2187 mutex_exit(&vdc->lock); 2188 mutex_exit(&local_dep->lock); 2189 2190 return (rv ? rv : EAGAIN); 2191 } 2192 2193 /* 2194 * If the message was successfully sent, we increment the sequence 2195 * number to be used by the next message 2196 */ 2197 vdc->seq_num++; 2198 mutex_exit(&vdc->lock); 2199 2200 /* 2201 * When a guest is panicking, the completion of requests needs to be 2202 * handled differently because interrupts are disabled and vdc 2203 * will not get messages. We have to poll for the messages instead. 2204 */ 2205 if (ddi_in_panic()) { 2206 int start = 0; 2207 retries = 0; 2208 for (;;) { 2209 msglen = sizeof (dmsg); 2210 rv = ldc_read(vdc->ldc_handle, (caddr_t)&dmsg, 2211 &msglen); 2212 if (rv) { 2213 rv = EINVAL; 2214 break; 2215 } 2216 2217 /* 2218 * if there are no packets wait and check again 2219 */ 2220 if ((rv == 0) && (msglen == 0)) { 2221 if (retries++ > vdc_dump_retries) { 2222 DMSG(0, "[%d] Stopping wait, idx %d\n", 2223 vdc->instance, idx); 2224 rv = EAGAIN; 2225 break; 2226 } 2227 2228 DMSG(1, "Waiting for next packet @ %d\n", idx); 2229 drv_usecwait(vdc_usec_timeout_dump); 2230 continue; 2231 } 2232 2233 /* 2234 * Ignore all messages that are not ACKs/NACKs to 2235 * DRing requests. 2236 */ 2237 if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 2238 (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 2239 DMSG(0, "discard pkt: type=%d sub=%d env=%d\n", 2240 dmsg.tag.vio_msgtype, 2241 dmsg.tag.vio_subtype, 2242 dmsg.tag.vio_subtype_env); 2243 continue; 2244 } 2245 2246 /* 2247 * set the appropriate return value for the 2248 * current request. 2249 */ 2250 switch (dmsg.tag.vio_subtype) { 2251 case VIO_SUBTYPE_ACK: 2252 rv = 0; 2253 break; 2254 case VIO_SUBTYPE_NACK: 2255 rv = EAGAIN; 2256 break; 2257 default: 2258 continue; 2259 } 2260 2261 start = dmsg.start_idx; 2262 if (start >= vdc->dring_len) { 2263 DMSG(0, "[%d] Bogus ack data : start %d\n", 2264 vdc->instance, start); 2265 continue; 2266 } 2267 2268 dep = VDC_GET_DRING_ENTRY_PTR(vdc, start); 2269 2270 DMSG(1, "[%d] Dumping start=%d idx=%d state=%d\n", 2271 vdc->instance, start, idx, dep->hdr.dstate); 2272 2273 if (dep->hdr.dstate != VIO_DESC_DONE) { 2274 DMSG(0, "[%d] Entry @ %d - state !DONE %d\n", 2275 vdc->instance, start, dep->hdr.dstate); 2276 continue; 2277 } 2278 2279 (void) vdc_depopulate_descriptor(vdc, start); 2280 2281 /* 2282 * We want to process all Dring entries up to 2283 * the current one so that we can return an 2284 * error with the correct request. 2285 */ 2286 if (idx > start) { 2287 DMSG(0, "[%d] Looping: start %d, idx %d\n", 2288 vdc->instance, idx, start); 2289 continue; 2290 } 2291 2292 /* exit - all outstanding requests are completed */ 2293 break; 2294 } 2295 2296 mutex_exit(&local_dep->lock); 2297 2298 return (rv); 2299 } 2300 2301 /* 2302 * In the case of calls from strategy and dump (in the non-panic case), 2303 * instead of waiting for a response from the vDisk server return now. 2304 * They will be processed asynchronously and the vdc ACK handling code 2305 * will trigger the biodone(9F) 2306 */ 2307 if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) { 2308 mutex_exit(&local_dep->lock); 2309 return (rv); 2310 } 2311 2312 /* 2313 * In the case of synchronous calls we watch the DRing entries we 2314 * modified and await the response from vds. 2315 */ 2316 rv = vdc_wait_for_descriptor_update(vdc, idx, dmsg); 2317 if (rv == ETIMEDOUT) { 2318 /* debug info when dumping state on vds side */ 2319 dep->payload.status = ECANCELED; 2320 } 2321 2322 rv = vdc_depopulate_descriptor(vdc, idx); 2323 DMSG(0, "[%d] Exiting: status=%d\n", vdc->instance, rv); 2324 2325 mutex_exit(&local_dep->lock); 2326 2327 return (rv); 2328 } 2329 2330 /* 2331 * Function: 2332 * vdc_wait_for_descriptor_update() 2333 * 2334 * Description: 2335 * 2336 * Arguments: 2337 * vdc - soft state pointer for this instance of the device driver. 2338 * idx - Index of the Descriptor Ring entry being modified 2339 * dmsg - LDC message sent by vDisk server 2340 * 2341 * Return Code: 2342 * 0 - Success 2343 */ 2344 static int 2345 vdc_wait_for_descriptor_update(vdc_t *vdc, uint_t idx, vio_dring_msg_t dmsg) 2346 { 2347 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2348 vdc_local_desc_t *local_dep = NULL; /* Local Dring Entry Pointer */ 2349 size_t msglen = sizeof (dmsg); 2350 int retries = 0; 2351 int status = 0; 2352 int rv = 0; 2353 2354 ASSERT(vdc != NULL); 2355 ASSERT(idx < vdc->dring_len); 2356 local_dep = &vdc->local_dring[idx]; 2357 ASSERT(local_dep != NULL); 2358 ASSERT(MUTEX_HELD(&local_dep->lock)); 2359 dep = local_dep->dep; 2360 ASSERT(dep != NULL); 2361 2362 while (dep->hdr.dstate != VIO_DESC_DONE) { 2363 rv = cv_timedwait(&local_dep->cv, &local_dep->lock, 2364 VD_GET_TIMEOUT_HZ(vdc_hz_timeout, retries)); 2365 if (rv == -1) { 2366 /* 2367 * If they persist in ignoring us we'll storm off in a 2368 * huff and return ETIMEDOUT to the upper layers. 2369 */ 2370 if (retries >= vdc_retries) { 2371 DMSG(0, "[%d] Finished waiting on entry %d\n", 2372 vdc->instance, idx); 2373 status = ETIMEDOUT; 2374 break; 2375 } else { 2376 retries++; 2377 DMSG(0, "[%d] Timeout #%d on entry %d " 2378 "[seq %lu][req %lu]\n", vdc->instance, 2379 retries, idx, dmsg.seq_num, 2380 dep->payload.req_id); 2381 } 2382 2383 if (dep->hdr.dstate & VIO_DESC_ACCEPTED) { 2384 DMSG(0, "[%d] entry %d ACCEPTED [seq %lu]" 2385 "[req %lu] but not ACK'ed by vds yet\n", 2386 vdc->instance, idx, dmsg.seq_num, 2387 dep->payload.req_id); 2388 continue; 2389 } 2390 2391 /* 2392 * we resend the message as it may have been dropped 2393 * and have never made it to the other side (vds). 2394 * (We reuse the original message but update seq ID) 2395 */ 2396 mutex_enter(&vdc->lock); 2397 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc); 2398 retries = 0; 2399 status = vdc_send(vdc, (caddr_t)&dmsg, &msglen); 2400 if (status != 0) { 2401 mutex_exit(&vdc->lock); 2402 cmn_err(CE_NOTE, "[%d] Error (%d) while sending" 2403 " after timeout", 2404 vdc->instance, status); 2405 status = ETIMEDOUT; 2406 break; 2407 } 2408 /* 2409 * If the message was successfully sent, we increment 2410 * the sequence number to be used by the next message. 2411 */ 2412 vdc->seq_num++; 2413 mutex_exit(&vdc->lock); 2414 } 2415 } 2416 2417 return (status); 2418 } 2419 2420 2421 /* 2422 * Function: 2423 * vdc_depopulate_descriptor() 2424 * 2425 * Description: 2426 * 2427 * Arguments: 2428 * vdc - soft state pointer for this instance of the device driver. 2429 * idx - Index of the Descriptor Ring entry being modified 2430 * 2431 * Return Code: 2432 * 0 - Success 2433 */ 2434 static int 2435 vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 2436 { 2437 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2438 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2439 int status = ENXIO; 2440 int operation; 2441 int rv = 0; 2442 2443 ASSERT(vdc != NULL); 2444 ASSERT(idx < vdc->dring_len); 2445 ldep = &vdc->local_dring[idx]; 2446 ASSERT(ldep != NULL); 2447 ASSERT(MUTEX_HELD(&ldep->lock)); 2448 dep = ldep->dep; 2449 ASSERT(dep != NULL); 2450 ASSERT((dep->hdr.dstate == VIO_DESC_DONE) || 2451 (dep->payload.status == ECANCELED)); 2452 2453 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 2454 VIO_SET_DESC_STATE(ldep->flags, VIO_DESC_FREE); 2455 status = dep->payload.status; 2456 operation = dep->payload.operation; 2457 2458 /* the DKIO W$ operations never bind handles so we can return now */ 2459 if ((operation == VD_OP_FLUSH) || 2460 (operation == VD_OP_GET_WCE) || 2461 (operation == VD_OP_SET_WCE)) 2462 return (status); 2463 2464 /* 2465 * If the upper layer passed in a misaligned address we copied the 2466 * data into an aligned buffer before sending it to LDC - we now 2467 * copy it back to the original buffer. 2468 */ 2469 if (ldep->align_addr) { 2470 ASSERT(ldep->addr != NULL); 2471 ASSERT(dep->payload.nbytes > 0); 2472 2473 bcopy(ldep->align_addr, ldep->addr, dep->payload.nbytes); 2474 kmem_free(ldep->align_addr, 2475 sizeof (caddr_t) * P2ROUNDUP(dep->payload.nbytes, 8)); 2476 ldep->align_addr = NULL; 2477 } 2478 2479 rv = ldc_mem_unbind_handle(ldep->desc_mhdl); 2480 if (rv != 0) { 2481 cmn_err(CE_CONT, "?[%d] unbind mhdl 0x%lx @ idx %d failed (%d)", 2482 vdc->instance, ldep->desc_mhdl, idx, rv); 2483 /* 2484 * The error returned by the vDisk server is more informative 2485 * and thus has a higher priority but if it isn't set we ensure 2486 * that this function returns an error. 2487 */ 2488 if (status == 0) 2489 status = EINVAL; 2490 } 2491 2492 return (status); 2493 } 2494 2495 /* 2496 * Function: 2497 * vdc_populate_mem_hdl() 2498 * 2499 * Description: 2500 * 2501 * Arguments: 2502 * vdc - soft state pointer for this instance of the device driver. 2503 * idx - Index of the Descriptor Ring entry being modified 2504 * addr - virtual address being mapped in 2505 * nybtes - number of bytes in 'addr' 2506 * operation - the vDisk operation being performed (VD_OP_xxx) 2507 * 2508 * Return Code: 2509 * 0 - Success 2510 */ 2511 static int 2512 vdc_populate_mem_hdl(vdc_t *vdc, uint_t idx, caddr_t addr, size_t nbytes, 2513 int operation) 2514 { 2515 vd_dring_entry_t *dep = NULL; 2516 vdc_local_desc_t *ldep = NULL; 2517 ldc_mem_handle_t mhdl; 2518 caddr_t vaddr; 2519 int perm = LDC_MEM_RW; 2520 int rv = 0; 2521 int i; 2522 2523 ASSERT(vdc != NULL); 2524 ASSERT(idx < vdc->dring_len); 2525 2526 dep = VDC_GET_DRING_ENTRY_PTR(vdc, idx); 2527 ldep = &vdc->local_dring[idx]; 2528 mhdl = ldep->desc_mhdl; 2529 2530 switch (operation) { 2531 case VD_OP_BREAD: 2532 perm = LDC_MEM_W; 2533 break; 2534 2535 case VD_OP_BWRITE: 2536 perm = LDC_MEM_R; 2537 break; 2538 2539 case VD_OP_GET_VTOC: 2540 case VD_OP_SET_VTOC: 2541 case VD_OP_GET_DISKGEOM: 2542 case VD_OP_SET_DISKGEOM: 2543 case VD_OP_SCSICMD: 2544 perm = LDC_MEM_RW; 2545 break; 2546 2547 default: 2548 ASSERT(0); /* catch bad programming in vdc */ 2549 } 2550 2551 /* 2552 * LDC expects any addresses passed in to be 8-byte aligned. We need 2553 * to copy the contents of any misaligned buffers to a newly allocated 2554 * buffer and bind it instead (and copy the the contents back to the 2555 * original buffer passed in when depopulating the descriptor) 2556 */ 2557 vaddr = addr; 2558 if (((uint64_t)addr & 0x7) != 0) { 2559 ASSERT(ldep->align_addr == NULL); 2560 ldep->align_addr = 2561 kmem_zalloc(sizeof (caddr_t) * P2ROUNDUP(nbytes, 8), 2562 KM_SLEEP); 2563 DMSG(0, "[%d] Misaligned address %p reallocating " 2564 "(buf=%p nb=%ld op=%d entry=%d)\n", 2565 vdc->instance, (void *)addr, (void *)ldep->align_addr, 2566 nbytes, operation, idx); 2567 bcopy(addr, ldep->align_addr, nbytes); 2568 vaddr = ldep->align_addr; 2569 } 2570 2571 rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 2572 LDC_SHADOW_MAP, perm, &dep->payload.cookie[0], 2573 &dep->payload.ncookies); 2574 DMSG(2, "[%d] bound mem handle; ncookies=%d\n", 2575 vdc->instance, dep->payload.ncookies); 2576 if (rv != 0) { 2577 cmn_err(CE_CONT, "?[%d] Failed to bind LDC memory handle " 2578 "(mhdl=%p, buf=%p entry=%u err=%d)\n", 2579 vdc->instance, (void *)mhdl, (void *)addr, idx, rv); 2580 if (ldep->align_addr) { 2581 kmem_free(ldep->align_addr, 2582 sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); 2583 ldep->align_addr = NULL; 2584 } 2585 return (EAGAIN); 2586 } 2587 2588 /* 2589 * Get the other cookies (if any). 2590 */ 2591 for (i = 1; i < dep->payload.ncookies; i++) { 2592 rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 2593 if (rv != 0) { 2594 (void) ldc_mem_unbind_handle(mhdl); 2595 cmn_err(CE_CONT, "?[%d] Failed to get next cookie " 2596 "(mhdl=%lx cnum=%d), err=%d", 2597 vdc->instance, mhdl, i, rv); 2598 if (ldep->align_addr) { 2599 kmem_free(ldep->align_addr, 2600 sizeof (caddr_t) * dep->payload.nbytes); 2601 ldep->align_addr = NULL; 2602 } 2603 return (EAGAIN); 2604 } 2605 } 2606 2607 return (rv); 2608 } 2609 2610 /* 2611 * Interrupt handlers for messages from LDC 2612 */ 2613 2614 /* 2615 * Function: 2616 * vdc_handle_cb() 2617 * 2618 * Description: 2619 * 2620 * Arguments: 2621 * event - Type of event (LDC_EVT_xxx) that triggered the callback 2622 * arg - soft state pointer for this instance of the device driver. 2623 * 2624 * Return Code: 2625 * 0 - Success 2626 */ 2627 static uint_t 2628 vdc_handle_cb(uint64_t event, caddr_t arg) 2629 { 2630 ldc_status_t ldc_state; 2631 int rv = 0; 2632 2633 vdc_t *vdc = (vdc_t *)(void *)arg; 2634 2635 ASSERT(vdc != NULL); 2636 2637 DMSG(1, "[%d] evt=%lx seqID=%ld\n", vdc->instance, event, vdc->seq_num); 2638 2639 /* 2640 * Depending on the type of event that triggered this callback, 2641 * we modify the handhske state or read the data. 2642 * 2643 * NOTE: not done as a switch() as event could be triggered by 2644 * a state change and a read request. Also the ordering of the 2645 * check for the event types is deliberate. 2646 */ 2647 if (event & LDC_EVT_UP) { 2648 DMSG(0, "[%d] Received LDC_EVT_UP\n", vdc->instance); 2649 2650 /* get LDC state */ 2651 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2652 if (rv != 0) { 2653 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d", 2654 vdc->instance, rv); 2655 mutex_enter(&vdc->lock); 2656 vdc_reset_connection(vdc, B_TRUE); 2657 mutex_exit(&vdc->lock); 2658 return (LDC_SUCCESS); 2659 } 2660 2661 /* 2662 * Reset the transaction sequence numbers when LDC comes up. 2663 * We then kick off the handshake negotiation with the vDisk 2664 * server. 2665 */ 2666 mutex_enter(&vdc->lock); 2667 vdc->seq_num = 1; 2668 vdc->seq_num_reply = 0; 2669 vdc->ldc_state = ldc_state; 2670 ASSERT(ldc_state == LDC_UP); 2671 mutex_exit(&vdc->lock); 2672 2673 vdc_init_handshake_negotiation(vdc); 2674 2675 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 2676 } 2677 2678 if (event & LDC_EVT_READ) { 2679 /* 2680 * Wake up the worker thread to process the message 2681 */ 2682 mutex_enter(&vdc->msg_proc_lock); 2683 vdc->msg_pending = B_TRUE; 2684 cv_signal(&vdc->msg_proc_cv); 2685 mutex_exit(&vdc->msg_proc_lock); 2686 2687 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 2688 2689 /* that's all we have to do - no need to handle DOWN/RESET */ 2690 return (LDC_SUCCESS); 2691 } 2692 2693 if (event & LDC_EVT_RESET) { 2694 DMSG(0, "[%d] Received LDC RESET event\n", vdc->instance); 2695 2696 /* get LDC state */ 2697 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2698 if (rv != 0) { 2699 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d", 2700 vdc->instance, rv); 2701 ldc_state = LDC_OPEN; 2702 } 2703 mutex_enter(&vdc->lock); 2704 vdc->ldc_state = ldc_state; 2705 vdc_reset_connection(vdc, B_TRUE); 2706 mutex_exit(&vdc->lock); 2707 2708 vdc_init_handshake_negotiation(vdc); 2709 } 2710 2711 if (event & LDC_EVT_DOWN) { 2712 DMSG(0, "[%d] Received LDC DOWN event\n", vdc->instance); 2713 2714 /* get LDC state */ 2715 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2716 if (rv != 0) { 2717 cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d", 2718 vdc->instance, rv); 2719 ldc_state = LDC_OPEN; 2720 } 2721 mutex_enter(&vdc->lock); 2722 vdc->ldc_state = ldc_state; 2723 vdc_reset_connection(vdc, B_TRUE); 2724 mutex_exit(&vdc->lock); 2725 } 2726 2727 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 2728 cmn_err(CE_NOTE, "![%d] Unexpected LDC event (%lx) received", 2729 vdc->instance, event); 2730 2731 return (LDC_SUCCESS); 2732 } 2733 2734 /* -------------------------------------------------------------------------- */ 2735 2736 /* 2737 * The following functions process the incoming messages from vds 2738 */ 2739 2740 2741 /* 2742 * Function: 2743 * vdc_process_msg_thread() 2744 * 2745 * Description: 2746 * 2747 * Arguments: 2748 * vdc - soft state pointer for this instance of the device driver. 2749 * 2750 * Return Code: 2751 * None 2752 */ 2753 static void 2754 vdc_process_msg_thread(vdc_t *vdc) 2755 { 2756 int status = 0; 2757 boolean_t q_has_pkts = B_FALSE; 2758 2759 ASSERT(vdc != NULL); 2760 2761 mutex_enter(&vdc->msg_proc_lock); 2762 DMSG(0, "[%d] Starting\n", vdc->instance); 2763 2764 vdc->msg_proc_thr_state = VDC_THR_RUNNING; 2765 2766 while (vdc->msg_proc_thr_state == VDC_THR_RUNNING) { 2767 2768 DMSG(2, "[%d] Waiting\n", vdc->instance); 2769 while (!vdc->msg_pending) 2770 cv_wait(&vdc->msg_proc_cv, &vdc->msg_proc_lock); 2771 2772 DMSG(2, "[%d] Message Received\n", vdc->instance); 2773 2774 /* check if there is data */ 2775 status = ldc_chkq(vdc->ldc_handle, &q_has_pkts); 2776 if ((status != 0) && 2777 (vdc->msg_proc_thr_state == VDC_THR_RUNNING)) { 2778 cmn_err(CE_NOTE, "[%d] Unable to communicate with vDisk" 2779 " server. Cannot check LDC queue: %d", 2780 vdc->instance, status); 2781 mutex_enter(&vdc->lock); 2782 vdc_reset_connection(vdc, B_TRUE); 2783 mutex_exit(&vdc->lock); 2784 vdc->msg_proc_thr_state = VDC_THR_STOP; 2785 continue; 2786 } 2787 2788 if (q_has_pkts) { 2789 DMSG(2, "[%d] new pkt(s) available\n", vdc->instance); 2790 vdc_process_msg(vdc); 2791 } 2792 2793 vdc->msg_pending = B_FALSE; 2794 } 2795 2796 DMSG(0, "[%d] Message processing thread stopped\n", vdc->instance); 2797 vdc->msg_pending = B_FALSE; 2798 vdc->msg_proc_thr_state = VDC_THR_DONE; 2799 cv_signal(&vdc->msg_proc_cv); 2800 mutex_exit(&vdc->msg_proc_lock); 2801 thread_exit(); 2802 } 2803 2804 2805 /* 2806 * Function: 2807 * vdc_process_msg() 2808 * 2809 * Description: 2810 * This function is called by the message processing thread each time it 2811 * is triggered when LDC sends an interrupt to indicate that there are 2812 * more packets on the queue. When it is called it will continue to loop 2813 * and read the messages until there are no more left of the queue. If it 2814 * encounters an invalid sized message it will drop it and check the next 2815 * message. 2816 * 2817 * Arguments: 2818 * arg - soft state pointer for this instance of the device driver. 2819 * 2820 * Return Code: 2821 * None. 2822 */ 2823 static void 2824 vdc_process_msg(void *arg) 2825 { 2826 vdc_t *vdc = (vdc_t *)(void *)arg; 2827 vio_msg_t vio_msg; 2828 size_t nbytes = sizeof (vio_msg); 2829 int status; 2830 2831 ASSERT(vdc != NULL); 2832 2833 mutex_enter(&vdc->lock); 2834 2835 DMSG(1, "[%d]\n", vdc->instance); 2836 2837 for (;;) { 2838 2839 /* read all messages - until no more left */ 2840 status = ldc_read(vdc->ldc_handle, (caddr_t)&vio_msg, &nbytes); 2841 2842 if (status) { 2843 cmn_err(CE_CONT, "?[%d] Error %d reading LDC msg\n", 2844 vdc->instance, status); 2845 2846 /* if status is ECONNRESET --- reset vdc state */ 2847 if (status == EIO || status == ECONNRESET) { 2848 vdc_reset_connection(vdc, B_TRUE); 2849 } 2850 2851 mutex_exit(&vdc->lock); 2852 return; 2853 } 2854 2855 if ((nbytes > 0) && (nbytes < sizeof (vio_msg_tag_t))) { 2856 cmn_err(CE_CONT, "?[%d] Expect %lu bytes; recv'd %lu\n", 2857 vdc->instance, sizeof (vio_msg_tag_t), nbytes); 2858 mutex_exit(&vdc->lock); 2859 return; 2860 } 2861 2862 if (nbytes == 0) { 2863 DMSG(3, "[%d] ldc_read() done..\n", vdc->instance); 2864 mutex_exit(&vdc->lock); 2865 return; 2866 } 2867 2868 DMSG(2, "[%d] (%x/%x/%x)\n", vdc->instance, 2869 vio_msg.tag.vio_msgtype, 2870 vio_msg.tag.vio_subtype, 2871 vio_msg.tag.vio_subtype_env); 2872 2873 /* 2874 * Verify the Session ID of the message 2875 * 2876 * Every message after the Version has been negotiated should 2877 * have the correct session ID set. 2878 */ 2879 if ((vio_msg.tag.vio_sid != vdc->session_id) && 2880 (vio_msg.tag.vio_subtype_env != VIO_VER_INFO)) { 2881 cmn_err(CE_NOTE, "[%d] Invalid SID: received 0x%x, " 2882 "expected 0x%lx [seq num %lx @ %d]", 2883 vdc->instance, vio_msg.tag.vio_sid, 2884 vdc->session_id, 2885 ((vio_dring_msg_t *)&vio_msg)->seq_num, 2886 ((vio_dring_msg_t *)&vio_msg)->start_idx); 2887 vdc_reset_connection(vdc, B_TRUE); 2888 mutex_exit(&vdc->lock); 2889 return; 2890 } 2891 2892 switch (vio_msg.tag.vio_msgtype) { 2893 case VIO_TYPE_CTRL: 2894 status = vdc_process_ctrl_msg(vdc, vio_msg); 2895 break; 2896 case VIO_TYPE_DATA: 2897 status = vdc_process_data_msg(vdc, vio_msg); 2898 break; 2899 case VIO_TYPE_ERR: 2900 status = vdc_process_err_msg(vdc, vio_msg); 2901 break; 2902 default: 2903 cmn_err(CE_NOTE, "[%d] Unknown VIO message type", 2904 vdc->instance); 2905 status = EINVAL; 2906 break; 2907 } 2908 2909 if (status != 0) { 2910 DMSG(0, "[%d] Error (%d) occurred processing req %lu\n", 2911 vdc->instance, status, 2912 vdc->req_id_proc); 2913 vdc_reset_connection(vdc, B_TRUE); 2914 2915 /* we need to drop the lock to trigger the handshake */ 2916 mutex_exit(&vdc->lock); 2917 vdc_init_handshake_negotiation(vdc); 2918 mutex_enter(&vdc->lock); 2919 } 2920 } 2921 _NOTE(NOTREACHED) 2922 } 2923 2924 /* 2925 * Function: 2926 * vdc_process_ctrl_msg() 2927 * 2928 * Description: 2929 * This function is called by the message processing thread each time 2930 * an LDC message with a msgtype of VIO_TYPE_CTRL is received. 2931 * 2932 * Arguments: 2933 * vdc - soft state pointer for this instance of the device driver. 2934 * msg - the LDC message sent by vds 2935 * 2936 * Return Codes: 2937 * 0 - Success. 2938 * EPROTO - A message was received which shouldn't have happened according 2939 * to the protocol 2940 * ENOTSUP - An action which is allowed according to the protocol but which 2941 * isn't (or doesn't need to be) implemented yet. 2942 * EINVAL - An invalid value was returned as part of a message. 2943 */ 2944 static int 2945 vdc_process_ctrl_msg(vdc_t *vdc, vio_msg_t msg) 2946 { 2947 int status = -1; 2948 2949 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_CTRL); 2950 ASSERT(vdc != NULL); 2951 ASSERT(mutex_owned(&vdc->lock)); 2952 2953 /* Depending on which state we are in; process the message */ 2954 switch (vdc->state) { 2955 case VD_STATE_INIT: 2956 status = vdc_handle_ver_msg(vdc, (vio_ver_msg_t *)&msg); 2957 break; 2958 2959 case VD_STATE_VER: 2960 status = vdc_handle_attr_msg(vdc, (vd_attr_msg_t *)&msg); 2961 break; 2962 2963 case VD_STATE_ATTR: 2964 status = vdc_handle_dring_reg_msg(vdc, 2965 (vio_dring_reg_msg_t *)&msg); 2966 break; 2967 2968 case VD_STATE_RDX: 2969 if (msg.tag.vio_subtype_env != VIO_RDX) { 2970 status = EPROTO; 2971 break; 2972 } 2973 2974 DMSG(0, "[%d] Received RDX: handshake done\n", vdc->instance); 2975 2976 vdc->hshake_cnt = 0; /* reset failed handshake count */ 2977 status = 0; 2978 vdc->state = VD_STATE_DATA; 2979 2980 cv_broadcast(&vdc->attach_cv); 2981 break; 2982 2983 case VD_STATE_DATA: 2984 default: 2985 cmn_err(CE_NOTE, "[%d] Unexpected handshake state %d", 2986 vdc->instance, vdc->state); 2987 status = EPROTO; 2988 break; 2989 } 2990 2991 return (status); 2992 } 2993 2994 2995 /* 2996 * Function: 2997 * vdc_process_data_msg() 2998 * 2999 * Description: 3000 * This function is called by the message processing thread each time 3001 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 3002 * be an ACK or NACK from vds[1] which vdc handles as follows. 3003 * ACK - wake up the waiting thread 3004 * NACK - resend any messages necessary 3005 * 3006 * [1] Although the message format allows it, vds should not send a 3007 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 3008 * some bizarre reason it does, vdc will reset the connection. 3009 * 3010 * Arguments: 3011 * vdc - soft state pointer for this instance of the device driver. 3012 * msg - the LDC message sent by vds 3013 * 3014 * Return Code: 3015 * 0 - Success. 3016 * > 0 - error value returned by LDC 3017 */ 3018 static int 3019 vdc_process_data_msg(vdc_t *vdc, vio_msg_t msg) 3020 { 3021 int status = 0; 3022 vdc_local_desc_t *ldep = NULL; 3023 vio_dring_msg_t *dring_msg = NULL; 3024 uint_t start; 3025 int end; 3026 uint_t count = 0; 3027 uint_t operation; 3028 uint_t idx; 3029 3030 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_DATA); 3031 ASSERT(vdc != NULL); 3032 ASSERT(mutex_owned(&vdc->lock)); 3033 3034 dring_msg = (vio_dring_msg_t *)&msg; 3035 3036 /* 3037 * Check to see if the message has bogus data 3038 */ 3039 idx = start = dring_msg->start_idx; 3040 end = dring_msg->end_idx; 3041 if ((start >= vdc->dring_len) || 3042 (end >= vdc->dring_len) || (end < -1)) { 3043 cmn_err(CE_CONT, "?[%d] Bogus ACK data : start %d, end %d\n", 3044 vdc->instance, start, end); 3045 return (EINVAL); 3046 } 3047 3048 DTRACE_IO2(recv, vio_dring_msg_t, dring_msg, vdc_t *, vdc); 3049 3050 /* 3051 * Verify that the sequence number is what vdc expects. 3052 */ 3053 switch (vdc_verify_seq_num(vdc, dring_msg)) { 3054 case VDC_SEQ_NUM_TODO: 3055 break; /* keep processing this message */ 3056 case VDC_SEQ_NUM_SKIP: 3057 return (0); 3058 case VDC_SEQ_NUM_INVALID: 3059 return (ENXIO); 3060 } 3061 3062 if (msg.tag.vio_subtype == VIO_SUBTYPE_NACK) { 3063 DMSG(0, "[%d] DATA NACK\n", vdc->instance); 3064 VDC_DUMP_DRING_MSG(dring_msg); 3065 return (EIO); 3066 3067 } else if (msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 3068 return (EPROTO); 3069 } 3070 3071 ldep = &vdc->local_dring[start]; 3072 if (ldep->dep->hdr.dstate == VIO_DESC_DONE) { 3073 mutex_enter(&ldep->lock); 3074 operation = ldep->dep->payload.operation; 3075 vdc->req_id_proc = ldep->dep->payload.req_id; 3076 vdc->dring_proc_idx = idx; 3077 ASSERT(ldep->dep->hdr.dstate == VIO_DESC_DONE); 3078 3079 if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) { 3080 bioerror(ldep->buf, ldep->dep->payload.status); 3081 biodone(ldep->buf); 3082 3083 DTRACE_IO2(vdone, buf_t *, ldep->buf, vdc_t *, vdc); 3084 3085 /* Clear the DRing entry */ 3086 status = vdc_depopulate_descriptor(vdc, idx); 3087 } 3088 cv_signal(&ldep->cv); 3089 mutex_exit(&ldep->lock); 3090 } 3091 3092 /* probe gives the count of how many entries were processed */ 3093 DTRACE_IO2(processed, int, count, vdc_t *, vdc); 3094 3095 return (status); 3096 } 3097 3098 /* 3099 * Function: 3100 * vdc_process_err_msg() 3101 * 3102 * NOTE: No error messages are used as part of the vDisk protocol 3103 */ 3104 static int 3105 vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg) 3106 { 3107 _NOTE(ARGUNUSED(vdc)) 3108 _NOTE(ARGUNUSED(msg)) 3109 3110 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_ERR); 3111 cmn_err(CE_NOTE, "[%d] Got an ERR msg", vdc->instance); 3112 3113 return (ENOTSUP); 3114 } 3115 3116 /* 3117 * Function: 3118 * vdc_handle_ver_msg() 3119 * 3120 * Description: 3121 * 3122 * Arguments: 3123 * vdc - soft state pointer for this instance of the device driver. 3124 * ver_msg - LDC message sent by vDisk server 3125 * 3126 * Return Code: 3127 * 0 - Success 3128 */ 3129 static int 3130 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) 3131 { 3132 int status = 0; 3133 3134 ASSERT(vdc != NULL); 3135 ASSERT(mutex_owned(&vdc->lock)); 3136 3137 if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { 3138 return (EPROTO); 3139 } 3140 3141 if (ver_msg->dev_class != VDEV_DISK_SERVER) { 3142 return (EINVAL); 3143 } 3144 3145 switch (ver_msg->tag.vio_subtype) { 3146 case VIO_SUBTYPE_ACK: 3147 /* 3148 * We check to see if the version returned is indeed supported 3149 * (The server may have also adjusted the minor number downwards 3150 * and if so 'ver_msg' will contain the actual version agreed) 3151 */ 3152 if (vdc_is_supported_version(ver_msg)) { 3153 vdc->ver.major = ver_msg->ver_major; 3154 vdc->ver.minor = ver_msg->ver_minor; 3155 ASSERT(vdc->ver.major > 0); 3156 3157 vdc->state = VD_STATE_VER; 3158 status = vdc_init_attr_negotiation(vdc); 3159 } else { 3160 status = EPROTO; 3161 } 3162 break; 3163 3164 case VIO_SUBTYPE_NACK: 3165 /* 3166 * call vdc_is_supported_version() which will return the next 3167 * supported version (if any) in 'ver_msg' 3168 */ 3169 (void) vdc_is_supported_version(ver_msg); 3170 if (ver_msg->ver_major > 0) { 3171 size_t len = sizeof (*ver_msg); 3172 3173 ASSERT(vdc->ver.major > 0); 3174 3175 /* reset the necessary fields and resend */ 3176 ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; 3177 ver_msg->dev_class = VDEV_DISK; 3178 3179 status = vdc_send(vdc, (caddr_t)ver_msg, &len); 3180 DMSG(0, "[%d] Resend VER info (LDC status = %d)\n", 3181 vdc->instance, status); 3182 if (len != sizeof (*ver_msg)) 3183 status = EBADMSG; 3184 } else { 3185 cmn_err(CE_NOTE, "[%d] No common version with " 3186 "vDisk server", vdc->instance); 3187 status = ENOTSUP; 3188 } 3189 3190 break; 3191 case VIO_SUBTYPE_INFO: 3192 /* 3193 * Handle the case where vds starts handshake 3194 * (for now only vdc is the instigatior) 3195 */ 3196 status = ENOTSUP; 3197 break; 3198 3199 default: 3200 status = EINVAL; 3201 break; 3202 } 3203 3204 return (status); 3205 } 3206 3207 /* 3208 * Function: 3209 * vdc_handle_attr_msg() 3210 * 3211 * Description: 3212 * 3213 * Arguments: 3214 * vdc - soft state pointer for this instance of the device driver. 3215 * attr_msg - LDC message sent by vDisk server 3216 * 3217 * Return Code: 3218 * 0 - Success 3219 */ 3220 static int 3221 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) 3222 { 3223 int status = 0; 3224 3225 ASSERT(vdc != NULL); 3226 ASSERT(mutex_owned(&vdc->lock)); 3227 3228 if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { 3229 return (EPROTO); 3230 } 3231 3232 switch (attr_msg->tag.vio_subtype) { 3233 case VIO_SUBTYPE_ACK: 3234 /* 3235 * We now verify the attributes sent by vds. 3236 */ 3237 vdc->vdisk_size = attr_msg->vdisk_size; 3238 vdc->vdisk_type = attr_msg->vdisk_type; 3239 3240 DMSG(0, "[%d] max_xfer_sz: sent %lx acked %lx\n", 3241 vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz); 3242 DMSG(0, "[%d] vdisk_block_size: sent %lx acked %x\n", 3243 vdc->instance, vdc->block_size, 3244 attr_msg->vdisk_block_size); 3245 3246 /* 3247 * We don't know at compile time what the vDisk server will 3248 * think are good values but we apply an large (arbitrary) 3249 * upper bound to prevent memory exhaustion in vdc if it was 3250 * allocating a DRing based of huge values sent by the server. 3251 * We probably will never exceed this except if the message 3252 * was garbage. 3253 */ 3254 if ((attr_msg->max_xfer_sz * attr_msg->vdisk_block_size) <= 3255 (PAGESIZE * DEV_BSIZE)) { 3256 vdc->max_xfer_sz = attr_msg->max_xfer_sz; 3257 vdc->block_size = attr_msg->vdisk_block_size; 3258 } else { 3259 cmn_err(CE_NOTE, "[%d] vds block transfer size too big;" 3260 " using max supported by vdc", vdc->instance); 3261 } 3262 3263 if ((attr_msg->xfer_mode != VIO_DRING_MODE) || 3264 (attr_msg->vdisk_size > INT64_MAX) || 3265 (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 3266 cmn_err(CE_NOTE, "[%d] Invalid attributes from vds", 3267 vdc->instance); 3268 status = EINVAL; 3269 break; 3270 } 3271 3272 vdc->state = VD_STATE_ATTR; 3273 status = vdc_init_dring_negotiate(vdc); 3274 break; 3275 3276 case VIO_SUBTYPE_NACK: 3277 /* 3278 * vds could not handle the attributes we sent so we 3279 * stop negotiating. 3280 */ 3281 status = EPROTO; 3282 break; 3283 3284 case VIO_SUBTYPE_INFO: 3285 /* 3286 * Handle the case where vds starts the handshake 3287 * (for now; vdc is the only supported instigatior) 3288 */ 3289 status = ENOTSUP; 3290 break; 3291 3292 default: 3293 status = ENOTSUP; 3294 break; 3295 } 3296 3297 return (status); 3298 } 3299 3300 /* 3301 * Function: 3302 * vdc_handle_dring_reg_msg() 3303 * 3304 * Description: 3305 * 3306 * Arguments: 3307 * vdc - soft state pointer for this instance of the driver. 3308 * dring_msg - LDC message sent by vDisk server 3309 * 3310 * Return Code: 3311 * 0 - Success 3312 */ 3313 static int 3314 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) 3315 { 3316 int status = 0; 3317 vio_rdx_msg_t msg = {0}; 3318 size_t msglen = sizeof (msg); 3319 3320 ASSERT(vdc != NULL); 3321 ASSERT(mutex_owned(&vdc->lock)); 3322 3323 if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { 3324 return (EPROTO); 3325 } 3326 3327 switch (dring_msg->tag.vio_subtype) { 3328 case VIO_SUBTYPE_ACK: 3329 /* save the received dring_ident */ 3330 vdc->dring_ident = dring_msg->dring_ident; 3331 DMSG(0, "[%d] Received dring ident=0x%lx\n", 3332 vdc->instance, vdc->dring_ident); 3333 3334 /* 3335 * Send an RDX message to vds to indicate we are ready 3336 * to send data 3337 */ 3338 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 3339 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 3340 msg.tag.vio_subtype_env = VIO_RDX; 3341 msg.tag.vio_sid = vdc->session_id; 3342 status = vdc_send(vdc, (caddr_t)&msg, &msglen); 3343 if (status != 0) { 3344 cmn_err(CE_NOTE, "[%d] Failed to send RDX" 3345 " message (%d)", vdc->instance, status); 3346 break; 3347 } 3348 3349 vdc->state = VD_STATE_RDX; 3350 break; 3351 3352 case VIO_SUBTYPE_NACK: 3353 /* 3354 * vds could not handle the DRing info we sent so we 3355 * stop negotiating. 3356 */ 3357 cmn_err(CE_CONT, "server could not register DRing\n"); 3358 vdc_reset_connection(vdc, B_TRUE); 3359 vdc_destroy_descriptor_ring(vdc); 3360 status = EPROTO; 3361 break; 3362 3363 case VIO_SUBTYPE_INFO: 3364 /* 3365 * Handle the case where vds starts handshake 3366 * (for now only vdc is the instigatior) 3367 */ 3368 status = ENOTSUP; 3369 break; 3370 default: 3371 status = ENOTSUP; 3372 } 3373 3374 return (status); 3375 } 3376 3377 /* 3378 * Function: 3379 * vdc_verify_seq_num() 3380 * 3381 * Description: 3382 * This functions verifies that the sequence number sent back by the vDisk 3383 * server with the latest message is what is expected (i.e. it is greater 3384 * than the last seq num sent by the vDisk server and less than or equal 3385 * to the last seq num generated by vdc). 3386 * 3387 * It then checks the request ID to see if any requests need processing 3388 * in the DRing. 3389 * 3390 * Arguments: 3391 * vdc - soft state pointer for this instance of the driver. 3392 * dring_msg - pointer to the LDC message sent by vds 3393 * 3394 * Return Code: 3395 * VDC_SEQ_NUM_TODO - Message needs to be processed 3396 * VDC_SEQ_NUM_SKIP - Message has already been processed 3397 * VDC_SEQ_NUM_INVALID - The seq numbers are so out of sync, 3398 * vdc cannot deal with them 3399 */ 3400 static int 3401 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg) 3402 { 3403 ASSERT(vdc != NULL); 3404 ASSERT(dring_msg != NULL); 3405 ASSERT(mutex_owned(&vdc->lock)); 3406 3407 /* 3408 * Check to see if the messages were responded to in the correct 3409 * order by vds. 3410 */ 3411 if ((dring_msg->seq_num <= vdc->seq_num_reply) || 3412 (dring_msg->seq_num > vdc->seq_num)) { 3413 cmn_err(CE_CONT, "?[%d] Bogus sequence_number %lu: " 3414 "%lu > expected <= %lu (last proc req %lu sent %lu)\n", 3415 vdc->instance, dring_msg->seq_num, 3416 vdc->seq_num_reply, vdc->seq_num, 3417 vdc->req_id_proc, vdc->req_id); 3418 return (VDC_SEQ_NUM_INVALID); 3419 } 3420 vdc->seq_num_reply = dring_msg->seq_num; 3421 3422 if (vdc->req_id_proc < vdc->req_id) 3423 return (VDC_SEQ_NUM_TODO); 3424 else 3425 return (VDC_SEQ_NUM_SKIP); 3426 } 3427 3428 3429 /* 3430 * Function: 3431 * vdc_is_supported_version() 3432 * 3433 * Description: 3434 * This routine checks if the major/minor version numbers specified in 3435 * 'ver_msg' are supported. If not it finds the next version that is 3436 * in the supported version list 'vdc_version[]' and sets the fields in 3437 * 'ver_msg' to those values 3438 * 3439 * Arguments: 3440 * ver_msg - LDC message sent by vDisk server 3441 * 3442 * Return Code: 3443 * B_TRUE - Success 3444 * B_FALSE - Version not supported 3445 */ 3446 static boolean_t 3447 vdc_is_supported_version(vio_ver_msg_t *ver_msg) 3448 { 3449 int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); 3450 3451 for (int i = 0; i < vdc_num_versions; i++) { 3452 ASSERT(vdc_version[i].major > 0); 3453 ASSERT((i == 0) || 3454 (vdc_version[i].major < vdc_version[i-1].major)); 3455 3456 /* 3457 * If the major versions match, adjust the minor version, if 3458 * necessary, down to the highest value supported by this 3459 * client. The server should support all minor versions lower 3460 * than the value it sent 3461 */ 3462 if (ver_msg->ver_major == vdc_version[i].major) { 3463 if (ver_msg->ver_minor > vdc_version[i].minor) { 3464 DMSG(0, "Adjusting minor version from %u to %u", 3465 ver_msg->ver_minor, vdc_version[i].minor); 3466 ver_msg->ver_minor = vdc_version[i].minor; 3467 } 3468 return (B_TRUE); 3469 } 3470 3471 /* 3472 * If the message contains a higher major version number, set 3473 * the message's major/minor versions to the current values 3474 * and return false, so this message will get resent with 3475 * these values, and the server will potentially try again 3476 * with the same or a lower version 3477 */ 3478 if (ver_msg->ver_major > vdc_version[i].major) { 3479 ver_msg->ver_major = vdc_version[i].major; 3480 ver_msg->ver_minor = vdc_version[i].minor; 3481 DMSG(0, "Suggesting major/minor (0x%x/0x%x)\n", 3482 ver_msg->ver_major, ver_msg->ver_minor); 3483 3484 return (B_FALSE); 3485 } 3486 3487 /* 3488 * Otherwise, the message's major version is less than the 3489 * current major version, so continue the loop to the next 3490 * (lower) supported version 3491 */ 3492 } 3493 3494 /* 3495 * No common version was found; "ground" the version pair in the 3496 * message to terminate negotiation 3497 */ 3498 ver_msg->ver_major = 0; 3499 ver_msg->ver_minor = 0; 3500 3501 return (B_FALSE); 3502 } 3503 /* -------------------------------------------------------------------------- */ 3504 3505 /* 3506 * DKIO(7) support 3507 */ 3508 3509 typedef struct vdc_dk_arg { 3510 struct dk_callback dkc; 3511 int mode; 3512 dev_t dev; 3513 vdc_t *vdc; 3514 } vdc_dk_arg_t; 3515 3516 /* 3517 * Function: 3518 * vdc_dkio_flush_cb() 3519 * 3520 * Description: 3521 * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 3522 * by kernel code. 3523 * 3524 * Arguments: 3525 * arg - a pointer to a vdc_dk_arg_t structure. 3526 */ 3527 void 3528 vdc_dkio_flush_cb(void *arg) 3529 { 3530 struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 3531 struct dk_callback *dkc = NULL; 3532 vdc_t *vdc = NULL; 3533 int rv; 3534 3535 if (dk_arg == NULL) { 3536 cmn_err(CE_CONT, "?[Unk] DKIOCFLUSHWRITECACHE arg is NULL\n"); 3537 return; 3538 } 3539 dkc = &dk_arg->dkc; 3540 vdc = dk_arg->vdc; 3541 ASSERT(vdc != NULL); 3542 3543 rv = vdc_populate_descriptor(vdc, NULL, 0, VD_OP_FLUSH, 3544 dk_arg->mode, SDPART(getminor(dk_arg->dev))); 3545 if (rv != 0) { 3546 DMSG(0, "[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n", 3547 vdc->instance, rv, 3548 ddi_model_convert_from(dk_arg->mode & FMODELS)); 3549 } 3550 3551 /* 3552 * Trigger the call back to notify the caller the the ioctl call has 3553 * been completed. 3554 */ 3555 if ((dk_arg->mode & FKIOCTL) && 3556 (dkc != NULL) && 3557 (dkc->dkc_callback != NULL)) { 3558 ASSERT(dkc->dkc_cookie != NULL); 3559 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 3560 } 3561 3562 /* Indicate that one less DKIO write flush is outstanding */ 3563 mutex_enter(&vdc->lock); 3564 vdc->dkio_flush_pending--; 3565 ASSERT(vdc->dkio_flush_pending >= 0); 3566 mutex_exit(&vdc->lock); 3567 3568 /* free the mem that was allocated when the callback was dispatched */ 3569 kmem_free(arg, sizeof (vdc_dk_arg_t)); 3570 } 3571 3572 /* 3573 * This structure is used in the DKIO(7I) array below. 3574 */ 3575 typedef struct vdc_dk_ioctl { 3576 uint8_t op; /* VD_OP_XXX value */ 3577 int cmd; /* Solaris ioctl operation number */ 3578 size_t nbytes; /* size of structure to be copied */ 3579 3580 /* function to convert between vDisk and Solaris structure formats */ 3581 int (*convert)(vdc_t *vdc, void *vd_buf, void *ioctl_arg, 3582 int mode, int dir); 3583 } vdc_dk_ioctl_t; 3584 3585 /* 3586 * Subset of DKIO(7I) operations currently supported 3587 */ 3588 static vdc_dk_ioctl_t dk_ioctl[] = { 3589 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, sizeof (int), 3590 vdc_null_copy_func}, 3591 {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), 3592 vdc_null_copy_func}, 3593 {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), 3594 vdc_null_copy_func}, 3595 {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), 3596 vdc_get_vtoc_convert}, 3597 {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), 3598 vdc_set_vtoc_convert}, 3599 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), 3600 vdc_get_geom_convert}, 3601 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), 3602 vdc_get_geom_convert}, 3603 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), 3604 vdc_get_geom_convert}, 3605 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 3606 vdc_set_geom_convert}, 3607 3608 /* 3609 * These particular ioctls are not sent to the server - vdc fakes up 3610 * the necessary info. 3611 */ 3612 {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, 3613 {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, 3614 {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, 3615 {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, 3616 {0, CDROMREADOFFSET, 0, vdc_null_copy_func} 3617 }; 3618 3619 /* 3620 * Function: 3621 * vd_process_ioctl() 3622 * 3623 * Description: 3624 * This routine processes disk specific ioctl calls 3625 * 3626 * Arguments: 3627 * dev - the device number 3628 * cmd - the operation [dkio(7I)] to be processed 3629 * arg - pointer to user provided structure 3630 * (contains data to be set or reference parameter for get) 3631 * mode - bit flag, indicating open settings, 32/64 bit type, etc 3632 * 3633 * Return Code: 3634 * 0 3635 * EFAULT 3636 * ENXIO 3637 * EIO 3638 * ENOTSUP 3639 */ 3640 static int 3641 vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode) 3642 { 3643 int instance = SDUNIT(getminor(dev)); 3644 vdc_t *vdc = NULL; 3645 int rv = -1; 3646 int idx = 0; /* index into dk_ioctl[] */ 3647 size_t len = 0; /* #bytes to send to vds */ 3648 size_t alloc_len = 0; /* #bytes to allocate mem for */ 3649 caddr_t mem_p = NULL; 3650 size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 3651 struct vtoc vtoc_saved; 3652 3653 DMSG(0, "[%d] Processing ioctl(%x) for dev %lx : model %x\n", 3654 instance, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 3655 3656 vdc = ddi_get_soft_state(vdc_state, instance); 3657 if (vdc == NULL) { 3658 cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 3659 instance); 3660 return (ENXIO); 3661 } 3662 3663 /* 3664 * Check to see if we can communicate with the vDisk server 3665 */ 3666 if (!vdc_is_able_to_tx_data(vdc, O_NONBLOCK)) { 3667 DMSG(0, "[%d] Not ready to transmit data\n", instance); 3668 return (ENOLINK); 3669 } 3670 3671 /* 3672 * Validate the ioctl operation to be performed. 3673 * 3674 * If we have looped through the array without finding a match then we 3675 * don't support this ioctl. 3676 */ 3677 for (idx = 0; idx < nioctls; idx++) { 3678 if (cmd == dk_ioctl[idx].cmd) 3679 break; 3680 } 3681 3682 if (idx >= nioctls) { 3683 cmn_err(CE_CONT, "?[%d] Unsupported ioctl (0x%x)\n", 3684 vdc->instance, cmd); 3685 return (ENOTSUP); 3686 } 3687 3688 len = dk_ioctl[idx].nbytes; 3689 3690 /* 3691 * Deal with the ioctls which the server does not provide. vdc can 3692 * fake these up and return immediately 3693 */ 3694 switch (cmd) { 3695 case CDROMREADOFFSET: 3696 case DKIOCREMOVABLE: 3697 case USCSICMD: 3698 return (ENOTTY); 3699 3700 case DKIOCINFO: 3701 { 3702 struct dk_cinfo cinfo; 3703 if (vdc->cinfo == NULL) 3704 return (ENXIO); 3705 3706 bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 3707 cinfo.dki_partition = SDPART(getminor(dev)); 3708 3709 rv = ddi_copyout(&cinfo, (void *)arg, 3710 sizeof (struct dk_cinfo), mode); 3711 if (rv != 0) 3712 return (EFAULT); 3713 3714 return (0); 3715 } 3716 3717 case DKIOCGMEDIAINFO: 3718 { 3719 if (vdc->minfo == NULL) 3720 return (ENXIO); 3721 3722 rv = ddi_copyout(vdc->minfo, (void *)arg, 3723 sizeof (struct dk_minfo), mode); 3724 if (rv != 0) 3725 return (EFAULT); 3726 3727 return (0); 3728 } 3729 3730 case DKIOCFLUSHWRITECACHE: 3731 { 3732 struct dk_callback *dkc = (struct dk_callback *)arg; 3733 vdc_dk_arg_t *dkarg = NULL; 3734 3735 DMSG(1, "[%d] Flush W$: mode %x\n", instance, mode); 3736 3737 /* 3738 * If the backing device is not a 'real' disk then the 3739 * W$ operation request to the vDisk server will fail 3740 * so we might as well save the cycles and return now. 3741 */ 3742 if (vdc->vdisk_type != VD_DISK_TYPE_DISK) 3743 return (ENOTTY); 3744 3745 /* 3746 * If arg is NULL, then there is no callback function 3747 * registered and the call operates synchronously; we 3748 * break and continue with the rest of the function and 3749 * wait for vds to return (i.e. after the request to 3750 * vds returns successfully, all writes completed prior 3751 * to the ioctl will have been flushed from the disk 3752 * write cache to persistent media. 3753 * 3754 * If a callback function is registered, we dispatch 3755 * the request on a task queue and return immediately. 3756 * The callback will deal with informing the calling 3757 * thread that the flush request is completed. 3758 */ 3759 if (dkc == NULL) 3760 break; 3761 3762 dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP); 3763 3764 dkarg->mode = mode; 3765 dkarg->dev = dev; 3766 bcopy(dkc, &dkarg->dkc, sizeof (*dkc)); 3767 3768 mutex_enter(&vdc->lock); 3769 vdc->dkio_flush_pending++; 3770 dkarg->vdc = vdc; 3771 mutex_exit(&vdc->lock); 3772 3773 /* put the request on a task queue */ 3774 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 3775 (void *)dkarg, DDI_SLEEP); 3776 3777 return (rv == NULL ? ENOMEM : 0); 3778 } 3779 } 3780 3781 /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 3782 ASSERT(dk_ioctl[idx].op != 0); 3783 3784 /* LDC requires that the memory being mapped is 8-byte aligned */ 3785 alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 3786 DMSG(1, "[%d] struct size %ld alloc %ld\n", instance, len, alloc_len); 3787 3788 ASSERT(alloc_len != 0); /* sanity check */ 3789 mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 3790 3791 if (cmd == DKIOCSVTOC) { 3792 /* 3793 * Save a copy of the current VTOC so that we can roll back 3794 * if the setting of the new VTOC fails. 3795 */ 3796 bcopy(vdc->vtoc, &vtoc_saved, sizeof (struct vtoc)); 3797 } 3798 3799 /* 3800 * Call the conversion function for this ioctl whhich if necessary 3801 * converts from the Solaris format to the format ARC'ed 3802 * as part of the vDisk protocol (FWARC 2006/195) 3803 */ 3804 ASSERT(dk_ioctl[idx].convert != NULL); 3805 rv = (dk_ioctl[idx].convert)(vdc, arg, mem_p, mode, VD_COPYIN); 3806 if (rv != 0) { 3807 DMSG(0, "[%d] convert func returned %d for ioctl 0x%x\n", 3808 instance, rv, cmd); 3809 if (mem_p != NULL) 3810 kmem_free(mem_p, alloc_len); 3811 return (rv); 3812 } 3813 3814 /* 3815 * send request to vds to service the ioctl. 3816 */ 3817 rv = vdc_populate_descriptor(vdc, mem_p, alloc_len, dk_ioctl[idx].op, 3818 mode, SDPART((getminor(dev)))); 3819 if (rv != 0) { 3820 /* 3821 * This is not necessarily an error. The ioctl could 3822 * be returning a value such as ENOTTY to indicate 3823 * that the ioctl is not applicable. 3824 */ 3825 DMSG(0, "[%d] vds returned %d for ioctl 0x%x\n", 3826 instance, rv, cmd); 3827 if (mem_p != NULL) 3828 kmem_free(mem_p, alloc_len); 3829 3830 if (cmd == DKIOCSVTOC) { 3831 /* update of the VTOC has failed, roll back */ 3832 bcopy(&vtoc_saved, vdc->vtoc, sizeof (struct vtoc)); 3833 } 3834 3835 return (rv); 3836 } 3837 3838 if (cmd == DKIOCSVTOC) { 3839 /* 3840 * The VTOC has been changed, try and update the device 3841 * node properties. Failing to set the properties should 3842 * not cause an error to be return the caller though. 3843 */ 3844 if (vdc_create_device_nodes_props(vdc)) { 3845 cmn_err(CE_NOTE, "![%d] Failed to update device nodes" 3846 " properties", vdc->instance); 3847 } 3848 } 3849 3850 /* 3851 * Call the conversion function (if it exists) for this ioctl 3852 * which converts from the format ARC'ed as part of the vDisk 3853 * protocol (FWARC 2006/195) back to a format understood by 3854 * the rest of Solaris. 3855 */ 3856 rv = (dk_ioctl[idx].convert)(vdc, mem_p, arg, mode, VD_COPYOUT); 3857 if (rv != 0) { 3858 DMSG(0, "[%d] convert func returned %d for ioctl 0x%x\n", 3859 instance, rv, cmd); 3860 if (mem_p != NULL) 3861 kmem_free(mem_p, alloc_len); 3862 return (rv); 3863 } 3864 3865 if (mem_p != NULL) 3866 kmem_free(mem_p, alloc_len); 3867 3868 return (rv); 3869 } 3870 3871 /* 3872 * Function: 3873 * 3874 * Description: 3875 * This is an empty conversion function used by ioctl calls which 3876 * do not need to convert the data being passed in/out to userland 3877 */ 3878 static int 3879 vdc_null_copy_func(vdc_t *vdc, void *from, void *to, int mode, int dir) 3880 { 3881 _NOTE(ARGUNUSED(vdc)) 3882 _NOTE(ARGUNUSED(from)) 3883 _NOTE(ARGUNUSED(to)) 3884 _NOTE(ARGUNUSED(mode)) 3885 _NOTE(ARGUNUSED(dir)) 3886 3887 return (0); 3888 } 3889 3890 /* 3891 * Function: 3892 * vdc_get_vtoc_convert() 3893 * 3894 * Description: 3895 * This routine performs the necessary convertions from the DKIOCGVTOC 3896 * Solaris structure to the format defined in FWARC 2006/195. 3897 * 3898 * In the struct vtoc definition, the timestamp field is marked as not 3899 * supported so it is not part of vDisk protocol (FWARC 2006/195). 3900 * However SVM uses that field to check it can write into the VTOC, 3901 * so we fake up the info of that field. 3902 * 3903 * Arguments: 3904 * vdc - the vDisk client 3905 * from - the buffer containing the data to be copied from 3906 * to - the buffer to be copied to 3907 * mode - flags passed to ioctl() call 3908 * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT 3909 * 3910 * Return Code: 3911 * 0 - Success 3912 * ENXIO - incorrect buffer passed in. 3913 * EFAULT - ddi_copyout routine encountered an error. 3914 */ 3915 static int 3916 vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 3917 { 3918 int i; 3919 void *tmp_mem = NULL; 3920 void *tmp_memp; 3921 struct vtoc vt; 3922 struct vtoc32 vt32; 3923 int copy_len = 0; 3924 int rv = 0; 3925 3926 if (dir != VD_COPYOUT) 3927 return (0); /* nothing to do */ 3928 3929 if ((from == NULL) || (to == NULL)) 3930 return (ENXIO); 3931 3932 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 3933 copy_len = sizeof (struct vtoc32); 3934 else 3935 copy_len = sizeof (struct vtoc); 3936 3937 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 3938 3939 VD_VTOC2VTOC((vd_vtoc_t *)from, &vt); 3940 3941 /* fake the VTOC timestamp field */ 3942 for (i = 0; i < V_NUMPAR; i++) { 3943 vt.timestamp[i] = vdc->vtoc->timestamp[i]; 3944 } 3945 3946 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 3947 vtoctovtoc32(vt, vt32); 3948 tmp_memp = &vt32; 3949 } else { 3950 tmp_memp = &vt; 3951 } 3952 rv = ddi_copyout(tmp_memp, to, copy_len, mode); 3953 if (rv != 0) 3954 rv = EFAULT; 3955 3956 kmem_free(tmp_mem, copy_len); 3957 return (rv); 3958 } 3959 3960 /* 3961 * Function: 3962 * vdc_set_vtoc_convert() 3963 * 3964 * Description: 3965 * This routine performs the necessary convertions from the DKIOCSVTOC 3966 * Solaris structure to the format defined in FWARC 2006/195. 3967 * 3968 * Arguments: 3969 * vdc - the vDisk client 3970 * from - Buffer with data 3971 * to - Buffer where data is to be copied to 3972 * mode - flags passed to ioctl 3973 * dir - direction of copy (in or out) 3974 * 3975 * Return Code: 3976 * 0 - Success 3977 * ENXIO - Invalid buffer passed in 3978 * EFAULT - ddi_copyin of data failed 3979 */ 3980 static int 3981 vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 3982 { 3983 void *tmp_mem = NULL; 3984 struct vtoc vt; 3985 struct vtoc *vtp = &vt; 3986 vd_vtoc_t vtvd; 3987 int copy_len = 0; 3988 int rv = 0; 3989 3990 if (dir != VD_COPYIN) 3991 return (0); /* nothing to do */ 3992 3993 if ((from == NULL) || (to == NULL)) 3994 return (ENXIO); 3995 3996 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 3997 copy_len = sizeof (struct vtoc32); 3998 else 3999 copy_len = sizeof (struct vtoc); 4000 4001 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 4002 4003 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 4004 if (rv != 0) { 4005 kmem_free(tmp_mem, copy_len); 4006 return (EFAULT); 4007 } 4008 4009 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 4010 vtoc32tovtoc((*(struct vtoc32 *)tmp_mem), vt); 4011 } else { 4012 vtp = tmp_mem; 4013 } 4014 4015 /* 4016 * The VTOC is being changed, then vdc needs to update the copy 4017 * it saved in the soft state structure. 4018 */ 4019 bcopy(vtp, vdc->vtoc, sizeof (struct vtoc)); 4020 4021 VTOC2VD_VTOC(vtp, &vtvd); 4022 bcopy(&vtvd, to, sizeof (vd_vtoc_t)); 4023 kmem_free(tmp_mem, copy_len); 4024 4025 return (0); 4026 } 4027 4028 /* 4029 * Function: 4030 * vdc_get_geom_convert() 4031 * 4032 * Description: 4033 * This routine performs the necessary convertions from the DKIOCGGEOM, 4034 * DKIOCG_PHYSGEOM and DKIOG_VIRTGEOM Solaris structures to the format 4035 * defined in FWARC 2006/195 4036 * 4037 * Arguments: 4038 * vdc - the vDisk client 4039 * from - Buffer with data 4040 * to - Buffer where data is to be copied to 4041 * mode - flags passed to ioctl 4042 * dir - direction of copy (in or out) 4043 * 4044 * Return Code: 4045 * 0 - Success 4046 * ENXIO - Invalid buffer passed in 4047 * EFAULT - ddi_copyout of data failed 4048 */ 4049 static int 4050 vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4051 { 4052 _NOTE(ARGUNUSED(vdc)) 4053 4054 struct dk_geom geom; 4055 int copy_len = sizeof (struct dk_geom); 4056 int rv = 0; 4057 4058 if (dir != VD_COPYOUT) 4059 return (0); /* nothing to do */ 4060 4061 if ((from == NULL) || (to == NULL)) 4062 return (ENXIO); 4063 4064 VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); 4065 rv = ddi_copyout(&geom, to, copy_len, mode); 4066 if (rv != 0) 4067 rv = EFAULT; 4068 4069 return (rv); 4070 } 4071 4072 /* 4073 * Function: 4074 * vdc_set_geom_convert() 4075 * 4076 * Description: 4077 * This routine performs the necessary convertions from the DKIOCSGEOM 4078 * Solaris structure to the format defined in FWARC 2006/195. 4079 * 4080 * Arguments: 4081 * vdc - the vDisk client 4082 * from - Buffer with data 4083 * to - Buffer where data is to be copied to 4084 * mode - flags passed to ioctl 4085 * dir - direction of copy (in or out) 4086 * 4087 * Return Code: 4088 * 0 - Success 4089 * ENXIO - Invalid buffer passed in 4090 * EFAULT - ddi_copyin of data failed 4091 */ 4092 static int 4093 vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4094 { 4095 _NOTE(ARGUNUSED(vdc)) 4096 4097 vd_geom_t vdgeom; 4098 void *tmp_mem = NULL; 4099 int copy_len = sizeof (struct dk_geom); 4100 int rv = 0; 4101 4102 if (dir != VD_COPYIN) 4103 return (0); /* nothing to do */ 4104 4105 if ((from == NULL) || (to == NULL)) 4106 return (ENXIO); 4107 4108 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 4109 4110 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 4111 if (rv != 0) { 4112 kmem_free(tmp_mem, copy_len); 4113 return (EFAULT); 4114 } 4115 DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); 4116 bcopy(&vdgeom, to, sizeof (vdgeom)); 4117 kmem_free(tmp_mem, copy_len); 4118 4119 return (0); 4120 } 4121 4122 /* 4123 * Function: 4124 * vdc_create_fake_geometry() 4125 * 4126 * Description: 4127 * This routine fakes up the disk info needed for some DKIO ioctls. 4128 * - DKIOCINFO 4129 * - DKIOCGMEDIAINFO 4130 * 4131 * [ just like lofi(7D) and ramdisk(7D) ] 4132 * 4133 * Arguments: 4134 * vdc - soft state pointer for this instance of the device driver. 4135 * 4136 * Return Code: 4137 * 0 - Success 4138 */ 4139 static int 4140 vdc_create_fake_geometry(vdc_t *vdc) 4141 { 4142 int rv = 0; 4143 4144 ASSERT(vdc != NULL); 4145 4146 /* 4147 * DKIOCINFO support 4148 */ 4149 vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 4150 4151 (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 4152 (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 4153 /* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */ 4154 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; 4155 vdc->cinfo->dki_ctype = DKC_SCSI_CCS; 4156 vdc->cinfo->dki_flags = DKI_FMTVOL; 4157 vdc->cinfo->dki_cnum = 0; 4158 vdc->cinfo->dki_addr = 0; 4159 vdc->cinfo->dki_space = 0; 4160 vdc->cinfo->dki_prio = 0; 4161 vdc->cinfo->dki_vec = 0; 4162 vdc->cinfo->dki_unit = vdc->instance; 4163 vdc->cinfo->dki_slave = 0; 4164 /* 4165 * The partition number will be created on the fly depending on the 4166 * actual slice (i.e. minor node) that is used to request the data. 4167 */ 4168 vdc->cinfo->dki_partition = 0; 4169 4170 /* 4171 * DKIOCGMEDIAINFO support 4172 */ 4173 if (vdc->minfo == NULL) 4174 vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 4175 vdc->minfo->dki_media_type = DK_FIXED_DISK; 4176 vdc->minfo->dki_capacity = 1; 4177 vdc->minfo->dki_lbsize = DEV_BSIZE; 4178 4179 return (rv); 4180 } 4181 4182 /* 4183 * Function: 4184 * vdc_setup_disk_layout() 4185 * 4186 * Description: 4187 * This routine discovers all the necessary details about the "disk" 4188 * by requesting the data that is available from the vDisk server and by 4189 * faking up the rest of the data. 4190 * 4191 * Arguments: 4192 * vdc - soft state pointer for this instance of the device driver. 4193 * 4194 * Return Code: 4195 * 0 - Success 4196 */ 4197 static int 4198 vdc_setup_disk_layout(vdc_t *vdc) 4199 { 4200 buf_t *buf; /* BREAD requests need to be in a buf_t structure */ 4201 dev_t dev; 4202 int slice = 0; 4203 int rv; 4204 4205 ASSERT(vdc != NULL); 4206 4207 rv = vdc_create_fake_geometry(vdc); 4208 if (rv != 0) { 4209 cmn_err(CE_NOTE, "[%d] Failed to create disk geometry (err%d)", 4210 vdc->instance, rv); 4211 } 4212 4213 if (vdc->vtoc == NULL) 4214 vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); 4215 4216 dev = makedevice(ddi_driver_major(vdc->dip), 4217 VD_MAKE_DEV(vdc->instance, 0)); 4218 rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)vdc->vtoc, FKIOCTL); 4219 if (rv) { 4220 cmn_err(CE_NOTE, "[%d] Failed to get VTOC (err=%d)", 4221 vdc->instance, rv); 4222 return (rv); 4223 } 4224 4225 /* 4226 * find the slice that represents the entire "disk" and use that to 4227 * read the disk label. The convention in Solaris is that slice 2 4228 * represents the whole disk so we check that it is, otherwise we 4229 * default to slice 0 4230 */ 4231 if ((vdc->vdisk_type == VD_DISK_TYPE_DISK) && 4232 (vdc->vtoc->v_part[2].p_tag == V_BACKUP)) { 4233 slice = 2; 4234 } else { 4235 slice = 0; 4236 } 4237 4238 /* 4239 * Read disk label from start of disk 4240 */ 4241 vdc->label = kmem_zalloc(DK_LABEL_SIZE, KM_SLEEP); 4242 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 4243 bioinit(buf); 4244 buf->b_un.b_addr = (caddr_t)vdc->label; 4245 buf->b_bcount = DK_LABEL_SIZE; 4246 buf->b_flags = B_BUSY | B_READ; 4247 buf->b_dev = dev; 4248 rv = vdc_populate_descriptor(vdc, (caddr_t)buf, DK_LABEL_SIZE, 4249 VD_OP_BREAD, 0, slice); 4250 rv = biowait(buf); 4251 biofini(buf); 4252 kmem_free(buf, sizeof (buf_t)); 4253 4254 return (rv); 4255 } 4256