1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * sun4v LDC Link Layer 31 */ 32 #include <sys/types.h> 33 #include <sys/file.h> 34 #include <sys/errno.h> 35 #include <sys/open.h> 36 #include <sys/cred.h> 37 #include <sys/kmem.h> 38 #include <sys/conf.h> 39 #include <sys/cmn_err.h> 40 #include <sys/ksynch.h> 41 #include <sys/modctl.h> 42 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */ 43 #include <sys/debug.h> 44 #include <sys/types.h> 45 #include <sys/cred.h> 46 #include <sys/promif.h> 47 #include <sys/ddi.h> 48 #include <sys/sunddi.h> 49 #include <sys/cyclic.h> 50 #include <sys/machsystm.h> 51 #include <sys/vm.h> 52 #include <sys/cpu.h> 53 #include <sys/intreg.h> 54 #include <sys/machcpuvar.h> 55 #include <sys/mmu.h> 56 #include <sys/pte.h> 57 #include <vm/hat.h> 58 #include <vm/as.h> 59 #include <vm/hat_sfmmu.h> 60 #include <sys/vm_machparam.h> 61 #include <vm/seg_kmem.h> 62 #include <vm/seg_kpm.h> 63 #include <sys/note.h> 64 #include <sys/ivintr.h> 65 #include <sys/hypervisor_api.h> 66 #include <sys/ldc.h> 67 #include <sys/ldc_impl.h> 68 #include <sys/cnex.h> 69 #include <sys/hsvc.h> 70 71 /* Core internal functions */ 72 static int i_ldc_h2v_error(int h_error); 73 static int i_ldc_txq_reconf(ldc_chan_t *ldcp); 74 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset); 75 static int i_ldc_rxq_drain(ldc_chan_t *ldcp); 76 static void i_ldc_reset_state(ldc_chan_t *ldcp); 77 static void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset); 78 79 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail); 80 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail); 81 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head); 82 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype, 83 uint8_t ctrlmsg); 84 85 /* Interrupt handling functions */ 86 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2); 87 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2); 88 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype); 89 90 /* Read method functions */ 91 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep); 92 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, 93 size_t *sizep); 94 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, 95 size_t *sizep); 96 97 /* Write method functions */ 98 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp, 99 size_t *sizep); 100 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp, 101 size_t *sizep); 102 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp, 103 size_t *sizep); 104 105 /* Pkt processing internal functions */ 106 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg); 107 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg); 108 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg); 109 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg); 110 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg); 111 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg); 112 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg); 113 114 /* Memory synchronization internal functions */ 115 static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, 116 uint8_t direction, uint64_t offset, size_t size); 117 static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle, 118 uint8_t direction, uint64_t start, uint64_t end); 119 120 /* LDC Version */ 121 static ldc_ver_t ldc_versions[] = { {1, 0} }; 122 123 /* number of supported versions */ 124 #define LDC_NUM_VERS (sizeof (ldc_versions) / sizeof (ldc_versions[0])) 125 126 /* Module State Pointer */ 127 static ldc_soft_state_t *ldcssp; 128 129 static struct modldrv md = { 130 &mod_miscops, /* This is a misc module */ 131 "sun4v LDC module v%I%", /* Name of the module */ 132 }; 133 134 static struct modlinkage ml = { 135 MODREV_1, 136 &md, 137 NULL 138 }; 139 140 static uint64_t ldc_sup_minor; /* Supported minor number */ 141 static hsvc_info_t ldc_hsvc = { 142 HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 0, "ldc" 143 }; 144 145 /* 146 * LDC framework supports mapping remote domain's memory 147 * either directly or via shadow memory pages. Default 148 * support is currently implemented via shadow copy. 149 * Direct map can be enabled by setting 'ldc_shmem_enabled' 150 */ 151 int ldc_shmem_enabled = 0; 152 153 /* 154 * The no. of MTU size messages that can be stored in 155 * the LDC Tx queue. The number of Tx queue entries is 156 * then computed as (mtu * mtu_msgs)/sizeof(queue_entry) 157 */ 158 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS; 159 160 /* 161 * The minimum queue length. This is the size of the smallest 162 * LDC queue. If the computed value is less than this default, 163 * the queue length is rounded up to 'ldc_queue_entries'. 164 */ 165 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES; 166 167 /* 168 * Pages exported for remote access over each channel is 169 * maintained in a table registered with the Hypervisor. 170 * The default number of entries in the table is set to 171 * 'ldc_mtbl_entries'. 172 */ 173 uint64_t ldc_maptable_entries = LDC_MTBL_ENTRIES; 174 175 /* 176 * LDC retry count and delay - when the HV returns EWOULDBLOCK 177 * the operation is retried 'ldc_max_retries' times with a 178 * wait of 'ldc_delay' usecs between each retry. 179 */ 180 int ldc_max_retries = LDC_MAX_RETRIES; 181 clock_t ldc_delay = LDC_DELAY; 182 183 /* 184 * delay between each retry of channel unregistration in 185 * ldc_close(), to wait for pending interrupts to complete. 186 */ 187 clock_t ldc_close_delay = LDC_CLOSE_DELAY; 188 189 #ifdef DEBUG 190 191 /* 192 * Print debug messages 193 * 194 * set ldcdbg to 0x7 for enabling all msgs 195 * 0x4 - Warnings 196 * 0x2 - All debug messages 197 * 0x1 - Minimal debug messages 198 * 199 * set ldcdbgchan to the channel number you want to debug 200 * setting it to -1 prints debug messages for all channels 201 * NOTE: ldcdbgchan has no effect on error messages 202 */ 203 204 #define DBG_ALL_LDCS -1 205 206 int ldcdbg = 0x0; 207 int64_t ldcdbgchan = DBG_ALL_LDCS; 208 uint64_t ldc_inject_err_flag = 0; 209 210 static void 211 ldcdebug(int64_t id, const char *fmt, ...) 212 { 213 char buf[512]; 214 va_list ap; 215 216 /* 217 * Do not return if, 218 * caller wants to print it anyway - (id == DBG_ALL_LDCS) 219 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS) 220 * debug channel = caller specified channel 221 */ 222 if ((id != DBG_ALL_LDCS) && 223 (ldcdbgchan != DBG_ALL_LDCS) && 224 (ldcdbgchan != id)) { 225 return; 226 } 227 228 va_start(ap, fmt); 229 (void) vsprintf(buf, fmt, ap); 230 va_end(ap); 231 232 cmn_err(CE_CONT, "?%s", buf); 233 } 234 235 #define LDC_ERR_RESET 0x1 236 #define LDC_ERR_PKTLOSS 0x2 237 238 static boolean_t 239 ldc_inject_error(ldc_chan_t *ldcp, uint64_t error) 240 { 241 if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id)) 242 return (B_FALSE); 243 244 if ((ldc_inject_err_flag & error) == 0) 245 return (B_FALSE); 246 247 /* clear the injection state */ 248 ldc_inject_err_flag &= ~error; 249 250 return (B_TRUE); 251 } 252 253 #define D1 \ 254 if (ldcdbg & 0x01) \ 255 ldcdebug 256 257 #define D2 \ 258 if (ldcdbg & 0x02) \ 259 ldcdebug 260 261 #define DWARN \ 262 if (ldcdbg & 0x04) \ 263 ldcdebug 264 265 #define DUMP_PAYLOAD(id, addr) \ 266 { \ 267 char buf[65*3]; \ 268 int i; \ 269 uint8_t *src = (uint8_t *)addr; \ 270 for (i = 0; i < 64; i++, src++) \ 271 (void) sprintf(&buf[i * 3], "|%02x", *src); \ 272 (void) sprintf(&buf[i * 3], "|\n"); \ 273 D2((id), "payload: %s", buf); \ 274 } 275 276 #define DUMP_LDC_PKT(c, s, addr) \ 277 { \ 278 ldc_msg_t *msg = (ldc_msg_t *)(addr); \ 279 uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0; \ 280 if (msg->type == LDC_DATA) { \ 281 D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])", \ 282 (s), mid, msg->type, msg->stype, msg->ctrl, \ 283 (msg->env & LDC_FRAG_START) ? 'B' : ' ', \ 284 (msg->env & LDC_FRAG_STOP) ? 'E' : ' ', \ 285 (msg->env & LDC_LEN_MASK)); \ 286 } else { \ 287 D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s), \ 288 mid, msg->type, msg->stype, msg->ctrl, msg->env); \ 289 } \ 290 } 291 292 #define LDC_INJECT_RESET(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_RESET) 293 #define LDC_INJECT_PKTLOSS(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_PKTLOSS) 294 295 #else 296 297 #define DBG_ALL_LDCS -1 298 299 #define D1 300 #define D2 301 #define DWARN 302 303 #define DUMP_PAYLOAD(id, addr) 304 #define DUMP_LDC_PKT(c, s, addr) 305 306 #define LDC_INJECT_RESET(_ldcp) (B_FALSE) 307 #define LDC_INJECT_PKTLOSS(_ldcp) (B_FALSE) 308 309 #endif 310 311 #define ZERO_PKT(p) \ 312 bzero((p), sizeof (ldc_msg_t)); 313 314 #define IDX2COOKIE(idx, pg_szc, pg_shift) \ 315 (((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift))) 316 317 318 int 319 _init(void) 320 { 321 int status; 322 323 status = hsvc_register(&ldc_hsvc, &ldc_sup_minor); 324 if (status != 0) { 325 cmn_err(CE_NOTE, "!%s: cannot negotiate hypervisor LDC services" 326 " group: 0x%lx major: %ld minor: %ld errno: %d", 327 ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group, 328 ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status); 329 return (-1); 330 } 331 332 /* allocate soft state structure */ 333 ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP); 334 335 /* Link the module into the system */ 336 status = mod_install(&ml); 337 if (status != 0) { 338 kmem_free(ldcssp, sizeof (ldc_soft_state_t)); 339 return (status); 340 } 341 342 /* Initialize the LDC state structure */ 343 mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL); 344 345 mutex_enter(&ldcssp->lock); 346 347 /* Create a cache for memory handles */ 348 ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache", 349 sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 350 if (ldcssp->memhdl_cache == NULL) { 351 DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n"); 352 mutex_exit(&ldcssp->lock); 353 return (-1); 354 } 355 356 /* Create cache for memory segment structures */ 357 ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache", 358 sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 359 if (ldcssp->memseg_cache == NULL) { 360 DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n"); 361 mutex_exit(&ldcssp->lock); 362 return (-1); 363 } 364 365 366 ldcssp->channel_count = 0; 367 ldcssp->channels_open = 0; 368 ldcssp->chan_list = NULL; 369 ldcssp->dring_list = NULL; 370 371 mutex_exit(&ldcssp->lock); 372 373 return (0); 374 } 375 376 int 377 _info(struct modinfo *modinfop) 378 { 379 /* Report status of the dynamically loadable driver module */ 380 return (mod_info(&ml, modinfop)); 381 } 382 383 int 384 _fini(void) 385 { 386 int rv, status; 387 ldc_chan_t *ldcp; 388 ldc_dring_t *dringp; 389 ldc_mem_info_t minfo; 390 391 /* Unlink the driver module from the system */ 392 status = mod_remove(&ml); 393 if (status) { 394 DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n"); 395 return (EIO); 396 } 397 398 /* close and finalize channels */ 399 ldcp = ldcssp->chan_list; 400 while (ldcp != NULL) { 401 (void) ldc_close((ldc_handle_t)ldcp); 402 (void) ldc_fini((ldc_handle_t)ldcp); 403 404 ldcp = ldcp->next; 405 } 406 407 /* Free descriptor rings */ 408 dringp = ldcssp->dring_list; 409 while (dringp != NULL) { 410 dringp = dringp->next; 411 412 rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo); 413 if (rv == 0 && minfo.status != LDC_UNBOUND) { 414 if (minfo.status == LDC_BOUND) { 415 (void) ldc_mem_dring_unbind( 416 (ldc_dring_handle_t)dringp); 417 } 418 if (minfo.status == LDC_MAPPED) { 419 (void) ldc_mem_dring_unmap( 420 (ldc_dring_handle_t)dringp); 421 } 422 } 423 424 (void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp); 425 } 426 ldcssp->dring_list = NULL; 427 428 /* Destroy kmem caches */ 429 kmem_cache_destroy(ldcssp->memhdl_cache); 430 kmem_cache_destroy(ldcssp->memseg_cache); 431 432 /* 433 * We have successfully "removed" the driver. 434 * Destroying soft states 435 */ 436 mutex_destroy(&ldcssp->lock); 437 kmem_free(ldcssp, sizeof (ldc_soft_state_t)); 438 439 (void) hsvc_unregister(&ldc_hsvc); 440 441 return (status); 442 } 443 444 /* -------------------------------------------------------------------------- */ 445 446 /* 447 * LDC Link Layer Internal Functions 448 */ 449 450 /* 451 * Translate HV Errors to sun4v error codes 452 */ 453 static int 454 i_ldc_h2v_error(int h_error) 455 { 456 switch (h_error) { 457 458 case H_EOK: 459 return (0); 460 461 case H_ENORADDR: 462 return (EFAULT); 463 464 case H_EBADPGSZ: 465 case H_EINVAL: 466 return (EINVAL); 467 468 case H_EWOULDBLOCK: 469 return (EWOULDBLOCK); 470 471 case H_ENOACCESS: 472 case H_ENOMAP: 473 return (EACCES); 474 475 case H_EIO: 476 case H_ECPUERROR: 477 return (EIO); 478 479 case H_ENOTSUPPORTED: 480 return (ENOTSUP); 481 482 case H_ETOOMANY: 483 return (ENOSPC); 484 485 case H_ECHANNEL: 486 return (ECHRNG); 487 default: 488 break; 489 } 490 491 return (EIO); 492 } 493 494 /* 495 * Reconfigure the transmit queue 496 */ 497 static int 498 i_ldc_txq_reconf(ldc_chan_t *ldcp) 499 { 500 int rv; 501 502 ASSERT(MUTEX_HELD(&ldcp->lock)); 503 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 504 505 rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries); 506 if (rv) { 507 cmn_err(CE_WARN, 508 "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id); 509 return (EIO); 510 } 511 rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head), 512 &(ldcp->tx_tail), &(ldcp->link_state)); 513 if (rv) { 514 cmn_err(CE_WARN, 515 "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id); 516 return (EIO); 517 } 518 D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx," 519 "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail, 520 ldcp->link_state); 521 522 return (0); 523 } 524 525 /* 526 * Reconfigure the receive queue 527 */ 528 static int 529 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset) 530 { 531 int rv; 532 uint64_t rx_head, rx_tail; 533 534 ASSERT(MUTEX_HELD(&ldcp->lock)); 535 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 536 &(ldcp->link_state)); 537 if (rv) { 538 cmn_err(CE_WARN, 539 "i_ldc_rxq_reconf: (0x%lx) cannot get state", 540 ldcp->id); 541 return (EIO); 542 } 543 544 if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) { 545 rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, 546 ldcp->rx_q_entries); 547 if (rv) { 548 cmn_err(CE_WARN, 549 "i_ldc_rxq_reconf: (0x%lx) cannot set qconf", 550 ldcp->id); 551 return (EIO); 552 } 553 D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf", 554 ldcp->id); 555 } 556 557 return (0); 558 } 559 560 561 /* 562 * Drain the contents of the receive queue 563 */ 564 static int 565 i_ldc_rxq_drain(ldc_chan_t *ldcp) 566 { 567 int rv; 568 uint64_t rx_head, rx_tail; 569 570 ASSERT(MUTEX_HELD(&ldcp->lock)); 571 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 572 &(ldcp->link_state)); 573 if (rv) { 574 cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state", 575 ldcp->id); 576 return (EIO); 577 } 578 579 /* flush contents by setting the head = tail */ 580 return (i_ldc_set_rx_head(ldcp, rx_tail)); 581 } 582 583 584 /* 585 * Reset LDC state structure and its contents 586 */ 587 static void 588 i_ldc_reset_state(ldc_chan_t *ldcp) 589 { 590 ASSERT(MUTEX_HELD(&ldcp->lock)); 591 ldcp->last_msg_snt = LDC_INIT_SEQID; 592 ldcp->last_ack_rcd = 0; 593 ldcp->last_msg_rcd = 0; 594 ldcp->tx_ackd_head = ldcp->tx_head; 595 ldcp->next_vidx = 0; 596 ldcp->hstate = 0; 597 ldcp->tstate = TS_OPEN; 598 ldcp->status = LDC_OPEN; 599 600 if (ldcp->link_state == LDC_CHANNEL_UP || 601 ldcp->link_state == LDC_CHANNEL_RESET) { 602 603 if (ldcp->mode == LDC_MODE_RAW) { 604 ldcp->status = LDC_UP; 605 ldcp->tstate = TS_UP; 606 } else { 607 ldcp->status = LDC_READY; 608 ldcp->tstate |= TS_LINK_READY; 609 } 610 } 611 } 612 613 /* 614 * Reset a LDC channel 615 */ 616 static void 617 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset) 618 { 619 DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id); 620 621 ASSERT(MUTEX_HELD(&ldcp->lock)); 622 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 623 624 /* reconfig Tx and Rx queues */ 625 (void) i_ldc_txq_reconf(ldcp); 626 (void) i_ldc_rxq_reconf(ldcp, force_reset); 627 628 /* Clear Tx and Rx interrupts */ 629 (void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR); 630 (void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 631 632 /* Reset channel state */ 633 i_ldc_reset_state(ldcp); 634 635 /* Mark channel in reset */ 636 ldcp->tstate |= TS_IN_RESET; 637 } 638 639 640 /* 641 * Clear pending interrupts 642 */ 643 static void 644 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype) 645 { 646 ldc_cnex_t *cinfo = &ldcssp->cinfo; 647 648 ASSERT(MUTEX_HELD(&ldcp->lock)); 649 ASSERT(cinfo->dip != NULL); 650 651 switch (itype) { 652 case CNEX_TX_INTR: 653 /* check Tx interrupt */ 654 if (ldcp->tx_intr_state) 655 ldcp->tx_intr_state = LDC_INTR_NONE; 656 else 657 return; 658 break; 659 660 case CNEX_RX_INTR: 661 /* check Rx interrupt */ 662 if (ldcp->rx_intr_state) 663 ldcp->rx_intr_state = LDC_INTR_NONE; 664 else 665 return; 666 break; 667 } 668 669 (void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype); 670 D2(ldcp->id, 671 "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n", 672 ldcp->id, itype); 673 } 674 675 /* 676 * Set the receive queue head 677 * Resets connection and returns an error if it fails. 678 */ 679 static int 680 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head) 681 { 682 int rv; 683 int retries; 684 685 ASSERT(MUTEX_HELD(&ldcp->lock)); 686 for (retries = 0; retries < ldc_max_retries; retries++) { 687 688 if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0) 689 return (0); 690 691 if (rv != H_EWOULDBLOCK) 692 break; 693 694 /* wait for ldc_delay usecs */ 695 drv_usecwait(ldc_delay); 696 } 697 698 cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx", 699 ldcp->id, head); 700 mutex_enter(&ldcp->tx_lock); 701 i_ldc_reset(ldcp, B_TRUE); 702 mutex_exit(&ldcp->tx_lock); 703 704 return (ECONNRESET); 705 } 706 707 708 /* 709 * Returns the tx_tail to be used for transfer 710 * Re-reads the TX queue ptrs if and only if the 711 * the cached head and tail are equal (queue is full) 712 */ 713 static int 714 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail) 715 { 716 int rv; 717 uint64_t current_head, new_tail; 718 719 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 720 /* Read the head and tail ptrs from HV */ 721 rv = hv_ldc_tx_get_state(ldcp->id, 722 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 723 if (rv) { 724 cmn_err(CE_WARN, 725 "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n", 726 ldcp->id); 727 return (EIO); 728 } 729 if (ldcp->link_state == LDC_CHANNEL_DOWN) { 730 D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n", 731 ldcp->id); 732 return (ECONNRESET); 733 } 734 735 /* In reliable mode, check against last ACKd msg */ 736 current_head = (ldcp->mode == LDC_MODE_RELIABLE || 737 ldcp->mode == LDC_MODE_STREAM) 738 ? ldcp->tx_ackd_head : ldcp->tx_head; 739 740 /* increment the tail */ 741 new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) % 742 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 743 744 if (new_tail == current_head) { 745 DWARN(ldcp->id, 746 "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n", 747 ldcp->id); 748 return (EWOULDBLOCK); 749 } 750 751 D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n", 752 ldcp->id, ldcp->tx_head, ldcp->tx_tail); 753 754 *tail = ldcp->tx_tail; 755 return (0); 756 } 757 758 /* 759 * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off 760 * and retry ldc_max_retries times before returning an error. 761 * Returns 0, EWOULDBLOCK or EIO 762 */ 763 static int 764 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail) 765 { 766 int rv, retval = EWOULDBLOCK; 767 int retries; 768 769 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 770 for (retries = 0; retries < ldc_max_retries; retries++) { 771 772 if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) { 773 retval = 0; 774 break; 775 } 776 if (rv != H_EWOULDBLOCK) { 777 DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set " 778 "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv); 779 retval = EIO; 780 break; 781 } 782 783 /* wait for ldc_delay usecs */ 784 drv_usecwait(ldc_delay); 785 } 786 return (retval); 787 } 788 789 /* 790 * Send a LDC message 791 */ 792 static int 793 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype, 794 uint8_t ctrlmsg) 795 { 796 int rv; 797 ldc_msg_t *pkt; 798 uint64_t tx_tail; 799 uint32_t curr_seqid = ldcp->last_msg_snt; 800 801 /* Obtain Tx lock */ 802 mutex_enter(&ldcp->tx_lock); 803 804 /* get the current tail for the message */ 805 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 806 if (rv) { 807 DWARN(ldcp->id, 808 "i_ldc_send_pkt: (0x%llx) error sending pkt, " 809 "type=0x%x,subtype=0x%x,ctrl=0x%x\n", 810 ldcp->id, pkttype, subtype, ctrlmsg); 811 mutex_exit(&ldcp->tx_lock); 812 return (rv); 813 } 814 815 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 816 ZERO_PKT(pkt); 817 818 /* Initialize the packet */ 819 pkt->type = pkttype; 820 pkt->stype = subtype; 821 pkt->ctrl = ctrlmsg; 822 823 /* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */ 824 if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) && 825 ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) { 826 curr_seqid++; 827 if (ldcp->mode != LDC_MODE_RAW) { 828 pkt->seqid = curr_seqid; 829 pkt->ackid = ldcp->last_msg_rcd; 830 } 831 } 832 DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt); 833 834 /* initiate the send by calling into HV and set the new tail */ 835 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 836 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 837 838 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 839 if (rv) { 840 DWARN(ldcp->id, 841 "i_ldc_send_pkt:(0x%llx) error sending pkt, " 842 "type=0x%x,stype=0x%x,ctrl=0x%x\n", 843 ldcp->id, pkttype, subtype, ctrlmsg); 844 mutex_exit(&ldcp->tx_lock); 845 return (EIO); 846 } 847 848 ldcp->last_msg_snt = curr_seqid; 849 ldcp->tx_tail = tx_tail; 850 851 mutex_exit(&ldcp->tx_lock); 852 return (0); 853 } 854 855 /* 856 * Checks if packet was received in right order 857 * in the case of a reliable link. 858 * Returns 0 if in order, else EIO 859 */ 860 static int 861 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg) 862 { 863 /* No seqid checking for RAW mode */ 864 if (ldcp->mode == LDC_MODE_RAW) 865 return (0); 866 867 /* No seqid checking for version, RTS, RTR message */ 868 if (msg->ctrl == LDC_VER || 869 msg->ctrl == LDC_RTS || 870 msg->ctrl == LDC_RTR) 871 return (0); 872 873 /* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */ 874 if (msg->seqid != (ldcp->last_msg_rcd + 1)) { 875 DWARN(ldcp->id, 876 "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, " 877 "expecting 0x%x\n", ldcp->id, msg->seqid, 878 (ldcp->last_msg_rcd + 1)); 879 return (EIO); 880 } 881 882 #ifdef DEBUG 883 if (LDC_INJECT_PKTLOSS(ldcp)) { 884 DWARN(ldcp->id, 885 "i_ldc_check_seqid: (0x%llx) inject pkt loss\n", ldcp->id); 886 return (EIO); 887 } 888 #endif 889 890 return (0); 891 } 892 893 894 /* 895 * Process an incoming version ctrl message 896 */ 897 static int 898 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg) 899 { 900 int rv = 0, idx = ldcp->next_vidx; 901 ldc_msg_t *pkt; 902 uint64_t tx_tail; 903 ldc_ver_t *rcvd_ver; 904 905 /* get the received version */ 906 rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF); 907 908 D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n", 909 ldcp->id, rcvd_ver->major, rcvd_ver->minor); 910 911 /* Obtain Tx lock */ 912 mutex_enter(&ldcp->tx_lock); 913 914 switch (msg->stype) { 915 case LDC_INFO: 916 917 if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) { 918 (void) i_ldc_txq_reconf(ldcp); 919 i_ldc_reset_state(ldcp); 920 mutex_exit(&ldcp->tx_lock); 921 return (EAGAIN); 922 } 923 924 /* get the current tail and pkt for the response */ 925 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 926 if (rv != 0) { 927 DWARN(ldcp->id, 928 "i_ldc_process_VER: (0x%llx) err sending " 929 "version ACK/NACK\n", ldcp->id); 930 i_ldc_reset(ldcp, B_TRUE); 931 mutex_exit(&ldcp->tx_lock); 932 return (ECONNRESET); 933 } 934 935 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 936 ZERO_PKT(pkt); 937 938 /* initialize the packet */ 939 pkt->type = LDC_CTRL; 940 pkt->ctrl = LDC_VER; 941 942 for (;;) { 943 944 D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n", 945 rcvd_ver->major, rcvd_ver->minor, 946 ldc_versions[idx].major, ldc_versions[idx].minor); 947 948 if (rcvd_ver->major == ldc_versions[idx].major) { 949 /* major version match - ACK version */ 950 pkt->stype = LDC_ACK; 951 952 /* 953 * lower minor version to the one this endpt 954 * supports, if necessary 955 */ 956 if (rcvd_ver->minor > ldc_versions[idx].minor) 957 rcvd_ver->minor = 958 ldc_versions[idx].minor; 959 bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver)); 960 961 break; 962 } 963 964 if (rcvd_ver->major > ldc_versions[idx].major) { 965 966 D1(ldcp->id, "i_ldc_process_VER: using next" 967 " lower idx=%d, v%u.%u\n", idx, 968 ldc_versions[idx].major, 969 ldc_versions[idx].minor); 970 971 /* nack with next lower version */ 972 pkt->stype = LDC_NACK; 973 bcopy(&ldc_versions[idx], pkt->udata, 974 sizeof (ldc_versions[idx])); 975 ldcp->next_vidx = idx; 976 break; 977 } 978 979 /* next major version */ 980 idx++; 981 982 D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx); 983 984 if (idx == LDC_NUM_VERS) { 985 /* no version match - send NACK */ 986 pkt->stype = LDC_NACK; 987 bzero(pkt->udata, sizeof (ldc_ver_t)); 988 ldcp->next_vidx = 0; 989 break; 990 } 991 } 992 993 /* initiate the send by calling into HV and set the new tail */ 994 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 995 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 996 997 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 998 if (rv == 0) { 999 ldcp->tx_tail = tx_tail; 1000 if (pkt->stype == LDC_ACK) { 1001 D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent" 1002 " version ACK\n", ldcp->id); 1003 /* Save the ACK'd version */ 1004 ldcp->version.major = rcvd_ver->major; 1005 ldcp->version.minor = rcvd_ver->minor; 1006 ldcp->hstate |= TS_RCVD_VER; 1007 ldcp->tstate |= TS_VER_DONE; 1008 D1(DBG_ALL_LDCS, 1009 "(0x%llx) Sent ACK, " 1010 "Agreed on version v%u.%u\n", 1011 ldcp->id, rcvd_ver->major, rcvd_ver->minor); 1012 } 1013 } else { 1014 DWARN(ldcp->id, 1015 "i_ldc_process_VER: (0x%llx) error sending " 1016 "ACK/NACK\n", ldcp->id); 1017 i_ldc_reset(ldcp, B_TRUE); 1018 mutex_exit(&ldcp->tx_lock); 1019 return (ECONNRESET); 1020 } 1021 1022 break; 1023 1024 case LDC_ACK: 1025 if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) { 1026 if (ldcp->version.major != rcvd_ver->major || 1027 ldcp->version.minor != rcvd_ver->minor) { 1028 1029 /* mismatched version - reset connection */ 1030 DWARN(ldcp->id, 1031 "i_ldc_process_VER: (0x%llx) recvd" 1032 " ACK ver != sent ACK ver\n", ldcp->id); 1033 i_ldc_reset(ldcp, B_TRUE); 1034 mutex_exit(&ldcp->tx_lock); 1035 return (ECONNRESET); 1036 } 1037 } else { 1038 /* SUCCESS - we have agreed on a version */ 1039 ldcp->version.major = rcvd_ver->major; 1040 ldcp->version.minor = rcvd_ver->minor; 1041 ldcp->tstate |= TS_VER_DONE; 1042 } 1043 1044 D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n", 1045 ldcp->id, rcvd_ver->major, rcvd_ver->minor); 1046 1047 /* initiate RTS-RTR-RDX handshake */ 1048 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 1049 if (rv) { 1050 DWARN(ldcp->id, 1051 "i_ldc_process_VER: (0x%llx) cannot send RTS\n", 1052 ldcp->id); 1053 i_ldc_reset(ldcp, B_TRUE); 1054 mutex_exit(&ldcp->tx_lock); 1055 return (ECONNRESET); 1056 } 1057 1058 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 1059 ZERO_PKT(pkt); 1060 1061 pkt->type = LDC_CTRL; 1062 pkt->stype = LDC_INFO; 1063 pkt->ctrl = LDC_RTS; 1064 pkt->env = ldcp->mode; 1065 if (ldcp->mode != LDC_MODE_RAW) 1066 pkt->seqid = LDC_INIT_SEQID; 1067 1068 ldcp->last_msg_rcd = LDC_INIT_SEQID; 1069 1070 DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt); 1071 1072 /* initiate the send by calling into HV and set the new tail */ 1073 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 1074 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1075 1076 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 1077 if (rv) { 1078 D2(ldcp->id, 1079 "i_ldc_process_VER: (0x%llx) no listener\n", 1080 ldcp->id); 1081 i_ldc_reset(ldcp, B_TRUE); 1082 mutex_exit(&ldcp->tx_lock); 1083 return (ECONNRESET); 1084 } 1085 1086 ldcp->tx_tail = tx_tail; 1087 ldcp->hstate |= TS_SENT_RTS; 1088 1089 break; 1090 1091 case LDC_NACK: 1092 /* check if version in NACK is zero */ 1093 if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) { 1094 /* version handshake failure */ 1095 DWARN(DBG_ALL_LDCS, 1096 "i_ldc_process_VER: (0x%llx) no version match\n", 1097 ldcp->id); 1098 i_ldc_reset(ldcp, B_TRUE); 1099 mutex_exit(&ldcp->tx_lock); 1100 return (ECONNRESET); 1101 } 1102 1103 /* get the current tail and pkt for the response */ 1104 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 1105 if (rv != 0) { 1106 cmn_err(CE_NOTE, 1107 "i_ldc_process_VER: (0x%lx) err sending " 1108 "version ACK/NACK\n", ldcp->id); 1109 i_ldc_reset(ldcp, B_TRUE); 1110 mutex_exit(&ldcp->tx_lock); 1111 return (ECONNRESET); 1112 } 1113 1114 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 1115 ZERO_PKT(pkt); 1116 1117 /* initialize the packet */ 1118 pkt->type = LDC_CTRL; 1119 pkt->ctrl = LDC_VER; 1120 pkt->stype = LDC_INFO; 1121 1122 /* check ver in NACK msg has a match */ 1123 for (;;) { 1124 if (rcvd_ver->major == ldc_versions[idx].major) { 1125 /* 1126 * major version match - resubmit request 1127 * if lower minor version to the one this endpt 1128 * supports, if necessary 1129 */ 1130 if (rcvd_ver->minor > ldc_versions[idx].minor) 1131 rcvd_ver->minor = 1132 ldc_versions[idx].minor; 1133 bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver)); 1134 break; 1135 1136 } 1137 1138 if (rcvd_ver->major > ldc_versions[idx].major) { 1139 1140 D1(ldcp->id, "i_ldc_process_VER: using next" 1141 " lower idx=%d, v%u.%u\n", idx, 1142 ldc_versions[idx].major, 1143 ldc_versions[idx].minor); 1144 1145 /* send next lower version */ 1146 bcopy(&ldc_versions[idx], pkt->udata, 1147 sizeof (ldc_versions[idx])); 1148 ldcp->next_vidx = idx; 1149 break; 1150 } 1151 1152 /* next version */ 1153 idx++; 1154 1155 D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx); 1156 1157 if (idx == LDC_NUM_VERS) { 1158 /* no version match - terminate */ 1159 ldcp->next_vidx = 0; 1160 mutex_exit(&ldcp->tx_lock); 1161 return (ECONNRESET); 1162 } 1163 } 1164 1165 /* initiate the send by calling into HV and set the new tail */ 1166 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 1167 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1168 1169 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 1170 if (rv == 0) { 1171 D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version" 1172 "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major, 1173 ldc_versions[idx].minor); 1174 ldcp->tx_tail = tx_tail; 1175 } else { 1176 cmn_err(CE_NOTE, 1177 "i_ldc_process_VER: (0x%lx) error sending version" 1178 "INFO\n", ldcp->id); 1179 i_ldc_reset(ldcp, B_TRUE); 1180 mutex_exit(&ldcp->tx_lock); 1181 return (ECONNRESET); 1182 } 1183 1184 break; 1185 } 1186 1187 mutex_exit(&ldcp->tx_lock); 1188 return (rv); 1189 } 1190 1191 1192 /* 1193 * Process an incoming RTS ctrl message 1194 */ 1195 static int 1196 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg) 1197 { 1198 int rv = 0; 1199 ldc_msg_t *pkt; 1200 uint64_t tx_tail; 1201 boolean_t sent_NACK = B_FALSE; 1202 1203 D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id); 1204 1205 switch (msg->stype) { 1206 case LDC_NACK: 1207 DWARN(ldcp->id, 1208 "i_ldc_process_RTS: (0x%llx) RTS NACK received\n", 1209 ldcp->id); 1210 1211 /* Reset the channel -- as we cannot continue */ 1212 mutex_enter(&ldcp->tx_lock); 1213 i_ldc_reset(ldcp, B_TRUE); 1214 mutex_exit(&ldcp->tx_lock); 1215 rv = ECONNRESET; 1216 break; 1217 1218 case LDC_INFO: 1219 1220 /* check mode */ 1221 if (ldcp->mode != (ldc_mode_t)msg->env) { 1222 cmn_err(CE_NOTE, 1223 "i_ldc_process_RTS: (0x%lx) mode mismatch\n", 1224 ldcp->id); 1225 /* 1226 * send NACK in response to MODE message 1227 * get the current tail for the response 1228 */ 1229 rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS); 1230 if (rv) { 1231 /* if cannot send NACK - reset channel */ 1232 mutex_enter(&ldcp->tx_lock); 1233 i_ldc_reset(ldcp, B_TRUE); 1234 mutex_exit(&ldcp->tx_lock); 1235 rv = ECONNRESET; 1236 break; 1237 } 1238 sent_NACK = B_TRUE; 1239 } 1240 break; 1241 default: 1242 DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n", 1243 ldcp->id); 1244 mutex_enter(&ldcp->tx_lock); 1245 i_ldc_reset(ldcp, B_TRUE); 1246 mutex_exit(&ldcp->tx_lock); 1247 rv = ECONNRESET; 1248 break; 1249 } 1250 1251 /* 1252 * If either the connection was reset (when rv != 0) or 1253 * a NACK was sent, we return. In the case of a NACK 1254 * we dont want to consume the packet that came in but 1255 * not record that we received the RTS 1256 */ 1257 if (rv || sent_NACK) 1258 return (rv); 1259 1260 /* record RTS received */ 1261 ldcp->hstate |= TS_RCVD_RTS; 1262 1263 /* store initial SEQID info */ 1264 ldcp->last_msg_snt = msg->seqid; 1265 1266 /* Obtain Tx lock */ 1267 mutex_enter(&ldcp->tx_lock); 1268 1269 /* get the current tail for the response */ 1270 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 1271 if (rv != 0) { 1272 cmn_err(CE_NOTE, 1273 "i_ldc_process_RTS: (0x%lx) err sending RTR\n", 1274 ldcp->id); 1275 i_ldc_reset(ldcp, B_TRUE); 1276 mutex_exit(&ldcp->tx_lock); 1277 return (ECONNRESET); 1278 } 1279 1280 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 1281 ZERO_PKT(pkt); 1282 1283 /* initialize the packet */ 1284 pkt->type = LDC_CTRL; 1285 pkt->stype = LDC_INFO; 1286 pkt->ctrl = LDC_RTR; 1287 pkt->env = ldcp->mode; 1288 if (ldcp->mode != LDC_MODE_RAW) 1289 pkt->seqid = LDC_INIT_SEQID; 1290 1291 ldcp->last_msg_rcd = msg->seqid; 1292 1293 /* initiate the send by calling into HV and set the new tail */ 1294 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 1295 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1296 1297 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 1298 if (rv == 0) { 1299 D2(ldcp->id, 1300 "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id); 1301 DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt); 1302 1303 ldcp->tx_tail = tx_tail; 1304 ldcp->hstate |= TS_SENT_RTR; 1305 1306 } else { 1307 cmn_err(CE_NOTE, 1308 "i_ldc_process_RTS: (0x%lx) error sending RTR\n", 1309 ldcp->id); 1310 i_ldc_reset(ldcp, B_TRUE); 1311 mutex_exit(&ldcp->tx_lock); 1312 return (ECONNRESET); 1313 } 1314 1315 mutex_exit(&ldcp->tx_lock); 1316 return (0); 1317 } 1318 1319 /* 1320 * Process an incoming RTR ctrl message 1321 */ 1322 static int 1323 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg) 1324 { 1325 int rv = 0; 1326 boolean_t sent_NACK = B_FALSE; 1327 1328 D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id); 1329 1330 switch (msg->stype) { 1331 case LDC_NACK: 1332 /* RTR NACK received */ 1333 DWARN(ldcp->id, 1334 "i_ldc_process_RTR: (0x%llx) RTR NACK received\n", 1335 ldcp->id); 1336 1337 /* Reset the channel -- as we cannot continue */ 1338 mutex_enter(&ldcp->tx_lock); 1339 i_ldc_reset(ldcp, B_TRUE); 1340 mutex_exit(&ldcp->tx_lock); 1341 rv = ECONNRESET; 1342 1343 break; 1344 1345 case LDC_INFO: 1346 1347 /* check mode */ 1348 if (ldcp->mode != (ldc_mode_t)msg->env) { 1349 DWARN(ldcp->id, 1350 "i_ldc_process_RTR: (0x%llx) mode mismatch, " 1351 "expecting 0x%x, got 0x%x\n", 1352 ldcp->id, ldcp->mode, (ldc_mode_t)msg->env); 1353 /* 1354 * send NACK in response to MODE message 1355 * get the current tail for the response 1356 */ 1357 rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR); 1358 if (rv) { 1359 /* if cannot send NACK - reset channel */ 1360 mutex_enter(&ldcp->tx_lock); 1361 i_ldc_reset(ldcp, B_TRUE); 1362 mutex_exit(&ldcp->tx_lock); 1363 rv = ECONNRESET; 1364 break; 1365 } 1366 sent_NACK = B_TRUE; 1367 } 1368 break; 1369 1370 default: 1371 DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n", 1372 ldcp->id); 1373 1374 /* Reset the channel -- as we cannot continue */ 1375 mutex_enter(&ldcp->tx_lock); 1376 i_ldc_reset(ldcp, B_TRUE); 1377 mutex_exit(&ldcp->tx_lock); 1378 rv = ECONNRESET; 1379 break; 1380 } 1381 1382 /* 1383 * If either the connection was reset (when rv != 0) or 1384 * a NACK was sent, we return. In the case of a NACK 1385 * we dont want to consume the packet that came in but 1386 * not record that we received the RTR 1387 */ 1388 if (rv || sent_NACK) 1389 return (rv); 1390 1391 ldcp->last_msg_snt = msg->seqid; 1392 ldcp->hstate |= TS_RCVD_RTR; 1393 1394 rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX); 1395 if (rv) { 1396 cmn_err(CE_NOTE, 1397 "i_ldc_process_RTR: (0x%lx) cannot send RDX\n", 1398 ldcp->id); 1399 mutex_enter(&ldcp->tx_lock); 1400 i_ldc_reset(ldcp, B_TRUE); 1401 mutex_exit(&ldcp->tx_lock); 1402 return (ECONNRESET); 1403 } 1404 D2(ldcp->id, 1405 "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id); 1406 1407 ldcp->hstate |= TS_SENT_RDX; 1408 ldcp->tstate |= TS_HSHAKE_DONE; 1409 if ((ldcp->tstate & TS_IN_RESET) == 0) 1410 ldcp->status = LDC_UP; 1411 1412 D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id); 1413 1414 return (0); 1415 } 1416 1417 1418 /* 1419 * Process an incoming RDX ctrl message 1420 */ 1421 static int 1422 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg) 1423 { 1424 int rv = 0; 1425 1426 D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id); 1427 1428 switch (msg->stype) { 1429 case LDC_NACK: 1430 /* RDX NACK received */ 1431 DWARN(ldcp->id, 1432 "i_ldc_process_RDX: (0x%llx) RDX NACK received\n", 1433 ldcp->id); 1434 1435 /* Reset the channel -- as we cannot continue */ 1436 mutex_enter(&ldcp->tx_lock); 1437 i_ldc_reset(ldcp, B_TRUE); 1438 mutex_exit(&ldcp->tx_lock); 1439 rv = ECONNRESET; 1440 1441 break; 1442 1443 case LDC_INFO: 1444 1445 /* 1446 * if channel is UP and a RDX received after data transmission 1447 * has commenced it is an error 1448 */ 1449 if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) { 1450 DWARN(DBG_ALL_LDCS, 1451 "i_ldc_process_RDX: (0x%llx) unexpected RDX" 1452 " - LDC reset\n", ldcp->id); 1453 mutex_enter(&ldcp->tx_lock); 1454 i_ldc_reset(ldcp, B_TRUE); 1455 mutex_exit(&ldcp->tx_lock); 1456 return (ECONNRESET); 1457 } 1458 1459 ldcp->hstate |= TS_RCVD_RDX; 1460 ldcp->tstate |= TS_HSHAKE_DONE; 1461 if ((ldcp->tstate & TS_IN_RESET) == 0) 1462 ldcp->status = LDC_UP; 1463 1464 D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id); 1465 break; 1466 1467 default: 1468 DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n", 1469 ldcp->id); 1470 1471 /* Reset the channel -- as we cannot continue */ 1472 mutex_enter(&ldcp->tx_lock); 1473 i_ldc_reset(ldcp, B_TRUE); 1474 mutex_exit(&ldcp->tx_lock); 1475 rv = ECONNRESET; 1476 break; 1477 } 1478 1479 return (rv); 1480 } 1481 1482 /* 1483 * Process an incoming ACK for a data packet 1484 */ 1485 static int 1486 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg) 1487 { 1488 int rv; 1489 uint64_t tx_head; 1490 ldc_msg_t *pkt; 1491 1492 /* Obtain Tx lock */ 1493 mutex_enter(&ldcp->tx_lock); 1494 1495 /* 1496 * Read the current Tx head and tail 1497 */ 1498 rv = hv_ldc_tx_get_state(ldcp->id, 1499 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 1500 if (rv != 0) { 1501 cmn_err(CE_WARN, 1502 "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n", 1503 ldcp->id); 1504 1505 /* Reset the channel -- as we cannot continue */ 1506 i_ldc_reset(ldcp, B_TRUE); 1507 mutex_exit(&ldcp->tx_lock); 1508 return (ECONNRESET); 1509 } 1510 1511 /* 1512 * loop from where the previous ACK location was to the 1513 * current head location. This is how far the HV has 1514 * actually send pkts. Pkts between head and tail are 1515 * yet to be sent by HV. 1516 */ 1517 tx_head = ldcp->tx_ackd_head; 1518 for (;;) { 1519 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head); 1520 tx_head = (tx_head + LDC_PACKET_SIZE) % 1521 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1522 1523 if (pkt->seqid == msg->ackid) { 1524 D2(ldcp->id, 1525 "i_ldc_process_data_ACK: (0x%llx) found packet\n", 1526 ldcp->id); 1527 ldcp->last_ack_rcd = msg->ackid; 1528 ldcp->tx_ackd_head = tx_head; 1529 break; 1530 } 1531 if (tx_head == ldcp->tx_head) { 1532 /* could not find packet */ 1533 DWARN(ldcp->id, 1534 "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n", 1535 ldcp->id); 1536 1537 /* Reset the channel -- as we cannot continue */ 1538 i_ldc_reset(ldcp, B_TRUE); 1539 mutex_exit(&ldcp->tx_lock); 1540 return (ECONNRESET); 1541 } 1542 } 1543 1544 mutex_exit(&ldcp->tx_lock); 1545 return (0); 1546 } 1547 1548 /* 1549 * Process incoming control message 1550 * Return 0 - session can continue 1551 * EAGAIN - reprocess packet - state was changed 1552 * ECONNRESET - channel was reset 1553 */ 1554 static int 1555 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg) 1556 { 1557 int rv = 0; 1558 1559 D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n", 1560 ldcp->id, ldcp->tstate, ldcp->hstate); 1561 1562 switch (ldcp->tstate & ~TS_IN_RESET) { 1563 1564 case TS_OPEN: 1565 case TS_READY: 1566 1567 switch (msg->ctrl & LDC_CTRL_MASK) { 1568 case LDC_VER: 1569 /* process version message */ 1570 rv = i_ldc_process_VER(ldcp, msg); 1571 break; 1572 default: 1573 DWARN(ldcp->id, 1574 "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x " 1575 "tstate=0x%x\n", ldcp->id, 1576 (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate); 1577 break; 1578 } 1579 1580 break; 1581 1582 case TS_VREADY: 1583 1584 switch (msg->ctrl & LDC_CTRL_MASK) { 1585 case LDC_VER: 1586 /* process version message */ 1587 rv = i_ldc_process_VER(ldcp, msg); 1588 break; 1589 case LDC_RTS: 1590 /* process RTS message */ 1591 rv = i_ldc_process_RTS(ldcp, msg); 1592 break; 1593 case LDC_RTR: 1594 /* process RTR message */ 1595 rv = i_ldc_process_RTR(ldcp, msg); 1596 break; 1597 case LDC_RDX: 1598 /* process RDX message */ 1599 rv = i_ldc_process_RDX(ldcp, msg); 1600 break; 1601 default: 1602 DWARN(ldcp->id, 1603 "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x " 1604 "tstate=0x%x\n", ldcp->id, 1605 (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate); 1606 break; 1607 } 1608 1609 break; 1610 1611 case TS_UP: 1612 1613 switch (msg->ctrl & LDC_CTRL_MASK) { 1614 case LDC_VER: 1615 DWARN(ldcp->id, 1616 "i_ldc_ctrlmsg: (0x%llx) unexpected VER " 1617 "- LDC reset\n", ldcp->id); 1618 /* peer is redoing version negotiation */ 1619 mutex_enter(&ldcp->tx_lock); 1620 (void) i_ldc_txq_reconf(ldcp); 1621 i_ldc_reset_state(ldcp); 1622 mutex_exit(&ldcp->tx_lock); 1623 rv = EAGAIN; 1624 break; 1625 1626 case LDC_RDX: 1627 /* process RDX message */ 1628 rv = i_ldc_process_RDX(ldcp, msg); 1629 break; 1630 1631 default: 1632 DWARN(ldcp->id, 1633 "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x " 1634 "tstate=0x%x\n", ldcp->id, 1635 (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate); 1636 break; 1637 } 1638 } 1639 1640 return (rv); 1641 } 1642 1643 /* 1644 * Register channel with the channel nexus 1645 */ 1646 static int 1647 i_ldc_register_channel(ldc_chan_t *ldcp) 1648 { 1649 int rv = 0; 1650 ldc_cnex_t *cinfo = &ldcssp->cinfo; 1651 1652 if (cinfo->dip == NULL) { 1653 DWARN(ldcp->id, 1654 "i_ldc_register_channel: cnex has not registered\n"); 1655 return (EAGAIN); 1656 } 1657 1658 rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass); 1659 if (rv) { 1660 DWARN(ldcp->id, 1661 "i_ldc_register_channel: cannot register channel\n"); 1662 return (rv); 1663 } 1664 1665 rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR, 1666 i_ldc_tx_hdlr, ldcp, NULL); 1667 if (rv) { 1668 DWARN(ldcp->id, 1669 "i_ldc_register_channel: cannot add Tx interrupt\n"); 1670 (void) cinfo->unreg_chan(cinfo->dip, ldcp->id); 1671 return (rv); 1672 } 1673 1674 rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR, 1675 i_ldc_rx_hdlr, ldcp, NULL); 1676 if (rv) { 1677 DWARN(ldcp->id, 1678 "i_ldc_register_channel: cannot add Rx interrupt\n"); 1679 (void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR); 1680 (void) cinfo->unreg_chan(cinfo->dip, ldcp->id); 1681 return (rv); 1682 } 1683 1684 ldcp->tstate |= TS_CNEX_RDY; 1685 1686 return (0); 1687 } 1688 1689 /* 1690 * Unregister a channel with the channel nexus 1691 */ 1692 static int 1693 i_ldc_unregister_channel(ldc_chan_t *ldcp) 1694 { 1695 int rv = 0; 1696 ldc_cnex_t *cinfo = &ldcssp->cinfo; 1697 1698 if (cinfo->dip == NULL) { 1699 DWARN(ldcp->id, 1700 "i_ldc_unregister_channel: cnex has not registered\n"); 1701 return (EAGAIN); 1702 } 1703 1704 if (ldcp->tstate & TS_CNEX_RDY) { 1705 1706 /* Remove the Rx interrupt */ 1707 rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR); 1708 if (rv) { 1709 if (rv != EAGAIN) { 1710 DWARN(ldcp->id, 1711 "i_ldc_unregister_channel: err removing " 1712 "Rx intr\n"); 1713 return (rv); 1714 } 1715 1716 /* 1717 * If interrupts are pending and handler has 1718 * finished running, clear interrupt and try 1719 * again 1720 */ 1721 if (ldcp->rx_intr_state != LDC_INTR_PEND) 1722 return (rv); 1723 1724 (void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 1725 rv = cinfo->rem_intr(cinfo->dip, ldcp->id, 1726 CNEX_RX_INTR); 1727 if (rv) { 1728 DWARN(ldcp->id, "i_ldc_unregister_channel: " 1729 "err removing Rx interrupt\n"); 1730 return (rv); 1731 } 1732 } 1733 1734 /* Remove the Tx interrupt */ 1735 rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR); 1736 if (rv) { 1737 DWARN(ldcp->id, 1738 "i_ldc_unregister_channel: err removing Tx intr\n"); 1739 return (rv); 1740 } 1741 1742 /* Unregister the channel */ 1743 rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id); 1744 if (rv) { 1745 DWARN(ldcp->id, 1746 "i_ldc_unregister_channel: cannot unreg channel\n"); 1747 return (rv); 1748 } 1749 1750 ldcp->tstate &= ~TS_CNEX_RDY; 1751 } 1752 1753 return (0); 1754 } 1755 1756 1757 /* 1758 * LDC transmit interrupt handler 1759 * triggered for chanel up/down/reset events 1760 * and Tx queue content changes 1761 */ 1762 static uint_t 1763 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2) 1764 { 1765 _NOTE(ARGUNUSED(arg2)) 1766 1767 int rv; 1768 ldc_chan_t *ldcp; 1769 boolean_t notify_client = B_FALSE; 1770 uint64_t notify_event = 0, link_state; 1771 1772 /* Get the channel for which interrupt was received */ 1773 ASSERT(arg1 != NULL); 1774 ldcp = (ldc_chan_t *)arg1; 1775 1776 D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n", 1777 ldcp->id, ldcp); 1778 1779 /* Lock channel */ 1780 mutex_enter(&ldcp->lock); 1781 1782 /* Obtain Tx lock */ 1783 mutex_enter(&ldcp->tx_lock); 1784 1785 /* mark interrupt as pending */ 1786 ldcp->tx_intr_state = LDC_INTR_ACTIVE; 1787 1788 /* save current link state */ 1789 link_state = ldcp->link_state; 1790 1791 rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail, 1792 &ldcp->link_state); 1793 if (rv) { 1794 cmn_err(CE_WARN, 1795 "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n", 1796 ldcp->id, rv); 1797 i_ldc_clear_intr(ldcp, CNEX_TX_INTR); 1798 mutex_exit(&ldcp->tx_lock); 1799 mutex_exit(&ldcp->lock); 1800 return (DDI_INTR_CLAIMED); 1801 } 1802 1803 /* 1804 * reset the channel state if the channel went down 1805 * (other side unconfigured queue) or channel was reset 1806 * (other side reconfigured its queue) 1807 */ 1808 if (link_state != ldcp->link_state && 1809 ldcp->link_state == LDC_CHANNEL_DOWN) { 1810 D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id); 1811 i_ldc_reset(ldcp, B_FALSE); 1812 notify_client = B_TRUE; 1813 notify_event = LDC_EVT_DOWN; 1814 } 1815 1816 if (link_state != ldcp->link_state && 1817 ldcp->link_state == LDC_CHANNEL_RESET) { 1818 D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id); 1819 i_ldc_reset(ldcp, B_FALSE); 1820 notify_client = B_TRUE; 1821 notify_event = LDC_EVT_RESET; 1822 } 1823 1824 if (link_state != ldcp->link_state && 1825 (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN && 1826 ldcp->link_state == LDC_CHANNEL_UP) { 1827 D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id); 1828 notify_client = B_TRUE; 1829 notify_event = LDC_EVT_RESET; 1830 ldcp->tstate |= TS_LINK_READY; 1831 ldcp->status = LDC_READY; 1832 } 1833 1834 /* if callbacks are disabled, do not notify */ 1835 if (!ldcp->cb_enabled) 1836 notify_client = B_FALSE; 1837 1838 i_ldc_clear_intr(ldcp, CNEX_TX_INTR); 1839 1840 if (notify_client) { 1841 ldcp->cb_inprogress = B_TRUE; 1842 mutex_exit(&ldcp->tx_lock); 1843 mutex_exit(&ldcp->lock); 1844 rv = ldcp->cb(notify_event, ldcp->cb_arg); 1845 if (rv) { 1846 DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback " 1847 "failure", ldcp->id); 1848 } 1849 mutex_enter(&ldcp->lock); 1850 ldcp->cb_inprogress = B_FALSE; 1851 } 1852 1853 mutex_exit(&ldcp->lock); 1854 1855 D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id); 1856 1857 return (DDI_INTR_CLAIMED); 1858 } 1859 1860 /* 1861 * LDC receive interrupt handler 1862 * triggered for channel with data pending to read 1863 * i.e. Rx queue content changes 1864 */ 1865 static uint_t 1866 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2) 1867 { 1868 _NOTE(ARGUNUSED(arg2)) 1869 1870 int rv; 1871 uint64_t rx_head, rx_tail; 1872 ldc_msg_t *msg; 1873 ldc_chan_t *ldcp; 1874 boolean_t notify_client = B_FALSE; 1875 uint64_t notify_event = 0; 1876 uint64_t link_state, first_fragment = 0; 1877 1878 1879 /* Get the channel for which interrupt was received */ 1880 if (arg1 == NULL) { 1881 cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n"); 1882 return (DDI_INTR_UNCLAIMED); 1883 } 1884 1885 ldcp = (ldc_chan_t *)arg1; 1886 1887 D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n", 1888 ldcp->id, ldcp); 1889 D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n", 1890 ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate, 1891 ldcp->link_state); 1892 1893 /* Lock channel */ 1894 mutex_enter(&ldcp->lock); 1895 1896 /* mark interrupt as pending */ 1897 ldcp->rx_intr_state = LDC_INTR_ACTIVE; 1898 1899 /* 1900 * Read packet(s) from the queue 1901 */ 1902 for (;;) { 1903 1904 link_state = ldcp->link_state; 1905 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 1906 &ldcp->link_state); 1907 if (rv) { 1908 cmn_err(CE_WARN, 1909 "i_ldc_rx_hdlr: (0x%lx) cannot read " 1910 "queue ptrs, rv=0x%d\n", ldcp->id, rv); 1911 i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 1912 mutex_exit(&ldcp->lock); 1913 return (DDI_INTR_CLAIMED); 1914 } 1915 1916 /* 1917 * reset the channel state if the channel went down 1918 * (other side unconfigured queue) or channel was reset 1919 * (other side reconfigured its queue) 1920 */ 1921 1922 if (link_state != ldcp->link_state) { 1923 1924 switch (ldcp->link_state) { 1925 case LDC_CHANNEL_DOWN: 1926 D1(ldcp->id, "i_ldc_rx_hdlr: channel " 1927 "link down\n", ldcp->id); 1928 mutex_enter(&ldcp->tx_lock); 1929 i_ldc_reset(ldcp, B_FALSE); 1930 mutex_exit(&ldcp->tx_lock); 1931 notify_client = B_TRUE; 1932 notify_event = LDC_EVT_DOWN; 1933 goto loop_exit; 1934 1935 case LDC_CHANNEL_UP: 1936 D1(ldcp->id, "i_ldc_rx_hdlr: " 1937 "channel link up\n", ldcp->id); 1938 1939 if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) { 1940 notify_client = B_TRUE; 1941 notify_event = LDC_EVT_RESET; 1942 ldcp->tstate |= TS_LINK_READY; 1943 ldcp->status = LDC_READY; 1944 } 1945 break; 1946 1947 case LDC_CHANNEL_RESET: 1948 default: 1949 #ifdef DEBUG 1950 force_reset: 1951 #endif 1952 D1(ldcp->id, "i_ldc_rx_hdlr: channel " 1953 "link reset\n", ldcp->id); 1954 mutex_enter(&ldcp->tx_lock); 1955 i_ldc_reset(ldcp, B_FALSE); 1956 mutex_exit(&ldcp->tx_lock); 1957 notify_client = B_TRUE; 1958 notify_event = LDC_EVT_RESET; 1959 break; 1960 } 1961 } 1962 1963 #ifdef DEBUG 1964 if (LDC_INJECT_RESET(ldcp)) 1965 goto force_reset; 1966 #endif 1967 1968 if (rx_head == rx_tail) { 1969 D2(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) No packets\n", 1970 ldcp->id); 1971 break; 1972 } 1973 1974 D2(ldcp->id, "i_ldc_rx_hdlr: head=0x%llx, tail=0x%llx\n", 1975 rx_head, rx_tail); 1976 DUMP_LDC_PKT(ldcp, "i_ldc_rx_hdlr rcd", 1977 ldcp->rx_q_va + rx_head); 1978 1979 /* get the message */ 1980 msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head); 1981 1982 /* if channel is in RAW mode or data pkt, notify and return */ 1983 if (ldcp->mode == LDC_MODE_RAW) { 1984 notify_client = B_TRUE; 1985 notify_event |= LDC_EVT_READ; 1986 break; 1987 } 1988 1989 if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) { 1990 1991 /* discard packet if channel is not up */ 1992 if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) { 1993 1994 /* move the head one position */ 1995 rx_head = (rx_head + LDC_PACKET_SIZE) % 1996 (ldcp->rx_q_entries << LDC_PACKET_SHIFT); 1997 1998 if (rv = i_ldc_set_rx_head(ldcp, rx_head)) 1999 break; 2000 2001 continue; 2002 } else { 2003 if ((ldcp->tstate & TS_IN_RESET) == 0) 2004 notify_client = B_TRUE; 2005 notify_event |= LDC_EVT_READ; 2006 break; 2007 } 2008 } 2009 2010 /* Check the sequence ID for the message received */ 2011 rv = i_ldc_check_seqid(ldcp, msg); 2012 if (rv != 0) { 2013 2014 DWARN(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) seqid error, " 2015 "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail); 2016 2017 /* Reset last_msg_rcd to start of message */ 2018 if (first_fragment != 0) { 2019 ldcp->last_msg_rcd = first_fragment - 1; 2020 first_fragment = 0; 2021 } 2022 2023 /* 2024 * Send a NACK due to seqid mismatch 2025 */ 2026 rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, 2027 (msg->ctrl & LDC_CTRL_MASK)); 2028 2029 if (rv) { 2030 cmn_err(CE_NOTE, 2031 "i_ldc_rx_hdlr: (0x%lx) err sending " 2032 "CTRL/NACK msg\n", ldcp->id); 2033 2034 /* if cannot send NACK - reset channel */ 2035 mutex_enter(&ldcp->tx_lock); 2036 i_ldc_reset(ldcp, B_TRUE); 2037 mutex_exit(&ldcp->tx_lock); 2038 2039 notify_client = B_TRUE; 2040 notify_event = LDC_EVT_RESET; 2041 break; 2042 } 2043 2044 /* purge receive queue */ 2045 (void) i_ldc_set_rx_head(ldcp, rx_tail); 2046 break; 2047 } 2048 2049 /* record the message ID */ 2050 ldcp->last_msg_rcd = msg->seqid; 2051 2052 /* process control messages */ 2053 if (msg->type & LDC_CTRL) { 2054 /* save current internal state */ 2055 uint64_t tstate = ldcp->tstate; 2056 2057 rv = i_ldc_ctrlmsg(ldcp, msg); 2058 if (rv == EAGAIN) { 2059 /* re-process pkt - state was adjusted */ 2060 continue; 2061 } 2062 if (rv == ECONNRESET) { 2063 notify_client = B_TRUE; 2064 notify_event = LDC_EVT_RESET; 2065 break; 2066 } 2067 2068 /* 2069 * control message processing was successful 2070 * channel transitioned to ready for communication 2071 */ 2072 if (rv == 0 && ldcp->tstate == TS_UP && 2073 (tstate & ~TS_IN_RESET) != 2074 (ldcp->tstate & ~TS_IN_RESET)) { 2075 notify_client = B_TRUE; 2076 notify_event = LDC_EVT_UP; 2077 } 2078 } 2079 2080 /* process data NACKs */ 2081 if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) { 2082 DWARN(ldcp->id, 2083 "i_ldc_rx_hdlr: (0x%llx) received DATA/NACK", 2084 ldcp->id); 2085 mutex_enter(&ldcp->tx_lock); 2086 i_ldc_reset(ldcp, B_TRUE); 2087 mutex_exit(&ldcp->tx_lock); 2088 notify_client = B_TRUE; 2089 notify_event = LDC_EVT_RESET; 2090 break; 2091 } 2092 2093 /* process data ACKs */ 2094 if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) { 2095 if (rv = i_ldc_process_data_ACK(ldcp, msg)) { 2096 notify_client = B_TRUE; 2097 notify_event = LDC_EVT_RESET; 2098 break; 2099 } 2100 } 2101 2102 /* move the head one position */ 2103 rx_head = (rx_head + LDC_PACKET_SIZE) % 2104 (ldcp->rx_q_entries << LDC_PACKET_SHIFT); 2105 if (rv = i_ldc_set_rx_head(ldcp, rx_head)) { 2106 notify_client = B_TRUE; 2107 notify_event = LDC_EVT_RESET; 2108 break; 2109 } 2110 2111 } /* for */ 2112 2113 loop_exit: 2114 2115 /* if callbacks are disabled, do not notify */ 2116 if (!ldcp->cb_enabled) 2117 notify_client = B_FALSE; 2118 2119 /* 2120 * If there are data packets in the queue, the ldc_read will 2121 * clear interrupts after draining the queue, else clear interrupts 2122 */ 2123 if ((notify_event & LDC_EVT_READ) == 0) { 2124 i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 2125 } else 2126 ldcp->rx_intr_state = LDC_INTR_PEND; 2127 2128 2129 if (notify_client) { 2130 ldcp->cb_inprogress = B_TRUE; 2131 mutex_exit(&ldcp->lock); 2132 rv = ldcp->cb(notify_event, ldcp->cb_arg); 2133 if (rv) { 2134 DWARN(ldcp->id, 2135 "i_ldc_rx_hdlr: (0x%llx) callback failure", 2136 ldcp->id); 2137 } 2138 mutex_enter(&ldcp->lock); 2139 ldcp->cb_inprogress = B_FALSE; 2140 } 2141 2142 mutex_exit(&ldcp->lock); 2143 2144 D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id); 2145 return (DDI_INTR_CLAIMED); 2146 } 2147 2148 2149 /* -------------------------------------------------------------------------- */ 2150 2151 /* 2152 * LDC API functions 2153 */ 2154 2155 /* 2156 * Initialize the channel. Allocate internal structure and memory for 2157 * TX/RX queues, and initialize locks. 2158 */ 2159 int 2160 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle) 2161 { 2162 ldc_chan_t *ldcp; 2163 int rv, exit_val; 2164 uint64_t ra_base, nentries; 2165 uint64_t qlen; 2166 2167 exit_val = EINVAL; /* guarantee an error if exit on failure */ 2168 2169 if (attr == NULL) { 2170 DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id); 2171 return (EINVAL); 2172 } 2173 if (handle == NULL) { 2174 DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id); 2175 return (EINVAL); 2176 } 2177 2178 /* check if channel is valid */ 2179 rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries); 2180 if (rv == H_ECHANNEL) { 2181 DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id); 2182 return (EINVAL); 2183 } 2184 2185 /* check if the channel has already been initialized */ 2186 mutex_enter(&ldcssp->lock); 2187 ldcp = ldcssp->chan_list; 2188 while (ldcp != NULL) { 2189 if (ldcp->id == id) { 2190 DWARN(id, "ldc_init: (0x%llx) already initialized\n", 2191 id); 2192 mutex_exit(&ldcssp->lock); 2193 return (EADDRINUSE); 2194 } 2195 ldcp = ldcp->next; 2196 } 2197 mutex_exit(&ldcssp->lock); 2198 2199 ASSERT(ldcp == NULL); 2200 2201 *handle = 0; 2202 2203 /* Allocate an ldcp structure */ 2204 ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP); 2205 2206 /* 2207 * Initialize the channel and Tx lock 2208 * 2209 * The channel 'lock' protects the entire channel and 2210 * should be acquired before initializing, resetting, 2211 * destroying or reading from a channel. 2212 * 2213 * The 'tx_lock' should be acquired prior to transmitting 2214 * data over the channel. The lock should also be acquired 2215 * prior to channel reconfiguration (in order to prevent 2216 * concurrent writes). 2217 * 2218 * ORDERING: When both locks are being acquired, to prevent 2219 * deadlocks, the channel lock should be always acquired prior 2220 * to the tx_lock. 2221 */ 2222 mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL); 2223 mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL); 2224 2225 /* Initialize the channel */ 2226 ldcp->id = id; 2227 ldcp->cb = NULL; 2228 ldcp->cb_arg = NULL; 2229 ldcp->cb_inprogress = B_FALSE; 2230 ldcp->cb_enabled = B_FALSE; 2231 ldcp->next = NULL; 2232 2233 /* Read attributes */ 2234 ldcp->mode = attr->mode; 2235 ldcp->devclass = attr->devclass; 2236 ldcp->devinst = attr->instance; 2237 ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU; 2238 2239 D1(ldcp->id, 2240 "ldc_init: (0x%llx) channel attributes, class=0x%x, " 2241 "instance=0x%llx, mode=%d, mtu=%d\n", 2242 ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu); 2243 2244 ldcp->next_vidx = 0; 2245 ldcp->tstate = TS_IN_RESET; 2246 ldcp->hstate = 0; 2247 ldcp->last_msg_snt = LDC_INIT_SEQID; 2248 ldcp->last_ack_rcd = 0; 2249 ldcp->last_msg_rcd = 0; 2250 2251 ldcp->stream_bufferp = NULL; 2252 ldcp->exp_dring_list = NULL; 2253 ldcp->imp_dring_list = NULL; 2254 ldcp->mhdl_list = NULL; 2255 2256 ldcp->tx_intr_state = LDC_INTR_NONE; 2257 ldcp->rx_intr_state = LDC_INTR_NONE; 2258 2259 /* Initialize payload size depending on whether channel is reliable */ 2260 switch (ldcp->mode) { 2261 case LDC_MODE_RAW: 2262 ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW; 2263 ldcp->read_p = i_ldc_read_raw; 2264 ldcp->write_p = i_ldc_write_raw; 2265 break; 2266 case LDC_MODE_UNRELIABLE: 2267 ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE; 2268 ldcp->read_p = i_ldc_read_packet; 2269 ldcp->write_p = i_ldc_write_packet; 2270 break; 2271 case LDC_MODE_RELIABLE: 2272 ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE; 2273 ldcp->read_p = i_ldc_read_packet; 2274 ldcp->write_p = i_ldc_write_packet; 2275 break; 2276 case LDC_MODE_STREAM: 2277 ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE; 2278 2279 ldcp->stream_remains = 0; 2280 ldcp->stream_offset = 0; 2281 ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP); 2282 ldcp->read_p = i_ldc_read_stream; 2283 ldcp->write_p = i_ldc_write_stream; 2284 break; 2285 default: 2286 exit_val = EINVAL; 2287 goto cleanup_on_exit; 2288 } 2289 2290 /* 2291 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this 2292 * value is smaller than default length of ldc_queue_entries, 2293 * qlen is set to ldc_queue_entries.. 2294 */ 2295 qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload; 2296 ldcp->rx_q_entries = 2297 (qlen < ldc_queue_entries) ? ldc_queue_entries : qlen; 2298 ldcp->tx_q_entries = ldcp->rx_q_entries; 2299 2300 D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", qlen); 2301 2302 /* Create a transmit queue */ 2303 ldcp->tx_q_va = (uint64_t) 2304 contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT); 2305 if (ldcp->tx_q_va == NULL) { 2306 cmn_err(CE_WARN, 2307 "ldc_init: (0x%lx) TX queue allocation failed\n", 2308 ldcp->id); 2309 exit_val = ENOMEM; 2310 goto cleanup_on_exit; 2311 } 2312 ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va); 2313 2314 D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n", 2315 ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries); 2316 2317 ldcp->tstate |= TS_TXQ_RDY; 2318 2319 /* Create a receive queue */ 2320 ldcp->rx_q_va = (uint64_t) 2321 contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT); 2322 if (ldcp->rx_q_va == NULL) { 2323 cmn_err(CE_WARN, 2324 "ldc_init: (0x%lx) RX queue allocation failed\n", 2325 ldcp->id); 2326 exit_val = ENOMEM; 2327 goto cleanup_on_exit; 2328 } 2329 ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va); 2330 2331 D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n", 2332 ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries); 2333 2334 ldcp->tstate |= TS_RXQ_RDY; 2335 2336 /* Init descriptor ring and memory handle list lock */ 2337 mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL); 2338 mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL); 2339 mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL); 2340 2341 /* mark status as INITialized */ 2342 ldcp->status = LDC_INIT; 2343 2344 /* Add to channel list */ 2345 mutex_enter(&ldcssp->lock); 2346 ldcp->next = ldcssp->chan_list; 2347 ldcssp->chan_list = ldcp; 2348 ldcssp->channel_count++; 2349 mutex_exit(&ldcssp->lock); 2350 2351 /* set the handle */ 2352 *handle = (ldc_handle_t)ldcp; 2353 2354 D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id); 2355 2356 return (0); 2357 2358 cleanup_on_exit: 2359 2360 if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp) 2361 kmem_free(ldcp->stream_bufferp, ldcp->mtu); 2362 2363 if (ldcp->tstate & TS_TXQ_RDY) 2364 contig_mem_free((caddr_t)ldcp->tx_q_va, 2365 (ldcp->tx_q_entries << LDC_PACKET_SHIFT)); 2366 2367 if (ldcp->tstate & TS_RXQ_RDY) 2368 contig_mem_free((caddr_t)ldcp->rx_q_va, 2369 (ldcp->rx_q_entries << LDC_PACKET_SHIFT)); 2370 2371 mutex_destroy(&ldcp->tx_lock); 2372 mutex_destroy(&ldcp->lock); 2373 2374 if (ldcp) 2375 kmem_free(ldcp, sizeof (ldc_chan_t)); 2376 2377 return (exit_val); 2378 } 2379 2380 /* 2381 * Finalizes the LDC connection. It will return EBUSY if the 2382 * channel is open. A ldc_close() has to be done prior to 2383 * a ldc_fini operation. It frees TX/RX queues, associated 2384 * with the channel 2385 */ 2386 int 2387 ldc_fini(ldc_handle_t handle) 2388 { 2389 ldc_chan_t *ldcp; 2390 ldc_chan_t *tmp_ldcp; 2391 uint64_t id; 2392 2393 if (handle == NULL) { 2394 DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n"); 2395 return (EINVAL); 2396 } 2397 ldcp = (ldc_chan_t *)handle; 2398 id = ldcp->id; 2399 2400 mutex_enter(&ldcp->lock); 2401 2402 if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) { 2403 DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n", 2404 ldcp->id); 2405 mutex_exit(&ldcp->lock); 2406 return (EBUSY); 2407 } 2408 2409 /* Remove from the channel list */ 2410 mutex_enter(&ldcssp->lock); 2411 tmp_ldcp = ldcssp->chan_list; 2412 if (tmp_ldcp == ldcp) { 2413 ldcssp->chan_list = ldcp->next; 2414 ldcp->next = NULL; 2415 } else { 2416 while (tmp_ldcp != NULL) { 2417 if (tmp_ldcp->next == ldcp) { 2418 tmp_ldcp->next = ldcp->next; 2419 ldcp->next = NULL; 2420 break; 2421 } 2422 tmp_ldcp = tmp_ldcp->next; 2423 } 2424 if (tmp_ldcp == NULL) { 2425 DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n"); 2426 mutex_exit(&ldcssp->lock); 2427 mutex_exit(&ldcp->lock); 2428 return (EINVAL); 2429 } 2430 } 2431 2432 ldcssp->channel_count--; 2433 2434 mutex_exit(&ldcssp->lock); 2435 2436 /* Free the map table for this channel */ 2437 if (ldcp->mtbl) { 2438 (void) hv_ldc_set_map_table(ldcp->id, NULL, NULL); 2439 if (ldcp->mtbl->contigmem) 2440 contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size); 2441 else 2442 kmem_free(ldcp->mtbl->table, ldcp->mtbl->size); 2443 mutex_destroy(&ldcp->mtbl->lock); 2444 kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t)); 2445 } 2446 2447 /* Destroy descriptor ring and memory handle list lock */ 2448 mutex_destroy(&ldcp->exp_dlist_lock); 2449 mutex_destroy(&ldcp->imp_dlist_lock); 2450 mutex_destroy(&ldcp->mlist_lock); 2451 2452 /* Free the stream buffer for STREAM_MODE */ 2453 if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp) 2454 kmem_free(ldcp->stream_bufferp, ldcp->mtu); 2455 2456 /* Free the RX queue */ 2457 contig_mem_free((caddr_t)ldcp->rx_q_va, 2458 (ldcp->rx_q_entries << LDC_PACKET_SHIFT)); 2459 ldcp->tstate &= ~TS_RXQ_RDY; 2460 2461 /* Free the TX queue */ 2462 contig_mem_free((caddr_t)ldcp->tx_q_va, 2463 (ldcp->tx_q_entries << LDC_PACKET_SHIFT)); 2464 ldcp->tstate &= ~TS_TXQ_RDY; 2465 2466 mutex_exit(&ldcp->lock); 2467 2468 /* Destroy mutex */ 2469 mutex_destroy(&ldcp->tx_lock); 2470 mutex_destroy(&ldcp->lock); 2471 2472 /* free channel structure */ 2473 kmem_free(ldcp, sizeof (ldc_chan_t)); 2474 2475 D1(id, "ldc_fini: (0x%llx) channel finalized\n", id); 2476 2477 return (0); 2478 } 2479 2480 /* 2481 * Open the LDC channel for use. It registers the TX/RX queues 2482 * with the Hypervisor. It also specifies the interrupt number 2483 * and target CPU for this channel 2484 */ 2485 int 2486 ldc_open(ldc_handle_t handle) 2487 { 2488 ldc_chan_t *ldcp; 2489 int rv; 2490 2491 if (handle == NULL) { 2492 DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n"); 2493 return (EINVAL); 2494 } 2495 2496 ldcp = (ldc_chan_t *)handle; 2497 2498 mutex_enter(&ldcp->lock); 2499 2500 if (ldcp->tstate < TS_INIT) { 2501 DWARN(ldcp->id, 2502 "ldc_open: (0x%llx) channel not initialized\n", ldcp->id); 2503 mutex_exit(&ldcp->lock); 2504 return (EFAULT); 2505 } 2506 if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) { 2507 DWARN(ldcp->id, 2508 "ldc_open: (0x%llx) channel is already open\n", ldcp->id); 2509 mutex_exit(&ldcp->lock); 2510 return (EFAULT); 2511 } 2512 2513 /* 2514 * Unregister/Register the tx queue with the hypervisor 2515 */ 2516 rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL); 2517 if (rv) { 2518 cmn_err(CE_WARN, 2519 "ldc_open: (0x%lx) channel tx queue unconf failed\n", 2520 ldcp->id); 2521 mutex_exit(&ldcp->lock); 2522 return (EIO); 2523 } 2524 2525 rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries); 2526 if (rv) { 2527 cmn_err(CE_WARN, 2528 "ldc_open: (0x%lx) channel tx queue conf failed\n", 2529 ldcp->id); 2530 mutex_exit(&ldcp->lock); 2531 return (EIO); 2532 } 2533 2534 D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n", 2535 ldcp->id); 2536 2537 /* 2538 * Unregister/Register the rx queue with the hypervisor 2539 */ 2540 rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL); 2541 if (rv) { 2542 cmn_err(CE_WARN, 2543 "ldc_open: (0x%lx) channel rx queue unconf failed\n", 2544 ldcp->id); 2545 mutex_exit(&ldcp->lock); 2546 return (EIO); 2547 } 2548 2549 rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries); 2550 if (rv) { 2551 cmn_err(CE_WARN, 2552 "ldc_open: (0x%lx) channel rx queue conf failed\n", 2553 ldcp->id); 2554 mutex_exit(&ldcp->lock); 2555 return (EIO); 2556 } 2557 2558 D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n", 2559 ldcp->id); 2560 2561 ldcp->tstate |= TS_QCONF_RDY; 2562 2563 /* Register the channel with the channel nexus */ 2564 rv = i_ldc_register_channel(ldcp); 2565 if (rv && rv != EAGAIN) { 2566 cmn_err(CE_WARN, 2567 "ldc_open: (0x%lx) channel register failed\n", ldcp->id); 2568 (void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL); 2569 (void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL); 2570 mutex_exit(&ldcp->lock); 2571 return (EIO); 2572 } 2573 2574 /* mark channel in OPEN state */ 2575 ldcp->status = LDC_OPEN; 2576 2577 /* Read channel state */ 2578 rv = hv_ldc_tx_get_state(ldcp->id, 2579 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 2580 if (rv) { 2581 cmn_err(CE_WARN, 2582 "ldc_open: (0x%lx) cannot read channel state\n", 2583 ldcp->id); 2584 (void) i_ldc_unregister_channel(ldcp); 2585 (void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL); 2586 (void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL); 2587 mutex_exit(&ldcp->lock); 2588 return (EIO); 2589 } 2590 2591 /* 2592 * set the ACKd head to current head location for reliable & 2593 * streaming mode 2594 */ 2595 ldcp->tx_ackd_head = ldcp->tx_head; 2596 2597 /* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */ 2598 if (ldcp->link_state == LDC_CHANNEL_UP || 2599 ldcp->link_state == LDC_CHANNEL_RESET) { 2600 ldcp->tstate |= TS_LINK_READY; 2601 ldcp->status = LDC_READY; 2602 } 2603 2604 /* 2605 * if channel is being opened in RAW mode - no handshake is needed 2606 * switch the channel READY and UP state 2607 */ 2608 if (ldcp->mode == LDC_MODE_RAW) { 2609 ldcp->tstate = TS_UP; /* set bits associated with LDC UP */ 2610 ldcp->status = LDC_UP; 2611 } 2612 2613 mutex_exit(&ldcp->lock); 2614 2615 /* 2616 * Increment number of open channels 2617 */ 2618 mutex_enter(&ldcssp->lock); 2619 ldcssp->channels_open++; 2620 mutex_exit(&ldcssp->lock); 2621 2622 D1(ldcp->id, 2623 "ldc_open: (0x%llx) channel (0x%p) open for use " 2624 "(tstate=0x%x, status=0x%x)\n", 2625 ldcp->id, ldcp, ldcp->tstate, ldcp->status); 2626 2627 return (0); 2628 } 2629 2630 /* 2631 * Close the LDC connection. It will return EBUSY if there 2632 * are memory segments or descriptor rings either bound to or 2633 * mapped over the channel 2634 */ 2635 int 2636 ldc_close(ldc_handle_t handle) 2637 { 2638 ldc_chan_t *ldcp; 2639 int rv = 0, retries = 0; 2640 boolean_t chk_done = B_FALSE; 2641 2642 if (handle == NULL) { 2643 DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n"); 2644 return (EINVAL); 2645 } 2646 ldcp = (ldc_chan_t *)handle; 2647 2648 mutex_enter(&ldcp->lock); 2649 2650 /* return error if channel is not open */ 2651 if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) { 2652 DWARN(ldcp->id, 2653 "ldc_close: (0x%llx) channel is not open\n", ldcp->id); 2654 mutex_exit(&ldcp->lock); 2655 return (EFAULT); 2656 } 2657 2658 /* if any memory handles, drings, are bound or mapped cannot close */ 2659 if (ldcp->mhdl_list != NULL) { 2660 DWARN(ldcp->id, 2661 "ldc_close: (0x%llx) channel has bound memory handles\n", 2662 ldcp->id); 2663 mutex_exit(&ldcp->lock); 2664 return (EBUSY); 2665 } 2666 if (ldcp->exp_dring_list != NULL) { 2667 DWARN(ldcp->id, 2668 "ldc_close: (0x%llx) channel has bound descriptor rings\n", 2669 ldcp->id); 2670 mutex_exit(&ldcp->lock); 2671 return (EBUSY); 2672 } 2673 if (ldcp->imp_dring_list != NULL) { 2674 DWARN(ldcp->id, 2675 "ldc_close: (0x%llx) channel has mapped descriptor rings\n", 2676 ldcp->id); 2677 mutex_exit(&ldcp->lock); 2678 return (EBUSY); 2679 } 2680 2681 if (ldcp->cb_inprogress) { 2682 DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n", 2683 ldcp->id); 2684 mutex_exit(&ldcp->lock); 2685 return (EWOULDBLOCK); 2686 } 2687 2688 /* Obtain Tx lock */ 2689 mutex_enter(&ldcp->tx_lock); 2690 2691 /* 2692 * Wait for pending transmits to complete i.e Tx queue to drain 2693 * if there are pending pkts - wait 1 ms and retry again 2694 */ 2695 for (;;) { 2696 2697 rv = hv_ldc_tx_get_state(ldcp->id, 2698 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 2699 if (rv) { 2700 cmn_err(CE_WARN, 2701 "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id); 2702 mutex_exit(&ldcp->tx_lock); 2703 mutex_exit(&ldcp->lock); 2704 return (EIO); 2705 } 2706 2707 if (ldcp->tx_head == ldcp->tx_tail || 2708 ldcp->link_state != LDC_CHANNEL_UP) { 2709 break; 2710 } 2711 2712 if (chk_done) { 2713 DWARN(ldcp->id, 2714 "ldc_close: (0x%llx) Tx queue drain timeout\n", 2715 ldcp->id); 2716 break; 2717 } 2718 2719 /* wait for one ms and try again */ 2720 delay(drv_usectohz(1000)); 2721 chk_done = B_TRUE; 2722 } 2723 2724 /* 2725 * Drain the Tx and Rx queues as we are closing the 2726 * channel. We dont care about any pending packets. 2727 * We have to also drain the queue prior to clearing 2728 * pending interrupts, otherwise the HV will trigger 2729 * an interrupt the moment the interrupt state is 2730 * cleared. 2731 */ 2732 (void) i_ldc_txq_reconf(ldcp); 2733 (void) i_ldc_rxq_drain(ldcp); 2734 2735 /* 2736 * Unregister the channel with the nexus 2737 */ 2738 while ((rv = i_ldc_unregister_channel(ldcp)) != 0) { 2739 2740 mutex_exit(&ldcp->tx_lock); 2741 mutex_exit(&ldcp->lock); 2742 2743 /* if any error other than EAGAIN return back */ 2744 if (rv != EAGAIN || retries >= ldc_max_retries) { 2745 cmn_err(CE_WARN, 2746 "ldc_close: (0x%lx) unregister failed, %d\n", 2747 ldcp->id, rv); 2748 return (rv); 2749 } 2750 2751 /* 2752 * As there could be pending interrupts we need 2753 * to wait and try again 2754 */ 2755 drv_usecwait(ldc_close_delay); 2756 mutex_enter(&ldcp->lock); 2757 mutex_enter(&ldcp->tx_lock); 2758 retries++; 2759 } 2760 2761 /* 2762 * Unregister queues 2763 */ 2764 rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL); 2765 if (rv) { 2766 cmn_err(CE_WARN, 2767 "ldc_close: (0x%lx) channel TX queue unconf failed\n", 2768 ldcp->id); 2769 mutex_exit(&ldcp->tx_lock); 2770 mutex_exit(&ldcp->lock); 2771 return (EIO); 2772 } 2773 rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL); 2774 if (rv) { 2775 cmn_err(CE_WARN, 2776 "ldc_close: (0x%lx) channel RX queue unconf failed\n", 2777 ldcp->id); 2778 mutex_exit(&ldcp->tx_lock); 2779 mutex_exit(&ldcp->lock); 2780 return (EIO); 2781 } 2782 2783 ldcp->tstate &= ~TS_QCONF_RDY; 2784 2785 /* Reset channel state information */ 2786 i_ldc_reset_state(ldcp); 2787 2788 /* Mark channel as down and in initialized state */ 2789 ldcp->tx_ackd_head = 0; 2790 ldcp->tx_head = 0; 2791 ldcp->tstate = TS_IN_RESET|TS_INIT; 2792 ldcp->status = LDC_INIT; 2793 2794 mutex_exit(&ldcp->tx_lock); 2795 mutex_exit(&ldcp->lock); 2796 2797 /* Decrement number of open channels */ 2798 mutex_enter(&ldcssp->lock); 2799 ldcssp->channels_open--; 2800 mutex_exit(&ldcssp->lock); 2801 2802 D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id); 2803 2804 return (0); 2805 } 2806 2807 /* 2808 * Register channel callback 2809 */ 2810 int 2811 ldc_reg_callback(ldc_handle_t handle, 2812 uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg) 2813 { 2814 ldc_chan_t *ldcp; 2815 2816 if (handle == NULL) { 2817 DWARN(DBG_ALL_LDCS, 2818 "ldc_reg_callback: invalid channel handle\n"); 2819 return (EINVAL); 2820 } 2821 if (((uint64_t)cb) < KERNELBASE) { 2822 DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n"); 2823 return (EINVAL); 2824 } 2825 ldcp = (ldc_chan_t *)handle; 2826 2827 mutex_enter(&ldcp->lock); 2828 2829 if (ldcp->cb) { 2830 DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n", 2831 ldcp->id); 2832 mutex_exit(&ldcp->lock); 2833 return (EIO); 2834 } 2835 if (ldcp->cb_inprogress) { 2836 DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n", 2837 ldcp->id); 2838 mutex_exit(&ldcp->lock); 2839 return (EWOULDBLOCK); 2840 } 2841 2842 ldcp->cb = cb; 2843 ldcp->cb_arg = arg; 2844 ldcp->cb_enabled = B_TRUE; 2845 2846 D1(ldcp->id, 2847 "ldc_reg_callback: (0x%llx) registered callback for channel\n", 2848 ldcp->id); 2849 2850 mutex_exit(&ldcp->lock); 2851 2852 return (0); 2853 } 2854 2855 /* 2856 * Unregister channel callback 2857 */ 2858 int 2859 ldc_unreg_callback(ldc_handle_t handle) 2860 { 2861 ldc_chan_t *ldcp; 2862 2863 if (handle == NULL) { 2864 DWARN(DBG_ALL_LDCS, 2865 "ldc_unreg_callback: invalid channel handle\n"); 2866 return (EINVAL); 2867 } 2868 ldcp = (ldc_chan_t *)handle; 2869 2870 mutex_enter(&ldcp->lock); 2871 2872 if (ldcp->cb == NULL) { 2873 DWARN(ldcp->id, 2874 "ldc_unreg_callback: (0x%llx) no callback exists\n", 2875 ldcp->id); 2876 mutex_exit(&ldcp->lock); 2877 return (EIO); 2878 } 2879 if (ldcp->cb_inprogress) { 2880 DWARN(ldcp->id, 2881 "ldc_unreg_callback: (0x%llx) callback active\n", 2882 ldcp->id); 2883 mutex_exit(&ldcp->lock); 2884 return (EWOULDBLOCK); 2885 } 2886 2887 ldcp->cb = NULL; 2888 ldcp->cb_arg = NULL; 2889 ldcp->cb_enabled = B_FALSE; 2890 2891 D1(ldcp->id, 2892 "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n", 2893 ldcp->id); 2894 2895 mutex_exit(&ldcp->lock); 2896 2897 return (0); 2898 } 2899 2900 2901 /* 2902 * Bring a channel up by initiating a handshake with the peer 2903 * This call is asynchronous. It will complete at a later point 2904 * in time when the peer responds back with an RTR. 2905 */ 2906 int 2907 ldc_up(ldc_handle_t handle) 2908 { 2909 int rv; 2910 ldc_chan_t *ldcp; 2911 ldc_msg_t *ldcmsg; 2912 uint64_t tx_tail, tstate, link_state; 2913 2914 if (handle == NULL) { 2915 DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n"); 2916 return (EINVAL); 2917 } 2918 ldcp = (ldc_chan_t *)handle; 2919 2920 mutex_enter(&ldcp->lock); 2921 2922 D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id); 2923 2924 /* clear the reset state */ 2925 tstate = ldcp->tstate; 2926 ldcp->tstate &= ~TS_IN_RESET; 2927 2928 if (ldcp->tstate == TS_UP) { 2929 DWARN(ldcp->id, 2930 "ldc_up: (0x%llx) channel is already in UP state\n", 2931 ldcp->id); 2932 2933 /* mark channel as up */ 2934 ldcp->status = LDC_UP; 2935 2936 /* 2937 * if channel was in reset state and there was 2938 * pending data clear interrupt state. this will 2939 * trigger an interrupt, causing the RX handler to 2940 * to invoke the client's callback 2941 */ 2942 if ((tstate & TS_IN_RESET) && 2943 ldcp->rx_intr_state == LDC_INTR_PEND) { 2944 D1(ldcp->id, 2945 "ldc_up: (0x%llx) channel has pending data, " 2946 "clearing interrupt\n", ldcp->id); 2947 i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 2948 } 2949 2950 mutex_exit(&ldcp->lock); 2951 return (0); 2952 } 2953 2954 /* if the channel is in RAW mode - mark it as UP, if READY */ 2955 if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) { 2956 ldcp->tstate = TS_UP; 2957 mutex_exit(&ldcp->lock); 2958 return (0); 2959 } 2960 2961 /* Don't start another handshake if there is one in progress */ 2962 if (ldcp->hstate) { 2963 D1(ldcp->id, 2964 "ldc_up: (0x%llx) channel handshake in progress\n", 2965 ldcp->id); 2966 mutex_exit(&ldcp->lock); 2967 return (0); 2968 } 2969 2970 mutex_enter(&ldcp->tx_lock); 2971 2972 /* save current link state */ 2973 link_state = ldcp->link_state; 2974 2975 /* get the current tail for the LDC msg */ 2976 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 2977 if (rv) { 2978 D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n", 2979 ldcp->id); 2980 mutex_exit(&ldcp->tx_lock); 2981 mutex_exit(&ldcp->lock); 2982 return (ECONNREFUSED); 2983 } 2984 2985 /* 2986 * If i_ldc_get_tx_tail() changed link_state to either RESET or UP, 2987 * from a previous state of DOWN, then mark the channel as 2988 * being ready for handshake. 2989 */ 2990 if ((link_state == LDC_CHANNEL_DOWN) && 2991 (link_state != ldcp->link_state)) { 2992 2993 ASSERT((ldcp->link_state == LDC_CHANNEL_RESET) || 2994 (ldcp->link_state == LDC_CHANNEL_UP)); 2995 2996 if (ldcp->mode == LDC_MODE_RAW) { 2997 ldcp->status = LDC_UP; 2998 ldcp->tstate = TS_UP; 2999 mutex_exit(&ldcp->tx_lock); 3000 mutex_exit(&ldcp->lock); 3001 return (0); 3002 } else { 3003 ldcp->status = LDC_READY; 3004 ldcp->tstate |= TS_LINK_READY; 3005 } 3006 3007 } 3008 3009 ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 3010 ZERO_PKT(ldcmsg); 3011 3012 ldcmsg->type = LDC_CTRL; 3013 ldcmsg->stype = LDC_INFO; 3014 ldcmsg->ctrl = LDC_VER; 3015 ldcp->next_vidx = 0; 3016 bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0])); 3017 3018 DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg); 3019 3020 /* initiate the send by calling into HV and set the new tail */ 3021 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 3022 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 3023 3024 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 3025 if (rv) { 3026 DWARN(ldcp->id, 3027 "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n", 3028 ldcp->id, rv); 3029 mutex_exit(&ldcp->tx_lock); 3030 mutex_exit(&ldcp->lock); 3031 return (rv); 3032 } 3033 3034 ldcp->hstate |= TS_SENT_VER; 3035 ldcp->tx_tail = tx_tail; 3036 D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id); 3037 3038 mutex_exit(&ldcp->tx_lock); 3039 mutex_exit(&ldcp->lock); 3040 3041 return (rv); 3042 } 3043 3044 3045 /* 3046 * Bring a channel down by resetting its state and queues 3047 */ 3048 int 3049 ldc_down(ldc_handle_t handle) 3050 { 3051 ldc_chan_t *ldcp; 3052 3053 if (handle == NULL) { 3054 DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n"); 3055 return (EINVAL); 3056 } 3057 ldcp = (ldc_chan_t *)handle; 3058 mutex_enter(&ldcp->lock); 3059 mutex_enter(&ldcp->tx_lock); 3060 i_ldc_reset(ldcp, B_TRUE); 3061 mutex_exit(&ldcp->tx_lock); 3062 mutex_exit(&ldcp->lock); 3063 3064 return (0); 3065 } 3066 3067 /* 3068 * Get the current channel status 3069 */ 3070 int 3071 ldc_status(ldc_handle_t handle, ldc_status_t *status) 3072 { 3073 ldc_chan_t *ldcp; 3074 3075 if (handle == NULL || status == NULL) { 3076 DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n"); 3077 return (EINVAL); 3078 } 3079 ldcp = (ldc_chan_t *)handle; 3080 3081 *status = ((ldc_chan_t *)handle)->status; 3082 3083 D1(ldcp->id, 3084 "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status); 3085 return (0); 3086 } 3087 3088 3089 /* 3090 * Set the channel's callback mode - enable/disable callbacks 3091 */ 3092 int 3093 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode) 3094 { 3095 ldc_chan_t *ldcp; 3096 3097 if (handle == NULL) { 3098 DWARN(DBG_ALL_LDCS, 3099 "ldc_set_intr_mode: invalid channel handle\n"); 3100 return (EINVAL); 3101 } 3102 ldcp = (ldc_chan_t *)handle; 3103 3104 /* 3105 * Record no callbacks should be invoked 3106 */ 3107 mutex_enter(&ldcp->lock); 3108 3109 switch (cmode) { 3110 case LDC_CB_DISABLE: 3111 if (!ldcp->cb_enabled) { 3112 DWARN(ldcp->id, 3113 "ldc_set_cb_mode: (0x%llx) callbacks disabled\n", 3114 ldcp->id); 3115 break; 3116 } 3117 ldcp->cb_enabled = B_FALSE; 3118 3119 D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n", 3120 ldcp->id); 3121 break; 3122 3123 case LDC_CB_ENABLE: 3124 if (ldcp->cb_enabled) { 3125 DWARN(ldcp->id, 3126 "ldc_set_cb_mode: (0x%llx) callbacks enabled\n", 3127 ldcp->id); 3128 break; 3129 } 3130 ldcp->cb_enabled = B_TRUE; 3131 3132 D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n", 3133 ldcp->id); 3134 break; 3135 } 3136 3137 mutex_exit(&ldcp->lock); 3138 3139 return (0); 3140 } 3141 3142 /* 3143 * Check to see if there are packets on the incoming queue 3144 * Will return hasdata = B_FALSE if there are no packets 3145 */ 3146 int 3147 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata) 3148 { 3149 int rv; 3150 uint64_t rx_head, rx_tail; 3151 ldc_chan_t *ldcp; 3152 3153 if (handle == NULL) { 3154 DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n"); 3155 return (EINVAL); 3156 } 3157 ldcp = (ldc_chan_t *)handle; 3158 3159 *hasdata = B_FALSE; 3160 3161 mutex_enter(&ldcp->lock); 3162 3163 if (ldcp->tstate != TS_UP) { 3164 D1(ldcp->id, 3165 "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id); 3166 mutex_exit(&ldcp->lock); 3167 return (ECONNRESET); 3168 } 3169 3170 /* Read packet(s) from the queue */ 3171 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 3172 &ldcp->link_state); 3173 if (rv != 0) { 3174 cmn_err(CE_WARN, 3175 "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id); 3176 mutex_exit(&ldcp->lock); 3177 return (EIO); 3178 } 3179 /* reset the channel state if the channel went down */ 3180 if (ldcp->link_state == LDC_CHANNEL_DOWN || 3181 ldcp->link_state == LDC_CHANNEL_RESET) { 3182 mutex_enter(&ldcp->tx_lock); 3183 i_ldc_reset(ldcp, B_FALSE); 3184 mutex_exit(&ldcp->tx_lock); 3185 mutex_exit(&ldcp->lock); 3186 return (ECONNRESET); 3187 } 3188 3189 if ((rx_head != rx_tail) || 3190 (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_remains > 0)) { 3191 D1(ldcp->id, 3192 "ldc_chkq: (0x%llx) queue has pkt(s) or buffered data\n", 3193 ldcp->id); 3194 *hasdata = B_TRUE; 3195 } 3196 3197 mutex_exit(&ldcp->lock); 3198 3199 return (0); 3200 } 3201 3202 3203 /* 3204 * Read 'size' amount of bytes or less. If incoming buffer 3205 * is more than 'size', ENOBUFS is returned. 3206 * 3207 * On return, size contains the number of bytes read. 3208 */ 3209 int 3210 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep) 3211 { 3212 ldc_chan_t *ldcp; 3213 uint64_t rx_head = 0, rx_tail = 0; 3214 int rv = 0, exit_val; 3215 3216 if (handle == NULL) { 3217 DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n"); 3218 return (EINVAL); 3219 } 3220 3221 ldcp = (ldc_chan_t *)handle; 3222 3223 /* channel lock */ 3224 mutex_enter(&ldcp->lock); 3225 3226 if (ldcp->tstate != TS_UP) { 3227 DWARN(ldcp->id, 3228 "ldc_read: (0x%llx) channel is not in UP state\n", 3229 ldcp->id); 3230 exit_val = ECONNRESET; 3231 } else { 3232 exit_val = ldcp->read_p(ldcp, bufp, sizep); 3233 } 3234 3235 /* 3236 * if queue has been drained - clear interrupt 3237 */ 3238 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 3239 &ldcp->link_state); 3240 if (rv != 0) { 3241 cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs", 3242 ldcp->id); 3243 mutex_enter(&ldcp->tx_lock); 3244 i_ldc_reset(ldcp, B_TRUE); 3245 mutex_exit(&ldcp->tx_lock); 3246 mutex_exit(&ldcp->lock); 3247 return (ECONNRESET); 3248 } 3249 3250 if (exit_val == 0) { 3251 if (ldcp->link_state == LDC_CHANNEL_DOWN || 3252 ldcp->link_state == LDC_CHANNEL_RESET) { 3253 mutex_enter(&ldcp->tx_lock); 3254 i_ldc_reset(ldcp, B_FALSE); 3255 exit_val = ECONNRESET; 3256 mutex_exit(&ldcp->tx_lock); 3257 } 3258 if ((rv == 0) && 3259 (ldcp->rx_intr_state == LDC_INTR_PEND) && 3260 (rx_head == rx_tail)) { 3261 i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 3262 } 3263 } 3264 3265 mutex_exit(&ldcp->lock); 3266 return (exit_val); 3267 } 3268 3269 /* 3270 * Basic raw mondo read - 3271 * no interpretation of mondo contents at all. 3272 * 3273 * Enter and exit with ldcp->lock held by caller 3274 */ 3275 static int 3276 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) 3277 { 3278 uint64_t q_size_mask; 3279 ldc_msg_t *msgp; 3280 uint8_t *msgbufp; 3281 int rv = 0, space; 3282 uint64_t rx_head, rx_tail; 3283 3284 space = *sizep; 3285 3286 if (space < LDC_PAYLOAD_SIZE_RAW) 3287 return (ENOBUFS); 3288 3289 ASSERT(mutex_owned(&ldcp->lock)); 3290 3291 /* compute mask for increment */ 3292 q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT; 3293 3294 /* 3295 * Read packet(s) from the queue 3296 */ 3297 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 3298 &ldcp->link_state); 3299 if (rv != 0) { 3300 cmn_err(CE_WARN, 3301 "ldc_read_raw: (0x%lx) unable to read queue ptrs", 3302 ldcp->id); 3303 return (EIO); 3304 } 3305 D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx," 3306 " rxt=0x%llx, st=0x%llx\n", 3307 ldcp->id, rx_head, rx_tail, ldcp->link_state); 3308 3309 /* reset the channel state if the channel went down */ 3310 if (ldcp->link_state == LDC_CHANNEL_DOWN || 3311 ldcp->link_state == LDC_CHANNEL_RESET) { 3312 mutex_enter(&ldcp->tx_lock); 3313 i_ldc_reset(ldcp, B_FALSE); 3314 mutex_exit(&ldcp->tx_lock); 3315 return (ECONNRESET); 3316 } 3317 3318 /* 3319 * Check for empty queue 3320 */ 3321 if (rx_head == rx_tail) { 3322 *sizep = 0; 3323 return (0); 3324 } 3325 3326 /* get the message */ 3327 msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head); 3328 3329 /* if channel is in RAW mode, copy data and return */ 3330 msgbufp = (uint8_t *)&(msgp->raw[0]); 3331 3332 bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW); 3333 3334 DUMP_PAYLOAD(ldcp->id, msgbufp); 3335 3336 *sizep = LDC_PAYLOAD_SIZE_RAW; 3337 3338 rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask; 3339 rv = i_ldc_set_rx_head(ldcp, rx_head); 3340 3341 return (rv); 3342 } 3343 3344 /* 3345 * Process LDC mondos to build larger packets 3346 * with either un-reliable or reliable delivery. 3347 * 3348 * Enter and exit with ldcp->lock held by caller 3349 */ 3350 static int 3351 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) 3352 { 3353 int rv = 0; 3354 uint64_t rx_head = 0, rx_tail = 0; 3355 uint64_t curr_head = 0; 3356 ldc_msg_t *msg; 3357 caddr_t target; 3358 size_t len = 0, bytes_read = 0; 3359 int retries = 0; 3360 uint64_t q_size_mask; 3361 uint64_t first_fragment = 0; 3362 3363 target = target_bufp; 3364 3365 ASSERT(mutex_owned(&ldcp->lock)); 3366 3367 /* check if the buffer and size are valid */ 3368 if (target_bufp == NULL || *sizep == 0) { 3369 DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n", 3370 ldcp->id); 3371 return (EINVAL); 3372 } 3373 3374 /* compute mask for increment */ 3375 q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT; 3376 3377 /* 3378 * Read packet(s) from the queue 3379 */ 3380 rv = hv_ldc_rx_get_state(ldcp->id, &curr_head, &rx_tail, 3381 &ldcp->link_state); 3382 if (rv != 0) { 3383 cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs", 3384 ldcp->id); 3385 mutex_enter(&ldcp->tx_lock); 3386 i_ldc_reset(ldcp, B_TRUE); 3387 mutex_exit(&ldcp->tx_lock); 3388 return (ECONNRESET); 3389 } 3390 D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n", 3391 ldcp->id, curr_head, rx_tail, ldcp->link_state); 3392 3393 /* reset the channel state if the channel went down */ 3394 if (ldcp->link_state != LDC_CHANNEL_UP) 3395 goto channel_is_reset; 3396 3397 for (;;) { 3398 3399 if (curr_head == rx_tail) { 3400 rv = hv_ldc_rx_get_state(ldcp->id, 3401 &rx_head, &rx_tail, &ldcp->link_state); 3402 if (rv != 0) { 3403 cmn_err(CE_WARN, 3404 "ldc_read: (0x%lx) cannot read queue ptrs", 3405 ldcp->id); 3406 mutex_enter(&ldcp->tx_lock); 3407 i_ldc_reset(ldcp, B_TRUE); 3408 mutex_exit(&ldcp->tx_lock); 3409 return (ECONNRESET); 3410 } 3411 if (ldcp->link_state != LDC_CHANNEL_UP) 3412 goto channel_is_reset; 3413 3414 if (curr_head == rx_tail) { 3415 3416 /* If in the middle of a fragmented xfer */ 3417 if (first_fragment != 0) { 3418 3419 /* wait for ldc_delay usecs */ 3420 drv_usecwait(ldc_delay); 3421 3422 if (++retries < ldc_max_retries) 3423 continue; 3424 3425 *sizep = 0; 3426 ldcp->last_msg_rcd = first_fragment - 1; 3427 DWARN(DBG_ALL_LDCS, "ldc_read: " 3428 "(0x%llx) read timeout", 3429 ldcp->id); 3430 return (EAGAIN); 3431 } 3432 *sizep = 0; 3433 break; 3434 } 3435 } 3436 retries = 0; 3437 3438 D2(ldcp->id, 3439 "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n", 3440 ldcp->id, curr_head, rx_head, rx_tail); 3441 3442 /* get the message */ 3443 msg = (ldc_msg_t *)(ldcp->rx_q_va + curr_head); 3444 3445 DUMP_LDC_PKT(ldcp, "ldc_read received pkt", 3446 ldcp->rx_q_va + curr_head); 3447 3448 /* Check the message ID for the message received */ 3449 if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) { 3450 3451 DWARN(ldcp->id, "ldc_read: (0x%llx) seqid error, " 3452 "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail); 3453 3454 /* throw away data */ 3455 bytes_read = 0; 3456 3457 /* Reset last_msg_rcd to start of message */ 3458 if (first_fragment != 0) { 3459 ldcp->last_msg_rcd = first_fragment - 1; 3460 first_fragment = 0; 3461 } 3462 /* 3463 * Send a NACK -- invalid seqid 3464 * get the current tail for the response 3465 */ 3466 rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK, 3467 (msg->ctrl & LDC_CTRL_MASK)); 3468 if (rv) { 3469 cmn_err(CE_NOTE, 3470 "ldc_read: (0x%lx) err sending " 3471 "NACK msg\n", ldcp->id); 3472 3473 /* if cannot send NACK - reset channel */ 3474 mutex_enter(&ldcp->tx_lock); 3475 i_ldc_reset(ldcp, B_FALSE); 3476 mutex_exit(&ldcp->tx_lock); 3477 rv = ECONNRESET; 3478 break; 3479 } 3480 3481 /* purge receive queue */ 3482 rv = i_ldc_set_rx_head(ldcp, rx_tail); 3483 3484 break; 3485 } 3486 3487 /* 3488 * Process any messages of type CTRL messages 3489 * Future implementations should try to pass these 3490 * to LDC link by resetting the intr state. 3491 * 3492 * NOTE: not done as a switch() as type can be both ctrl+data 3493 */ 3494 if (msg->type & LDC_CTRL) { 3495 if (rv = i_ldc_ctrlmsg(ldcp, msg)) { 3496 if (rv == EAGAIN) 3497 continue; 3498 rv = i_ldc_set_rx_head(ldcp, rx_tail); 3499 *sizep = 0; 3500 bytes_read = 0; 3501 break; 3502 } 3503 } 3504 3505 /* process data ACKs */ 3506 if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) { 3507 if (rv = i_ldc_process_data_ACK(ldcp, msg)) { 3508 *sizep = 0; 3509 bytes_read = 0; 3510 break; 3511 } 3512 } 3513 3514 /* process data NACKs */ 3515 if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) { 3516 DWARN(ldcp->id, 3517 "ldc_read: (0x%llx) received DATA/NACK", ldcp->id); 3518 mutex_enter(&ldcp->tx_lock); 3519 i_ldc_reset(ldcp, B_TRUE); 3520 mutex_exit(&ldcp->tx_lock); 3521 return (ECONNRESET); 3522 } 3523 3524 /* process data messages */ 3525 if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) { 3526 3527 uint8_t *msgbuf = (uint8_t *)( 3528 (ldcp->mode == LDC_MODE_RELIABLE || 3529 ldcp->mode == LDC_MODE_STREAM) 3530 ? msg->rdata : msg->udata); 3531 3532 D2(ldcp->id, 3533 "ldc_read: (0x%llx) received data msg\n", ldcp->id); 3534 3535 /* get the packet length */ 3536 len = (msg->env & LDC_LEN_MASK); 3537 3538 /* 3539 * FUTURE OPTIMIZATION: 3540 * dont need to set q head for every 3541 * packet we read just need to do this when 3542 * we are done or need to wait for more 3543 * mondos to make a full packet - this is 3544 * currently expensive. 3545 */ 3546 3547 if (first_fragment == 0) { 3548 3549 /* 3550 * first packets should always have the start 3551 * bit set (even for a single packet). If not 3552 * throw away the packet 3553 */ 3554 if (!(msg->env & LDC_FRAG_START)) { 3555 3556 DWARN(DBG_ALL_LDCS, 3557 "ldc_read: (0x%llx) not start - " 3558 "frag=%x\n", ldcp->id, 3559 (msg->env) & LDC_FRAG_MASK); 3560 3561 /* toss pkt, inc head, cont reading */ 3562 bytes_read = 0; 3563 target = target_bufp; 3564 curr_head = 3565 (curr_head + LDC_PACKET_SIZE) 3566 & q_size_mask; 3567 if (rv = i_ldc_set_rx_head(ldcp, 3568 curr_head)) 3569 break; 3570 3571 continue; 3572 } 3573 3574 first_fragment = msg->seqid; 3575 } else { 3576 /* check to see if this is a pkt w/ START bit */ 3577 if (msg->env & LDC_FRAG_START) { 3578 DWARN(DBG_ALL_LDCS, 3579 "ldc_read:(0x%llx) unexpected pkt" 3580 " env=0x%x discarding %d bytes," 3581 " lastmsg=%d, currentmsg=%d\n", 3582 ldcp->id, msg->env&LDC_FRAG_MASK, 3583 bytes_read, ldcp->last_msg_rcd, 3584 msg->seqid); 3585 3586 /* throw data we have read so far */ 3587 bytes_read = 0; 3588 target = target_bufp; 3589 first_fragment = msg->seqid; 3590 3591 if (rv = i_ldc_set_rx_head(ldcp, 3592 curr_head)) 3593 break; 3594 } 3595 } 3596 3597 /* copy (next) pkt into buffer */ 3598 if (len <= (*sizep - bytes_read)) { 3599 bcopy(msgbuf, target, len); 3600 target += len; 3601 bytes_read += len; 3602 } else { 3603 /* 3604 * there is not enough space in the buffer to 3605 * read this pkt. throw message away & continue 3606 * reading data from queue 3607 */ 3608 DWARN(DBG_ALL_LDCS, 3609 "ldc_read: (0x%llx) buffer too small, " 3610 "head=0x%lx, expect=%d, got=%d\n", ldcp->id, 3611 curr_head, *sizep, bytes_read+len); 3612 3613 first_fragment = 0; 3614 target = target_bufp; 3615 bytes_read = 0; 3616 3617 /* throw away everything received so far */ 3618 if (rv = i_ldc_set_rx_head(ldcp, curr_head)) 3619 break; 3620 3621 /* continue reading remaining pkts */ 3622 continue; 3623 } 3624 } 3625 3626 /* set the message id */ 3627 ldcp->last_msg_rcd = msg->seqid; 3628 3629 /* move the head one position */ 3630 curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask; 3631 3632 if (msg->env & LDC_FRAG_STOP) { 3633 3634 /* 3635 * All pkts that are part of this fragmented transfer 3636 * have been read or this was a single pkt read 3637 * or there was an error 3638 */ 3639 3640 /* set the queue head */ 3641 if (rv = i_ldc_set_rx_head(ldcp, curr_head)) 3642 bytes_read = 0; 3643 3644 *sizep = bytes_read; 3645 3646 break; 3647 } 3648 3649 /* advance head if it is a DATA ACK */ 3650 if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) { 3651 3652 /* set the queue head */ 3653 if (rv = i_ldc_set_rx_head(ldcp, curr_head)) { 3654 bytes_read = 0; 3655 break; 3656 } 3657 3658 D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx", 3659 ldcp->id, curr_head); 3660 } 3661 3662 } /* for (;;) */ 3663 3664 3665 /* 3666 * If useful data was read - Send msg ACK 3667 * OPTIMIZE: do not send ACK for all msgs - use some frequency 3668 */ 3669 if ((bytes_read > 0) && (ldcp->mode == LDC_MODE_RELIABLE || 3670 ldcp->mode == LDC_MODE_STREAM)) { 3671 3672 rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0); 3673 if (rv && rv != EWOULDBLOCK) { 3674 cmn_err(CE_NOTE, 3675 "ldc_read: (0x%lx) cannot send ACK\n", ldcp->id); 3676 3677 /* if cannot send ACK - reset channel */ 3678 goto channel_is_reset; 3679 } 3680 } 3681 3682 D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep); 3683 3684 return (rv); 3685 3686 channel_is_reset: 3687 mutex_enter(&ldcp->tx_lock); 3688 i_ldc_reset(ldcp, B_FALSE); 3689 mutex_exit(&ldcp->tx_lock); 3690 return (ECONNRESET); 3691 } 3692 3693 /* 3694 * Use underlying reliable packet mechanism to fetch 3695 * and buffer incoming packets so we can hand them back as 3696 * a basic byte stream. 3697 * 3698 * Enter and exit with ldcp->lock held by caller 3699 */ 3700 static int 3701 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) 3702 { 3703 int rv; 3704 size_t size; 3705 3706 ASSERT(mutex_owned(&ldcp->lock)); 3707 3708 D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d", 3709 ldcp->id, *sizep); 3710 3711 if (ldcp->stream_remains == 0) { 3712 size = ldcp->mtu; 3713 rv = i_ldc_read_packet(ldcp, 3714 (caddr_t)ldcp->stream_bufferp, &size); 3715 D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d", 3716 ldcp->id, size); 3717 3718 if (rv != 0) 3719 return (rv); 3720 3721 ldcp->stream_remains = size; 3722 ldcp->stream_offset = 0; 3723 } 3724 3725 size = MIN(ldcp->stream_remains, *sizep); 3726 3727 bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size); 3728 ldcp->stream_offset += size; 3729 ldcp->stream_remains -= size; 3730 3731 D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d", 3732 ldcp->id, size); 3733 3734 *sizep = size; 3735 return (0); 3736 } 3737 3738 /* 3739 * Write specified amount of bytes to the channel 3740 * in multiple pkts of pkt_payload size. Each 3741 * packet is tagged with an unique packet ID in 3742 * the case of a reliable link. 3743 * 3744 * On return, size contains the number of bytes written. 3745 */ 3746 int 3747 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep) 3748 { 3749 ldc_chan_t *ldcp; 3750 int rv = 0; 3751 3752 if (handle == NULL) { 3753 DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n"); 3754 return (EINVAL); 3755 } 3756 ldcp = (ldc_chan_t *)handle; 3757 3758 /* check if writes can occur */ 3759 if (!mutex_tryenter(&ldcp->tx_lock)) { 3760 /* 3761 * Could not get the lock - channel could 3762 * be in the process of being unconfigured 3763 * or reader has encountered an error 3764 */ 3765 return (EAGAIN); 3766 } 3767 3768 /* check if non-zero data to write */ 3769 if (buf == NULL || sizep == NULL) { 3770 DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n", 3771 ldcp->id); 3772 mutex_exit(&ldcp->tx_lock); 3773 return (EINVAL); 3774 } 3775 3776 if (*sizep == 0) { 3777 DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n", 3778 ldcp->id); 3779 mutex_exit(&ldcp->tx_lock); 3780 return (0); 3781 } 3782 3783 /* Check if channel is UP for data exchange */ 3784 if (ldcp->tstate != TS_UP) { 3785 DWARN(ldcp->id, 3786 "ldc_write: (0x%llx) channel is not in UP state\n", 3787 ldcp->id); 3788 *sizep = 0; 3789 rv = ECONNRESET; 3790 } else { 3791 rv = ldcp->write_p(ldcp, buf, sizep); 3792 } 3793 3794 mutex_exit(&ldcp->tx_lock); 3795 3796 return (rv); 3797 } 3798 3799 /* 3800 * Write a raw packet to the channel 3801 * On return, size contains the number of bytes written. 3802 */ 3803 static int 3804 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep) 3805 { 3806 ldc_msg_t *ldcmsg; 3807 uint64_t tx_head, tx_tail, new_tail; 3808 int rv = 0; 3809 size_t size; 3810 3811 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 3812 ASSERT(ldcp->mode == LDC_MODE_RAW); 3813 3814 size = *sizep; 3815 3816 /* 3817 * Check to see if the packet size is less than or 3818 * equal to packet size support in raw mode 3819 */ 3820 if (size > ldcp->pkt_payload) { 3821 DWARN(ldcp->id, 3822 "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n", 3823 ldcp->id, *sizep); 3824 *sizep = 0; 3825 return (EMSGSIZE); 3826 } 3827 3828 /* get the qptrs for the tx queue */ 3829 rv = hv_ldc_tx_get_state(ldcp->id, 3830 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 3831 if (rv != 0) { 3832 cmn_err(CE_WARN, 3833 "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id); 3834 *sizep = 0; 3835 return (EIO); 3836 } 3837 3838 if (ldcp->link_state == LDC_CHANNEL_DOWN || 3839 ldcp->link_state == LDC_CHANNEL_RESET) { 3840 DWARN(ldcp->id, 3841 "ldc_write: (0x%llx) channel down/reset\n", ldcp->id); 3842 3843 *sizep = 0; 3844 if (mutex_tryenter(&ldcp->lock)) { 3845 i_ldc_reset(ldcp, B_FALSE); 3846 mutex_exit(&ldcp->lock); 3847 } else { 3848 /* 3849 * Release Tx lock, and then reacquire channel 3850 * and Tx lock in correct order 3851 */ 3852 mutex_exit(&ldcp->tx_lock); 3853 mutex_enter(&ldcp->lock); 3854 mutex_enter(&ldcp->tx_lock); 3855 i_ldc_reset(ldcp, B_FALSE); 3856 mutex_exit(&ldcp->lock); 3857 } 3858 return (ECONNRESET); 3859 } 3860 3861 tx_tail = ldcp->tx_tail; 3862 tx_head = ldcp->tx_head; 3863 new_tail = (tx_tail + LDC_PACKET_SIZE) & 3864 ((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT); 3865 3866 if (new_tail == tx_head) { 3867 DWARN(DBG_ALL_LDCS, 3868 "ldc_write: (0x%llx) TX queue is full\n", ldcp->id); 3869 *sizep = 0; 3870 return (EWOULDBLOCK); 3871 } 3872 3873 D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d", 3874 ldcp->id, size); 3875 3876 /* Send the data now */ 3877 ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 3878 3879 /* copy the data into pkt */ 3880 bcopy((uint8_t *)buf, ldcmsg, size); 3881 3882 /* increment tail */ 3883 tx_tail = new_tail; 3884 3885 /* 3886 * All packets have been copied into the TX queue 3887 * update the tail ptr in the HV 3888 */ 3889 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 3890 if (rv) { 3891 if (rv == EWOULDBLOCK) { 3892 DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n", 3893 ldcp->id); 3894 *sizep = 0; 3895 return (EWOULDBLOCK); 3896 } 3897 3898 *sizep = 0; 3899 if (mutex_tryenter(&ldcp->lock)) { 3900 i_ldc_reset(ldcp, B_FALSE); 3901 mutex_exit(&ldcp->lock); 3902 } else { 3903 /* 3904 * Release Tx lock, and then reacquire channel 3905 * and Tx lock in correct order 3906 */ 3907 mutex_exit(&ldcp->tx_lock); 3908 mutex_enter(&ldcp->lock); 3909 mutex_enter(&ldcp->tx_lock); 3910 i_ldc_reset(ldcp, B_FALSE); 3911 mutex_exit(&ldcp->lock); 3912 } 3913 return (ECONNRESET); 3914 } 3915 3916 ldcp->tx_tail = tx_tail; 3917 *sizep = size; 3918 3919 D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size); 3920 3921 return (rv); 3922 } 3923 3924 3925 /* 3926 * Write specified amount of bytes to the channel 3927 * in multiple pkts of pkt_payload size. Each 3928 * packet is tagged with an unique packet ID in 3929 * the case of a reliable link. 3930 * 3931 * On return, size contains the number of bytes written. 3932 * This function needs to ensure that the write size is < MTU size 3933 */ 3934 static int 3935 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size) 3936 { 3937 ldc_msg_t *ldcmsg; 3938 uint64_t tx_head, tx_tail, new_tail, start; 3939 uint64_t txq_size_mask, numavail; 3940 uint8_t *msgbuf, *source = (uint8_t *)buf; 3941 size_t len, bytes_written = 0, remaining; 3942 int rv; 3943 uint32_t curr_seqid; 3944 3945 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 3946 3947 ASSERT(ldcp->mode == LDC_MODE_RELIABLE || 3948 ldcp->mode == LDC_MODE_UNRELIABLE || 3949 ldcp->mode == LDC_MODE_STREAM); 3950 3951 /* compute mask for increment */ 3952 txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT; 3953 3954 /* get the qptrs for the tx queue */ 3955 rv = hv_ldc_tx_get_state(ldcp->id, 3956 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 3957 if (rv != 0) { 3958 cmn_err(CE_WARN, 3959 "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id); 3960 *size = 0; 3961 return (EIO); 3962 } 3963 3964 if (ldcp->link_state == LDC_CHANNEL_DOWN || 3965 ldcp->link_state == LDC_CHANNEL_RESET) { 3966 DWARN(ldcp->id, 3967 "ldc_write: (0x%llx) channel down/reset\n", ldcp->id); 3968 *size = 0; 3969 if (mutex_tryenter(&ldcp->lock)) { 3970 i_ldc_reset(ldcp, B_FALSE); 3971 mutex_exit(&ldcp->lock); 3972 } else { 3973 /* 3974 * Release Tx lock, and then reacquire channel 3975 * and Tx lock in correct order 3976 */ 3977 mutex_exit(&ldcp->tx_lock); 3978 mutex_enter(&ldcp->lock); 3979 mutex_enter(&ldcp->tx_lock); 3980 i_ldc_reset(ldcp, B_FALSE); 3981 mutex_exit(&ldcp->lock); 3982 } 3983 return (ECONNRESET); 3984 } 3985 3986 tx_tail = ldcp->tx_tail; 3987 new_tail = (tx_tail + LDC_PACKET_SIZE) % 3988 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 3989 3990 /* 3991 * Link mode determines whether we use HV Tx head or the 3992 * private protocol head (corresponding to last ACKd pkt) for 3993 * determining how much we can write 3994 */ 3995 tx_head = (ldcp->mode == LDC_MODE_RELIABLE || 3996 ldcp->mode == LDC_MODE_STREAM) 3997 ? ldcp->tx_ackd_head : ldcp->tx_head; 3998 if (new_tail == tx_head) { 3999 DWARN(DBG_ALL_LDCS, 4000 "ldc_write: (0x%llx) TX queue is full\n", ldcp->id); 4001 *size = 0; 4002 return (EWOULDBLOCK); 4003 } 4004 4005 /* 4006 * Make sure that the LDC Tx queue has enough space 4007 */ 4008 numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT) 4009 + ldcp->tx_q_entries - 1; 4010 numavail %= ldcp->tx_q_entries; 4011 4012 if (*size > (numavail * ldcp->pkt_payload)) { 4013 DWARN(DBG_ALL_LDCS, 4014 "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id); 4015 return (EWOULDBLOCK); 4016 } 4017 4018 D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d", 4019 ldcp->id, *size); 4020 4021 /* Send the data now */ 4022 bytes_written = 0; 4023 curr_seqid = ldcp->last_msg_snt; 4024 start = tx_tail; 4025 4026 while (*size > bytes_written) { 4027 4028 ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 4029 4030 msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE || 4031 ldcp->mode == LDC_MODE_STREAM) 4032 ? ldcmsg->rdata : ldcmsg->udata); 4033 4034 ldcmsg->type = LDC_DATA; 4035 ldcmsg->stype = LDC_INFO; 4036 ldcmsg->ctrl = 0; 4037 4038 remaining = *size - bytes_written; 4039 len = min(ldcp->pkt_payload, remaining); 4040 ldcmsg->env = (uint8_t)len; 4041 4042 curr_seqid++; 4043 ldcmsg->seqid = curr_seqid; 4044 4045 /* copy the data into pkt */ 4046 bcopy(source, msgbuf, len); 4047 4048 source += len; 4049 bytes_written += len; 4050 4051 /* increment tail */ 4052 tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask; 4053 4054 ASSERT(tx_tail != tx_head); 4055 } 4056 4057 /* Set the start and stop bits */ 4058 ldcmsg->env |= LDC_FRAG_STOP; 4059 ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start); 4060 ldcmsg->env |= LDC_FRAG_START; 4061 4062 /* 4063 * All packets have been copied into the TX queue 4064 * update the tail ptr in the HV 4065 */ 4066 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 4067 if (rv == 0) { 4068 ldcp->tx_tail = tx_tail; 4069 ldcp->last_msg_snt = curr_seqid; 4070 *size = bytes_written; 4071 } else { 4072 int rv2; 4073 4074 if (rv != EWOULDBLOCK) { 4075 *size = 0; 4076 if (mutex_tryenter(&ldcp->lock)) { 4077 i_ldc_reset(ldcp, B_FALSE); 4078 mutex_exit(&ldcp->lock); 4079 } else { 4080 /* 4081 * Release Tx lock, and then reacquire channel 4082 * and Tx lock in correct order 4083 */ 4084 mutex_exit(&ldcp->tx_lock); 4085 mutex_enter(&ldcp->lock); 4086 mutex_enter(&ldcp->tx_lock); 4087 i_ldc_reset(ldcp, B_FALSE); 4088 mutex_exit(&ldcp->lock); 4089 } 4090 return (ECONNRESET); 4091 } 4092 4093 D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, " 4094 "old tail 0x%x, new tail 0x%x, qsize=0x%x)\n", 4095 rv, ldcp->tx_head, ldcp->tx_tail, tx_tail, 4096 (ldcp->tx_q_entries << LDC_PACKET_SHIFT)); 4097 4098 rv2 = hv_ldc_tx_get_state(ldcp->id, 4099 &tx_head, &tx_tail, &ldcp->link_state); 4100 4101 D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x " 4102 "(head 0x%x, tail 0x%x state 0x%x)\n", 4103 rv2, tx_head, tx_tail, ldcp->link_state); 4104 4105 *size = 0; 4106 } 4107 4108 D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size); 4109 4110 return (rv); 4111 } 4112 4113 /* 4114 * Write specified amount of bytes to the channel 4115 * in multiple pkts of pkt_payload size. Each 4116 * packet is tagged with an unique packet ID in 4117 * the case of a reliable link. 4118 * 4119 * On return, size contains the number of bytes written. 4120 * This function needs to ensure that the write size is < MTU size 4121 */ 4122 static int 4123 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep) 4124 { 4125 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 4126 ASSERT(ldcp->mode == LDC_MODE_STREAM); 4127 4128 /* Truncate packet to max of MTU size */ 4129 if (*sizep > ldcp->mtu) *sizep = ldcp->mtu; 4130 return (i_ldc_write_packet(ldcp, buf, sizep)); 4131 } 4132 4133 4134 /* 4135 * Interfaces for channel nexus to register/unregister with LDC module 4136 * The nexus will register functions to be used to register individual 4137 * channels with the nexus and enable interrupts for the channels 4138 */ 4139 int 4140 ldc_register(ldc_cnex_t *cinfo) 4141 { 4142 ldc_chan_t *ldcp; 4143 4144 if (cinfo == NULL || cinfo->dip == NULL || 4145 cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL || 4146 cinfo->add_intr == NULL || cinfo->rem_intr == NULL || 4147 cinfo->clr_intr == NULL) { 4148 4149 DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n"); 4150 return (EINVAL); 4151 } 4152 4153 mutex_enter(&ldcssp->lock); 4154 4155 /* nexus registration */ 4156 ldcssp->cinfo.dip = cinfo->dip; 4157 ldcssp->cinfo.reg_chan = cinfo->reg_chan; 4158 ldcssp->cinfo.unreg_chan = cinfo->unreg_chan; 4159 ldcssp->cinfo.add_intr = cinfo->add_intr; 4160 ldcssp->cinfo.rem_intr = cinfo->rem_intr; 4161 ldcssp->cinfo.clr_intr = cinfo->clr_intr; 4162 4163 /* register any channels that might have been previously initialized */ 4164 ldcp = ldcssp->chan_list; 4165 while (ldcp) { 4166 if ((ldcp->tstate & TS_QCONF_RDY) && 4167 (ldcp->tstate & TS_CNEX_RDY) == 0) 4168 (void) i_ldc_register_channel(ldcp); 4169 4170 ldcp = ldcp->next; 4171 } 4172 4173 mutex_exit(&ldcssp->lock); 4174 4175 return (0); 4176 } 4177 4178 int 4179 ldc_unregister(ldc_cnex_t *cinfo) 4180 { 4181 if (cinfo == NULL || cinfo->dip == NULL) { 4182 DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n"); 4183 return (EINVAL); 4184 } 4185 4186 mutex_enter(&ldcssp->lock); 4187 4188 if (cinfo->dip != ldcssp->cinfo.dip) { 4189 DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n"); 4190 mutex_exit(&ldcssp->lock); 4191 return (EINVAL); 4192 } 4193 4194 /* nexus unregister */ 4195 ldcssp->cinfo.dip = NULL; 4196 ldcssp->cinfo.reg_chan = NULL; 4197 ldcssp->cinfo.unreg_chan = NULL; 4198 ldcssp->cinfo.add_intr = NULL; 4199 ldcssp->cinfo.rem_intr = NULL; 4200 ldcssp->cinfo.clr_intr = NULL; 4201 4202 mutex_exit(&ldcssp->lock); 4203 4204 return (0); 4205 } 4206 4207 4208 /* ------------------------------------------------------------------------- */ 4209 4210 /* 4211 * Allocate a memory handle for the channel and link it into the list 4212 * Also choose which memory table to use if this is the first handle 4213 * being assigned to this channel 4214 */ 4215 int 4216 ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle) 4217 { 4218 ldc_chan_t *ldcp; 4219 ldc_mhdl_t *mhdl; 4220 4221 if (handle == NULL) { 4222 DWARN(DBG_ALL_LDCS, 4223 "ldc_mem_alloc_handle: invalid channel handle\n"); 4224 return (EINVAL); 4225 } 4226 ldcp = (ldc_chan_t *)handle; 4227 4228 mutex_enter(&ldcp->lock); 4229 4230 /* check to see if channel is initalized */ 4231 if ((ldcp->tstate & ~TS_IN_RESET) < TS_INIT) { 4232 DWARN(ldcp->id, 4233 "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n", 4234 ldcp->id); 4235 mutex_exit(&ldcp->lock); 4236 return (EINVAL); 4237 } 4238 4239 /* allocate handle for channel */ 4240 mhdl = kmem_cache_alloc(ldcssp->memhdl_cache, KM_SLEEP); 4241 4242 /* initialize the lock */ 4243 mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL); 4244 4245 mhdl->myshadow = B_FALSE; 4246 mhdl->memseg = NULL; 4247 mhdl->ldcp = ldcp; 4248 mhdl->status = LDC_UNBOUND; 4249 4250 /* insert memory handle (@ head) into list */ 4251 if (ldcp->mhdl_list == NULL) { 4252 ldcp->mhdl_list = mhdl; 4253 mhdl->next = NULL; 4254 } else { 4255 /* insert @ head */ 4256 mhdl->next = ldcp->mhdl_list; 4257 ldcp->mhdl_list = mhdl; 4258 } 4259 4260 /* return the handle */ 4261 *mhandle = (ldc_mem_handle_t)mhdl; 4262 4263 mutex_exit(&ldcp->lock); 4264 4265 D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n", 4266 ldcp->id, mhdl); 4267 4268 return (0); 4269 } 4270 4271 /* 4272 * Free memory handle for the channel and unlink it from the list 4273 */ 4274 int 4275 ldc_mem_free_handle(ldc_mem_handle_t mhandle) 4276 { 4277 ldc_mhdl_t *mhdl, *phdl; 4278 ldc_chan_t *ldcp; 4279 4280 if (mhandle == NULL) { 4281 DWARN(DBG_ALL_LDCS, 4282 "ldc_mem_free_handle: invalid memory handle\n"); 4283 return (EINVAL); 4284 } 4285 mhdl = (ldc_mhdl_t *)mhandle; 4286 4287 mutex_enter(&mhdl->lock); 4288 4289 ldcp = mhdl->ldcp; 4290 4291 if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) { 4292 DWARN(ldcp->id, 4293 "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n", 4294 mhdl); 4295 mutex_exit(&mhdl->lock); 4296 return (EINVAL); 4297 } 4298 mutex_exit(&mhdl->lock); 4299 4300 mutex_enter(&ldcp->mlist_lock); 4301 4302 phdl = ldcp->mhdl_list; 4303 4304 /* first handle */ 4305 if (phdl == mhdl) { 4306 ldcp->mhdl_list = mhdl->next; 4307 mutex_destroy(&mhdl->lock); 4308 kmem_cache_free(ldcssp->memhdl_cache, mhdl); 4309 4310 D1(ldcp->id, 4311 "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n", 4312 ldcp->id, mhdl); 4313 } else { 4314 /* walk the list - unlink and free */ 4315 while (phdl != NULL) { 4316 if (phdl->next == mhdl) { 4317 phdl->next = mhdl->next; 4318 mutex_destroy(&mhdl->lock); 4319 kmem_cache_free(ldcssp->memhdl_cache, mhdl); 4320 D1(ldcp->id, 4321 "ldc_mem_free_handle: (0x%llx) freed " 4322 "handle 0x%llx\n", ldcp->id, mhdl); 4323 break; 4324 } 4325 phdl = phdl->next; 4326 } 4327 } 4328 4329 if (phdl == NULL) { 4330 DWARN(ldcp->id, 4331 "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl); 4332 mutex_exit(&ldcp->mlist_lock); 4333 return (EINVAL); 4334 } 4335 4336 mutex_exit(&ldcp->mlist_lock); 4337 4338 return (0); 4339 } 4340 4341 /* 4342 * Bind a memory handle to a virtual address. 4343 * The virtual address is converted to the corresponding real addresses. 4344 * Returns pointer to the first ldc_mem_cookie and the total number 4345 * of cookies for this virtual address. Other cookies can be obtained 4346 * using the ldc_mem_nextcookie() call. If the pages are stored in 4347 * consecutive locations in the table, a single cookie corresponding to 4348 * the first location is returned. The cookie size spans all the entries. 4349 * 4350 * If the VA corresponds to a page that is already being exported, reuse 4351 * the page and do not export it again. Bump the page's use count. 4352 */ 4353 int 4354 ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len, 4355 uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount) 4356 { 4357 ldc_mhdl_t *mhdl; 4358 ldc_chan_t *ldcp; 4359 ldc_mtbl_t *mtbl; 4360 ldc_memseg_t *memseg; 4361 ldc_mte_t tmp_mte; 4362 uint64_t index, prev_index = 0; 4363 int64_t cookie_idx; 4364 uintptr_t raddr, ra_aligned; 4365 uint64_t psize, poffset, v_offset; 4366 uint64_t pg_shift, pg_size, pg_size_code, pg_mask; 4367 pgcnt_t npages; 4368 caddr_t v_align, addr; 4369 int i, rv; 4370 4371 if (mhandle == NULL) { 4372 DWARN(DBG_ALL_LDCS, 4373 "ldc_mem_bind_handle: invalid memory handle\n"); 4374 return (EINVAL); 4375 } 4376 mhdl = (ldc_mhdl_t *)mhandle; 4377 ldcp = mhdl->ldcp; 4378 4379 /* clear count */ 4380 *ccount = 0; 4381 4382 mutex_enter(&mhdl->lock); 4383 4384 if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) { 4385 DWARN(ldcp->id, 4386 "ldc_mem_bind_handle: (0x%x) handle already bound\n", 4387 mhandle); 4388 mutex_exit(&mhdl->lock); 4389 return (EINVAL); 4390 } 4391 4392 /* Force address and size to be 8-byte aligned */ 4393 if ((((uintptr_t)vaddr | len) & 0x7) != 0) { 4394 DWARN(ldcp->id, 4395 "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n"); 4396 mutex_exit(&mhdl->lock); 4397 return (EINVAL); 4398 } 4399 4400 /* 4401 * If this channel is binding a memory handle for the 4402 * first time allocate it a memory map table and initialize it 4403 */ 4404 if ((mtbl = ldcp->mtbl) == NULL) { 4405 4406 mutex_enter(&ldcp->lock); 4407 4408 /* Allocate and initialize the map table structure */ 4409 mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP); 4410 mtbl->num_entries = mtbl->num_avail = ldc_maptable_entries; 4411 mtbl->size = ldc_maptable_entries * sizeof (ldc_mte_slot_t); 4412 mtbl->next_entry = NULL; 4413 mtbl->contigmem = B_TRUE; 4414 4415 /* Allocate the table itself */ 4416 mtbl->table = (ldc_mte_slot_t *) 4417 contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE); 4418 if (mtbl->table == NULL) { 4419 4420 /* allocate a page of memory using kmem_alloc */ 4421 mtbl->table = kmem_alloc(MMU_PAGESIZE, KM_SLEEP); 4422 mtbl->size = MMU_PAGESIZE; 4423 mtbl->contigmem = B_FALSE; 4424 mtbl->num_entries = mtbl->num_avail = 4425 mtbl->size / sizeof (ldc_mte_slot_t); 4426 DWARN(ldcp->id, 4427 "ldc_mem_bind_handle: (0x%llx) reduced tbl size " 4428 "to %lx entries\n", ldcp->id, mtbl->num_entries); 4429 } 4430 4431 /* zero out the memory */ 4432 bzero(mtbl->table, mtbl->size); 4433 4434 /* initialize the lock */ 4435 mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL); 4436 4437 /* register table for this channel */ 4438 rv = hv_ldc_set_map_table(ldcp->id, 4439 va_to_pa(mtbl->table), mtbl->num_entries); 4440 if (rv != 0) { 4441 cmn_err(CE_WARN, 4442 "ldc_mem_bind_handle: (0x%lx) err %d mapping tbl", 4443 ldcp->id, rv); 4444 if (mtbl->contigmem) 4445 contig_mem_free(mtbl->table, mtbl->size); 4446 else 4447 kmem_free(mtbl->table, mtbl->size); 4448 mutex_destroy(&mtbl->lock); 4449 kmem_free(mtbl, sizeof (ldc_mtbl_t)); 4450 mutex_exit(&ldcp->lock); 4451 mutex_exit(&mhdl->lock); 4452 return (EIO); 4453 } 4454 4455 ldcp->mtbl = mtbl; 4456 mutex_exit(&ldcp->lock); 4457 4458 D1(ldcp->id, 4459 "ldc_mem_bind_handle: (0x%llx) alloc'd map table 0x%llx\n", 4460 ldcp->id, ldcp->mtbl->table); 4461 } 4462 4463 /* FUTURE: get the page size, pgsz code, and shift */ 4464 pg_size = MMU_PAGESIZE; 4465 pg_size_code = page_szc(pg_size); 4466 pg_shift = page_get_shift(pg_size_code); 4467 pg_mask = ~(pg_size - 1); 4468 4469 D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding " 4470 "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", 4471 ldcp->id, vaddr, pg_size, pg_size_code, pg_shift); 4472 4473 /* aligned VA and its offset */ 4474 v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1)); 4475 v_offset = ((uintptr_t)vaddr) & (pg_size - 1); 4476 4477 npages = (len+v_offset)/pg_size; 4478 npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1; 4479 4480 D1(ldcp->id, "ldc_mem_bind_handle: binding " 4481 "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n", 4482 ldcp->id, vaddr, v_align, v_offset, npages); 4483 4484 /* lock the memory table - exclusive access to channel */ 4485 mutex_enter(&mtbl->lock); 4486 4487 if (npages > mtbl->num_avail) { 4488 D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) no table entries\n", 4489 ldcp->id); 4490 mutex_exit(&mtbl->lock); 4491 mutex_exit(&mhdl->lock); 4492 return (ENOMEM); 4493 } 4494 4495 /* Allocate a memseg structure */ 4496 memseg = mhdl->memseg = 4497 kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP); 4498 4499 /* Allocate memory to store all pages and cookies */ 4500 memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP); 4501 memseg->cookies = 4502 kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP); 4503 4504 D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n", 4505 ldcp->id, npages); 4506 4507 addr = v_align; 4508 4509 /* 4510 * Check if direct shared memory map is enabled, if not change 4511 * the mapping type to include SHADOW_MAP. 4512 */ 4513 if (ldc_shmem_enabled == 0) 4514 mtype = LDC_SHADOW_MAP; 4515 4516 /* 4517 * Table slots are used in a round-robin manner. The algorithm permits 4518 * inserting duplicate entries. Slots allocated earlier will typically 4519 * get freed before we get back to reusing the slot.Inserting duplicate 4520 * entries should be OK as we only lookup entries using the cookie addr 4521 * i.e. tbl index, during export, unexport and copy operation. 4522 * 4523 * One implementation what was tried was to search for a duplicate 4524 * page entry first and reuse it. The search overhead is very high and 4525 * in the vnet case dropped the perf by almost half, 50 to 24 mbps. 4526 * So it does make sense to avoid searching for duplicates. 4527 * 4528 * But during the process of searching for a free slot, if we find a 4529 * duplicate entry we will go ahead and use it, and bump its use count. 4530 */ 4531 4532 /* index to start searching from */ 4533 index = mtbl->next_entry; 4534 cookie_idx = -1; 4535 4536 tmp_mte.ll = 0; /* initialise fields to 0 */ 4537 4538 if (mtype & LDC_DIRECT_MAP) { 4539 tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0; 4540 tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0; 4541 tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0; 4542 } 4543 4544 if (mtype & LDC_SHADOW_MAP) { 4545 tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0; 4546 tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0; 4547 } 4548 4549 if (mtype & LDC_IO_MAP) { 4550 tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0; 4551 tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0; 4552 } 4553 4554 D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll); 4555 4556 tmp_mte.mte_pgszc = pg_size_code; 4557 4558 /* initialize each mem table entry */ 4559 for (i = 0; i < npages; i++) { 4560 4561 /* check if slot is available in the table */ 4562 while (mtbl->table[index].entry.ll != 0) { 4563 4564 index = (index + 1) % mtbl->num_entries; 4565 4566 if (index == mtbl->next_entry) { 4567 /* we have looped around */ 4568 DWARN(DBG_ALL_LDCS, 4569 "ldc_mem_bind_handle: (0x%llx) cannot find " 4570 "entry\n", ldcp->id); 4571 *ccount = 0; 4572 4573 /* NOTE: free memory, remove previous entries */ 4574 /* this shouldnt happen as num_avail was ok */ 4575 4576 mutex_exit(&mtbl->lock); 4577 mutex_exit(&mhdl->lock); 4578 return (ENOMEM); 4579 } 4580 } 4581 4582 /* get the real address */ 4583 raddr = va_to_pa((void *)addr); 4584 ra_aligned = ((uintptr_t)raddr & pg_mask); 4585 4586 /* build the mte */ 4587 tmp_mte.mte_rpfn = ra_aligned >> pg_shift; 4588 4589 D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll); 4590 4591 /* update entry in table */ 4592 mtbl->table[index].entry = tmp_mte; 4593 4594 D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx" 4595 " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index); 4596 4597 /* calculate the size and offset for this export range */ 4598 if (i == 0) { 4599 /* first page */ 4600 psize = min((pg_size - v_offset), len); 4601 poffset = v_offset; 4602 4603 } else if (i == (npages - 1)) { 4604 /* last page */ 4605 psize = (((uintptr_t)(vaddr + len)) & 4606 ((uint64_t)(pg_size-1))); 4607 if (psize == 0) 4608 psize = pg_size; 4609 poffset = 0; 4610 4611 } else { 4612 /* middle pages */ 4613 psize = pg_size; 4614 poffset = 0; 4615 } 4616 4617 /* store entry for this page */ 4618 memseg->pages[i].index = index; 4619 memseg->pages[i].raddr = raddr; 4620 memseg->pages[i].offset = poffset; 4621 memseg->pages[i].size = psize; 4622 memseg->pages[i].mte = &(mtbl->table[index]); 4623 4624 /* create the cookie */ 4625 if (i == 0 || (index != prev_index + 1)) { 4626 cookie_idx++; 4627 memseg->cookies[cookie_idx].addr = 4628 IDX2COOKIE(index, pg_size_code, pg_shift); 4629 memseg->cookies[cookie_idx].addr |= poffset; 4630 memseg->cookies[cookie_idx].size = psize; 4631 4632 } else { 4633 memseg->cookies[cookie_idx].size += psize; 4634 } 4635 4636 D1(ldcp->id, "ldc_mem_bind_handle: bound " 4637 "(0x%llx) va=0x%llx, idx=0x%llx, " 4638 "ra=0x%llx(sz=0x%x,off=0x%x)\n", 4639 ldcp->id, addr, index, raddr, psize, poffset); 4640 4641 /* decrement number of available entries */ 4642 mtbl->num_avail--; 4643 4644 /* increment va by page size */ 4645 addr += pg_size; 4646 4647 /* increment index */ 4648 prev_index = index; 4649 index = (index + 1) % mtbl->num_entries; 4650 4651 /* save the next slot */ 4652 mtbl->next_entry = index; 4653 } 4654 4655 mutex_exit(&mtbl->lock); 4656 4657 /* memory handle = bound */ 4658 mhdl->mtype = mtype; 4659 mhdl->perm = perm; 4660 mhdl->status = LDC_BOUND; 4661 4662 /* update memseg_t */ 4663 memseg->vaddr = vaddr; 4664 memseg->raddr = memseg->pages[0].raddr; 4665 memseg->size = len; 4666 memseg->npages = npages; 4667 memseg->ncookies = cookie_idx + 1; 4668 memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0; 4669 4670 /* return count and first cookie */ 4671 *ccount = memseg->ncookies; 4672 cookie->addr = memseg->cookies[0].addr; 4673 cookie->size = memseg->cookies[0].size; 4674 4675 D1(ldcp->id, 4676 "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, " 4677 "pgs=0x%llx cookies=0x%llx\n", 4678 ldcp->id, mhdl, vaddr, npages, memseg->ncookies); 4679 4680 mutex_exit(&mhdl->lock); 4681 return (0); 4682 } 4683 4684 /* 4685 * Return the next cookie associated with the specified memory handle 4686 */ 4687 int 4688 ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie) 4689 { 4690 ldc_mhdl_t *mhdl; 4691 ldc_chan_t *ldcp; 4692 ldc_memseg_t *memseg; 4693 4694 if (mhandle == NULL) { 4695 DWARN(DBG_ALL_LDCS, 4696 "ldc_mem_nextcookie: invalid memory handle\n"); 4697 return (EINVAL); 4698 } 4699 mhdl = (ldc_mhdl_t *)mhandle; 4700 4701 mutex_enter(&mhdl->lock); 4702 4703 ldcp = mhdl->ldcp; 4704 memseg = mhdl->memseg; 4705 4706 if (cookie == 0) { 4707 DWARN(ldcp->id, 4708 "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n", 4709 ldcp->id); 4710 mutex_exit(&mhdl->lock); 4711 return (EINVAL); 4712 } 4713 4714 if (memseg->next_cookie != 0) { 4715 cookie->addr = memseg->cookies[memseg->next_cookie].addr; 4716 cookie->size = memseg->cookies[memseg->next_cookie].size; 4717 memseg->next_cookie++; 4718 if (memseg->next_cookie == memseg->ncookies) 4719 memseg->next_cookie = 0; 4720 4721 } else { 4722 DWARN(ldcp->id, 4723 "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id); 4724 cookie->addr = 0; 4725 cookie->size = 0; 4726 mutex_exit(&mhdl->lock); 4727 return (EINVAL); 4728 } 4729 4730 D1(ldcp->id, 4731 "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n", 4732 ldcp->id, cookie->addr, cookie->size); 4733 4734 mutex_exit(&mhdl->lock); 4735 return (0); 4736 } 4737 4738 /* 4739 * Unbind the virtual memory region associated with the specified 4740 * memory handle. Allassociated cookies are freed and the corresponding 4741 * RA space is no longer exported. 4742 */ 4743 int 4744 ldc_mem_unbind_handle(ldc_mem_handle_t mhandle) 4745 { 4746 ldc_mhdl_t *mhdl; 4747 ldc_chan_t *ldcp; 4748 ldc_mtbl_t *mtbl; 4749 ldc_memseg_t *memseg; 4750 uint64_t cookie_addr; 4751 uint64_t pg_shift, pg_size_code; 4752 int i, rv; 4753 4754 if (mhandle == NULL) { 4755 DWARN(DBG_ALL_LDCS, 4756 "ldc_mem_unbind_handle: invalid memory handle\n"); 4757 return (EINVAL); 4758 } 4759 mhdl = (ldc_mhdl_t *)mhandle; 4760 4761 mutex_enter(&mhdl->lock); 4762 4763 if (mhdl->status == LDC_UNBOUND) { 4764 DWARN(DBG_ALL_LDCS, 4765 "ldc_mem_unbind_handle: (0x%x) handle is not bound\n", 4766 mhandle); 4767 mutex_exit(&mhdl->lock); 4768 return (EINVAL); 4769 } 4770 4771 ldcp = mhdl->ldcp; 4772 mtbl = ldcp->mtbl; 4773 4774 memseg = mhdl->memseg; 4775 4776 /* lock the memory table - exclusive access to channel */ 4777 mutex_enter(&mtbl->lock); 4778 4779 /* undo the pages exported */ 4780 for (i = 0; i < memseg->npages; i++) { 4781 4782 /* check for mapped pages, revocation cookie != 0 */ 4783 if (memseg->pages[i].mte->cookie) { 4784 4785 pg_size_code = page_szc(memseg->pages[i].size); 4786 pg_shift = page_get_shift(memseg->pages[i].size); 4787 cookie_addr = IDX2COOKIE(memseg->pages[i].index, 4788 pg_size_code, pg_shift); 4789 4790 D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) revoke " 4791 "cookie 0x%llx, rcookie 0x%llx\n", ldcp->id, 4792 cookie_addr, memseg->pages[i].mte->cookie); 4793 rv = hv_ldc_revoke(ldcp->id, cookie_addr, 4794 memseg->pages[i].mte->cookie); 4795 if (rv) { 4796 DWARN(ldcp->id, 4797 "ldc_mem_unbind_handle: (0x%llx) cannot " 4798 "revoke mapping, cookie %llx\n", ldcp->id, 4799 cookie_addr); 4800 } 4801 } 4802 4803 /* clear the entry from the table */ 4804 memseg->pages[i].mte->entry.ll = 0; 4805 mtbl->num_avail++; 4806 } 4807 mutex_exit(&mtbl->lock); 4808 4809 /* free the allocated memseg and page structures */ 4810 kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages)); 4811 kmem_free(memseg->cookies, 4812 (sizeof (ldc_mem_cookie_t) * memseg->npages)); 4813 kmem_cache_free(ldcssp->memseg_cache, memseg); 4814 4815 /* uninitialize the memory handle */ 4816 mhdl->memseg = NULL; 4817 mhdl->status = LDC_UNBOUND; 4818 4819 D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n", 4820 ldcp->id, mhdl); 4821 4822 mutex_exit(&mhdl->lock); 4823 return (0); 4824 } 4825 4826 /* 4827 * Get information about the dring. The base address of the descriptor 4828 * ring along with the type and permission are returned back. 4829 */ 4830 int 4831 ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo) 4832 { 4833 ldc_mhdl_t *mhdl; 4834 4835 if (mhandle == NULL) { 4836 DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n"); 4837 return (EINVAL); 4838 } 4839 mhdl = (ldc_mhdl_t *)mhandle; 4840 4841 if (minfo == NULL) { 4842 DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n"); 4843 return (EINVAL); 4844 } 4845 4846 mutex_enter(&mhdl->lock); 4847 4848 minfo->status = mhdl->status; 4849 if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) { 4850 minfo->vaddr = mhdl->memseg->vaddr; 4851 minfo->raddr = mhdl->memseg->raddr; 4852 minfo->mtype = mhdl->mtype; 4853 minfo->perm = mhdl->perm; 4854 } 4855 mutex_exit(&mhdl->lock); 4856 4857 return (0); 4858 } 4859 4860 /* 4861 * Copy data either from or to the client specified virtual address 4862 * space to or from the exported memory associated with the cookies. 4863 * The direction argument determines whether the data is read from or 4864 * written to exported memory. 4865 */ 4866 int 4867 ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size, 4868 ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction) 4869 { 4870 ldc_chan_t *ldcp; 4871 uint64_t local_voff, local_valign; 4872 uint64_t cookie_addr, cookie_size; 4873 uint64_t pg_shift, pg_size, pg_size_code; 4874 uint64_t export_caddr, export_poff, export_psize, export_size; 4875 uint64_t local_ra, local_poff, local_psize; 4876 uint64_t copy_size, copied_len = 0, total_bal = 0, idx = 0; 4877 pgcnt_t npages; 4878 size_t len = *size; 4879 int i, rv = 0; 4880 4881 uint64_t chid; 4882 4883 if (handle == NULL) { 4884 DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n"); 4885 return (EINVAL); 4886 } 4887 ldcp = (ldc_chan_t *)handle; 4888 chid = ldcp->id; 4889 4890 /* check to see if channel is UP */ 4891 if (ldcp->tstate != TS_UP) { 4892 DWARN(chid, "ldc_mem_copy: (0x%llx) channel is not UP\n", 4893 chid); 4894 return (ECONNRESET); 4895 } 4896 4897 /* Force address and size to be 8-byte aligned */ 4898 if ((((uintptr_t)vaddr | len) & 0x7) != 0) { 4899 DWARN(chid, 4900 "ldc_mem_copy: addr/sz is not 8-byte aligned\n"); 4901 return (EINVAL); 4902 } 4903 4904 /* Find the size of the exported memory */ 4905 export_size = 0; 4906 for (i = 0; i < ccount; i++) 4907 export_size += cookies[i].size; 4908 4909 /* check to see if offset is valid */ 4910 if (off > export_size) { 4911 DWARN(chid, 4912 "ldc_mem_copy: (0x%llx) start offset > export mem size\n", 4913 chid); 4914 return (EINVAL); 4915 } 4916 4917 /* 4918 * Check to see if the export size is smaller than the size we 4919 * are requesting to copy - if so flag an error 4920 */ 4921 if ((export_size - off) < *size) { 4922 DWARN(chid, 4923 "ldc_mem_copy: (0x%llx) copy size > export mem size\n", 4924 chid); 4925 return (EINVAL); 4926 } 4927 4928 total_bal = min(export_size, *size); 4929 4930 /* FUTURE: get the page size, pgsz code, and shift */ 4931 pg_size = MMU_PAGESIZE; 4932 pg_size_code = page_szc(pg_size); 4933 pg_shift = page_get_shift(pg_size_code); 4934 4935 D1(chid, "ldc_mem_copy: copying data " 4936 "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", 4937 chid, vaddr, pg_size, pg_size_code, pg_shift); 4938 4939 /* aligned VA and its offset */ 4940 local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1)); 4941 local_voff = ((uintptr_t)vaddr) & (pg_size - 1); 4942 4943 npages = (len+local_voff)/pg_size; 4944 npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1; 4945 4946 D1(chid, 4947 "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n", 4948 chid, vaddr, local_valign, local_voff, npages); 4949 4950 local_ra = va_to_pa((void *)local_valign); 4951 local_poff = local_voff; 4952 local_psize = min(len, (pg_size - local_voff)); 4953 4954 len -= local_psize; 4955 4956 /* 4957 * find the first cookie in the list of cookies 4958 * if the offset passed in is not zero 4959 */ 4960 for (idx = 0; idx < ccount; idx++) { 4961 cookie_size = cookies[idx].size; 4962 if (off < cookie_size) 4963 break; 4964 off -= cookie_size; 4965 } 4966 4967 cookie_addr = cookies[idx].addr + off; 4968 cookie_size = cookies[idx].size - off; 4969 4970 export_caddr = cookie_addr & ~(pg_size - 1); 4971 export_poff = cookie_addr & (pg_size - 1); 4972 export_psize = min(cookie_size, (pg_size - export_poff)); 4973 4974 for (;;) { 4975 4976 copy_size = min(export_psize, local_psize); 4977 4978 D1(chid, 4979 "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx," 4980 " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx," 4981 " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx," 4982 " total_bal=0x%llx\n", 4983 chid, direction, export_caddr, local_ra, export_poff, 4984 local_poff, export_psize, local_psize, copy_size, 4985 total_bal); 4986 4987 rv = hv_ldc_copy(chid, direction, 4988 (export_caddr + export_poff), (local_ra + local_poff), 4989 copy_size, &copied_len); 4990 4991 if (rv != 0) { 4992 int error = EIO; 4993 uint64_t rx_hd, rx_tl; 4994 4995 DWARN(chid, 4996 "ldc_mem_copy: (0x%llx) err %d during copy\n", 4997 (unsigned long long)chid, rv); 4998 DWARN(chid, 4999 "ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%lx, " 5000 "loc_ra=0x%lx, exp_poff=0x%lx, loc_poff=0x%lx," 5001 " exp_psz=0x%lx, loc_psz=0x%lx, copy_sz=0x%lx," 5002 " copied_len=0x%lx, total_bal=0x%lx\n", 5003 chid, direction, export_caddr, local_ra, 5004 export_poff, local_poff, export_psize, local_psize, 5005 copy_size, copied_len, total_bal); 5006 5007 *size = *size - total_bal; 5008 5009 /* 5010 * check if reason for copy error was due to 5011 * a channel reset. we need to grab the lock 5012 * just in case we have to do a reset. 5013 */ 5014 mutex_enter(&ldcp->lock); 5015 mutex_enter(&ldcp->tx_lock); 5016 5017 rv = hv_ldc_rx_get_state(ldcp->id, 5018 &rx_hd, &rx_tl, &(ldcp->link_state)); 5019 if (ldcp->link_state == LDC_CHANNEL_DOWN || 5020 ldcp->link_state == LDC_CHANNEL_RESET) { 5021 i_ldc_reset(ldcp, B_FALSE); 5022 error = ECONNRESET; 5023 } 5024 5025 mutex_exit(&ldcp->tx_lock); 5026 mutex_exit(&ldcp->lock); 5027 5028 return (error); 5029 } 5030 5031 ASSERT(copied_len <= copy_size); 5032 5033 D2(chid, "ldc_mem_copy: copied=0x%llx\n", copied_len); 5034 export_poff += copied_len; 5035 local_poff += copied_len; 5036 export_psize -= copied_len; 5037 local_psize -= copied_len; 5038 cookie_size -= copied_len; 5039 5040 total_bal -= copied_len; 5041 5042 if (copy_size != copied_len) 5043 continue; 5044 5045 if (export_psize == 0 && total_bal != 0) { 5046 5047 if (cookie_size == 0) { 5048 idx++; 5049 cookie_addr = cookies[idx].addr; 5050 cookie_size = cookies[idx].size; 5051 5052 export_caddr = cookie_addr & ~(pg_size - 1); 5053 export_poff = cookie_addr & (pg_size - 1); 5054 export_psize = 5055 min(cookie_size, (pg_size-export_poff)); 5056 } else { 5057 export_caddr += pg_size; 5058 export_poff = 0; 5059 export_psize = min(cookie_size, pg_size); 5060 } 5061 } 5062 5063 if (local_psize == 0 && total_bal != 0) { 5064 local_valign += pg_size; 5065 local_ra = va_to_pa((void *)local_valign); 5066 local_poff = 0; 5067 local_psize = min(pg_size, len); 5068 len -= local_psize; 5069 } 5070 5071 /* check if we are all done */ 5072 if (total_bal == 0) 5073 break; 5074 } 5075 5076 5077 D1(chid, 5078 "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n", 5079 chid, *size); 5080 5081 return (0); 5082 } 5083 5084 /* 5085 * Copy data either from or to the client specified virtual address 5086 * space to or from HV physical memory. 5087 * 5088 * The direction argument determines whether the data is read from or 5089 * written to HV memory. direction values are LDC_COPY_IN/OUT similar 5090 * to the ldc_mem_copy interface 5091 */ 5092 int 5093 ldc_mem_rdwr_cookie(ldc_handle_t handle, caddr_t vaddr, size_t *size, 5094 caddr_t paddr, uint8_t direction) 5095 { 5096 ldc_chan_t *ldcp; 5097 uint64_t local_voff, local_valign; 5098 uint64_t pg_shift, pg_size, pg_size_code; 5099 uint64_t target_pa, target_poff, target_psize, target_size; 5100 uint64_t local_ra, local_poff, local_psize; 5101 uint64_t copy_size, copied_len = 0; 5102 pgcnt_t npages; 5103 size_t len = *size; 5104 int rv = 0; 5105 5106 if (handle == NULL) { 5107 DWARN(DBG_ALL_LDCS, 5108 "ldc_mem_rdwr_cookie: invalid channel handle\n"); 5109 return (EINVAL); 5110 } 5111 ldcp = (ldc_chan_t *)handle; 5112 5113 mutex_enter(&ldcp->lock); 5114 5115 /* check to see if channel is UP */ 5116 if (ldcp->tstate != TS_UP) { 5117 DWARN(ldcp->id, 5118 "ldc_mem_rdwr_cookie: (0x%llx) channel is not UP\n", 5119 ldcp->id); 5120 mutex_exit(&ldcp->lock); 5121 return (ECONNRESET); 5122 } 5123 5124 /* Force address and size to be 8-byte aligned */ 5125 if ((((uintptr_t)vaddr | len) & 0x7) != 0) { 5126 DWARN(ldcp->id, 5127 "ldc_mem_rdwr_cookie: addr/size is not 8-byte aligned\n"); 5128 mutex_exit(&ldcp->lock); 5129 return (EINVAL); 5130 } 5131 5132 target_size = *size; 5133 5134 /* FUTURE: get the page size, pgsz code, and shift */ 5135 pg_size = MMU_PAGESIZE; 5136 pg_size_code = page_szc(pg_size); 5137 pg_shift = page_get_shift(pg_size_code); 5138 5139 D1(ldcp->id, "ldc_mem_rdwr_cookie: copying data " 5140 "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", 5141 ldcp->id, vaddr, pg_size, pg_size_code, pg_shift); 5142 5143 /* aligned VA and its offset */ 5144 local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1); 5145 local_voff = ((uintptr_t)vaddr) & (pg_size - 1); 5146 5147 npages = (len + local_voff) / pg_size; 5148 npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1; 5149 5150 D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) v=0x%llx, " 5151 "val=0x%llx,off=0x%x,pgs=0x%x\n", 5152 ldcp->id, vaddr, local_valign, local_voff, npages); 5153 5154 local_ra = va_to_pa((void *)local_valign); 5155 local_poff = local_voff; 5156 local_psize = min(len, (pg_size - local_voff)); 5157 5158 len -= local_psize; 5159 5160 target_pa = ((uintptr_t)paddr) & ~(pg_size - 1); 5161 target_poff = ((uintptr_t)paddr) & (pg_size - 1); 5162 target_psize = pg_size - target_poff; 5163 5164 for (;;) { 5165 5166 copy_size = min(target_psize, local_psize); 5167 5168 D1(ldcp->id, 5169 "ldc_mem_rdwr_cookie: (0x%llx) dir=0x%x, tar_pa=0x%llx," 5170 " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx," 5171 " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx," 5172 " total_bal=0x%llx\n", 5173 ldcp->id, direction, target_pa, local_ra, target_poff, 5174 local_poff, target_psize, local_psize, copy_size, 5175 target_size); 5176 5177 rv = hv_ldc_copy(ldcp->id, direction, 5178 (target_pa + target_poff), (local_ra + local_poff), 5179 copy_size, &copied_len); 5180 5181 if (rv != 0) { 5182 DWARN(DBG_ALL_LDCS, 5183 "ldc_mem_rdwr_cookie: (0x%lx) err %d during copy\n", 5184 ldcp->id, rv); 5185 DWARN(DBG_ALL_LDCS, 5186 "ldc_mem_rdwr_cookie: (0x%llx) dir=%lld, " 5187 "tar_pa=0x%llx, loc_ra=0x%llx, tar_poff=0x%llx, " 5188 "loc_poff=0x%llx, tar_psz=0x%llx, loc_psz=0x%llx, " 5189 "copy_sz=0x%llx, total_bal=0x%llx\n", 5190 ldcp->id, direction, target_pa, local_ra, 5191 target_poff, local_poff, target_psize, local_psize, 5192 copy_size, target_size); 5193 5194 *size = *size - target_size; 5195 mutex_exit(&ldcp->lock); 5196 return (i_ldc_h2v_error(rv)); 5197 } 5198 5199 D2(ldcp->id, "ldc_mem_rdwr_cookie: copied=0x%llx\n", 5200 copied_len); 5201 target_poff += copied_len; 5202 local_poff += copied_len; 5203 target_psize -= copied_len; 5204 local_psize -= copied_len; 5205 5206 target_size -= copied_len; 5207 5208 if (copy_size != copied_len) 5209 continue; 5210 5211 if (target_psize == 0 && target_size != 0) { 5212 target_pa += pg_size; 5213 target_poff = 0; 5214 target_psize = min(pg_size, target_size); 5215 } 5216 5217 if (local_psize == 0 && target_size != 0) { 5218 local_valign += pg_size; 5219 local_ra = va_to_pa((void *)local_valign); 5220 local_poff = 0; 5221 local_psize = min(pg_size, len); 5222 len -= local_psize; 5223 } 5224 5225 /* check if we are all done */ 5226 if (target_size == 0) 5227 break; 5228 } 5229 5230 mutex_exit(&ldcp->lock); 5231 5232 D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) done copying sz=0x%llx\n", 5233 ldcp->id, *size); 5234 5235 return (0); 5236 } 5237 5238 /* 5239 * Map an exported memory segment into the local address space. If the 5240 * memory range was exported for direct map access, a HV call is made 5241 * to allocate a RA range. If the map is done via a shadow copy, local 5242 * shadow memory is allocated and the base VA is returned in 'vaddr'. If 5243 * the mapping is a direct map then the RA is returned in 'raddr'. 5244 */ 5245 int 5246 ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount, 5247 uint8_t mtype, uint8_t perm, caddr_t *vaddr, caddr_t *raddr) 5248 { 5249 int i, j, idx, rv, retries; 5250 ldc_chan_t *ldcp; 5251 ldc_mhdl_t *mhdl; 5252 ldc_memseg_t *memseg; 5253 caddr_t tmpaddr; 5254 uint64_t map_perm = perm; 5255 uint64_t pg_size, pg_shift, pg_size_code, pg_mask; 5256 uint64_t exp_size = 0, base_off, map_size, npages; 5257 uint64_t cookie_addr, cookie_off, cookie_size; 5258 tte_t ldc_tte; 5259 5260 if (mhandle == NULL) { 5261 DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n"); 5262 return (EINVAL); 5263 } 5264 mhdl = (ldc_mhdl_t *)mhandle; 5265 5266 mutex_enter(&mhdl->lock); 5267 5268 if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED || 5269 mhdl->memseg != NULL) { 5270 DWARN(DBG_ALL_LDCS, 5271 "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle); 5272 mutex_exit(&mhdl->lock); 5273 return (EINVAL); 5274 } 5275 5276 ldcp = mhdl->ldcp; 5277 5278 mutex_enter(&ldcp->lock); 5279 5280 if (ldcp->tstate != TS_UP) { 5281 DWARN(ldcp->id, 5282 "ldc_mem_dring_map: (0x%llx) channel is not UP\n", 5283 ldcp->id); 5284 mutex_exit(&ldcp->lock); 5285 mutex_exit(&mhdl->lock); 5286 return (ECONNRESET); 5287 } 5288 5289 if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) { 5290 DWARN(ldcp->id, "ldc_mem_map: invalid map type\n"); 5291 mutex_exit(&ldcp->lock); 5292 mutex_exit(&mhdl->lock); 5293 return (EINVAL); 5294 } 5295 5296 D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n", 5297 ldcp->id, cookie->addr, cookie->size); 5298 5299 /* FUTURE: get the page size, pgsz code, and shift */ 5300 pg_size = MMU_PAGESIZE; 5301 pg_size_code = page_szc(pg_size); 5302 pg_shift = page_get_shift(pg_size_code); 5303 pg_mask = ~(pg_size - 1); 5304 5305 /* calculate the number of pages in the exported cookie */ 5306 base_off = cookie[0].addr & (pg_size - 1); 5307 for (idx = 0; idx < ccount; idx++) 5308 exp_size += cookie[idx].size; 5309 map_size = P2ROUNDUP((exp_size + base_off), pg_size); 5310 npages = (map_size >> pg_shift); 5311 5312 /* Allocate memseg structure */ 5313 memseg = mhdl->memseg = 5314 kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP); 5315 5316 /* Allocate memory to store all pages and cookies */ 5317 memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP); 5318 memseg->cookies = 5319 kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP); 5320 5321 D2(ldcp->id, "ldc_mem_map: (0x%llx) exp_size=0x%llx, map_size=0x%llx," 5322 "pages=0x%llx\n", ldcp->id, exp_size, map_size, npages); 5323 5324 /* 5325 * Check if direct map over shared memory is enabled, if not change 5326 * the mapping type to SHADOW_MAP. 5327 */ 5328 if (ldc_shmem_enabled == 0) 5329 mtype = LDC_SHADOW_MAP; 5330 5331 /* 5332 * Check to see if the client is requesting direct or shadow map 5333 * If direct map is requested, try to map remote memory first, 5334 * and if that fails, revert to shadow map 5335 */ 5336 if (mtype == LDC_DIRECT_MAP) { 5337 5338 /* Allocate kernel virtual space for mapping */ 5339 memseg->vaddr = vmem_xalloc(heap_arena, map_size, 5340 pg_size, 0, 0, NULL, NULL, VM_NOSLEEP); 5341 if (memseg->vaddr == NULL) { 5342 cmn_err(CE_WARN, 5343 "ldc_mem_map: (0x%lx) memory map failed\n", 5344 ldcp->id); 5345 kmem_free(memseg->cookies, 5346 (sizeof (ldc_mem_cookie_t) * ccount)); 5347 kmem_free(memseg->pages, 5348 (sizeof (ldc_page_t) * npages)); 5349 kmem_cache_free(ldcssp->memseg_cache, memseg); 5350 5351 mutex_exit(&ldcp->lock); 5352 mutex_exit(&mhdl->lock); 5353 return (ENOMEM); 5354 } 5355 5356 /* Unload previous mapping */ 5357 hat_unload(kas.a_hat, memseg->vaddr, map_size, 5358 HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK); 5359 5360 /* for each cookie passed in - map into address space */ 5361 idx = 0; 5362 cookie_size = 0; 5363 tmpaddr = memseg->vaddr; 5364 5365 for (i = 0; i < npages; i++) { 5366 5367 if (cookie_size == 0) { 5368 ASSERT(idx < ccount); 5369 cookie_addr = cookie[idx].addr & pg_mask; 5370 cookie_off = cookie[idx].addr & (pg_size - 1); 5371 cookie_size = 5372 P2ROUNDUP((cookie_off + cookie[idx].size), 5373 pg_size); 5374 idx++; 5375 } 5376 5377 D1(ldcp->id, "ldc_mem_map: (0x%llx) mapping " 5378 "cookie 0x%llx, bal=0x%llx\n", ldcp->id, 5379 cookie_addr, cookie_size); 5380 5381 /* map the cookie into address space */ 5382 for (retries = 0; retries < ldc_max_retries; 5383 retries++) { 5384 5385 rv = hv_ldc_mapin(ldcp->id, cookie_addr, 5386 &memseg->pages[i].raddr, &map_perm); 5387 if (rv != H_EWOULDBLOCK && rv != H_ETOOMANY) 5388 break; 5389 5390 drv_usecwait(ldc_delay); 5391 } 5392 5393 if (rv || memseg->pages[i].raddr == 0) { 5394 DWARN(ldcp->id, 5395 "ldc_mem_map: (0x%llx) hv mapin err %d\n", 5396 ldcp->id, rv); 5397 5398 /* remove previous mapins */ 5399 hat_unload(kas.a_hat, memseg->vaddr, map_size, 5400 HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK); 5401 for (j = 0; j < i; j++) { 5402 rv = hv_ldc_unmap( 5403 memseg->pages[j].raddr); 5404 if (rv) { 5405 DWARN(ldcp->id, 5406 "ldc_mem_map: (0x%llx) " 5407 "cannot unmap ra=0x%llx\n", 5408 ldcp->id, 5409 memseg->pages[j].raddr); 5410 } 5411 } 5412 5413 /* free kernel virtual space */ 5414 vmem_free(heap_arena, (void *)memseg->vaddr, 5415 map_size); 5416 5417 /* direct map failed - revert to shadow map */ 5418 mtype = LDC_SHADOW_MAP; 5419 break; 5420 5421 } else { 5422 5423 D1(ldcp->id, 5424 "ldc_mem_map: (0x%llx) vtop map 0x%llx -> " 5425 "0x%llx, cookie=0x%llx, perm=0x%llx\n", 5426 ldcp->id, tmpaddr, memseg->pages[i].raddr, 5427 cookie_addr, perm); 5428 5429 /* 5430 * NOTE: Calling hat_devload directly, causes it 5431 * to look for page_t using the pfn. Since this 5432 * addr is greater than the memlist, it treates 5433 * it as non-memory 5434 */ 5435 sfmmu_memtte(&ldc_tte, 5436 (pfn_t)(memseg->pages[i].raddr >> pg_shift), 5437 PROT_READ | PROT_WRITE | HAT_NOSYNC, TTE8K); 5438 5439 D1(ldcp->id, 5440 "ldc_mem_map: (0x%llx) ra 0x%llx -> " 5441 "tte 0x%llx\n", ldcp->id, 5442 memseg->pages[i].raddr, ldc_tte); 5443 5444 sfmmu_tteload(kas.a_hat, &ldc_tte, tmpaddr, 5445 NULL, HAT_LOAD_LOCK); 5446 5447 cookie_size -= pg_size; 5448 cookie_addr += pg_size; 5449 tmpaddr += pg_size; 5450 } 5451 } 5452 } 5453 5454 if (mtype == LDC_SHADOW_MAP) { 5455 if (*vaddr == NULL) { 5456 memseg->vaddr = kmem_zalloc(exp_size, KM_SLEEP); 5457 mhdl->myshadow = B_TRUE; 5458 5459 D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated " 5460 "shadow page va=0x%llx\n", ldcp->id, memseg->vaddr); 5461 } else { 5462 /* 5463 * Use client supplied memory for memseg->vaddr 5464 * WARNING: assuming that client mem is >= exp_size 5465 */ 5466 memseg->vaddr = *vaddr; 5467 } 5468 5469 /* Save all page and cookie information */ 5470 for (i = 0, tmpaddr = memseg->vaddr; i < npages; i++) { 5471 memseg->pages[i].raddr = va_to_pa(tmpaddr); 5472 memseg->pages[i].size = pg_size; 5473 tmpaddr += pg_size; 5474 } 5475 5476 } 5477 5478 /* save all cookies */ 5479 bcopy(cookie, memseg->cookies, ccount * sizeof (ldc_mem_cookie_t)); 5480 5481 /* update memseg_t */ 5482 memseg->raddr = memseg->pages[0].raddr; 5483 memseg->size = (mtype == LDC_SHADOW_MAP) ? exp_size : map_size; 5484 memseg->npages = npages; 5485 memseg->ncookies = ccount; 5486 memseg->next_cookie = 0; 5487 5488 /* memory handle = mapped */ 5489 mhdl->mtype = mtype; 5490 mhdl->perm = perm; 5491 mhdl->status = LDC_MAPPED; 5492 5493 D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, " 5494 "va=0x%llx, pgs=0x%llx cookies=0x%llx\n", 5495 ldcp->id, mhdl, memseg->raddr, memseg->vaddr, 5496 memseg->npages, memseg->ncookies); 5497 5498 if (mtype == LDC_SHADOW_MAP) 5499 base_off = 0; 5500 if (raddr) 5501 *raddr = (caddr_t)(memseg->raddr | base_off); 5502 if (vaddr) 5503 *vaddr = (caddr_t)((uintptr_t)memseg->vaddr | base_off); 5504 5505 mutex_exit(&ldcp->lock); 5506 mutex_exit(&mhdl->lock); 5507 return (0); 5508 } 5509 5510 /* 5511 * Unmap a memory segment. Free shadow memory (if any). 5512 */ 5513 int 5514 ldc_mem_unmap(ldc_mem_handle_t mhandle) 5515 { 5516 int i, rv; 5517 ldc_mhdl_t *mhdl = (ldc_mhdl_t *)mhandle; 5518 ldc_chan_t *ldcp; 5519 ldc_memseg_t *memseg; 5520 5521 if (mhdl == 0 || mhdl->status != LDC_MAPPED) { 5522 DWARN(DBG_ALL_LDCS, 5523 "ldc_mem_unmap: (0x%llx) handle is not mapped\n", 5524 mhandle); 5525 return (EINVAL); 5526 } 5527 5528 mutex_enter(&mhdl->lock); 5529 5530 ldcp = mhdl->ldcp; 5531 memseg = mhdl->memseg; 5532 5533 D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n", 5534 ldcp->id, mhdl); 5535 5536 /* if we allocated shadow memory - free it */ 5537 if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) { 5538 kmem_free(memseg->vaddr, memseg->size); 5539 } else if (mhdl->mtype == LDC_DIRECT_MAP) { 5540 5541 /* unmap in the case of DIRECT_MAP */ 5542 hat_unload(kas.a_hat, memseg->vaddr, memseg->size, 5543 HAT_UNLOAD_UNLOCK); 5544 5545 for (i = 0; i < memseg->npages; i++) { 5546 rv = hv_ldc_unmap(memseg->pages[i].raddr); 5547 if (rv) { 5548 cmn_err(CE_WARN, 5549 "ldc_mem_map: (0x%lx) hv unmap err %d\n", 5550 ldcp->id, rv); 5551 } 5552 } 5553 5554 vmem_free(heap_arena, (void *)memseg->vaddr, memseg->size); 5555 } 5556 5557 /* free the allocated memseg and page structures */ 5558 kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages)); 5559 kmem_free(memseg->cookies, 5560 (sizeof (ldc_mem_cookie_t) * memseg->ncookies)); 5561 kmem_cache_free(ldcssp->memseg_cache, memseg); 5562 5563 /* uninitialize the memory handle */ 5564 mhdl->memseg = NULL; 5565 mhdl->status = LDC_UNBOUND; 5566 5567 D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n", 5568 ldcp->id, mhdl); 5569 5570 mutex_exit(&mhdl->lock); 5571 return (0); 5572 } 5573 5574 /* 5575 * Internal entry point for LDC mapped memory entry consistency 5576 * semantics. Acquire copies the contents of the remote memory 5577 * into the local shadow copy. The release operation copies the local 5578 * contents into the remote memory. The offset and size specify the 5579 * bounds for the memory range being synchronized. 5580 */ 5581 static int 5582 i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction, 5583 uint64_t offset, size_t size) 5584 { 5585 int err; 5586 ldc_mhdl_t *mhdl; 5587 ldc_chan_t *ldcp; 5588 ldc_memseg_t *memseg; 5589 caddr_t local_vaddr; 5590 size_t copy_size; 5591 5592 if (mhandle == NULL) { 5593 DWARN(DBG_ALL_LDCS, 5594 "i_ldc_mem_acquire_release: invalid memory handle\n"); 5595 return (EINVAL); 5596 } 5597 mhdl = (ldc_mhdl_t *)mhandle; 5598 5599 mutex_enter(&mhdl->lock); 5600 5601 if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) { 5602 DWARN(DBG_ALL_LDCS, 5603 "i_ldc_mem_acquire_release: not mapped memory\n"); 5604 mutex_exit(&mhdl->lock); 5605 return (EINVAL); 5606 } 5607 5608 /* do nothing for direct map */ 5609 if (mhdl->mtype == LDC_DIRECT_MAP) { 5610 mutex_exit(&mhdl->lock); 5611 return (0); 5612 } 5613 5614 /* do nothing if COPY_IN+MEM_W and COPY_OUT+MEM_R */ 5615 if ((direction == LDC_COPY_IN && (mhdl->perm & LDC_MEM_R) == 0) || 5616 (direction == LDC_COPY_OUT && (mhdl->perm & LDC_MEM_W) == 0)) { 5617 mutex_exit(&mhdl->lock); 5618 return (0); 5619 } 5620 5621 if (offset >= mhdl->memseg->size || 5622 (offset + size) > mhdl->memseg->size) { 5623 DWARN(DBG_ALL_LDCS, 5624 "i_ldc_mem_acquire_release: memory out of range\n"); 5625 mutex_exit(&mhdl->lock); 5626 return (EINVAL); 5627 } 5628 5629 /* get the channel handle and memory segment */ 5630 ldcp = mhdl->ldcp; 5631 memseg = mhdl->memseg; 5632 5633 if (mhdl->mtype == LDC_SHADOW_MAP) { 5634 5635 local_vaddr = memseg->vaddr + offset; 5636 copy_size = size; 5637 5638 /* copy to/from remote from/to local memory */ 5639 err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset, 5640 ©_size, memseg->cookies, memseg->ncookies, 5641 direction); 5642 if (err || copy_size != size) { 5643 DWARN(ldcp->id, 5644 "i_ldc_mem_acquire_release: copy failed\n"); 5645 mutex_exit(&mhdl->lock); 5646 return (err); 5647 } 5648 } 5649 5650 mutex_exit(&mhdl->lock); 5651 5652 return (0); 5653 } 5654 5655 /* 5656 * Ensure that the contents in the remote memory seg are consistent 5657 * with the contents if of local segment 5658 */ 5659 int 5660 ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size) 5661 { 5662 return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size)); 5663 } 5664 5665 5666 /* 5667 * Ensure that the contents in the local memory seg are consistent 5668 * with the contents if of remote segment 5669 */ 5670 int 5671 ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size) 5672 { 5673 return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size)); 5674 } 5675 5676 /* 5677 * Allocate a descriptor ring. The size of each each descriptor 5678 * must be 8-byte aligned and the entire ring should be a multiple 5679 * of MMU_PAGESIZE. 5680 */ 5681 int 5682 ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle) 5683 { 5684 ldc_dring_t *dringp; 5685 size_t size = (dsize * len); 5686 5687 D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n", 5688 len, dsize); 5689 5690 if (dhandle == NULL) { 5691 DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n"); 5692 return (EINVAL); 5693 } 5694 5695 if (len == 0) { 5696 DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n"); 5697 return (EINVAL); 5698 } 5699 5700 /* descriptor size should be 8-byte aligned */ 5701 if (dsize == 0 || (dsize & 0x7)) { 5702 DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n"); 5703 return (EINVAL); 5704 } 5705 5706 *dhandle = 0; 5707 5708 /* Allocate a desc ring structure */ 5709 dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP); 5710 5711 /* Initialize dring */ 5712 dringp->length = len; 5713 dringp->dsize = dsize; 5714 5715 /* round off to multiple of pagesize */ 5716 dringp->size = (size & MMU_PAGEMASK); 5717 if (size & MMU_PAGEOFFSET) 5718 dringp->size += MMU_PAGESIZE; 5719 5720 dringp->status = LDC_UNBOUND; 5721 5722 /* allocate descriptor ring memory */ 5723 dringp->base = kmem_zalloc(dringp->size, KM_SLEEP); 5724 5725 /* initialize the desc ring lock */ 5726 mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL); 5727 5728 /* Add descriptor ring to the head of global list */ 5729 mutex_enter(&ldcssp->lock); 5730 dringp->next = ldcssp->dring_list; 5731 ldcssp->dring_list = dringp; 5732 mutex_exit(&ldcssp->lock); 5733 5734 *dhandle = (ldc_dring_handle_t)dringp; 5735 5736 D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n"); 5737 5738 return (0); 5739 } 5740 5741 5742 /* 5743 * Destroy a descriptor ring. 5744 */ 5745 int 5746 ldc_mem_dring_destroy(ldc_dring_handle_t dhandle) 5747 { 5748 ldc_dring_t *dringp; 5749 ldc_dring_t *tmp_dringp; 5750 5751 D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n"); 5752 5753 if (dhandle == NULL) { 5754 DWARN(DBG_ALL_LDCS, 5755 "ldc_mem_dring_destroy: invalid desc ring handle\n"); 5756 return (EINVAL); 5757 } 5758 dringp = (ldc_dring_t *)dhandle; 5759 5760 if (dringp->status == LDC_BOUND) { 5761 DWARN(DBG_ALL_LDCS, 5762 "ldc_mem_dring_destroy: desc ring is bound\n"); 5763 return (EACCES); 5764 } 5765 5766 mutex_enter(&dringp->lock); 5767 mutex_enter(&ldcssp->lock); 5768 5769 /* remove from linked list - if not bound */ 5770 tmp_dringp = ldcssp->dring_list; 5771 if (tmp_dringp == dringp) { 5772 ldcssp->dring_list = dringp->next; 5773 dringp->next = NULL; 5774 5775 } else { 5776 while (tmp_dringp != NULL) { 5777 if (tmp_dringp->next == dringp) { 5778 tmp_dringp->next = dringp->next; 5779 dringp->next = NULL; 5780 break; 5781 } 5782 tmp_dringp = tmp_dringp->next; 5783 } 5784 if (tmp_dringp == NULL) { 5785 DWARN(DBG_ALL_LDCS, 5786 "ldc_mem_dring_destroy: invalid descriptor\n"); 5787 mutex_exit(&ldcssp->lock); 5788 mutex_exit(&dringp->lock); 5789 return (EINVAL); 5790 } 5791 } 5792 5793 mutex_exit(&ldcssp->lock); 5794 5795 /* free the descriptor ring */ 5796 kmem_free(dringp->base, dringp->size); 5797 5798 mutex_exit(&dringp->lock); 5799 5800 /* destroy dring lock */ 5801 mutex_destroy(&dringp->lock); 5802 5803 /* free desc ring object */ 5804 kmem_free(dringp, sizeof (ldc_dring_t)); 5805 5806 return (0); 5807 } 5808 5809 /* 5810 * Bind a previously allocated dring to a channel. The channel should 5811 * be OPEN in order to bind the ring to the channel. Returns back a 5812 * descriptor ring cookie. The descriptor ring is exported for remote 5813 * access by the client at the other end of the channel. An entry for 5814 * dring pages is stored in map table (via call to ldc_mem_bind_handle). 5815 */ 5816 int 5817 ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle, 5818 uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount) 5819 { 5820 int err; 5821 ldc_chan_t *ldcp; 5822 ldc_dring_t *dringp; 5823 ldc_mem_handle_t mhandle; 5824 5825 /* check to see if channel is initalized */ 5826 if (handle == NULL) { 5827 DWARN(DBG_ALL_LDCS, 5828 "ldc_mem_dring_bind: invalid channel handle\n"); 5829 return (EINVAL); 5830 } 5831 ldcp = (ldc_chan_t *)handle; 5832 5833 if (dhandle == NULL) { 5834 DWARN(DBG_ALL_LDCS, 5835 "ldc_mem_dring_bind: invalid desc ring handle\n"); 5836 return (EINVAL); 5837 } 5838 dringp = (ldc_dring_t *)dhandle; 5839 5840 if (cookie == NULL) { 5841 DWARN(ldcp->id, 5842 "ldc_mem_dring_bind: invalid cookie arg\n"); 5843 return (EINVAL); 5844 } 5845 5846 mutex_enter(&dringp->lock); 5847 5848 if (dringp->status == LDC_BOUND) { 5849 DWARN(DBG_ALL_LDCS, 5850 "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n", 5851 ldcp->id); 5852 mutex_exit(&dringp->lock); 5853 return (EINVAL); 5854 } 5855 5856 if ((perm & LDC_MEM_RW) == 0) { 5857 DWARN(DBG_ALL_LDCS, 5858 "ldc_mem_dring_bind: invalid permissions\n"); 5859 mutex_exit(&dringp->lock); 5860 return (EINVAL); 5861 } 5862 5863 if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) { 5864 DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n"); 5865 mutex_exit(&dringp->lock); 5866 return (EINVAL); 5867 } 5868 5869 dringp->ldcp = ldcp; 5870 5871 /* create an memory handle */ 5872 err = ldc_mem_alloc_handle(handle, &mhandle); 5873 if (err || mhandle == NULL) { 5874 DWARN(DBG_ALL_LDCS, 5875 "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n", 5876 ldcp->id); 5877 mutex_exit(&dringp->lock); 5878 return (err); 5879 } 5880 dringp->mhdl = mhandle; 5881 5882 /* bind the descriptor ring to channel */ 5883 err = ldc_mem_bind_handle(mhandle, dringp->base, dringp->size, 5884 mtype, perm, cookie, ccount); 5885 if (err) { 5886 DWARN(ldcp->id, 5887 "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n", 5888 ldcp->id); 5889 mutex_exit(&dringp->lock); 5890 return (err); 5891 } 5892 5893 /* 5894 * For now return error if we get more than one cookie 5895 * FUTURE: Return multiple cookies .. 5896 */ 5897 if (*ccount > 1) { 5898 (void) ldc_mem_unbind_handle(mhandle); 5899 (void) ldc_mem_free_handle(mhandle); 5900 5901 dringp->ldcp = NULL; 5902 dringp->mhdl = NULL; 5903 *ccount = 0; 5904 5905 mutex_exit(&dringp->lock); 5906 return (EAGAIN); 5907 } 5908 5909 /* Add descriptor ring to channel's exported dring list */ 5910 mutex_enter(&ldcp->exp_dlist_lock); 5911 dringp->ch_next = ldcp->exp_dring_list; 5912 ldcp->exp_dring_list = dringp; 5913 mutex_exit(&ldcp->exp_dlist_lock); 5914 5915 dringp->status = LDC_BOUND; 5916 5917 mutex_exit(&dringp->lock); 5918 5919 return (0); 5920 } 5921 5922 /* 5923 * Return the next cookie associated with the specified dring handle 5924 */ 5925 int 5926 ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie) 5927 { 5928 int rv = 0; 5929 ldc_dring_t *dringp; 5930 ldc_chan_t *ldcp; 5931 5932 if (dhandle == NULL) { 5933 DWARN(DBG_ALL_LDCS, 5934 "ldc_mem_dring_nextcookie: invalid desc ring handle\n"); 5935 return (EINVAL); 5936 } 5937 dringp = (ldc_dring_t *)dhandle; 5938 mutex_enter(&dringp->lock); 5939 5940 if (dringp->status != LDC_BOUND) { 5941 DWARN(DBG_ALL_LDCS, 5942 "ldc_mem_dring_nextcookie: descriptor ring 0x%llx " 5943 "is not bound\n", dringp); 5944 mutex_exit(&dringp->lock); 5945 return (EINVAL); 5946 } 5947 5948 ldcp = dringp->ldcp; 5949 5950 if (cookie == NULL) { 5951 DWARN(ldcp->id, 5952 "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n", 5953 ldcp->id); 5954 mutex_exit(&dringp->lock); 5955 return (EINVAL); 5956 } 5957 5958 rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie); 5959 mutex_exit(&dringp->lock); 5960 5961 return (rv); 5962 } 5963 /* 5964 * Unbind a previously bound dring from a channel. 5965 */ 5966 int 5967 ldc_mem_dring_unbind(ldc_dring_handle_t dhandle) 5968 { 5969 ldc_dring_t *dringp; 5970 ldc_dring_t *tmp_dringp; 5971 ldc_chan_t *ldcp; 5972 5973 if (dhandle == NULL) { 5974 DWARN(DBG_ALL_LDCS, 5975 "ldc_mem_dring_unbind: invalid desc ring handle\n"); 5976 return (EINVAL); 5977 } 5978 dringp = (ldc_dring_t *)dhandle; 5979 5980 mutex_enter(&dringp->lock); 5981 5982 if (dringp->status == LDC_UNBOUND) { 5983 DWARN(DBG_ALL_LDCS, 5984 "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n", 5985 dringp); 5986 mutex_exit(&dringp->lock); 5987 return (EINVAL); 5988 } 5989 ldcp = dringp->ldcp; 5990 5991 mutex_enter(&ldcp->exp_dlist_lock); 5992 5993 tmp_dringp = ldcp->exp_dring_list; 5994 if (tmp_dringp == dringp) { 5995 ldcp->exp_dring_list = dringp->ch_next; 5996 dringp->ch_next = NULL; 5997 5998 } else { 5999 while (tmp_dringp != NULL) { 6000 if (tmp_dringp->ch_next == dringp) { 6001 tmp_dringp->ch_next = dringp->ch_next; 6002 dringp->ch_next = NULL; 6003 break; 6004 } 6005 tmp_dringp = tmp_dringp->ch_next; 6006 } 6007 if (tmp_dringp == NULL) { 6008 DWARN(DBG_ALL_LDCS, 6009 "ldc_mem_dring_unbind: invalid descriptor\n"); 6010 mutex_exit(&ldcp->exp_dlist_lock); 6011 mutex_exit(&dringp->lock); 6012 return (EINVAL); 6013 } 6014 } 6015 6016 mutex_exit(&ldcp->exp_dlist_lock); 6017 6018 (void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl); 6019 (void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl); 6020 6021 dringp->ldcp = NULL; 6022 dringp->mhdl = NULL; 6023 dringp->status = LDC_UNBOUND; 6024 6025 mutex_exit(&dringp->lock); 6026 6027 return (0); 6028 } 6029 6030 /* 6031 * Get information about the dring. The base address of the descriptor 6032 * ring along with the type and permission are returned back. 6033 */ 6034 int 6035 ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo) 6036 { 6037 ldc_dring_t *dringp; 6038 int rv; 6039 6040 if (dhandle == NULL) { 6041 DWARN(DBG_ALL_LDCS, 6042 "ldc_mem_dring_info: invalid desc ring handle\n"); 6043 return (EINVAL); 6044 } 6045 dringp = (ldc_dring_t *)dhandle; 6046 6047 mutex_enter(&dringp->lock); 6048 6049 if (dringp->mhdl) { 6050 rv = ldc_mem_info(dringp->mhdl, minfo); 6051 if (rv) { 6052 DWARN(DBG_ALL_LDCS, 6053 "ldc_mem_dring_info: error reading mem info\n"); 6054 mutex_exit(&dringp->lock); 6055 return (rv); 6056 } 6057 } else { 6058 minfo->vaddr = dringp->base; 6059 minfo->raddr = NULL; 6060 minfo->status = dringp->status; 6061 } 6062 6063 mutex_exit(&dringp->lock); 6064 6065 return (0); 6066 } 6067 6068 /* 6069 * Map an exported descriptor ring into the local address space. If the 6070 * descriptor ring was exported for direct map access, a HV call is made 6071 * to allocate a RA range. If the map is done via a shadow copy, local 6072 * shadow memory is allocated. 6073 */ 6074 int 6075 ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie, 6076 uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype, 6077 ldc_dring_handle_t *dhandle) 6078 { 6079 int err; 6080 ldc_chan_t *ldcp = (ldc_chan_t *)handle; 6081 ldc_mem_handle_t mhandle; 6082 ldc_dring_t *dringp; 6083 size_t dring_size; 6084 6085 if (dhandle == NULL) { 6086 DWARN(DBG_ALL_LDCS, 6087 "ldc_mem_dring_map: invalid dhandle\n"); 6088 return (EINVAL); 6089 } 6090 6091 /* check to see if channel is initalized */ 6092 if (handle == NULL) { 6093 DWARN(DBG_ALL_LDCS, 6094 "ldc_mem_dring_map: invalid channel handle\n"); 6095 return (EINVAL); 6096 } 6097 ldcp = (ldc_chan_t *)handle; 6098 6099 if (cookie == NULL) { 6100 DWARN(ldcp->id, 6101 "ldc_mem_dring_map: (0x%llx) invalid cookie\n", 6102 ldcp->id); 6103 return (EINVAL); 6104 } 6105 6106 /* FUTURE: For now we support only one cookie per dring */ 6107 ASSERT(ccount == 1); 6108 6109 if (cookie->size < (dsize * len)) { 6110 DWARN(ldcp->id, 6111 "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n", 6112 ldcp->id); 6113 return (EINVAL); 6114 } 6115 6116 *dhandle = 0; 6117 6118 /* Allocate an dring structure */ 6119 dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP); 6120 6121 D1(ldcp->id, 6122 "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n", 6123 mtype, len, dsize, cookie->addr, cookie->size); 6124 6125 /* Initialize dring */ 6126 dringp->length = len; 6127 dringp->dsize = dsize; 6128 6129 /* round of to multiple of page size */ 6130 dring_size = len * dsize; 6131 dringp->size = (dring_size & MMU_PAGEMASK); 6132 if (dring_size & MMU_PAGEOFFSET) 6133 dringp->size += MMU_PAGESIZE; 6134 6135 dringp->ldcp = ldcp; 6136 6137 /* create an memory handle */ 6138 err = ldc_mem_alloc_handle(handle, &mhandle); 6139 if (err || mhandle == NULL) { 6140 DWARN(DBG_ALL_LDCS, 6141 "ldc_mem_dring_map: cannot alloc hdl err=%d\n", 6142 err); 6143 kmem_free(dringp, sizeof (ldc_dring_t)); 6144 return (ENOMEM); 6145 } 6146 6147 dringp->mhdl = mhandle; 6148 dringp->base = NULL; 6149 6150 /* map the dring into local memory */ 6151 err = ldc_mem_map(mhandle, cookie, ccount, mtype, LDC_MEM_RW, 6152 &(dringp->base), NULL); 6153 if (err || dringp->base == NULL) { 6154 cmn_err(CE_WARN, 6155 "ldc_mem_dring_map: cannot map desc ring err=%d\n", err); 6156 (void) ldc_mem_free_handle(mhandle); 6157 kmem_free(dringp, sizeof (ldc_dring_t)); 6158 return (ENOMEM); 6159 } 6160 6161 /* initialize the desc ring lock */ 6162 mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL); 6163 6164 /* Add descriptor ring to channel's imported dring list */ 6165 mutex_enter(&ldcp->imp_dlist_lock); 6166 dringp->ch_next = ldcp->imp_dring_list; 6167 ldcp->imp_dring_list = dringp; 6168 mutex_exit(&ldcp->imp_dlist_lock); 6169 6170 dringp->status = LDC_MAPPED; 6171 6172 *dhandle = (ldc_dring_handle_t)dringp; 6173 6174 return (0); 6175 } 6176 6177 /* 6178 * Unmap a descriptor ring. Free shadow memory (if any). 6179 */ 6180 int 6181 ldc_mem_dring_unmap(ldc_dring_handle_t dhandle) 6182 { 6183 ldc_dring_t *dringp; 6184 ldc_dring_t *tmp_dringp; 6185 ldc_chan_t *ldcp; 6186 6187 if (dhandle == NULL) { 6188 DWARN(DBG_ALL_LDCS, 6189 "ldc_mem_dring_unmap: invalid desc ring handle\n"); 6190 return (EINVAL); 6191 } 6192 dringp = (ldc_dring_t *)dhandle; 6193 6194 if (dringp->status != LDC_MAPPED) { 6195 DWARN(DBG_ALL_LDCS, 6196 "ldc_mem_dring_unmap: not a mapped desc ring\n"); 6197 return (EINVAL); 6198 } 6199 6200 mutex_enter(&dringp->lock); 6201 6202 ldcp = dringp->ldcp; 6203 6204 mutex_enter(&ldcp->imp_dlist_lock); 6205 6206 /* find and unlink the desc ring from channel import list */ 6207 tmp_dringp = ldcp->imp_dring_list; 6208 if (tmp_dringp == dringp) { 6209 ldcp->imp_dring_list = dringp->ch_next; 6210 dringp->ch_next = NULL; 6211 6212 } else { 6213 while (tmp_dringp != NULL) { 6214 if (tmp_dringp->ch_next == dringp) { 6215 tmp_dringp->ch_next = dringp->ch_next; 6216 dringp->ch_next = NULL; 6217 break; 6218 } 6219 tmp_dringp = tmp_dringp->ch_next; 6220 } 6221 if (tmp_dringp == NULL) { 6222 DWARN(DBG_ALL_LDCS, 6223 "ldc_mem_dring_unmap: invalid descriptor\n"); 6224 mutex_exit(&ldcp->imp_dlist_lock); 6225 mutex_exit(&dringp->lock); 6226 return (EINVAL); 6227 } 6228 } 6229 6230 mutex_exit(&ldcp->imp_dlist_lock); 6231 6232 /* do a LDC memory handle unmap and free */ 6233 (void) ldc_mem_unmap(dringp->mhdl); 6234 (void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl); 6235 6236 dringp->status = 0; 6237 dringp->ldcp = NULL; 6238 6239 mutex_exit(&dringp->lock); 6240 6241 /* destroy dring lock */ 6242 mutex_destroy(&dringp->lock); 6243 6244 /* free desc ring object */ 6245 kmem_free(dringp, sizeof (ldc_dring_t)); 6246 6247 return (0); 6248 } 6249 6250 /* 6251 * Internal entry point for descriptor ring access entry consistency 6252 * semantics. Acquire copies the contents of the remote descriptor ring 6253 * into the local shadow copy. The release operation copies the local 6254 * contents into the remote dring. The start and end locations specify 6255 * bounds for the entries being synchronized. 6256 */ 6257 static int 6258 i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle, 6259 uint8_t direction, uint64_t start, uint64_t end) 6260 { 6261 int err; 6262 ldc_dring_t *dringp; 6263 ldc_chan_t *ldcp; 6264 uint64_t soff; 6265 size_t copy_size; 6266 6267 if (dhandle == NULL) { 6268 DWARN(DBG_ALL_LDCS, 6269 "i_ldc_dring_acquire_release: invalid desc ring handle\n"); 6270 return (EINVAL); 6271 } 6272 dringp = (ldc_dring_t *)dhandle; 6273 mutex_enter(&dringp->lock); 6274 6275 if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) { 6276 DWARN(DBG_ALL_LDCS, 6277 "i_ldc_dring_acquire_release: not a mapped desc ring\n"); 6278 mutex_exit(&dringp->lock); 6279 return (EINVAL); 6280 } 6281 6282 if (start >= dringp->length || end >= dringp->length) { 6283 DWARN(DBG_ALL_LDCS, 6284 "i_ldc_dring_acquire_release: index out of range\n"); 6285 mutex_exit(&dringp->lock); 6286 return (EINVAL); 6287 } 6288 6289 /* get the channel handle */ 6290 ldcp = dringp->ldcp; 6291 6292 copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) : 6293 ((dringp->length - start) * dringp->dsize); 6294 6295 /* Calculate the relative offset for the first desc */ 6296 soff = (start * dringp->dsize); 6297 6298 /* copy to/from remote from/to local memory */ 6299 D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n", 6300 soff, copy_size); 6301 err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl, 6302 direction, soff, copy_size); 6303 if (err) { 6304 DWARN(ldcp->id, 6305 "i_ldc_dring_acquire_release: copy failed\n"); 6306 mutex_exit(&dringp->lock); 6307 return (err); 6308 } 6309 6310 /* do the balance */ 6311 if (start > end) { 6312 copy_size = ((end + 1) * dringp->dsize); 6313 soff = 0; 6314 6315 /* copy to/from remote from/to local memory */ 6316 D1(ldcp->id, "i_ldc_dring_acquire_release: c2 " 6317 "off=0x%llx sz=0x%llx\n", soff, copy_size); 6318 err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl, 6319 direction, soff, copy_size); 6320 if (err) { 6321 DWARN(ldcp->id, 6322 "i_ldc_dring_acquire_release: copy failed\n"); 6323 mutex_exit(&dringp->lock); 6324 return (err); 6325 } 6326 } 6327 6328 mutex_exit(&dringp->lock); 6329 6330 return (0); 6331 } 6332 6333 /* 6334 * Ensure that the contents in the local dring are consistent 6335 * with the contents if of remote dring 6336 */ 6337 int 6338 ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end) 6339 { 6340 return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end)); 6341 } 6342 6343 /* 6344 * Ensure that the contents in the remote dring are consistent 6345 * with the contents if of local dring 6346 */ 6347 int 6348 ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end) 6349 { 6350 return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end)); 6351 } 6352 6353 6354 /* ------------------------------------------------------------------------- */ 6355