1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * sun4v LDC Link Layer 31 */ 32 #include <sys/types.h> 33 #include <sys/file.h> 34 #include <sys/errno.h> 35 #include <sys/open.h> 36 #include <sys/cred.h> 37 #include <sys/kmem.h> 38 #include <sys/conf.h> 39 #include <sys/cmn_err.h> 40 #include <sys/ksynch.h> 41 #include <sys/modctl.h> 42 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */ 43 #include <sys/debug.h> 44 #include <sys/types.h> 45 #include <sys/cred.h> 46 #include <sys/promif.h> 47 #include <sys/ddi.h> 48 #include <sys/sunddi.h> 49 #include <sys/cyclic.h> 50 #include <sys/machsystm.h> 51 #include <sys/vm.h> 52 #include <sys/cpu.h> 53 #include <sys/intreg.h> 54 #include <sys/machcpuvar.h> 55 #include <sys/mmu.h> 56 #include <sys/pte.h> 57 #include <vm/hat.h> 58 #include <vm/as.h> 59 #include <vm/hat_sfmmu.h> 60 #include <sys/vm_machparam.h> 61 #include <vm/seg_kmem.h> 62 #include <vm/seg_kpm.h> 63 #include <sys/note.h> 64 #include <sys/ivintr.h> 65 #include <sys/hypervisor_api.h> 66 #include <sys/ldc.h> 67 #include <sys/ldc_impl.h> 68 #include <sys/cnex.h> 69 #include <sys/hsvc.h> 70 71 /* Core internal functions */ 72 static int i_ldc_h2v_error(int h_error); 73 static int i_ldc_txq_reconf(ldc_chan_t *ldcp); 74 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset); 75 static int i_ldc_rxq_drain(ldc_chan_t *ldcp); 76 static void i_ldc_reset_state(ldc_chan_t *ldcp); 77 static void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset); 78 79 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail); 80 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail); 81 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head); 82 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype, 83 uint8_t ctrlmsg); 84 85 /* Interrupt handling functions */ 86 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2); 87 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2); 88 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype); 89 90 /* Read method functions */ 91 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep); 92 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, 93 size_t *sizep); 94 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, 95 size_t *sizep); 96 97 /* Write method functions */ 98 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp, 99 size_t *sizep); 100 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp, 101 size_t *sizep); 102 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp, 103 size_t *sizep); 104 105 /* Pkt processing internal functions */ 106 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg); 107 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg); 108 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg); 109 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg); 110 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg); 111 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg); 112 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg); 113 114 /* Memory synchronization internal functions */ 115 static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, 116 uint8_t direction, uint64_t offset, size_t size); 117 static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle, 118 uint8_t direction, uint64_t start, uint64_t end); 119 120 /* LDC Version */ 121 static ldc_ver_t ldc_versions[] = { {1, 0} }; 122 123 /* number of supported versions */ 124 #define LDC_NUM_VERS (sizeof (ldc_versions) / sizeof (ldc_versions[0])) 125 126 /* Module State Pointer */ 127 static ldc_soft_state_t *ldcssp; 128 129 static struct modldrv md = { 130 &mod_miscops, /* This is a misc module */ 131 "sun4v LDC module v%I%", /* Name of the module */ 132 }; 133 134 static struct modlinkage ml = { 135 MODREV_1, 136 &md, 137 NULL 138 }; 139 140 static uint64_t ldc_sup_minor; /* Supported minor number */ 141 static hsvc_info_t ldc_hsvc = { 142 HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 0, "ldc" 143 }; 144 145 static uint64_t intr_sup_minor; /* Supported minor number */ 146 static hsvc_info_t intr_hsvc = { 147 HSVC_REV_1, NULL, HSVC_GROUP_INTR, 1, 0, "ldc" 148 }; 149 150 /* 151 * LDC framework supports mapping remote domain's memory 152 * either directly or via shadow memory pages. Default 153 * support is currently implemented via shadow copy. 154 * Direct map can be enabled by setting 'ldc_shmem_enabled' 155 */ 156 int ldc_shmem_enabled = 0; 157 158 /* 159 * The no. of MTU size messages that can be stored in 160 * the LDC Tx queue. The number of Tx queue entries is 161 * then computed as (mtu * mtu_msgs)/sizeof(queue_entry) 162 */ 163 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS; 164 165 /* 166 * The minimum queue length. This is the size of the smallest 167 * LDC queue. If the computed value is less than this default, 168 * the queue length is rounded up to 'ldc_queue_entries'. 169 */ 170 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES; 171 172 /* 173 * Pages exported for remote access over each channel is 174 * maintained in a table registered with the Hypervisor. 175 * The default number of entries in the table is set to 176 * 'ldc_mtbl_entries'. 177 */ 178 uint64_t ldc_maptable_entries = LDC_MTBL_ENTRIES; 179 180 /* 181 * LDC retry count and delay - when the HV returns EWOULDBLOCK 182 * the operation is retried 'ldc_max_retries' times with a 183 * wait of 'ldc_delay' usecs between each retry. 184 */ 185 int ldc_max_retries = LDC_MAX_RETRIES; 186 clock_t ldc_delay = LDC_DELAY; 187 188 /* 189 * delay between each retry of channel unregistration in 190 * ldc_close(), to wait for pending interrupts to complete. 191 */ 192 clock_t ldc_close_delay = LDC_CLOSE_DELAY; 193 194 #ifdef DEBUG 195 196 /* 197 * Print debug messages 198 * 199 * set ldcdbg to 0x7 for enabling all msgs 200 * 0x4 - Warnings 201 * 0x2 - All debug messages 202 * 0x1 - Minimal debug messages 203 * 204 * set ldcdbgchan to the channel number you want to debug 205 * setting it to -1 prints debug messages for all channels 206 * NOTE: ldcdbgchan has no effect on error messages 207 */ 208 209 #define DBG_ALL_LDCS -1 210 211 int ldcdbg = 0x0; 212 int64_t ldcdbgchan = DBG_ALL_LDCS; 213 uint64_t ldc_inject_err_flag = 0; 214 215 static void 216 ldcdebug(int64_t id, const char *fmt, ...) 217 { 218 char buf[512]; 219 va_list ap; 220 221 /* 222 * Do not return if, 223 * caller wants to print it anyway - (id == DBG_ALL_LDCS) 224 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS) 225 * debug channel = caller specified channel 226 */ 227 if ((id != DBG_ALL_LDCS) && 228 (ldcdbgchan != DBG_ALL_LDCS) && 229 (ldcdbgchan != id)) { 230 return; 231 } 232 233 va_start(ap, fmt); 234 (void) vsprintf(buf, fmt, ap); 235 va_end(ap); 236 237 cmn_err(CE_CONT, "?%s", buf); 238 } 239 240 #define LDC_ERR_RESET 0x1 241 #define LDC_ERR_PKTLOSS 0x2 242 243 static boolean_t 244 ldc_inject_error(ldc_chan_t *ldcp, uint64_t error) 245 { 246 if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id)) 247 return (B_FALSE); 248 249 if ((ldc_inject_err_flag & error) == 0) 250 return (B_FALSE); 251 252 /* clear the injection state */ 253 ldc_inject_err_flag &= ~error; 254 255 return (B_TRUE); 256 } 257 258 #define D1 \ 259 if (ldcdbg & 0x01) \ 260 ldcdebug 261 262 #define D2 \ 263 if (ldcdbg & 0x02) \ 264 ldcdebug 265 266 #define DWARN \ 267 if (ldcdbg & 0x04) \ 268 ldcdebug 269 270 #define DUMP_PAYLOAD(id, addr) \ 271 { \ 272 char buf[65*3]; \ 273 int i; \ 274 uint8_t *src = (uint8_t *)addr; \ 275 for (i = 0; i < 64; i++, src++) \ 276 (void) sprintf(&buf[i * 3], "|%02x", *src); \ 277 (void) sprintf(&buf[i * 3], "|\n"); \ 278 D2((id), "payload: %s", buf); \ 279 } 280 281 #define DUMP_LDC_PKT(c, s, addr) \ 282 { \ 283 ldc_msg_t *msg = (ldc_msg_t *)(addr); \ 284 uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0; \ 285 if (msg->type == LDC_DATA) { \ 286 D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])", \ 287 (s), mid, msg->type, msg->stype, msg->ctrl, \ 288 (msg->env & LDC_FRAG_START) ? 'B' : ' ', \ 289 (msg->env & LDC_FRAG_STOP) ? 'E' : ' ', \ 290 (msg->env & LDC_LEN_MASK)); \ 291 } else { \ 292 D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s), \ 293 mid, msg->type, msg->stype, msg->ctrl, msg->env); \ 294 } \ 295 } 296 297 #define LDC_INJECT_RESET(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_RESET) 298 #define LDC_INJECT_PKTLOSS(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_PKTLOSS) 299 300 #else 301 302 #define DBG_ALL_LDCS -1 303 304 #define D1 305 #define D2 306 #define DWARN 307 308 #define DUMP_PAYLOAD(id, addr) 309 #define DUMP_LDC_PKT(c, s, addr) 310 311 #define LDC_INJECT_RESET(_ldcp) (B_FALSE) 312 #define LDC_INJECT_PKTLOSS(_ldcp) (B_FALSE) 313 314 #endif 315 316 #define ZERO_PKT(p) \ 317 bzero((p), sizeof (ldc_msg_t)); 318 319 #define IDX2COOKIE(idx, pg_szc, pg_shift) \ 320 (((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift))) 321 322 323 int 324 _init(void) 325 { 326 int status; 327 328 status = hsvc_register(&ldc_hsvc, &ldc_sup_minor); 329 if (status != 0) { 330 cmn_err(CE_WARN, "%s: cannot negotiate hypervisor LDC services" 331 " group: 0x%lx major: %ld minor: %ld errno: %d", 332 ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group, 333 ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status); 334 return (-1); 335 } 336 337 status = hsvc_register(&intr_hsvc, &intr_sup_minor); 338 if (status != 0) { 339 cmn_err(CE_WARN, "%s: cannot negotiate hypervisor interrupt " 340 "services group: 0x%lx major: %ld minor: %ld errno: %d", 341 intr_hsvc.hsvc_modname, intr_hsvc.hsvc_group, 342 intr_hsvc.hsvc_major, intr_hsvc.hsvc_minor, status); 343 (void) hsvc_unregister(&ldc_hsvc); 344 return (-1); 345 } 346 347 /* allocate soft state structure */ 348 ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP); 349 350 /* Link the module into the system */ 351 status = mod_install(&ml); 352 if (status != 0) { 353 kmem_free(ldcssp, sizeof (ldc_soft_state_t)); 354 return (status); 355 } 356 357 /* Initialize the LDC state structure */ 358 mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL); 359 360 mutex_enter(&ldcssp->lock); 361 362 /* Create a cache for memory handles */ 363 ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache", 364 sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 365 if (ldcssp->memhdl_cache == NULL) { 366 DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n"); 367 mutex_exit(&ldcssp->lock); 368 return (-1); 369 } 370 371 /* Create cache for memory segment structures */ 372 ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache", 373 sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 374 if (ldcssp->memseg_cache == NULL) { 375 DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n"); 376 mutex_exit(&ldcssp->lock); 377 return (-1); 378 } 379 380 381 ldcssp->channel_count = 0; 382 ldcssp->channels_open = 0; 383 ldcssp->chan_list = NULL; 384 ldcssp->dring_list = NULL; 385 386 mutex_exit(&ldcssp->lock); 387 388 return (0); 389 } 390 391 int 392 _info(struct modinfo *modinfop) 393 { 394 /* Report status of the dynamically loadable driver module */ 395 return (mod_info(&ml, modinfop)); 396 } 397 398 int 399 _fini(void) 400 { 401 int rv, status; 402 ldc_chan_t *ldcp; 403 ldc_dring_t *dringp; 404 ldc_mem_info_t minfo; 405 406 /* Unlink the driver module from the system */ 407 status = mod_remove(&ml); 408 if (status) { 409 DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n"); 410 return (EIO); 411 } 412 413 /* close and finalize channels */ 414 ldcp = ldcssp->chan_list; 415 while (ldcp != NULL) { 416 (void) ldc_close((ldc_handle_t)ldcp); 417 (void) ldc_fini((ldc_handle_t)ldcp); 418 419 ldcp = ldcp->next; 420 } 421 422 /* Free descriptor rings */ 423 dringp = ldcssp->dring_list; 424 while (dringp != NULL) { 425 dringp = dringp->next; 426 427 rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo); 428 if (rv == 0 && minfo.status != LDC_UNBOUND) { 429 if (minfo.status == LDC_BOUND) { 430 (void) ldc_mem_dring_unbind( 431 (ldc_dring_handle_t)dringp); 432 } 433 if (minfo.status == LDC_MAPPED) { 434 (void) ldc_mem_dring_unmap( 435 (ldc_dring_handle_t)dringp); 436 } 437 } 438 439 (void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp); 440 } 441 ldcssp->dring_list = NULL; 442 443 /* Destroy kmem caches */ 444 kmem_cache_destroy(ldcssp->memhdl_cache); 445 kmem_cache_destroy(ldcssp->memseg_cache); 446 447 /* 448 * We have successfully "removed" the driver. 449 * Destroying soft states 450 */ 451 mutex_destroy(&ldcssp->lock); 452 kmem_free(ldcssp, sizeof (ldc_soft_state_t)); 453 454 (void) hsvc_unregister(&ldc_hsvc); 455 (void) hsvc_unregister(&intr_hsvc); 456 457 return (status); 458 } 459 460 /* -------------------------------------------------------------------------- */ 461 462 /* 463 * LDC Link Layer Internal Functions 464 */ 465 466 /* 467 * Translate HV Errors to sun4v error codes 468 */ 469 static int 470 i_ldc_h2v_error(int h_error) 471 { 472 switch (h_error) { 473 474 case H_EOK: 475 return (0); 476 477 case H_ENORADDR: 478 return (EFAULT); 479 480 case H_EBADPGSZ: 481 case H_EINVAL: 482 return (EINVAL); 483 484 case H_EWOULDBLOCK: 485 return (EWOULDBLOCK); 486 487 case H_ENOACCESS: 488 case H_ENOMAP: 489 return (EACCES); 490 491 case H_EIO: 492 case H_ECPUERROR: 493 return (EIO); 494 495 case H_ENOTSUPPORTED: 496 return (ENOTSUP); 497 498 case H_ETOOMANY: 499 return (ENOSPC); 500 501 case H_ECHANNEL: 502 return (ECHRNG); 503 default: 504 break; 505 } 506 507 return (EIO); 508 } 509 510 /* 511 * Reconfigure the transmit queue 512 */ 513 static int 514 i_ldc_txq_reconf(ldc_chan_t *ldcp) 515 { 516 int rv; 517 518 ASSERT(MUTEX_HELD(&ldcp->lock)); 519 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 520 521 rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries); 522 if (rv) { 523 cmn_err(CE_WARN, 524 "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id); 525 return (EIO); 526 } 527 rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head), 528 &(ldcp->tx_tail), &(ldcp->link_state)); 529 if (rv) { 530 cmn_err(CE_WARN, 531 "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id); 532 return (EIO); 533 } 534 D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx," 535 "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail, 536 ldcp->link_state); 537 538 return (0); 539 } 540 541 /* 542 * Reconfigure the receive queue 543 */ 544 static int 545 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset) 546 { 547 int rv; 548 uint64_t rx_head, rx_tail; 549 550 ASSERT(MUTEX_HELD(&ldcp->lock)); 551 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 552 &(ldcp->link_state)); 553 if (rv) { 554 cmn_err(CE_WARN, 555 "i_ldc_rxq_reconf: (0x%lx) cannot get state", 556 ldcp->id); 557 return (EIO); 558 } 559 560 if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) { 561 rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, 562 ldcp->rx_q_entries); 563 if (rv) { 564 cmn_err(CE_WARN, 565 "i_ldc_rxq_reconf: (0x%lx) cannot set qconf", 566 ldcp->id); 567 return (EIO); 568 } 569 D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf", 570 ldcp->id); 571 } 572 573 return (0); 574 } 575 576 577 /* 578 * Drain the contents of the receive queue 579 */ 580 static int 581 i_ldc_rxq_drain(ldc_chan_t *ldcp) 582 { 583 int rv; 584 uint64_t rx_head, rx_tail; 585 586 ASSERT(MUTEX_HELD(&ldcp->lock)); 587 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 588 &(ldcp->link_state)); 589 if (rv) { 590 cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state", 591 ldcp->id); 592 return (EIO); 593 } 594 595 /* flush contents by setting the head = tail */ 596 return (i_ldc_set_rx_head(ldcp, rx_tail)); 597 } 598 599 600 /* 601 * Reset LDC state structure and its contents 602 */ 603 static void 604 i_ldc_reset_state(ldc_chan_t *ldcp) 605 { 606 ASSERT(MUTEX_HELD(&ldcp->lock)); 607 ldcp->last_msg_snt = LDC_INIT_SEQID; 608 ldcp->last_ack_rcd = 0; 609 ldcp->last_msg_rcd = 0; 610 ldcp->tx_ackd_head = ldcp->tx_head; 611 ldcp->next_vidx = 0; 612 ldcp->hstate = 0; 613 ldcp->tstate = TS_OPEN; 614 ldcp->status = LDC_OPEN; 615 616 if (ldcp->link_state == LDC_CHANNEL_UP || 617 ldcp->link_state == LDC_CHANNEL_RESET) { 618 619 if (ldcp->mode == LDC_MODE_RAW) { 620 ldcp->status = LDC_UP; 621 ldcp->tstate = TS_UP; 622 } else { 623 ldcp->status = LDC_READY; 624 ldcp->tstate |= TS_LINK_READY; 625 } 626 } 627 } 628 629 /* 630 * Reset a LDC channel 631 */ 632 static void 633 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset) 634 { 635 DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id); 636 637 ASSERT(MUTEX_HELD(&ldcp->lock)); 638 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 639 640 /* reconfig Tx and Rx queues */ 641 (void) i_ldc_txq_reconf(ldcp); 642 (void) i_ldc_rxq_reconf(ldcp, force_reset); 643 644 /* Clear Tx and Rx interrupts */ 645 (void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR); 646 (void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 647 648 /* Reset channel state */ 649 i_ldc_reset_state(ldcp); 650 651 /* Mark channel in reset */ 652 ldcp->tstate |= TS_IN_RESET; 653 } 654 655 656 /* 657 * Clear pending interrupts 658 */ 659 static void 660 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype) 661 { 662 ldc_cnex_t *cinfo = &ldcssp->cinfo; 663 664 ASSERT(MUTEX_HELD(&ldcp->lock)); 665 ASSERT(cinfo->dip != NULL); 666 667 switch (itype) { 668 case CNEX_TX_INTR: 669 /* check Tx interrupt */ 670 if (ldcp->tx_intr_state) 671 ldcp->tx_intr_state = LDC_INTR_NONE; 672 else 673 return; 674 break; 675 676 case CNEX_RX_INTR: 677 /* check Rx interrupt */ 678 if (ldcp->rx_intr_state) 679 ldcp->rx_intr_state = LDC_INTR_NONE; 680 else 681 return; 682 break; 683 } 684 685 (void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype); 686 D2(ldcp->id, 687 "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n", 688 ldcp->id, itype); 689 } 690 691 /* 692 * Set the receive queue head 693 * Resets connection and returns an error if it fails. 694 */ 695 static int 696 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head) 697 { 698 int rv; 699 int retries; 700 701 ASSERT(MUTEX_HELD(&ldcp->lock)); 702 for (retries = 0; retries < ldc_max_retries; retries++) { 703 704 if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0) 705 return (0); 706 707 if (rv != H_EWOULDBLOCK) 708 break; 709 710 /* wait for ldc_delay usecs */ 711 drv_usecwait(ldc_delay); 712 } 713 714 cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx", 715 ldcp->id, head); 716 mutex_enter(&ldcp->tx_lock); 717 i_ldc_reset(ldcp, B_TRUE); 718 mutex_exit(&ldcp->tx_lock); 719 720 return (ECONNRESET); 721 } 722 723 724 /* 725 * Returns the tx_tail to be used for transfer 726 * Re-reads the TX queue ptrs if and only if the 727 * the cached head and tail are equal (queue is full) 728 */ 729 static int 730 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail) 731 { 732 int rv; 733 uint64_t current_head, new_tail; 734 735 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 736 /* Read the head and tail ptrs from HV */ 737 rv = hv_ldc_tx_get_state(ldcp->id, 738 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 739 if (rv) { 740 cmn_err(CE_WARN, 741 "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n", 742 ldcp->id); 743 return (EIO); 744 } 745 if (ldcp->link_state == LDC_CHANNEL_DOWN) { 746 D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n", 747 ldcp->id); 748 return (ECONNRESET); 749 } 750 751 /* In reliable mode, check against last ACKd msg */ 752 current_head = (ldcp->mode == LDC_MODE_RELIABLE || 753 ldcp->mode == LDC_MODE_STREAM) 754 ? ldcp->tx_ackd_head : ldcp->tx_head; 755 756 /* increment the tail */ 757 new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) % 758 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 759 760 if (new_tail == current_head) { 761 DWARN(ldcp->id, 762 "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n", 763 ldcp->id); 764 return (EWOULDBLOCK); 765 } 766 767 D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n", 768 ldcp->id, ldcp->tx_head, ldcp->tx_tail); 769 770 *tail = ldcp->tx_tail; 771 return (0); 772 } 773 774 /* 775 * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off 776 * and retry ldc_max_retries times before returning an error. 777 * Returns 0, EWOULDBLOCK or EIO 778 */ 779 static int 780 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail) 781 { 782 int rv, retval = EWOULDBLOCK; 783 int retries; 784 785 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 786 for (retries = 0; retries < ldc_max_retries; retries++) { 787 788 if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) { 789 retval = 0; 790 break; 791 } 792 if (rv != H_EWOULDBLOCK) { 793 DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set " 794 "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv); 795 retval = EIO; 796 break; 797 } 798 799 /* wait for ldc_delay usecs */ 800 drv_usecwait(ldc_delay); 801 } 802 return (retval); 803 } 804 805 /* 806 * Send a LDC message 807 */ 808 static int 809 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype, 810 uint8_t ctrlmsg) 811 { 812 int rv; 813 ldc_msg_t *pkt; 814 uint64_t tx_tail; 815 uint32_t curr_seqid = ldcp->last_msg_snt; 816 817 /* Obtain Tx lock */ 818 mutex_enter(&ldcp->tx_lock); 819 820 /* get the current tail for the message */ 821 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 822 if (rv) { 823 DWARN(ldcp->id, 824 "i_ldc_send_pkt: (0x%llx) error sending pkt, " 825 "type=0x%x,subtype=0x%x,ctrl=0x%x\n", 826 ldcp->id, pkttype, subtype, ctrlmsg); 827 mutex_exit(&ldcp->tx_lock); 828 return (rv); 829 } 830 831 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 832 ZERO_PKT(pkt); 833 834 /* Initialize the packet */ 835 pkt->type = pkttype; 836 pkt->stype = subtype; 837 pkt->ctrl = ctrlmsg; 838 839 /* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */ 840 if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) && 841 ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) { 842 curr_seqid++; 843 if (ldcp->mode != LDC_MODE_RAW) { 844 pkt->seqid = curr_seqid; 845 pkt->ackid = ldcp->last_msg_rcd; 846 } 847 } 848 DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt); 849 850 /* initiate the send by calling into HV and set the new tail */ 851 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 852 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 853 854 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 855 if (rv) { 856 DWARN(ldcp->id, 857 "i_ldc_send_pkt:(0x%llx) error sending pkt, " 858 "type=0x%x,stype=0x%x,ctrl=0x%x\n", 859 ldcp->id, pkttype, subtype, ctrlmsg); 860 mutex_exit(&ldcp->tx_lock); 861 return (EIO); 862 } 863 864 ldcp->last_msg_snt = curr_seqid; 865 ldcp->tx_tail = tx_tail; 866 867 mutex_exit(&ldcp->tx_lock); 868 return (0); 869 } 870 871 /* 872 * Checks if packet was received in right order 873 * in the case of a reliable link. 874 * Returns 0 if in order, else EIO 875 */ 876 static int 877 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg) 878 { 879 /* No seqid checking for RAW mode */ 880 if (ldcp->mode == LDC_MODE_RAW) 881 return (0); 882 883 /* No seqid checking for version, RTS, RTR message */ 884 if (msg->ctrl == LDC_VER || 885 msg->ctrl == LDC_RTS || 886 msg->ctrl == LDC_RTR) 887 return (0); 888 889 /* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */ 890 if (msg->seqid != (ldcp->last_msg_rcd + 1)) { 891 DWARN(ldcp->id, 892 "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, " 893 "expecting 0x%x\n", ldcp->id, msg->seqid, 894 (ldcp->last_msg_rcd + 1)); 895 return (EIO); 896 } 897 898 #ifdef DEBUG 899 if (LDC_INJECT_PKTLOSS(ldcp)) { 900 DWARN(ldcp->id, 901 "i_ldc_check_seqid: (0x%llx) inject pkt loss\n", ldcp->id); 902 return (EIO); 903 } 904 #endif 905 906 return (0); 907 } 908 909 910 /* 911 * Process an incoming version ctrl message 912 */ 913 static int 914 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg) 915 { 916 int rv = 0, idx = ldcp->next_vidx; 917 ldc_msg_t *pkt; 918 uint64_t tx_tail; 919 ldc_ver_t *rcvd_ver; 920 921 /* get the received version */ 922 rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF); 923 924 D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n", 925 ldcp->id, rcvd_ver->major, rcvd_ver->minor); 926 927 /* Obtain Tx lock */ 928 mutex_enter(&ldcp->tx_lock); 929 930 switch (msg->stype) { 931 case LDC_INFO: 932 933 if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) { 934 (void) i_ldc_txq_reconf(ldcp); 935 i_ldc_reset_state(ldcp); 936 mutex_exit(&ldcp->tx_lock); 937 return (EAGAIN); 938 } 939 940 /* get the current tail and pkt for the response */ 941 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 942 if (rv != 0) { 943 DWARN(ldcp->id, 944 "i_ldc_process_VER: (0x%llx) err sending " 945 "version ACK/NACK\n", ldcp->id); 946 i_ldc_reset(ldcp, B_TRUE); 947 mutex_exit(&ldcp->tx_lock); 948 return (ECONNRESET); 949 } 950 951 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 952 ZERO_PKT(pkt); 953 954 /* initialize the packet */ 955 pkt->type = LDC_CTRL; 956 pkt->ctrl = LDC_VER; 957 958 for (;;) { 959 960 D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n", 961 rcvd_ver->major, rcvd_ver->minor, 962 ldc_versions[idx].major, ldc_versions[idx].minor); 963 964 if (rcvd_ver->major == ldc_versions[idx].major) { 965 /* major version match - ACK version */ 966 pkt->stype = LDC_ACK; 967 968 /* 969 * lower minor version to the one this endpt 970 * supports, if necessary 971 */ 972 if (rcvd_ver->minor > ldc_versions[idx].minor) 973 rcvd_ver->minor = 974 ldc_versions[idx].minor; 975 bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver)); 976 977 break; 978 } 979 980 if (rcvd_ver->major > ldc_versions[idx].major) { 981 982 D1(ldcp->id, "i_ldc_process_VER: using next" 983 " lower idx=%d, v%u.%u\n", idx, 984 ldc_versions[idx].major, 985 ldc_versions[idx].minor); 986 987 /* nack with next lower version */ 988 pkt->stype = LDC_NACK; 989 bcopy(&ldc_versions[idx], pkt->udata, 990 sizeof (ldc_versions[idx])); 991 ldcp->next_vidx = idx; 992 break; 993 } 994 995 /* next major version */ 996 idx++; 997 998 D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx); 999 1000 if (idx == LDC_NUM_VERS) { 1001 /* no version match - send NACK */ 1002 pkt->stype = LDC_NACK; 1003 bzero(pkt->udata, sizeof (ldc_ver_t)); 1004 ldcp->next_vidx = 0; 1005 break; 1006 } 1007 } 1008 1009 /* initiate the send by calling into HV and set the new tail */ 1010 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 1011 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1012 1013 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 1014 if (rv == 0) { 1015 ldcp->tx_tail = tx_tail; 1016 if (pkt->stype == LDC_ACK) { 1017 D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent" 1018 " version ACK\n", ldcp->id); 1019 /* Save the ACK'd version */ 1020 ldcp->version.major = rcvd_ver->major; 1021 ldcp->version.minor = rcvd_ver->minor; 1022 ldcp->hstate |= TS_RCVD_VER; 1023 ldcp->tstate |= TS_VER_DONE; 1024 D1(DBG_ALL_LDCS, 1025 "(0x%llx) Sent ACK, " 1026 "Agreed on version v%u.%u\n", 1027 ldcp->id, rcvd_ver->major, rcvd_ver->minor); 1028 } 1029 } else { 1030 DWARN(ldcp->id, 1031 "i_ldc_process_VER: (0x%llx) error sending " 1032 "ACK/NACK\n", ldcp->id); 1033 i_ldc_reset(ldcp, B_TRUE); 1034 mutex_exit(&ldcp->tx_lock); 1035 return (ECONNRESET); 1036 } 1037 1038 break; 1039 1040 case LDC_ACK: 1041 if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) { 1042 if (ldcp->version.major != rcvd_ver->major || 1043 ldcp->version.minor != rcvd_ver->minor) { 1044 1045 /* mismatched version - reset connection */ 1046 DWARN(ldcp->id, 1047 "i_ldc_process_VER: (0x%llx) recvd" 1048 " ACK ver != sent ACK ver\n", ldcp->id); 1049 i_ldc_reset(ldcp, B_TRUE); 1050 mutex_exit(&ldcp->tx_lock); 1051 return (ECONNRESET); 1052 } 1053 } else { 1054 /* SUCCESS - we have agreed on a version */ 1055 ldcp->version.major = rcvd_ver->major; 1056 ldcp->version.minor = rcvd_ver->minor; 1057 ldcp->tstate |= TS_VER_DONE; 1058 } 1059 1060 D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n", 1061 ldcp->id, rcvd_ver->major, rcvd_ver->minor); 1062 1063 /* initiate RTS-RTR-RDX handshake */ 1064 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 1065 if (rv) { 1066 DWARN(ldcp->id, 1067 "i_ldc_process_VER: (0x%llx) cannot send RTS\n", 1068 ldcp->id); 1069 i_ldc_reset(ldcp, B_TRUE); 1070 mutex_exit(&ldcp->tx_lock); 1071 return (ECONNRESET); 1072 } 1073 1074 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 1075 ZERO_PKT(pkt); 1076 1077 pkt->type = LDC_CTRL; 1078 pkt->stype = LDC_INFO; 1079 pkt->ctrl = LDC_RTS; 1080 pkt->env = ldcp->mode; 1081 if (ldcp->mode != LDC_MODE_RAW) 1082 pkt->seqid = LDC_INIT_SEQID; 1083 1084 ldcp->last_msg_rcd = LDC_INIT_SEQID; 1085 1086 DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt); 1087 1088 /* initiate the send by calling into HV and set the new tail */ 1089 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 1090 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1091 1092 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 1093 if (rv) { 1094 D2(ldcp->id, 1095 "i_ldc_process_VER: (0x%llx) no listener\n", 1096 ldcp->id); 1097 i_ldc_reset(ldcp, B_TRUE); 1098 mutex_exit(&ldcp->tx_lock); 1099 return (ECONNRESET); 1100 } 1101 1102 ldcp->tx_tail = tx_tail; 1103 ldcp->hstate |= TS_SENT_RTS; 1104 1105 break; 1106 1107 case LDC_NACK: 1108 /* check if version in NACK is zero */ 1109 if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) { 1110 /* version handshake failure */ 1111 DWARN(DBG_ALL_LDCS, 1112 "i_ldc_process_VER: (0x%llx) no version match\n", 1113 ldcp->id); 1114 i_ldc_reset(ldcp, B_TRUE); 1115 mutex_exit(&ldcp->tx_lock); 1116 return (ECONNRESET); 1117 } 1118 1119 /* get the current tail and pkt for the response */ 1120 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 1121 if (rv != 0) { 1122 cmn_err(CE_NOTE, 1123 "i_ldc_process_VER: (0x%lx) err sending " 1124 "version ACK/NACK\n", ldcp->id); 1125 i_ldc_reset(ldcp, B_TRUE); 1126 mutex_exit(&ldcp->tx_lock); 1127 return (ECONNRESET); 1128 } 1129 1130 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 1131 ZERO_PKT(pkt); 1132 1133 /* initialize the packet */ 1134 pkt->type = LDC_CTRL; 1135 pkt->ctrl = LDC_VER; 1136 pkt->stype = LDC_INFO; 1137 1138 /* check ver in NACK msg has a match */ 1139 for (;;) { 1140 if (rcvd_ver->major == ldc_versions[idx].major) { 1141 /* 1142 * major version match - resubmit request 1143 * if lower minor version to the one this endpt 1144 * supports, if necessary 1145 */ 1146 if (rcvd_ver->minor > ldc_versions[idx].minor) 1147 rcvd_ver->minor = 1148 ldc_versions[idx].minor; 1149 bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver)); 1150 break; 1151 1152 } 1153 1154 if (rcvd_ver->major > ldc_versions[idx].major) { 1155 1156 D1(ldcp->id, "i_ldc_process_VER: using next" 1157 " lower idx=%d, v%u.%u\n", idx, 1158 ldc_versions[idx].major, 1159 ldc_versions[idx].minor); 1160 1161 /* send next lower version */ 1162 bcopy(&ldc_versions[idx], pkt->udata, 1163 sizeof (ldc_versions[idx])); 1164 ldcp->next_vidx = idx; 1165 break; 1166 } 1167 1168 /* next version */ 1169 idx++; 1170 1171 D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx); 1172 1173 if (idx == LDC_NUM_VERS) { 1174 /* no version match - terminate */ 1175 ldcp->next_vidx = 0; 1176 mutex_exit(&ldcp->tx_lock); 1177 return (ECONNRESET); 1178 } 1179 } 1180 1181 /* initiate the send by calling into HV and set the new tail */ 1182 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 1183 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1184 1185 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 1186 if (rv == 0) { 1187 D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version" 1188 "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major, 1189 ldc_versions[idx].minor); 1190 ldcp->tx_tail = tx_tail; 1191 } else { 1192 cmn_err(CE_NOTE, 1193 "i_ldc_process_VER: (0x%lx) error sending version" 1194 "INFO\n", ldcp->id); 1195 i_ldc_reset(ldcp, B_TRUE); 1196 mutex_exit(&ldcp->tx_lock); 1197 return (ECONNRESET); 1198 } 1199 1200 break; 1201 } 1202 1203 mutex_exit(&ldcp->tx_lock); 1204 return (rv); 1205 } 1206 1207 1208 /* 1209 * Process an incoming RTS ctrl message 1210 */ 1211 static int 1212 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg) 1213 { 1214 int rv = 0; 1215 ldc_msg_t *pkt; 1216 uint64_t tx_tail; 1217 boolean_t sent_NACK = B_FALSE; 1218 1219 D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id); 1220 1221 switch (msg->stype) { 1222 case LDC_NACK: 1223 DWARN(ldcp->id, 1224 "i_ldc_process_RTS: (0x%llx) RTS NACK received\n", 1225 ldcp->id); 1226 1227 /* Reset the channel -- as we cannot continue */ 1228 mutex_enter(&ldcp->tx_lock); 1229 i_ldc_reset(ldcp, B_TRUE); 1230 mutex_exit(&ldcp->tx_lock); 1231 rv = ECONNRESET; 1232 break; 1233 1234 case LDC_INFO: 1235 1236 /* check mode */ 1237 if (ldcp->mode != (ldc_mode_t)msg->env) { 1238 cmn_err(CE_NOTE, 1239 "i_ldc_process_RTS: (0x%lx) mode mismatch\n", 1240 ldcp->id); 1241 /* 1242 * send NACK in response to MODE message 1243 * get the current tail for the response 1244 */ 1245 rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS); 1246 if (rv) { 1247 /* if cannot send NACK - reset channel */ 1248 mutex_enter(&ldcp->tx_lock); 1249 i_ldc_reset(ldcp, B_TRUE); 1250 mutex_exit(&ldcp->tx_lock); 1251 rv = ECONNRESET; 1252 break; 1253 } 1254 sent_NACK = B_TRUE; 1255 } 1256 break; 1257 default: 1258 DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n", 1259 ldcp->id); 1260 mutex_enter(&ldcp->tx_lock); 1261 i_ldc_reset(ldcp, B_TRUE); 1262 mutex_exit(&ldcp->tx_lock); 1263 rv = ECONNRESET; 1264 break; 1265 } 1266 1267 /* 1268 * If either the connection was reset (when rv != 0) or 1269 * a NACK was sent, we return. In the case of a NACK 1270 * we dont want to consume the packet that came in but 1271 * not record that we received the RTS 1272 */ 1273 if (rv || sent_NACK) 1274 return (rv); 1275 1276 /* record RTS received */ 1277 ldcp->hstate |= TS_RCVD_RTS; 1278 1279 /* store initial SEQID info */ 1280 ldcp->last_msg_snt = msg->seqid; 1281 1282 /* Obtain Tx lock */ 1283 mutex_enter(&ldcp->tx_lock); 1284 1285 /* get the current tail for the response */ 1286 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 1287 if (rv != 0) { 1288 cmn_err(CE_NOTE, 1289 "i_ldc_process_RTS: (0x%lx) err sending RTR\n", 1290 ldcp->id); 1291 i_ldc_reset(ldcp, B_TRUE); 1292 mutex_exit(&ldcp->tx_lock); 1293 return (ECONNRESET); 1294 } 1295 1296 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 1297 ZERO_PKT(pkt); 1298 1299 /* initialize the packet */ 1300 pkt->type = LDC_CTRL; 1301 pkt->stype = LDC_INFO; 1302 pkt->ctrl = LDC_RTR; 1303 pkt->env = ldcp->mode; 1304 if (ldcp->mode != LDC_MODE_RAW) 1305 pkt->seqid = LDC_INIT_SEQID; 1306 1307 ldcp->last_msg_rcd = msg->seqid; 1308 1309 /* initiate the send by calling into HV and set the new tail */ 1310 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 1311 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1312 1313 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 1314 if (rv == 0) { 1315 D2(ldcp->id, 1316 "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id); 1317 DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt); 1318 1319 ldcp->tx_tail = tx_tail; 1320 ldcp->hstate |= TS_SENT_RTR; 1321 1322 } else { 1323 cmn_err(CE_NOTE, 1324 "i_ldc_process_RTS: (0x%lx) error sending RTR\n", 1325 ldcp->id); 1326 i_ldc_reset(ldcp, B_TRUE); 1327 mutex_exit(&ldcp->tx_lock); 1328 return (ECONNRESET); 1329 } 1330 1331 mutex_exit(&ldcp->tx_lock); 1332 return (0); 1333 } 1334 1335 /* 1336 * Process an incoming RTR ctrl message 1337 */ 1338 static int 1339 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg) 1340 { 1341 int rv = 0; 1342 boolean_t sent_NACK = B_FALSE; 1343 1344 D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id); 1345 1346 switch (msg->stype) { 1347 case LDC_NACK: 1348 /* RTR NACK received */ 1349 DWARN(ldcp->id, 1350 "i_ldc_process_RTR: (0x%llx) RTR NACK received\n", 1351 ldcp->id); 1352 1353 /* Reset the channel -- as we cannot continue */ 1354 mutex_enter(&ldcp->tx_lock); 1355 i_ldc_reset(ldcp, B_TRUE); 1356 mutex_exit(&ldcp->tx_lock); 1357 rv = ECONNRESET; 1358 1359 break; 1360 1361 case LDC_INFO: 1362 1363 /* check mode */ 1364 if (ldcp->mode != (ldc_mode_t)msg->env) { 1365 DWARN(ldcp->id, 1366 "i_ldc_process_RTR: (0x%llx) mode mismatch, " 1367 "expecting 0x%x, got 0x%x\n", 1368 ldcp->id, ldcp->mode, (ldc_mode_t)msg->env); 1369 /* 1370 * send NACK in response to MODE message 1371 * get the current tail for the response 1372 */ 1373 rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR); 1374 if (rv) { 1375 /* if cannot send NACK - reset channel */ 1376 mutex_enter(&ldcp->tx_lock); 1377 i_ldc_reset(ldcp, B_TRUE); 1378 mutex_exit(&ldcp->tx_lock); 1379 rv = ECONNRESET; 1380 break; 1381 } 1382 sent_NACK = B_TRUE; 1383 } 1384 break; 1385 1386 default: 1387 DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n", 1388 ldcp->id); 1389 1390 /* Reset the channel -- as we cannot continue */ 1391 mutex_enter(&ldcp->tx_lock); 1392 i_ldc_reset(ldcp, B_TRUE); 1393 mutex_exit(&ldcp->tx_lock); 1394 rv = ECONNRESET; 1395 break; 1396 } 1397 1398 /* 1399 * If either the connection was reset (when rv != 0) or 1400 * a NACK was sent, we return. In the case of a NACK 1401 * we dont want to consume the packet that came in but 1402 * not record that we received the RTR 1403 */ 1404 if (rv || sent_NACK) 1405 return (rv); 1406 1407 ldcp->last_msg_snt = msg->seqid; 1408 ldcp->hstate |= TS_RCVD_RTR; 1409 1410 rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX); 1411 if (rv) { 1412 cmn_err(CE_NOTE, 1413 "i_ldc_process_RTR: (0x%lx) cannot send RDX\n", 1414 ldcp->id); 1415 mutex_enter(&ldcp->tx_lock); 1416 i_ldc_reset(ldcp, B_TRUE); 1417 mutex_exit(&ldcp->tx_lock); 1418 return (ECONNRESET); 1419 } 1420 D2(ldcp->id, 1421 "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id); 1422 1423 ldcp->hstate |= TS_SENT_RDX; 1424 ldcp->tstate |= TS_HSHAKE_DONE; 1425 if ((ldcp->tstate & TS_IN_RESET) == 0) 1426 ldcp->status = LDC_UP; 1427 1428 D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id); 1429 1430 return (0); 1431 } 1432 1433 1434 /* 1435 * Process an incoming RDX ctrl message 1436 */ 1437 static int 1438 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg) 1439 { 1440 int rv = 0; 1441 1442 D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id); 1443 1444 switch (msg->stype) { 1445 case LDC_NACK: 1446 /* RDX NACK received */ 1447 DWARN(ldcp->id, 1448 "i_ldc_process_RDX: (0x%llx) RDX NACK received\n", 1449 ldcp->id); 1450 1451 /* Reset the channel -- as we cannot continue */ 1452 mutex_enter(&ldcp->tx_lock); 1453 i_ldc_reset(ldcp, B_TRUE); 1454 mutex_exit(&ldcp->tx_lock); 1455 rv = ECONNRESET; 1456 1457 break; 1458 1459 case LDC_INFO: 1460 1461 /* 1462 * if channel is UP and a RDX received after data transmission 1463 * has commenced it is an error 1464 */ 1465 if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) { 1466 DWARN(DBG_ALL_LDCS, 1467 "i_ldc_process_RDX: (0x%llx) unexpected RDX" 1468 " - LDC reset\n", ldcp->id); 1469 mutex_enter(&ldcp->tx_lock); 1470 i_ldc_reset(ldcp, B_TRUE); 1471 mutex_exit(&ldcp->tx_lock); 1472 return (ECONNRESET); 1473 } 1474 1475 ldcp->hstate |= TS_RCVD_RDX; 1476 ldcp->tstate |= TS_HSHAKE_DONE; 1477 if ((ldcp->tstate & TS_IN_RESET) == 0) 1478 ldcp->status = LDC_UP; 1479 1480 D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id); 1481 break; 1482 1483 default: 1484 DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n", 1485 ldcp->id); 1486 1487 /* Reset the channel -- as we cannot continue */ 1488 mutex_enter(&ldcp->tx_lock); 1489 i_ldc_reset(ldcp, B_TRUE); 1490 mutex_exit(&ldcp->tx_lock); 1491 rv = ECONNRESET; 1492 break; 1493 } 1494 1495 return (rv); 1496 } 1497 1498 /* 1499 * Process an incoming ACK for a data packet 1500 */ 1501 static int 1502 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg) 1503 { 1504 int rv; 1505 uint64_t tx_head; 1506 ldc_msg_t *pkt; 1507 1508 /* Obtain Tx lock */ 1509 mutex_enter(&ldcp->tx_lock); 1510 1511 /* 1512 * Read the current Tx head and tail 1513 */ 1514 rv = hv_ldc_tx_get_state(ldcp->id, 1515 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 1516 if (rv != 0) { 1517 cmn_err(CE_WARN, 1518 "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n", 1519 ldcp->id); 1520 1521 /* Reset the channel -- as we cannot continue */ 1522 i_ldc_reset(ldcp, B_TRUE); 1523 mutex_exit(&ldcp->tx_lock); 1524 return (ECONNRESET); 1525 } 1526 1527 /* 1528 * loop from where the previous ACK location was to the 1529 * current head location. This is how far the HV has 1530 * actually send pkts. Pkts between head and tail are 1531 * yet to be sent by HV. 1532 */ 1533 tx_head = ldcp->tx_ackd_head; 1534 for (;;) { 1535 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head); 1536 tx_head = (tx_head + LDC_PACKET_SIZE) % 1537 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1538 1539 if (pkt->seqid == msg->ackid) { 1540 D2(ldcp->id, 1541 "i_ldc_process_data_ACK: (0x%llx) found packet\n", 1542 ldcp->id); 1543 ldcp->last_ack_rcd = msg->ackid; 1544 ldcp->tx_ackd_head = tx_head; 1545 break; 1546 } 1547 if (tx_head == ldcp->tx_head) { 1548 /* could not find packet */ 1549 DWARN(ldcp->id, 1550 "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n", 1551 ldcp->id); 1552 1553 /* Reset the channel -- as we cannot continue */ 1554 i_ldc_reset(ldcp, B_TRUE); 1555 mutex_exit(&ldcp->tx_lock); 1556 return (ECONNRESET); 1557 } 1558 } 1559 1560 mutex_exit(&ldcp->tx_lock); 1561 return (0); 1562 } 1563 1564 /* 1565 * Process incoming control message 1566 * Return 0 - session can continue 1567 * EAGAIN - reprocess packet - state was changed 1568 * ECONNRESET - channel was reset 1569 */ 1570 static int 1571 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg) 1572 { 1573 int rv = 0; 1574 1575 D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n", 1576 ldcp->id, ldcp->tstate, ldcp->hstate); 1577 1578 switch (ldcp->tstate & ~TS_IN_RESET) { 1579 1580 case TS_OPEN: 1581 case TS_READY: 1582 1583 switch (msg->ctrl & LDC_CTRL_MASK) { 1584 case LDC_VER: 1585 /* process version message */ 1586 rv = i_ldc_process_VER(ldcp, msg); 1587 break; 1588 default: 1589 DWARN(ldcp->id, 1590 "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x " 1591 "tstate=0x%x\n", ldcp->id, 1592 (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate); 1593 break; 1594 } 1595 1596 break; 1597 1598 case TS_VREADY: 1599 1600 switch (msg->ctrl & LDC_CTRL_MASK) { 1601 case LDC_VER: 1602 /* process version message */ 1603 rv = i_ldc_process_VER(ldcp, msg); 1604 break; 1605 case LDC_RTS: 1606 /* process RTS message */ 1607 rv = i_ldc_process_RTS(ldcp, msg); 1608 break; 1609 case LDC_RTR: 1610 /* process RTR message */ 1611 rv = i_ldc_process_RTR(ldcp, msg); 1612 break; 1613 case LDC_RDX: 1614 /* process RDX message */ 1615 rv = i_ldc_process_RDX(ldcp, msg); 1616 break; 1617 default: 1618 DWARN(ldcp->id, 1619 "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x " 1620 "tstate=0x%x\n", ldcp->id, 1621 (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate); 1622 break; 1623 } 1624 1625 break; 1626 1627 case TS_UP: 1628 1629 switch (msg->ctrl & LDC_CTRL_MASK) { 1630 case LDC_VER: 1631 DWARN(ldcp->id, 1632 "i_ldc_ctrlmsg: (0x%llx) unexpected VER " 1633 "- LDC reset\n", ldcp->id); 1634 /* peer is redoing version negotiation */ 1635 mutex_enter(&ldcp->tx_lock); 1636 (void) i_ldc_txq_reconf(ldcp); 1637 i_ldc_reset_state(ldcp); 1638 mutex_exit(&ldcp->tx_lock); 1639 rv = EAGAIN; 1640 break; 1641 1642 case LDC_RDX: 1643 /* process RDX message */ 1644 rv = i_ldc_process_RDX(ldcp, msg); 1645 break; 1646 1647 default: 1648 DWARN(ldcp->id, 1649 "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x " 1650 "tstate=0x%x\n", ldcp->id, 1651 (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate); 1652 break; 1653 } 1654 } 1655 1656 return (rv); 1657 } 1658 1659 /* 1660 * Register channel with the channel nexus 1661 */ 1662 static int 1663 i_ldc_register_channel(ldc_chan_t *ldcp) 1664 { 1665 int rv = 0; 1666 ldc_cnex_t *cinfo = &ldcssp->cinfo; 1667 1668 if (cinfo->dip == NULL) { 1669 DWARN(ldcp->id, 1670 "i_ldc_register_channel: cnex has not registered\n"); 1671 return (EAGAIN); 1672 } 1673 1674 rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass); 1675 if (rv) { 1676 DWARN(ldcp->id, 1677 "i_ldc_register_channel: cannot register channel\n"); 1678 return (rv); 1679 } 1680 1681 rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR, 1682 i_ldc_tx_hdlr, ldcp, NULL); 1683 if (rv) { 1684 DWARN(ldcp->id, 1685 "i_ldc_register_channel: cannot add Tx interrupt\n"); 1686 (void) cinfo->unreg_chan(cinfo->dip, ldcp->id); 1687 return (rv); 1688 } 1689 1690 rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR, 1691 i_ldc_rx_hdlr, ldcp, NULL); 1692 if (rv) { 1693 DWARN(ldcp->id, 1694 "i_ldc_register_channel: cannot add Rx interrupt\n"); 1695 (void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR); 1696 (void) cinfo->unreg_chan(cinfo->dip, ldcp->id); 1697 return (rv); 1698 } 1699 1700 ldcp->tstate |= TS_CNEX_RDY; 1701 1702 return (0); 1703 } 1704 1705 /* 1706 * Unregister a channel with the channel nexus 1707 */ 1708 static int 1709 i_ldc_unregister_channel(ldc_chan_t *ldcp) 1710 { 1711 int rv = 0; 1712 ldc_cnex_t *cinfo = &ldcssp->cinfo; 1713 1714 if (cinfo->dip == NULL) { 1715 DWARN(ldcp->id, 1716 "i_ldc_unregister_channel: cnex has not registered\n"); 1717 return (EAGAIN); 1718 } 1719 1720 if (ldcp->tstate & TS_CNEX_RDY) { 1721 1722 /* Remove the Rx interrupt */ 1723 rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR); 1724 if (rv) { 1725 if (rv != EAGAIN) { 1726 DWARN(ldcp->id, 1727 "i_ldc_unregister_channel: err removing " 1728 "Rx intr\n"); 1729 return (rv); 1730 } 1731 1732 /* 1733 * If interrupts are pending and handler has 1734 * finished running, clear interrupt and try 1735 * again 1736 */ 1737 if (ldcp->rx_intr_state != LDC_INTR_PEND) 1738 return (rv); 1739 1740 (void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 1741 rv = cinfo->rem_intr(cinfo->dip, ldcp->id, 1742 CNEX_RX_INTR); 1743 if (rv) { 1744 DWARN(ldcp->id, "i_ldc_unregister_channel: " 1745 "err removing Rx interrupt\n"); 1746 return (rv); 1747 } 1748 } 1749 1750 /* Remove the Tx interrupt */ 1751 rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR); 1752 if (rv) { 1753 DWARN(ldcp->id, 1754 "i_ldc_unregister_channel: err removing Tx intr\n"); 1755 return (rv); 1756 } 1757 1758 /* Unregister the channel */ 1759 rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id); 1760 if (rv) { 1761 DWARN(ldcp->id, 1762 "i_ldc_unregister_channel: cannot unreg channel\n"); 1763 return (rv); 1764 } 1765 1766 ldcp->tstate &= ~TS_CNEX_RDY; 1767 } 1768 1769 return (0); 1770 } 1771 1772 1773 /* 1774 * LDC transmit interrupt handler 1775 * triggered for chanel up/down/reset events 1776 * and Tx queue content changes 1777 */ 1778 static uint_t 1779 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2) 1780 { 1781 _NOTE(ARGUNUSED(arg2)) 1782 1783 int rv; 1784 ldc_chan_t *ldcp; 1785 boolean_t notify_client = B_FALSE; 1786 uint64_t notify_event = 0, link_state; 1787 1788 /* Get the channel for which interrupt was received */ 1789 ASSERT(arg1 != NULL); 1790 ldcp = (ldc_chan_t *)arg1; 1791 1792 D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n", 1793 ldcp->id, ldcp); 1794 1795 /* Lock channel */ 1796 mutex_enter(&ldcp->lock); 1797 1798 /* Obtain Tx lock */ 1799 mutex_enter(&ldcp->tx_lock); 1800 1801 /* mark interrupt as pending */ 1802 ldcp->tx_intr_state = LDC_INTR_ACTIVE; 1803 1804 /* save current link state */ 1805 link_state = ldcp->link_state; 1806 1807 rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail, 1808 &ldcp->link_state); 1809 if (rv) { 1810 cmn_err(CE_WARN, 1811 "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n", 1812 ldcp->id, rv); 1813 i_ldc_clear_intr(ldcp, CNEX_TX_INTR); 1814 mutex_exit(&ldcp->tx_lock); 1815 mutex_exit(&ldcp->lock); 1816 return (DDI_INTR_CLAIMED); 1817 } 1818 1819 /* 1820 * reset the channel state if the channel went down 1821 * (other side unconfigured queue) or channel was reset 1822 * (other side reconfigured its queue) 1823 */ 1824 if (link_state != ldcp->link_state && 1825 ldcp->link_state == LDC_CHANNEL_DOWN) { 1826 D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id); 1827 i_ldc_reset(ldcp, B_FALSE); 1828 notify_client = B_TRUE; 1829 notify_event = LDC_EVT_DOWN; 1830 } 1831 1832 if (link_state != ldcp->link_state && 1833 ldcp->link_state == LDC_CHANNEL_RESET) { 1834 D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id); 1835 i_ldc_reset(ldcp, B_FALSE); 1836 notify_client = B_TRUE; 1837 notify_event = LDC_EVT_RESET; 1838 } 1839 1840 if (link_state != ldcp->link_state && 1841 (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN && 1842 ldcp->link_state == LDC_CHANNEL_UP) { 1843 D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id); 1844 notify_client = B_TRUE; 1845 notify_event = LDC_EVT_RESET; 1846 ldcp->tstate |= TS_LINK_READY; 1847 ldcp->status = LDC_READY; 1848 } 1849 1850 /* if callbacks are disabled, do not notify */ 1851 if (!ldcp->cb_enabled) 1852 notify_client = B_FALSE; 1853 1854 i_ldc_clear_intr(ldcp, CNEX_TX_INTR); 1855 1856 if (notify_client) { 1857 ldcp->cb_inprogress = B_TRUE; 1858 mutex_exit(&ldcp->tx_lock); 1859 mutex_exit(&ldcp->lock); 1860 rv = ldcp->cb(notify_event, ldcp->cb_arg); 1861 if (rv) { 1862 DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback " 1863 "failure", ldcp->id); 1864 } 1865 mutex_enter(&ldcp->lock); 1866 ldcp->cb_inprogress = B_FALSE; 1867 } 1868 1869 mutex_exit(&ldcp->lock); 1870 1871 D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id); 1872 1873 return (DDI_INTR_CLAIMED); 1874 } 1875 1876 /* 1877 * LDC receive interrupt handler 1878 * triggered for channel with data pending to read 1879 * i.e. Rx queue content changes 1880 */ 1881 static uint_t 1882 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2) 1883 { 1884 _NOTE(ARGUNUSED(arg2)) 1885 1886 int rv; 1887 uint64_t rx_head, rx_tail; 1888 ldc_msg_t *msg; 1889 ldc_chan_t *ldcp; 1890 boolean_t notify_client = B_FALSE; 1891 uint64_t notify_event = 0; 1892 uint64_t link_state, first_fragment = 0; 1893 1894 1895 /* Get the channel for which interrupt was received */ 1896 if (arg1 == NULL) { 1897 cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n"); 1898 return (DDI_INTR_UNCLAIMED); 1899 } 1900 1901 ldcp = (ldc_chan_t *)arg1; 1902 1903 D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n", 1904 ldcp->id, ldcp); 1905 D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n", 1906 ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate, 1907 ldcp->link_state); 1908 1909 /* Lock channel */ 1910 mutex_enter(&ldcp->lock); 1911 1912 /* mark interrupt as pending */ 1913 ldcp->rx_intr_state = LDC_INTR_ACTIVE; 1914 1915 /* 1916 * Read packet(s) from the queue 1917 */ 1918 for (;;) { 1919 1920 link_state = ldcp->link_state; 1921 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 1922 &ldcp->link_state); 1923 if (rv) { 1924 cmn_err(CE_WARN, 1925 "i_ldc_rx_hdlr: (0x%lx) cannot read " 1926 "queue ptrs, rv=0x%d\n", ldcp->id, rv); 1927 i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 1928 mutex_exit(&ldcp->lock); 1929 return (DDI_INTR_CLAIMED); 1930 } 1931 1932 /* 1933 * reset the channel state if the channel went down 1934 * (other side unconfigured queue) or channel was reset 1935 * (other side reconfigured its queue) 1936 */ 1937 1938 if (link_state != ldcp->link_state) { 1939 1940 switch (ldcp->link_state) { 1941 case LDC_CHANNEL_DOWN: 1942 D1(ldcp->id, "i_ldc_rx_hdlr: channel " 1943 "link down\n", ldcp->id); 1944 mutex_enter(&ldcp->tx_lock); 1945 i_ldc_reset(ldcp, B_FALSE); 1946 mutex_exit(&ldcp->tx_lock); 1947 notify_client = B_TRUE; 1948 notify_event = LDC_EVT_DOWN; 1949 goto loop_exit; 1950 1951 case LDC_CHANNEL_UP: 1952 D1(ldcp->id, "i_ldc_rx_hdlr: " 1953 "channel link up\n", ldcp->id); 1954 1955 if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) { 1956 notify_client = B_TRUE; 1957 notify_event = LDC_EVT_RESET; 1958 ldcp->tstate |= TS_LINK_READY; 1959 ldcp->status = LDC_READY; 1960 } 1961 break; 1962 1963 case LDC_CHANNEL_RESET: 1964 default: 1965 #ifdef DEBUG 1966 force_reset: 1967 #endif 1968 D1(ldcp->id, "i_ldc_rx_hdlr: channel " 1969 "link reset\n", ldcp->id); 1970 mutex_enter(&ldcp->tx_lock); 1971 i_ldc_reset(ldcp, B_FALSE); 1972 mutex_exit(&ldcp->tx_lock); 1973 notify_client = B_TRUE; 1974 notify_event = LDC_EVT_RESET; 1975 break; 1976 } 1977 } 1978 1979 #ifdef DEBUG 1980 if (LDC_INJECT_RESET(ldcp)) 1981 goto force_reset; 1982 #endif 1983 1984 if (rx_head == rx_tail) { 1985 D2(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) No packets\n", 1986 ldcp->id); 1987 break; 1988 } 1989 1990 D2(ldcp->id, "i_ldc_rx_hdlr: head=0x%llx, tail=0x%llx\n", 1991 rx_head, rx_tail); 1992 DUMP_LDC_PKT(ldcp, "i_ldc_rx_hdlr rcd", 1993 ldcp->rx_q_va + rx_head); 1994 1995 /* get the message */ 1996 msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head); 1997 1998 /* if channel is in RAW mode or data pkt, notify and return */ 1999 if (ldcp->mode == LDC_MODE_RAW) { 2000 notify_client = B_TRUE; 2001 notify_event |= LDC_EVT_READ; 2002 break; 2003 } 2004 2005 if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) { 2006 2007 /* discard packet if channel is not up */ 2008 if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) { 2009 2010 /* move the head one position */ 2011 rx_head = (rx_head + LDC_PACKET_SIZE) % 2012 (ldcp->rx_q_entries << LDC_PACKET_SHIFT); 2013 2014 if (rv = i_ldc_set_rx_head(ldcp, rx_head)) 2015 break; 2016 2017 continue; 2018 } else { 2019 if ((ldcp->tstate & TS_IN_RESET) == 0) 2020 notify_client = B_TRUE; 2021 notify_event |= LDC_EVT_READ; 2022 break; 2023 } 2024 } 2025 2026 /* Check the sequence ID for the message received */ 2027 rv = i_ldc_check_seqid(ldcp, msg); 2028 if (rv != 0) { 2029 2030 DWARN(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) seqid error, " 2031 "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail); 2032 2033 /* Reset last_msg_rcd to start of message */ 2034 if (first_fragment != 0) { 2035 ldcp->last_msg_rcd = first_fragment - 1; 2036 first_fragment = 0; 2037 } 2038 2039 /* 2040 * Send a NACK due to seqid mismatch 2041 */ 2042 rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, 2043 (msg->ctrl & LDC_CTRL_MASK)); 2044 2045 if (rv) { 2046 cmn_err(CE_NOTE, 2047 "i_ldc_rx_hdlr: (0x%lx) err sending " 2048 "CTRL/NACK msg\n", ldcp->id); 2049 2050 /* if cannot send NACK - reset channel */ 2051 mutex_enter(&ldcp->tx_lock); 2052 i_ldc_reset(ldcp, B_TRUE); 2053 mutex_exit(&ldcp->tx_lock); 2054 2055 notify_client = B_TRUE; 2056 notify_event = LDC_EVT_RESET; 2057 break; 2058 } 2059 2060 /* purge receive queue */ 2061 (void) i_ldc_set_rx_head(ldcp, rx_tail); 2062 break; 2063 } 2064 2065 /* record the message ID */ 2066 ldcp->last_msg_rcd = msg->seqid; 2067 2068 /* process control messages */ 2069 if (msg->type & LDC_CTRL) { 2070 /* save current internal state */ 2071 uint64_t tstate = ldcp->tstate; 2072 2073 rv = i_ldc_ctrlmsg(ldcp, msg); 2074 if (rv == EAGAIN) { 2075 /* re-process pkt - state was adjusted */ 2076 continue; 2077 } 2078 if (rv == ECONNRESET) { 2079 notify_client = B_TRUE; 2080 notify_event = LDC_EVT_RESET; 2081 break; 2082 } 2083 2084 /* 2085 * control message processing was successful 2086 * channel transitioned to ready for communication 2087 */ 2088 if (rv == 0 && ldcp->tstate == TS_UP && 2089 (tstate & ~TS_IN_RESET) != 2090 (ldcp->tstate & ~TS_IN_RESET)) { 2091 notify_client = B_TRUE; 2092 notify_event = LDC_EVT_UP; 2093 } 2094 } 2095 2096 /* process data NACKs */ 2097 if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) { 2098 DWARN(ldcp->id, 2099 "i_ldc_rx_hdlr: (0x%llx) received DATA/NACK", 2100 ldcp->id); 2101 mutex_enter(&ldcp->tx_lock); 2102 i_ldc_reset(ldcp, B_TRUE); 2103 mutex_exit(&ldcp->tx_lock); 2104 notify_client = B_TRUE; 2105 notify_event = LDC_EVT_RESET; 2106 break; 2107 } 2108 2109 /* process data ACKs */ 2110 if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) { 2111 if (rv = i_ldc_process_data_ACK(ldcp, msg)) { 2112 notify_client = B_TRUE; 2113 notify_event = LDC_EVT_RESET; 2114 break; 2115 } 2116 } 2117 2118 /* move the head one position */ 2119 rx_head = (rx_head + LDC_PACKET_SIZE) % 2120 (ldcp->rx_q_entries << LDC_PACKET_SHIFT); 2121 if (rv = i_ldc_set_rx_head(ldcp, rx_head)) { 2122 notify_client = B_TRUE; 2123 notify_event = LDC_EVT_RESET; 2124 break; 2125 } 2126 2127 } /* for */ 2128 2129 loop_exit: 2130 2131 /* if callbacks are disabled, do not notify */ 2132 if (!ldcp->cb_enabled) 2133 notify_client = B_FALSE; 2134 2135 /* 2136 * If there are data packets in the queue, the ldc_read will 2137 * clear interrupts after draining the queue, else clear interrupts 2138 */ 2139 if ((notify_event & LDC_EVT_READ) == 0) { 2140 i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 2141 } else 2142 ldcp->rx_intr_state = LDC_INTR_PEND; 2143 2144 2145 if (notify_client) { 2146 ldcp->cb_inprogress = B_TRUE; 2147 mutex_exit(&ldcp->lock); 2148 rv = ldcp->cb(notify_event, ldcp->cb_arg); 2149 if (rv) { 2150 DWARN(ldcp->id, 2151 "i_ldc_rx_hdlr: (0x%llx) callback failure", 2152 ldcp->id); 2153 } 2154 mutex_enter(&ldcp->lock); 2155 ldcp->cb_inprogress = B_FALSE; 2156 } 2157 2158 mutex_exit(&ldcp->lock); 2159 2160 D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id); 2161 return (DDI_INTR_CLAIMED); 2162 } 2163 2164 2165 /* -------------------------------------------------------------------------- */ 2166 2167 /* 2168 * LDC API functions 2169 */ 2170 2171 /* 2172 * Initialize the channel. Allocate internal structure and memory for 2173 * TX/RX queues, and initialize locks. 2174 */ 2175 int 2176 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle) 2177 { 2178 ldc_chan_t *ldcp; 2179 int rv, exit_val; 2180 uint64_t ra_base, nentries; 2181 uint64_t qlen; 2182 2183 exit_val = EINVAL; /* guarantee an error if exit on failure */ 2184 2185 if (attr == NULL) { 2186 DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id); 2187 return (EINVAL); 2188 } 2189 if (handle == NULL) { 2190 DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id); 2191 return (EINVAL); 2192 } 2193 2194 /* check if channel is valid */ 2195 rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries); 2196 if (rv == H_ECHANNEL) { 2197 DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id); 2198 return (EINVAL); 2199 } 2200 2201 /* check if the channel has already been initialized */ 2202 mutex_enter(&ldcssp->lock); 2203 ldcp = ldcssp->chan_list; 2204 while (ldcp != NULL) { 2205 if (ldcp->id == id) { 2206 DWARN(id, "ldc_init: (0x%llx) already initialized\n", 2207 id); 2208 mutex_exit(&ldcssp->lock); 2209 return (EADDRINUSE); 2210 } 2211 ldcp = ldcp->next; 2212 } 2213 mutex_exit(&ldcssp->lock); 2214 2215 ASSERT(ldcp == NULL); 2216 2217 *handle = 0; 2218 2219 /* Allocate an ldcp structure */ 2220 ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP); 2221 2222 /* 2223 * Initialize the channel and Tx lock 2224 * 2225 * The channel 'lock' protects the entire channel and 2226 * should be acquired before initializing, resetting, 2227 * destroying or reading from a channel. 2228 * 2229 * The 'tx_lock' should be acquired prior to transmitting 2230 * data over the channel. The lock should also be acquired 2231 * prior to channel reconfiguration (in order to prevent 2232 * concurrent writes). 2233 * 2234 * ORDERING: When both locks are being acquired, to prevent 2235 * deadlocks, the channel lock should be always acquired prior 2236 * to the tx_lock. 2237 */ 2238 mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL); 2239 mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL); 2240 2241 /* Initialize the channel */ 2242 ldcp->id = id; 2243 ldcp->cb = NULL; 2244 ldcp->cb_arg = NULL; 2245 ldcp->cb_inprogress = B_FALSE; 2246 ldcp->cb_enabled = B_FALSE; 2247 ldcp->next = NULL; 2248 2249 /* Read attributes */ 2250 ldcp->mode = attr->mode; 2251 ldcp->devclass = attr->devclass; 2252 ldcp->devinst = attr->instance; 2253 ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU; 2254 2255 D1(ldcp->id, 2256 "ldc_init: (0x%llx) channel attributes, class=0x%x, " 2257 "instance=0x%llx, mode=%d, mtu=%d\n", 2258 ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu); 2259 2260 ldcp->next_vidx = 0; 2261 ldcp->tstate = TS_IN_RESET; 2262 ldcp->hstate = 0; 2263 ldcp->last_msg_snt = LDC_INIT_SEQID; 2264 ldcp->last_ack_rcd = 0; 2265 ldcp->last_msg_rcd = 0; 2266 2267 ldcp->stream_bufferp = NULL; 2268 ldcp->exp_dring_list = NULL; 2269 ldcp->imp_dring_list = NULL; 2270 ldcp->mhdl_list = NULL; 2271 2272 ldcp->tx_intr_state = LDC_INTR_NONE; 2273 ldcp->rx_intr_state = LDC_INTR_NONE; 2274 2275 /* Initialize payload size depending on whether channel is reliable */ 2276 switch (ldcp->mode) { 2277 case LDC_MODE_RAW: 2278 ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW; 2279 ldcp->read_p = i_ldc_read_raw; 2280 ldcp->write_p = i_ldc_write_raw; 2281 break; 2282 case LDC_MODE_UNRELIABLE: 2283 ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE; 2284 ldcp->read_p = i_ldc_read_packet; 2285 ldcp->write_p = i_ldc_write_packet; 2286 break; 2287 case LDC_MODE_RELIABLE: 2288 ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE; 2289 ldcp->read_p = i_ldc_read_packet; 2290 ldcp->write_p = i_ldc_write_packet; 2291 break; 2292 case LDC_MODE_STREAM: 2293 ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE; 2294 2295 ldcp->stream_remains = 0; 2296 ldcp->stream_offset = 0; 2297 ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP); 2298 ldcp->read_p = i_ldc_read_stream; 2299 ldcp->write_p = i_ldc_write_stream; 2300 break; 2301 default: 2302 exit_val = EINVAL; 2303 goto cleanup_on_exit; 2304 } 2305 2306 /* 2307 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this 2308 * value is smaller than default length of ldc_queue_entries, 2309 * qlen is set to ldc_queue_entries.. 2310 */ 2311 qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload; 2312 ldcp->rx_q_entries = 2313 (qlen < ldc_queue_entries) ? ldc_queue_entries : qlen; 2314 ldcp->tx_q_entries = ldcp->rx_q_entries; 2315 2316 D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", qlen); 2317 2318 /* Create a transmit queue */ 2319 ldcp->tx_q_va = (uint64_t) 2320 contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT); 2321 if (ldcp->tx_q_va == NULL) { 2322 cmn_err(CE_WARN, 2323 "ldc_init: (0x%lx) TX queue allocation failed\n", 2324 ldcp->id); 2325 exit_val = ENOMEM; 2326 goto cleanup_on_exit; 2327 } 2328 ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va); 2329 2330 D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n", 2331 ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries); 2332 2333 ldcp->tstate |= TS_TXQ_RDY; 2334 2335 /* Create a receive queue */ 2336 ldcp->rx_q_va = (uint64_t) 2337 contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT); 2338 if (ldcp->rx_q_va == NULL) { 2339 cmn_err(CE_WARN, 2340 "ldc_init: (0x%lx) RX queue allocation failed\n", 2341 ldcp->id); 2342 exit_val = ENOMEM; 2343 goto cleanup_on_exit; 2344 } 2345 ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va); 2346 2347 D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n", 2348 ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries); 2349 2350 ldcp->tstate |= TS_RXQ_RDY; 2351 2352 /* Init descriptor ring and memory handle list lock */ 2353 mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL); 2354 mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL); 2355 mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL); 2356 2357 /* mark status as INITialized */ 2358 ldcp->status = LDC_INIT; 2359 2360 /* Add to channel list */ 2361 mutex_enter(&ldcssp->lock); 2362 ldcp->next = ldcssp->chan_list; 2363 ldcssp->chan_list = ldcp; 2364 ldcssp->channel_count++; 2365 mutex_exit(&ldcssp->lock); 2366 2367 /* set the handle */ 2368 *handle = (ldc_handle_t)ldcp; 2369 2370 D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id); 2371 2372 return (0); 2373 2374 cleanup_on_exit: 2375 2376 if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp) 2377 kmem_free(ldcp->stream_bufferp, ldcp->mtu); 2378 2379 if (ldcp->tstate & TS_TXQ_RDY) 2380 contig_mem_free((caddr_t)ldcp->tx_q_va, 2381 (ldcp->tx_q_entries << LDC_PACKET_SHIFT)); 2382 2383 if (ldcp->tstate & TS_RXQ_RDY) 2384 contig_mem_free((caddr_t)ldcp->rx_q_va, 2385 (ldcp->rx_q_entries << LDC_PACKET_SHIFT)); 2386 2387 mutex_destroy(&ldcp->tx_lock); 2388 mutex_destroy(&ldcp->lock); 2389 2390 if (ldcp) 2391 kmem_free(ldcp, sizeof (ldc_chan_t)); 2392 2393 return (exit_val); 2394 } 2395 2396 /* 2397 * Finalizes the LDC connection. It will return EBUSY if the 2398 * channel is open. A ldc_close() has to be done prior to 2399 * a ldc_fini operation. It frees TX/RX queues, associated 2400 * with the channel 2401 */ 2402 int 2403 ldc_fini(ldc_handle_t handle) 2404 { 2405 ldc_chan_t *ldcp; 2406 ldc_chan_t *tmp_ldcp; 2407 uint64_t id; 2408 2409 if (handle == NULL) { 2410 DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n"); 2411 return (EINVAL); 2412 } 2413 ldcp = (ldc_chan_t *)handle; 2414 id = ldcp->id; 2415 2416 mutex_enter(&ldcp->lock); 2417 2418 if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) { 2419 DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n", 2420 ldcp->id); 2421 mutex_exit(&ldcp->lock); 2422 return (EBUSY); 2423 } 2424 2425 /* Remove from the channel list */ 2426 mutex_enter(&ldcssp->lock); 2427 tmp_ldcp = ldcssp->chan_list; 2428 if (tmp_ldcp == ldcp) { 2429 ldcssp->chan_list = ldcp->next; 2430 ldcp->next = NULL; 2431 } else { 2432 while (tmp_ldcp != NULL) { 2433 if (tmp_ldcp->next == ldcp) { 2434 tmp_ldcp->next = ldcp->next; 2435 ldcp->next = NULL; 2436 break; 2437 } 2438 tmp_ldcp = tmp_ldcp->next; 2439 } 2440 if (tmp_ldcp == NULL) { 2441 DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n"); 2442 mutex_exit(&ldcssp->lock); 2443 mutex_exit(&ldcp->lock); 2444 return (EINVAL); 2445 } 2446 } 2447 2448 ldcssp->channel_count--; 2449 2450 mutex_exit(&ldcssp->lock); 2451 2452 /* Free the map table for this channel */ 2453 if (ldcp->mtbl) { 2454 (void) hv_ldc_set_map_table(ldcp->id, NULL, NULL); 2455 if (ldcp->mtbl->contigmem) 2456 contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size); 2457 else 2458 kmem_free(ldcp->mtbl->table, ldcp->mtbl->size); 2459 mutex_destroy(&ldcp->mtbl->lock); 2460 kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t)); 2461 } 2462 2463 /* Destroy descriptor ring and memory handle list lock */ 2464 mutex_destroy(&ldcp->exp_dlist_lock); 2465 mutex_destroy(&ldcp->imp_dlist_lock); 2466 mutex_destroy(&ldcp->mlist_lock); 2467 2468 /* Free the stream buffer for STREAM_MODE */ 2469 if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp) 2470 kmem_free(ldcp->stream_bufferp, ldcp->mtu); 2471 2472 /* Free the RX queue */ 2473 contig_mem_free((caddr_t)ldcp->rx_q_va, 2474 (ldcp->rx_q_entries << LDC_PACKET_SHIFT)); 2475 ldcp->tstate &= ~TS_RXQ_RDY; 2476 2477 /* Free the TX queue */ 2478 contig_mem_free((caddr_t)ldcp->tx_q_va, 2479 (ldcp->tx_q_entries << LDC_PACKET_SHIFT)); 2480 ldcp->tstate &= ~TS_TXQ_RDY; 2481 2482 mutex_exit(&ldcp->lock); 2483 2484 /* Destroy mutex */ 2485 mutex_destroy(&ldcp->tx_lock); 2486 mutex_destroy(&ldcp->lock); 2487 2488 /* free channel structure */ 2489 kmem_free(ldcp, sizeof (ldc_chan_t)); 2490 2491 D1(id, "ldc_fini: (0x%llx) channel finalized\n", id); 2492 2493 return (0); 2494 } 2495 2496 /* 2497 * Open the LDC channel for use. It registers the TX/RX queues 2498 * with the Hypervisor. It also specifies the interrupt number 2499 * and target CPU for this channel 2500 */ 2501 int 2502 ldc_open(ldc_handle_t handle) 2503 { 2504 ldc_chan_t *ldcp; 2505 int rv; 2506 2507 if (handle == NULL) { 2508 DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n"); 2509 return (EINVAL); 2510 } 2511 2512 ldcp = (ldc_chan_t *)handle; 2513 2514 mutex_enter(&ldcp->lock); 2515 2516 if (ldcp->tstate < TS_INIT) { 2517 DWARN(ldcp->id, 2518 "ldc_open: (0x%llx) channel not initialized\n", ldcp->id); 2519 mutex_exit(&ldcp->lock); 2520 return (EFAULT); 2521 } 2522 if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) { 2523 DWARN(ldcp->id, 2524 "ldc_open: (0x%llx) channel is already open\n", ldcp->id); 2525 mutex_exit(&ldcp->lock); 2526 return (EFAULT); 2527 } 2528 2529 /* 2530 * Unregister/Register the tx queue with the hypervisor 2531 */ 2532 rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL); 2533 if (rv) { 2534 cmn_err(CE_WARN, 2535 "ldc_open: (0x%lx) channel tx queue unconf failed\n", 2536 ldcp->id); 2537 mutex_exit(&ldcp->lock); 2538 return (EIO); 2539 } 2540 2541 rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries); 2542 if (rv) { 2543 cmn_err(CE_WARN, 2544 "ldc_open: (0x%lx) channel tx queue conf failed\n", 2545 ldcp->id); 2546 mutex_exit(&ldcp->lock); 2547 return (EIO); 2548 } 2549 2550 D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n", 2551 ldcp->id); 2552 2553 /* 2554 * Unregister/Register the rx queue with the hypervisor 2555 */ 2556 rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL); 2557 if (rv) { 2558 cmn_err(CE_WARN, 2559 "ldc_open: (0x%lx) channel rx queue unconf failed\n", 2560 ldcp->id); 2561 mutex_exit(&ldcp->lock); 2562 return (EIO); 2563 } 2564 2565 rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries); 2566 if (rv) { 2567 cmn_err(CE_WARN, 2568 "ldc_open: (0x%lx) channel rx queue conf failed\n", 2569 ldcp->id); 2570 mutex_exit(&ldcp->lock); 2571 return (EIO); 2572 } 2573 2574 D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n", 2575 ldcp->id); 2576 2577 ldcp->tstate |= TS_QCONF_RDY; 2578 2579 /* Register the channel with the channel nexus */ 2580 rv = i_ldc_register_channel(ldcp); 2581 if (rv && rv != EAGAIN) { 2582 cmn_err(CE_WARN, 2583 "ldc_open: (0x%lx) channel register failed\n", ldcp->id); 2584 (void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL); 2585 (void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL); 2586 mutex_exit(&ldcp->lock); 2587 return (EIO); 2588 } 2589 2590 /* mark channel in OPEN state */ 2591 ldcp->status = LDC_OPEN; 2592 2593 /* Read channel state */ 2594 rv = hv_ldc_tx_get_state(ldcp->id, 2595 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 2596 if (rv) { 2597 cmn_err(CE_WARN, 2598 "ldc_open: (0x%lx) cannot read channel state\n", 2599 ldcp->id); 2600 (void) i_ldc_unregister_channel(ldcp); 2601 (void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL); 2602 (void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL); 2603 mutex_exit(&ldcp->lock); 2604 return (EIO); 2605 } 2606 2607 /* 2608 * set the ACKd head to current head location for reliable & 2609 * streaming mode 2610 */ 2611 ldcp->tx_ackd_head = ldcp->tx_head; 2612 2613 /* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */ 2614 if (ldcp->link_state == LDC_CHANNEL_UP || 2615 ldcp->link_state == LDC_CHANNEL_RESET) { 2616 ldcp->tstate |= TS_LINK_READY; 2617 ldcp->status = LDC_READY; 2618 } 2619 2620 /* 2621 * if channel is being opened in RAW mode - no handshake is needed 2622 * switch the channel READY and UP state 2623 */ 2624 if (ldcp->mode == LDC_MODE_RAW) { 2625 ldcp->tstate = TS_UP; /* set bits associated with LDC UP */ 2626 ldcp->status = LDC_UP; 2627 } 2628 2629 mutex_exit(&ldcp->lock); 2630 2631 /* 2632 * Increment number of open channels 2633 */ 2634 mutex_enter(&ldcssp->lock); 2635 ldcssp->channels_open++; 2636 mutex_exit(&ldcssp->lock); 2637 2638 D1(ldcp->id, 2639 "ldc_open: (0x%llx) channel (0x%p) open for use " 2640 "(tstate=0x%x, status=0x%x)\n", 2641 ldcp->id, ldcp, ldcp->tstate, ldcp->status); 2642 2643 return (0); 2644 } 2645 2646 /* 2647 * Close the LDC connection. It will return EBUSY if there 2648 * are memory segments or descriptor rings either bound to or 2649 * mapped over the channel 2650 */ 2651 int 2652 ldc_close(ldc_handle_t handle) 2653 { 2654 ldc_chan_t *ldcp; 2655 int rv = 0, retries = 0; 2656 boolean_t chk_done = B_FALSE; 2657 2658 if (handle == NULL) { 2659 DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n"); 2660 return (EINVAL); 2661 } 2662 ldcp = (ldc_chan_t *)handle; 2663 2664 mutex_enter(&ldcp->lock); 2665 2666 /* return error if channel is not open */ 2667 if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) { 2668 DWARN(ldcp->id, 2669 "ldc_close: (0x%llx) channel is not open\n", ldcp->id); 2670 mutex_exit(&ldcp->lock); 2671 return (EFAULT); 2672 } 2673 2674 /* if any memory handles, drings, are bound or mapped cannot close */ 2675 if (ldcp->mhdl_list != NULL) { 2676 DWARN(ldcp->id, 2677 "ldc_close: (0x%llx) channel has bound memory handles\n", 2678 ldcp->id); 2679 mutex_exit(&ldcp->lock); 2680 return (EBUSY); 2681 } 2682 if (ldcp->exp_dring_list != NULL) { 2683 DWARN(ldcp->id, 2684 "ldc_close: (0x%llx) channel has bound descriptor rings\n", 2685 ldcp->id); 2686 mutex_exit(&ldcp->lock); 2687 return (EBUSY); 2688 } 2689 if (ldcp->imp_dring_list != NULL) { 2690 DWARN(ldcp->id, 2691 "ldc_close: (0x%llx) channel has mapped descriptor rings\n", 2692 ldcp->id); 2693 mutex_exit(&ldcp->lock); 2694 return (EBUSY); 2695 } 2696 2697 if (ldcp->cb_inprogress) { 2698 DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n", 2699 ldcp->id); 2700 mutex_exit(&ldcp->lock); 2701 return (EWOULDBLOCK); 2702 } 2703 2704 /* Obtain Tx lock */ 2705 mutex_enter(&ldcp->tx_lock); 2706 2707 /* 2708 * Wait for pending transmits to complete i.e Tx queue to drain 2709 * if there are pending pkts - wait 1 ms and retry again 2710 */ 2711 for (;;) { 2712 2713 rv = hv_ldc_tx_get_state(ldcp->id, 2714 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 2715 if (rv) { 2716 cmn_err(CE_WARN, 2717 "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id); 2718 mutex_exit(&ldcp->tx_lock); 2719 mutex_exit(&ldcp->lock); 2720 return (EIO); 2721 } 2722 2723 if (ldcp->tx_head == ldcp->tx_tail || 2724 ldcp->link_state != LDC_CHANNEL_UP) { 2725 break; 2726 } 2727 2728 if (chk_done) { 2729 DWARN(ldcp->id, 2730 "ldc_close: (0x%llx) Tx queue drain timeout\n", 2731 ldcp->id); 2732 break; 2733 } 2734 2735 /* wait for one ms and try again */ 2736 delay(drv_usectohz(1000)); 2737 chk_done = B_TRUE; 2738 } 2739 2740 /* 2741 * Drain the Tx and Rx queues as we are closing the 2742 * channel. We dont care about any pending packets. 2743 * We have to also drain the queue prior to clearing 2744 * pending interrupts, otherwise the HV will trigger 2745 * an interrupt the moment the interrupt state is 2746 * cleared. 2747 */ 2748 (void) i_ldc_txq_reconf(ldcp); 2749 (void) i_ldc_rxq_drain(ldcp); 2750 2751 /* 2752 * Unregister the channel with the nexus 2753 */ 2754 while ((rv = i_ldc_unregister_channel(ldcp)) != 0) { 2755 2756 mutex_exit(&ldcp->tx_lock); 2757 mutex_exit(&ldcp->lock); 2758 2759 /* if any error other than EAGAIN return back */ 2760 if (rv != EAGAIN || retries >= ldc_max_retries) { 2761 cmn_err(CE_WARN, 2762 "ldc_close: (0x%lx) unregister failed, %d\n", 2763 ldcp->id, rv); 2764 return (rv); 2765 } 2766 2767 /* 2768 * As there could be pending interrupts we need 2769 * to wait and try again 2770 */ 2771 drv_usecwait(ldc_close_delay); 2772 mutex_enter(&ldcp->lock); 2773 mutex_enter(&ldcp->tx_lock); 2774 retries++; 2775 } 2776 2777 /* 2778 * Unregister queues 2779 */ 2780 rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL); 2781 if (rv) { 2782 cmn_err(CE_WARN, 2783 "ldc_close: (0x%lx) channel TX queue unconf failed\n", 2784 ldcp->id); 2785 mutex_exit(&ldcp->tx_lock); 2786 mutex_exit(&ldcp->lock); 2787 return (EIO); 2788 } 2789 rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL); 2790 if (rv) { 2791 cmn_err(CE_WARN, 2792 "ldc_close: (0x%lx) channel RX queue unconf failed\n", 2793 ldcp->id); 2794 mutex_exit(&ldcp->tx_lock); 2795 mutex_exit(&ldcp->lock); 2796 return (EIO); 2797 } 2798 2799 ldcp->tstate &= ~TS_QCONF_RDY; 2800 2801 /* Reset channel state information */ 2802 i_ldc_reset_state(ldcp); 2803 2804 /* Mark channel as down and in initialized state */ 2805 ldcp->tx_ackd_head = 0; 2806 ldcp->tx_head = 0; 2807 ldcp->tstate = TS_IN_RESET|TS_INIT; 2808 ldcp->status = LDC_INIT; 2809 2810 mutex_exit(&ldcp->tx_lock); 2811 mutex_exit(&ldcp->lock); 2812 2813 /* Decrement number of open channels */ 2814 mutex_enter(&ldcssp->lock); 2815 ldcssp->channels_open--; 2816 mutex_exit(&ldcssp->lock); 2817 2818 D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id); 2819 2820 return (0); 2821 } 2822 2823 /* 2824 * Register channel callback 2825 */ 2826 int 2827 ldc_reg_callback(ldc_handle_t handle, 2828 uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg) 2829 { 2830 ldc_chan_t *ldcp; 2831 2832 if (handle == NULL) { 2833 DWARN(DBG_ALL_LDCS, 2834 "ldc_reg_callback: invalid channel handle\n"); 2835 return (EINVAL); 2836 } 2837 if (((uint64_t)cb) < KERNELBASE) { 2838 DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n"); 2839 return (EINVAL); 2840 } 2841 ldcp = (ldc_chan_t *)handle; 2842 2843 mutex_enter(&ldcp->lock); 2844 2845 if (ldcp->cb) { 2846 DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n", 2847 ldcp->id); 2848 mutex_exit(&ldcp->lock); 2849 return (EIO); 2850 } 2851 if (ldcp->cb_inprogress) { 2852 DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n", 2853 ldcp->id); 2854 mutex_exit(&ldcp->lock); 2855 return (EWOULDBLOCK); 2856 } 2857 2858 ldcp->cb = cb; 2859 ldcp->cb_arg = arg; 2860 ldcp->cb_enabled = B_TRUE; 2861 2862 D1(ldcp->id, 2863 "ldc_reg_callback: (0x%llx) registered callback for channel\n", 2864 ldcp->id); 2865 2866 mutex_exit(&ldcp->lock); 2867 2868 return (0); 2869 } 2870 2871 /* 2872 * Unregister channel callback 2873 */ 2874 int 2875 ldc_unreg_callback(ldc_handle_t handle) 2876 { 2877 ldc_chan_t *ldcp; 2878 2879 if (handle == NULL) { 2880 DWARN(DBG_ALL_LDCS, 2881 "ldc_unreg_callback: invalid channel handle\n"); 2882 return (EINVAL); 2883 } 2884 ldcp = (ldc_chan_t *)handle; 2885 2886 mutex_enter(&ldcp->lock); 2887 2888 if (ldcp->cb == NULL) { 2889 DWARN(ldcp->id, 2890 "ldc_unreg_callback: (0x%llx) no callback exists\n", 2891 ldcp->id); 2892 mutex_exit(&ldcp->lock); 2893 return (EIO); 2894 } 2895 if (ldcp->cb_inprogress) { 2896 DWARN(ldcp->id, 2897 "ldc_unreg_callback: (0x%llx) callback active\n", 2898 ldcp->id); 2899 mutex_exit(&ldcp->lock); 2900 return (EWOULDBLOCK); 2901 } 2902 2903 ldcp->cb = NULL; 2904 ldcp->cb_arg = NULL; 2905 ldcp->cb_enabled = B_FALSE; 2906 2907 D1(ldcp->id, 2908 "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n", 2909 ldcp->id); 2910 2911 mutex_exit(&ldcp->lock); 2912 2913 return (0); 2914 } 2915 2916 2917 /* 2918 * Bring a channel up by initiating a handshake with the peer 2919 * This call is asynchronous. It will complete at a later point 2920 * in time when the peer responds back with an RTR. 2921 */ 2922 int 2923 ldc_up(ldc_handle_t handle) 2924 { 2925 int rv; 2926 ldc_chan_t *ldcp; 2927 ldc_msg_t *ldcmsg; 2928 uint64_t tx_tail, tstate; 2929 2930 if (handle == NULL) { 2931 DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n"); 2932 return (EINVAL); 2933 } 2934 ldcp = (ldc_chan_t *)handle; 2935 2936 mutex_enter(&ldcp->lock); 2937 2938 D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id); 2939 2940 /* clear the reset state */ 2941 tstate = ldcp->tstate; 2942 ldcp->tstate &= ~TS_IN_RESET; 2943 2944 if (ldcp->tstate == TS_UP) { 2945 DWARN(ldcp->id, 2946 "ldc_up: (0x%llx) channel is already in UP state\n", 2947 ldcp->id); 2948 2949 /* mark channel as up */ 2950 ldcp->status = LDC_UP; 2951 2952 /* 2953 * if channel was in reset state and there was 2954 * pending data clear interrupt state. this will 2955 * trigger an interrupt, causing the RX handler to 2956 * to invoke the client's callback 2957 */ 2958 if ((tstate & TS_IN_RESET) && 2959 ldcp->rx_intr_state == LDC_INTR_PEND) { 2960 D1(ldcp->id, 2961 "ldc_up: (0x%llx) channel has pending data, " 2962 "clearing interrupt\n", ldcp->id); 2963 i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 2964 } 2965 2966 mutex_exit(&ldcp->lock); 2967 return (0); 2968 } 2969 2970 /* if the channel is in RAW mode - mark it as UP, if READY */ 2971 if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) { 2972 ldcp->tstate = TS_UP; 2973 mutex_exit(&ldcp->lock); 2974 return (0); 2975 } 2976 2977 /* Don't start another handshake if there is one in progress */ 2978 if (ldcp->hstate) { 2979 D1(ldcp->id, 2980 "ldc_up: (0x%llx) channel handshake in progress\n", 2981 ldcp->id); 2982 mutex_exit(&ldcp->lock); 2983 return (0); 2984 } 2985 2986 mutex_enter(&ldcp->tx_lock); 2987 2988 /* get the current tail for the LDC msg */ 2989 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 2990 if (rv) { 2991 D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n", 2992 ldcp->id); 2993 mutex_exit(&ldcp->tx_lock); 2994 mutex_exit(&ldcp->lock); 2995 return (ECONNREFUSED); 2996 } 2997 2998 ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 2999 ZERO_PKT(ldcmsg); 3000 3001 ldcmsg->type = LDC_CTRL; 3002 ldcmsg->stype = LDC_INFO; 3003 ldcmsg->ctrl = LDC_VER; 3004 ldcp->next_vidx = 0; 3005 bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0])); 3006 3007 DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg); 3008 3009 /* initiate the send by calling into HV and set the new tail */ 3010 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 3011 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 3012 3013 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 3014 if (rv) { 3015 DWARN(ldcp->id, 3016 "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n", 3017 ldcp->id, rv); 3018 mutex_exit(&ldcp->tx_lock); 3019 mutex_exit(&ldcp->lock); 3020 return (rv); 3021 } 3022 3023 ldcp->hstate |= TS_SENT_VER; 3024 ldcp->tx_tail = tx_tail; 3025 D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id); 3026 3027 mutex_exit(&ldcp->tx_lock); 3028 mutex_exit(&ldcp->lock); 3029 3030 return (rv); 3031 } 3032 3033 3034 /* 3035 * Bring a channel down by resetting its state and queues 3036 */ 3037 int 3038 ldc_down(ldc_handle_t handle) 3039 { 3040 ldc_chan_t *ldcp; 3041 3042 if (handle == NULL) { 3043 DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n"); 3044 return (EINVAL); 3045 } 3046 ldcp = (ldc_chan_t *)handle; 3047 mutex_enter(&ldcp->lock); 3048 mutex_enter(&ldcp->tx_lock); 3049 i_ldc_reset(ldcp, B_TRUE); 3050 mutex_exit(&ldcp->tx_lock); 3051 mutex_exit(&ldcp->lock); 3052 3053 return (0); 3054 } 3055 3056 /* 3057 * Get the current channel status 3058 */ 3059 int 3060 ldc_status(ldc_handle_t handle, ldc_status_t *status) 3061 { 3062 ldc_chan_t *ldcp; 3063 3064 if (handle == NULL || status == NULL) { 3065 DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n"); 3066 return (EINVAL); 3067 } 3068 ldcp = (ldc_chan_t *)handle; 3069 3070 *status = ((ldc_chan_t *)handle)->status; 3071 3072 D1(ldcp->id, 3073 "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status); 3074 return (0); 3075 } 3076 3077 3078 /* 3079 * Set the channel's callback mode - enable/disable callbacks 3080 */ 3081 int 3082 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode) 3083 { 3084 ldc_chan_t *ldcp; 3085 3086 if (handle == NULL) { 3087 DWARN(DBG_ALL_LDCS, 3088 "ldc_set_intr_mode: invalid channel handle\n"); 3089 return (EINVAL); 3090 } 3091 ldcp = (ldc_chan_t *)handle; 3092 3093 /* 3094 * Record no callbacks should be invoked 3095 */ 3096 mutex_enter(&ldcp->lock); 3097 3098 switch (cmode) { 3099 case LDC_CB_DISABLE: 3100 if (!ldcp->cb_enabled) { 3101 DWARN(ldcp->id, 3102 "ldc_set_cb_mode: (0x%llx) callbacks disabled\n", 3103 ldcp->id); 3104 break; 3105 } 3106 ldcp->cb_enabled = B_FALSE; 3107 3108 D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n", 3109 ldcp->id); 3110 break; 3111 3112 case LDC_CB_ENABLE: 3113 if (ldcp->cb_enabled) { 3114 DWARN(ldcp->id, 3115 "ldc_set_cb_mode: (0x%llx) callbacks enabled\n", 3116 ldcp->id); 3117 break; 3118 } 3119 ldcp->cb_enabled = B_TRUE; 3120 3121 D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n", 3122 ldcp->id); 3123 break; 3124 } 3125 3126 mutex_exit(&ldcp->lock); 3127 3128 return (0); 3129 } 3130 3131 /* 3132 * Check to see if there are packets on the incoming queue 3133 * Will return hasdata = B_FALSE if there are no packets 3134 */ 3135 int 3136 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata) 3137 { 3138 int rv; 3139 uint64_t rx_head, rx_tail; 3140 ldc_chan_t *ldcp; 3141 3142 if (handle == NULL) { 3143 DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n"); 3144 return (EINVAL); 3145 } 3146 ldcp = (ldc_chan_t *)handle; 3147 3148 *hasdata = B_FALSE; 3149 3150 mutex_enter(&ldcp->lock); 3151 3152 if (ldcp->tstate != TS_UP) { 3153 D1(ldcp->id, 3154 "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id); 3155 mutex_exit(&ldcp->lock); 3156 return (ECONNRESET); 3157 } 3158 3159 /* Read packet(s) from the queue */ 3160 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 3161 &ldcp->link_state); 3162 if (rv != 0) { 3163 cmn_err(CE_WARN, 3164 "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id); 3165 mutex_exit(&ldcp->lock); 3166 return (EIO); 3167 } 3168 /* reset the channel state if the channel went down */ 3169 if (ldcp->link_state == LDC_CHANNEL_DOWN || 3170 ldcp->link_state == LDC_CHANNEL_RESET) { 3171 mutex_enter(&ldcp->tx_lock); 3172 i_ldc_reset(ldcp, B_FALSE); 3173 mutex_exit(&ldcp->tx_lock); 3174 mutex_exit(&ldcp->lock); 3175 return (ECONNRESET); 3176 } 3177 3178 if ((rx_head != rx_tail) || 3179 (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_remains > 0)) { 3180 D1(ldcp->id, 3181 "ldc_chkq: (0x%llx) queue has pkt(s) or buffered data\n", 3182 ldcp->id); 3183 *hasdata = B_TRUE; 3184 } 3185 3186 mutex_exit(&ldcp->lock); 3187 3188 return (0); 3189 } 3190 3191 3192 /* 3193 * Read 'size' amount of bytes or less. If incoming buffer 3194 * is more than 'size', ENOBUFS is returned. 3195 * 3196 * On return, size contains the number of bytes read. 3197 */ 3198 int 3199 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep) 3200 { 3201 ldc_chan_t *ldcp; 3202 uint64_t rx_head = 0, rx_tail = 0; 3203 int rv = 0, exit_val; 3204 3205 if (handle == NULL) { 3206 DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n"); 3207 return (EINVAL); 3208 } 3209 3210 ldcp = (ldc_chan_t *)handle; 3211 3212 /* channel lock */ 3213 mutex_enter(&ldcp->lock); 3214 3215 if (ldcp->tstate != TS_UP) { 3216 DWARN(ldcp->id, 3217 "ldc_read: (0x%llx) channel is not in UP state\n", 3218 ldcp->id); 3219 exit_val = ECONNRESET; 3220 } else { 3221 exit_val = ldcp->read_p(ldcp, bufp, sizep); 3222 } 3223 3224 /* 3225 * if queue has been drained - clear interrupt 3226 */ 3227 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 3228 &ldcp->link_state); 3229 if (rv != 0) { 3230 cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs", 3231 ldcp->id); 3232 mutex_enter(&ldcp->tx_lock); 3233 i_ldc_reset(ldcp, B_TRUE); 3234 mutex_exit(&ldcp->tx_lock); 3235 mutex_exit(&ldcp->lock); 3236 return (ECONNRESET); 3237 } 3238 3239 if (exit_val == 0) { 3240 if (ldcp->link_state == LDC_CHANNEL_DOWN || 3241 ldcp->link_state == LDC_CHANNEL_RESET) { 3242 mutex_enter(&ldcp->tx_lock); 3243 i_ldc_reset(ldcp, B_FALSE); 3244 exit_val = ECONNRESET; 3245 mutex_exit(&ldcp->tx_lock); 3246 } 3247 if ((rv == 0) && 3248 (ldcp->rx_intr_state == LDC_INTR_PEND) && 3249 (rx_head == rx_tail)) { 3250 i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 3251 } 3252 } 3253 3254 mutex_exit(&ldcp->lock); 3255 return (exit_val); 3256 } 3257 3258 /* 3259 * Basic raw mondo read - 3260 * no interpretation of mondo contents at all. 3261 * 3262 * Enter and exit with ldcp->lock held by caller 3263 */ 3264 static int 3265 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) 3266 { 3267 uint64_t q_size_mask; 3268 ldc_msg_t *msgp; 3269 uint8_t *msgbufp; 3270 int rv = 0, space; 3271 uint64_t rx_head, rx_tail; 3272 3273 space = *sizep; 3274 3275 if (space < LDC_PAYLOAD_SIZE_RAW) 3276 return (ENOBUFS); 3277 3278 ASSERT(mutex_owned(&ldcp->lock)); 3279 3280 /* compute mask for increment */ 3281 q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT; 3282 3283 /* 3284 * Read packet(s) from the queue 3285 */ 3286 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 3287 &ldcp->link_state); 3288 if (rv != 0) { 3289 cmn_err(CE_WARN, 3290 "ldc_read_raw: (0x%lx) unable to read queue ptrs", 3291 ldcp->id); 3292 return (EIO); 3293 } 3294 D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx," 3295 " rxt=0x%llx, st=0x%llx\n", 3296 ldcp->id, rx_head, rx_tail, ldcp->link_state); 3297 3298 /* reset the channel state if the channel went down */ 3299 if (ldcp->link_state == LDC_CHANNEL_DOWN || 3300 ldcp->link_state == LDC_CHANNEL_RESET) { 3301 mutex_enter(&ldcp->tx_lock); 3302 i_ldc_reset(ldcp, B_FALSE); 3303 mutex_exit(&ldcp->tx_lock); 3304 return (ECONNRESET); 3305 } 3306 3307 /* 3308 * Check for empty queue 3309 */ 3310 if (rx_head == rx_tail) { 3311 *sizep = 0; 3312 return (0); 3313 } 3314 3315 /* get the message */ 3316 msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head); 3317 3318 /* if channel is in RAW mode, copy data and return */ 3319 msgbufp = (uint8_t *)&(msgp->raw[0]); 3320 3321 bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW); 3322 3323 DUMP_PAYLOAD(ldcp->id, msgbufp); 3324 3325 *sizep = LDC_PAYLOAD_SIZE_RAW; 3326 3327 rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask; 3328 rv = i_ldc_set_rx_head(ldcp, rx_head); 3329 3330 return (rv); 3331 } 3332 3333 /* 3334 * Process LDC mondos to build larger packets 3335 * with either un-reliable or reliable delivery. 3336 * 3337 * Enter and exit with ldcp->lock held by caller 3338 */ 3339 static int 3340 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) 3341 { 3342 int rv = 0; 3343 uint64_t rx_head = 0, rx_tail = 0; 3344 uint64_t curr_head = 0; 3345 ldc_msg_t *msg; 3346 caddr_t target; 3347 size_t len = 0, bytes_read = 0; 3348 int retries = 0; 3349 uint64_t q_size_mask; 3350 uint64_t first_fragment = 0; 3351 3352 target = target_bufp; 3353 3354 ASSERT(mutex_owned(&ldcp->lock)); 3355 3356 /* check if the buffer and size are valid */ 3357 if (target_bufp == NULL || *sizep == 0) { 3358 DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n", 3359 ldcp->id); 3360 return (EINVAL); 3361 } 3362 3363 /* compute mask for increment */ 3364 q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT; 3365 3366 /* 3367 * Read packet(s) from the queue 3368 */ 3369 rv = hv_ldc_rx_get_state(ldcp->id, &curr_head, &rx_tail, 3370 &ldcp->link_state); 3371 if (rv != 0) { 3372 cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs", 3373 ldcp->id); 3374 mutex_enter(&ldcp->tx_lock); 3375 i_ldc_reset(ldcp, B_TRUE); 3376 mutex_exit(&ldcp->tx_lock); 3377 return (ECONNRESET); 3378 } 3379 D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n", 3380 ldcp->id, curr_head, rx_tail, ldcp->link_state); 3381 3382 /* reset the channel state if the channel went down */ 3383 if (ldcp->link_state != LDC_CHANNEL_UP) 3384 goto channel_is_reset; 3385 3386 for (;;) { 3387 3388 if (curr_head == rx_tail) { 3389 rv = hv_ldc_rx_get_state(ldcp->id, 3390 &rx_head, &rx_tail, &ldcp->link_state); 3391 if (rv != 0) { 3392 cmn_err(CE_WARN, 3393 "ldc_read: (0x%lx) cannot read queue ptrs", 3394 ldcp->id); 3395 mutex_enter(&ldcp->tx_lock); 3396 i_ldc_reset(ldcp, B_TRUE); 3397 mutex_exit(&ldcp->tx_lock); 3398 return (ECONNRESET); 3399 } 3400 if (ldcp->link_state != LDC_CHANNEL_UP) 3401 goto channel_is_reset; 3402 3403 if (curr_head == rx_tail) { 3404 3405 /* If in the middle of a fragmented xfer */ 3406 if (first_fragment != 0) { 3407 3408 /* wait for ldc_delay usecs */ 3409 drv_usecwait(ldc_delay); 3410 3411 if (++retries < ldc_max_retries) 3412 continue; 3413 3414 *sizep = 0; 3415 ldcp->last_msg_rcd = first_fragment - 1; 3416 DWARN(DBG_ALL_LDCS, "ldc_read: " 3417 "(0x%llx) read timeout", 3418 ldcp->id); 3419 return (EAGAIN); 3420 } 3421 *sizep = 0; 3422 break; 3423 } 3424 } 3425 retries = 0; 3426 3427 D2(ldcp->id, 3428 "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n", 3429 ldcp->id, curr_head, rx_head, rx_tail); 3430 3431 /* get the message */ 3432 msg = (ldc_msg_t *)(ldcp->rx_q_va + curr_head); 3433 3434 DUMP_LDC_PKT(ldcp, "ldc_read received pkt", 3435 ldcp->rx_q_va + curr_head); 3436 3437 /* Check the message ID for the message received */ 3438 if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) { 3439 3440 DWARN(ldcp->id, "ldc_read: (0x%llx) seqid error, " 3441 "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail); 3442 3443 /* throw away data */ 3444 bytes_read = 0; 3445 3446 /* Reset last_msg_rcd to start of message */ 3447 if (first_fragment != 0) { 3448 ldcp->last_msg_rcd = first_fragment - 1; 3449 first_fragment = 0; 3450 } 3451 /* 3452 * Send a NACK -- invalid seqid 3453 * get the current tail for the response 3454 */ 3455 rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK, 3456 (msg->ctrl & LDC_CTRL_MASK)); 3457 if (rv) { 3458 cmn_err(CE_NOTE, 3459 "ldc_read: (0x%lx) err sending " 3460 "NACK msg\n", ldcp->id); 3461 3462 /* if cannot send NACK - reset channel */ 3463 mutex_enter(&ldcp->tx_lock); 3464 i_ldc_reset(ldcp, B_FALSE); 3465 mutex_exit(&ldcp->tx_lock); 3466 rv = ECONNRESET; 3467 break; 3468 } 3469 3470 /* purge receive queue */ 3471 rv = i_ldc_set_rx_head(ldcp, rx_tail); 3472 3473 break; 3474 } 3475 3476 /* 3477 * Process any messages of type CTRL messages 3478 * Future implementations should try to pass these 3479 * to LDC link by resetting the intr state. 3480 * 3481 * NOTE: not done as a switch() as type can be both ctrl+data 3482 */ 3483 if (msg->type & LDC_CTRL) { 3484 if (rv = i_ldc_ctrlmsg(ldcp, msg)) { 3485 if (rv == EAGAIN) 3486 continue; 3487 rv = i_ldc_set_rx_head(ldcp, rx_tail); 3488 *sizep = 0; 3489 bytes_read = 0; 3490 break; 3491 } 3492 } 3493 3494 /* process data ACKs */ 3495 if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) { 3496 if (rv = i_ldc_process_data_ACK(ldcp, msg)) { 3497 *sizep = 0; 3498 bytes_read = 0; 3499 break; 3500 } 3501 } 3502 3503 /* process data NACKs */ 3504 if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) { 3505 DWARN(ldcp->id, 3506 "ldc_read: (0x%llx) received DATA/NACK", ldcp->id); 3507 mutex_enter(&ldcp->tx_lock); 3508 i_ldc_reset(ldcp, B_TRUE); 3509 mutex_exit(&ldcp->tx_lock); 3510 return (ECONNRESET); 3511 } 3512 3513 /* process data messages */ 3514 if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) { 3515 3516 uint8_t *msgbuf = (uint8_t *)( 3517 (ldcp->mode == LDC_MODE_RELIABLE || 3518 ldcp->mode == LDC_MODE_STREAM) 3519 ? msg->rdata : msg->udata); 3520 3521 D2(ldcp->id, 3522 "ldc_read: (0x%llx) received data msg\n", ldcp->id); 3523 3524 /* get the packet length */ 3525 len = (msg->env & LDC_LEN_MASK); 3526 3527 /* 3528 * FUTURE OPTIMIZATION: 3529 * dont need to set q head for every 3530 * packet we read just need to do this when 3531 * we are done or need to wait for more 3532 * mondos to make a full packet - this is 3533 * currently expensive. 3534 */ 3535 3536 if (first_fragment == 0) { 3537 3538 /* 3539 * first packets should always have the start 3540 * bit set (even for a single packet). If not 3541 * throw away the packet 3542 */ 3543 if (!(msg->env & LDC_FRAG_START)) { 3544 3545 DWARN(DBG_ALL_LDCS, 3546 "ldc_read: (0x%llx) not start - " 3547 "frag=%x\n", ldcp->id, 3548 (msg->env) & LDC_FRAG_MASK); 3549 3550 /* toss pkt, inc head, cont reading */ 3551 bytes_read = 0; 3552 target = target_bufp; 3553 curr_head = 3554 (curr_head + LDC_PACKET_SIZE) 3555 & q_size_mask; 3556 if (rv = i_ldc_set_rx_head(ldcp, 3557 curr_head)) 3558 break; 3559 3560 continue; 3561 } 3562 3563 first_fragment = msg->seqid; 3564 } else { 3565 /* check to see if this is a pkt w/ START bit */ 3566 if (msg->env & LDC_FRAG_START) { 3567 DWARN(DBG_ALL_LDCS, 3568 "ldc_read:(0x%llx) unexpected pkt" 3569 " env=0x%x discarding %d bytes," 3570 " lastmsg=%d, currentmsg=%d\n", 3571 ldcp->id, msg->env&LDC_FRAG_MASK, 3572 bytes_read, ldcp->last_msg_rcd, 3573 msg->seqid); 3574 3575 /* throw data we have read so far */ 3576 bytes_read = 0; 3577 target = target_bufp; 3578 first_fragment = msg->seqid; 3579 3580 if (rv = i_ldc_set_rx_head(ldcp, 3581 curr_head)) 3582 break; 3583 } 3584 } 3585 3586 /* copy (next) pkt into buffer */ 3587 if (len <= (*sizep - bytes_read)) { 3588 bcopy(msgbuf, target, len); 3589 target += len; 3590 bytes_read += len; 3591 } else { 3592 /* 3593 * there is not enough space in the buffer to 3594 * read this pkt. throw message away & continue 3595 * reading data from queue 3596 */ 3597 DWARN(DBG_ALL_LDCS, 3598 "ldc_read: (0x%llx) buffer too small, " 3599 "head=0x%lx, expect=%d, got=%d\n", ldcp->id, 3600 curr_head, *sizep, bytes_read+len); 3601 3602 first_fragment = 0; 3603 target = target_bufp; 3604 bytes_read = 0; 3605 3606 /* throw away everything received so far */ 3607 if (rv = i_ldc_set_rx_head(ldcp, curr_head)) 3608 break; 3609 3610 /* continue reading remaining pkts */ 3611 continue; 3612 } 3613 } 3614 3615 /* set the message id */ 3616 ldcp->last_msg_rcd = msg->seqid; 3617 3618 /* move the head one position */ 3619 curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask; 3620 3621 if (msg->env & LDC_FRAG_STOP) { 3622 3623 /* 3624 * All pkts that are part of this fragmented transfer 3625 * have been read or this was a single pkt read 3626 * or there was an error 3627 */ 3628 3629 /* set the queue head */ 3630 if (rv = i_ldc_set_rx_head(ldcp, curr_head)) 3631 bytes_read = 0; 3632 3633 *sizep = bytes_read; 3634 3635 break; 3636 } 3637 3638 /* advance head if it is a DATA ACK */ 3639 if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) { 3640 3641 /* set the queue head */ 3642 if (rv = i_ldc_set_rx_head(ldcp, curr_head)) { 3643 bytes_read = 0; 3644 break; 3645 } 3646 3647 D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx", 3648 ldcp->id, curr_head); 3649 } 3650 3651 } /* for (;;) */ 3652 3653 3654 /* 3655 * If useful data was read - Send msg ACK 3656 * OPTIMIZE: do not send ACK for all msgs - use some frequency 3657 */ 3658 if ((bytes_read > 0) && (ldcp->mode == LDC_MODE_RELIABLE || 3659 ldcp->mode == LDC_MODE_STREAM)) { 3660 3661 rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0); 3662 if (rv && rv != EWOULDBLOCK) { 3663 cmn_err(CE_NOTE, 3664 "ldc_read: (0x%lx) cannot send ACK\n", ldcp->id); 3665 3666 /* if cannot send ACK - reset channel */ 3667 goto channel_is_reset; 3668 } 3669 } 3670 3671 D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep); 3672 3673 return (rv); 3674 3675 channel_is_reset: 3676 mutex_enter(&ldcp->tx_lock); 3677 i_ldc_reset(ldcp, B_FALSE); 3678 mutex_exit(&ldcp->tx_lock); 3679 return (ECONNRESET); 3680 } 3681 3682 /* 3683 * Use underlying reliable packet mechanism to fetch 3684 * and buffer incoming packets so we can hand them back as 3685 * a basic byte stream. 3686 * 3687 * Enter and exit with ldcp->lock held by caller 3688 */ 3689 static int 3690 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) 3691 { 3692 int rv; 3693 size_t size; 3694 3695 ASSERT(mutex_owned(&ldcp->lock)); 3696 3697 D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d", 3698 ldcp->id, *sizep); 3699 3700 if (ldcp->stream_remains == 0) { 3701 size = ldcp->mtu; 3702 rv = i_ldc_read_packet(ldcp, 3703 (caddr_t)ldcp->stream_bufferp, &size); 3704 D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d", 3705 ldcp->id, size); 3706 3707 if (rv != 0) 3708 return (rv); 3709 3710 ldcp->stream_remains = size; 3711 ldcp->stream_offset = 0; 3712 } 3713 3714 size = MIN(ldcp->stream_remains, *sizep); 3715 3716 bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size); 3717 ldcp->stream_offset += size; 3718 ldcp->stream_remains -= size; 3719 3720 D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d", 3721 ldcp->id, size); 3722 3723 *sizep = size; 3724 return (0); 3725 } 3726 3727 /* 3728 * Write specified amount of bytes to the channel 3729 * in multiple pkts of pkt_payload size. Each 3730 * packet is tagged with an unique packet ID in 3731 * the case of a reliable link. 3732 * 3733 * On return, size contains the number of bytes written. 3734 */ 3735 int 3736 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep) 3737 { 3738 ldc_chan_t *ldcp; 3739 int rv = 0; 3740 3741 if (handle == NULL) { 3742 DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n"); 3743 return (EINVAL); 3744 } 3745 ldcp = (ldc_chan_t *)handle; 3746 3747 /* check if writes can occur */ 3748 if (!mutex_tryenter(&ldcp->tx_lock)) { 3749 /* 3750 * Could not get the lock - channel could 3751 * be in the process of being unconfigured 3752 * or reader has encountered an error 3753 */ 3754 return (EAGAIN); 3755 } 3756 3757 /* check if non-zero data to write */ 3758 if (buf == NULL || sizep == NULL) { 3759 DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n", 3760 ldcp->id); 3761 mutex_exit(&ldcp->tx_lock); 3762 return (EINVAL); 3763 } 3764 3765 if (*sizep == 0) { 3766 DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n", 3767 ldcp->id); 3768 mutex_exit(&ldcp->tx_lock); 3769 return (0); 3770 } 3771 3772 /* Check if channel is UP for data exchange */ 3773 if (ldcp->tstate != TS_UP) { 3774 DWARN(ldcp->id, 3775 "ldc_write: (0x%llx) channel is not in UP state\n", 3776 ldcp->id); 3777 *sizep = 0; 3778 rv = ECONNRESET; 3779 } else { 3780 rv = ldcp->write_p(ldcp, buf, sizep); 3781 } 3782 3783 mutex_exit(&ldcp->tx_lock); 3784 3785 return (rv); 3786 } 3787 3788 /* 3789 * Write a raw packet to the channel 3790 * On return, size contains the number of bytes written. 3791 */ 3792 static int 3793 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep) 3794 { 3795 ldc_msg_t *ldcmsg; 3796 uint64_t tx_head, tx_tail, new_tail; 3797 int rv = 0; 3798 size_t size; 3799 3800 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 3801 ASSERT(ldcp->mode == LDC_MODE_RAW); 3802 3803 size = *sizep; 3804 3805 /* 3806 * Check to see if the packet size is less than or 3807 * equal to packet size support in raw mode 3808 */ 3809 if (size > ldcp->pkt_payload) { 3810 DWARN(ldcp->id, 3811 "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n", 3812 ldcp->id, *sizep); 3813 *sizep = 0; 3814 return (EMSGSIZE); 3815 } 3816 3817 /* get the qptrs for the tx queue */ 3818 rv = hv_ldc_tx_get_state(ldcp->id, 3819 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 3820 if (rv != 0) { 3821 cmn_err(CE_WARN, 3822 "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id); 3823 *sizep = 0; 3824 return (EIO); 3825 } 3826 3827 if (ldcp->link_state == LDC_CHANNEL_DOWN || 3828 ldcp->link_state == LDC_CHANNEL_RESET) { 3829 DWARN(ldcp->id, 3830 "ldc_write: (0x%llx) channel down/reset\n", ldcp->id); 3831 3832 *sizep = 0; 3833 if (mutex_tryenter(&ldcp->lock)) { 3834 i_ldc_reset(ldcp, B_FALSE); 3835 mutex_exit(&ldcp->lock); 3836 } else { 3837 /* 3838 * Release Tx lock, and then reacquire channel 3839 * and Tx lock in correct order 3840 */ 3841 mutex_exit(&ldcp->tx_lock); 3842 mutex_enter(&ldcp->lock); 3843 mutex_enter(&ldcp->tx_lock); 3844 i_ldc_reset(ldcp, B_FALSE); 3845 mutex_exit(&ldcp->lock); 3846 } 3847 return (ECONNRESET); 3848 } 3849 3850 tx_tail = ldcp->tx_tail; 3851 tx_head = ldcp->tx_head; 3852 new_tail = (tx_tail + LDC_PACKET_SIZE) & 3853 ((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT); 3854 3855 if (new_tail == tx_head) { 3856 DWARN(DBG_ALL_LDCS, 3857 "ldc_write: (0x%llx) TX queue is full\n", ldcp->id); 3858 *sizep = 0; 3859 return (EWOULDBLOCK); 3860 } 3861 3862 D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d", 3863 ldcp->id, size); 3864 3865 /* Send the data now */ 3866 ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 3867 3868 /* copy the data into pkt */ 3869 bcopy((uint8_t *)buf, ldcmsg, size); 3870 3871 /* increment tail */ 3872 tx_tail = new_tail; 3873 3874 /* 3875 * All packets have been copied into the TX queue 3876 * update the tail ptr in the HV 3877 */ 3878 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 3879 if (rv) { 3880 if (rv == EWOULDBLOCK) { 3881 DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n", 3882 ldcp->id); 3883 *sizep = 0; 3884 return (EWOULDBLOCK); 3885 } 3886 3887 *sizep = 0; 3888 if (mutex_tryenter(&ldcp->lock)) { 3889 i_ldc_reset(ldcp, B_FALSE); 3890 mutex_exit(&ldcp->lock); 3891 } else { 3892 /* 3893 * Release Tx lock, and then reacquire channel 3894 * and Tx lock in correct order 3895 */ 3896 mutex_exit(&ldcp->tx_lock); 3897 mutex_enter(&ldcp->lock); 3898 mutex_enter(&ldcp->tx_lock); 3899 i_ldc_reset(ldcp, B_FALSE); 3900 mutex_exit(&ldcp->lock); 3901 } 3902 return (ECONNRESET); 3903 } 3904 3905 ldcp->tx_tail = tx_tail; 3906 *sizep = size; 3907 3908 D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size); 3909 3910 return (rv); 3911 } 3912 3913 3914 /* 3915 * Write specified amount of bytes to the channel 3916 * in multiple pkts of pkt_payload size. Each 3917 * packet is tagged with an unique packet ID in 3918 * the case of a reliable link. 3919 * 3920 * On return, size contains the number of bytes written. 3921 * This function needs to ensure that the write size is < MTU size 3922 */ 3923 static int 3924 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size) 3925 { 3926 ldc_msg_t *ldcmsg; 3927 uint64_t tx_head, tx_tail, new_tail, start; 3928 uint64_t txq_size_mask, numavail; 3929 uint8_t *msgbuf, *source = (uint8_t *)buf; 3930 size_t len, bytes_written = 0, remaining; 3931 int rv; 3932 uint32_t curr_seqid; 3933 3934 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 3935 3936 ASSERT(ldcp->mode == LDC_MODE_RELIABLE || 3937 ldcp->mode == LDC_MODE_UNRELIABLE || 3938 ldcp->mode == LDC_MODE_STREAM); 3939 3940 /* compute mask for increment */ 3941 txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT; 3942 3943 /* get the qptrs for the tx queue */ 3944 rv = hv_ldc_tx_get_state(ldcp->id, 3945 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 3946 if (rv != 0) { 3947 cmn_err(CE_WARN, 3948 "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id); 3949 *size = 0; 3950 return (EIO); 3951 } 3952 3953 if (ldcp->link_state == LDC_CHANNEL_DOWN || 3954 ldcp->link_state == LDC_CHANNEL_RESET) { 3955 DWARN(ldcp->id, 3956 "ldc_write: (0x%llx) channel down/reset\n", ldcp->id); 3957 *size = 0; 3958 if (mutex_tryenter(&ldcp->lock)) { 3959 i_ldc_reset(ldcp, B_FALSE); 3960 mutex_exit(&ldcp->lock); 3961 } else { 3962 /* 3963 * Release Tx lock, and then reacquire channel 3964 * and Tx lock in correct order 3965 */ 3966 mutex_exit(&ldcp->tx_lock); 3967 mutex_enter(&ldcp->lock); 3968 mutex_enter(&ldcp->tx_lock); 3969 i_ldc_reset(ldcp, B_FALSE); 3970 mutex_exit(&ldcp->lock); 3971 } 3972 return (ECONNRESET); 3973 } 3974 3975 tx_tail = ldcp->tx_tail; 3976 new_tail = (tx_tail + LDC_PACKET_SIZE) % 3977 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 3978 3979 /* 3980 * Link mode determines whether we use HV Tx head or the 3981 * private protocol head (corresponding to last ACKd pkt) for 3982 * determining how much we can write 3983 */ 3984 tx_head = (ldcp->mode == LDC_MODE_RELIABLE || 3985 ldcp->mode == LDC_MODE_STREAM) 3986 ? ldcp->tx_ackd_head : ldcp->tx_head; 3987 if (new_tail == tx_head) { 3988 DWARN(DBG_ALL_LDCS, 3989 "ldc_write: (0x%llx) TX queue is full\n", ldcp->id); 3990 *size = 0; 3991 return (EWOULDBLOCK); 3992 } 3993 3994 /* 3995 * Make sure that the LDC Tx queue has enough space 3996 */ 3997 numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT) 3998 + ldcp->tx_q_entries - 1; 3999 numavail %= ldcp->tx_q_entries; 4000 4001 if (*size > (numavail * ldcp->pkt_payload)) { 4002 DWARN(DBG_ALL_LDCS, 4003 "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id); 4004 return (EWOULDBLOCK); 4005 } 4006 4007 D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d", 4008 ldcp->id, *size); 4009 4010 /* Send the data now */ 4011 bytes_written = 0; 4012 curr_seqid = ldcp->last_msg_snt; 4013 start = tx_tail; 4014 4015 while (*size > bytes_written) { 4016 4017 ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 4018 4019 msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE || 4020 ldcp->mode == LDC_MODE_STREAM) 4021 ? ldcmsg->rdata : ldcmsg->udata); 4022 4023 ldcmsg->type = LDC_DATA; 4024 ldcmsg->stype = LDC_INFO; 4025 ldcmsg->ctrl = 0; 4026 4027 remaining = *size - bytes_written; 4028 len = min(ldcp->pkt_payload, remaining); 4029 ldcmsg->env = (uint8_t)len; 4030 4031 curr_seqid++; 4032 ldcmsg->seqid = curr_seqid; 4033 4034 /* copy the data into pkt */ 4035 bcopy(source, msgbuf, len); 4036 4037 source += len; 4038 bytes_written += len; 4039 4040 /* increment tail */ 4041 tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask; 4042 4043 ASSERT(tx_tail != tx_head); 4044 } 4045 4046 /* Set the start and stop bits */ 4047 ldcmsg->env |= LDC_FRAG_STOP; 4048 ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start); 4049 ldcmsg->env |= LDC_FRAG_START; 4050 4051 /* 4052 * All packets have been copied into the TX queue 4053 * update the tail ptr in the HV 4054 */ 4055 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 4056 if (rv == 0) { 4057 ldcp->tx_tail = tx_tail; 4058 ldcp->last_msg_snt = curr_seqid; 4059 *size = bytes_written; 4060 } else { 4061 int rv2; 4062 4063 if (rv != EWOULDBLOCK) { 4064 *size = 0; 4065 if (mutex_tryenter(&ldcp->lock)) { 4066 i_ldc_reset(ldcp, B_FALSE); 4067 mutex_exit(&ldcp->lock); 4068 } else { 4069 /* 4070 * Release Tx lock, and then reacquire channel 4071 * and Tx lock in correct order 4072 */ 4073 mutex_exit(&ldcp->tx_lock); 4074 mutex_enter(&ldcp->lock); 4075 mutex_enter(&ldcp->tx_lock); 4076 i_ldc_reset(ldcp, B_FALSE); 4077 mutex_exit(&ldcp->lock); 4078 } 4079 return (ECONNRESET); 4080 } 4081 4082 D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, " 4083 "old tail 0x%x, new tail 0x%x, qsize=0x%x)\n", 4084 rv, ldcp->tx_head, ldcp->tx_tail, tx_tail, 4085 (ldcp->tx_q_entries << LDC_PACKET_SHIFT)); 4086 4087 rv2 = hv_ldc_tx_get_state(ldcp->id, 4088 &tx_head, &tx_tail, &ldcp->link_state); 4089 4090 D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x " 4091 "(head 0x%x, tail 0x%x state 0x%x)\n", 4092 rv2, tx_head, tx_tail, ldcp->link_state); 4093 4094 *size = 0; 4095 } 4096 4097 D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size); 4098 4099 return (rv); 4100 } 4101 4102 /* 4103 * Write specified amount of bytes to the channel 4104 * in multiple pkts of pkt_payload size. Each 4105 * packet is tagged with an unique packet ID in 4106 * the case of a reliable link. 4107 * 4108 * On return, size contains the number of bytes written. 4109 * This function needs to ensure that the write size is < MTU size 4110 */ 4111 static int 4112 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep) 4113 { 4114 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 4115 ASSERT(ldcp->mode == LDC_MODE_STREAM); 4116 4117 /* Truncate packet to max of MTU size */ 4118 if (*sizep > ldcp->mtu) *sizep = ldcp->mtu; 4119 return (i_ldc_write_packet(ldcp, buf, sizep)); 4120 } 4121 4122 4123 /* 4124 * Interfaces for channel nexus to register/unregister with LDC module 4125 * The nexus will register functions to be used to register individual 4126 * channels with the nexus and enable interrupts for the channels 4127 */ 4128 int 4129 ldc_register(ldc_cnex_t *cinfo) 4130 { 4131 ldc_chan_t *ldcp; 4132 4133 if (cinfo == NULL || cinfo->dip == NULL || 4134 cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL || 4135 cinfo->add_intr == NULL || cinfo->rem_intr == NULL || 4136 cinfo->clr_intr == NULL) { 4137 4138 DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n"); 4139 return (EINVAL); 4140 } 4141 4142 mutex_enter(&ldcssp->lock); 4143 4144 /* nexus registration */ 4145 ldcssp->cinfo.dip = cinfo->dip; 4146 ldcssp->cinfo.reg_chan = cinfo->reg_chan; 4147 ldcssp->cinfo.unreg_chan = cinfo->unreg_chan; 4148 ldcssp->cinfo.add_intr = cinfo->add_intr; 4149 ldcssp->cinfo.rem_intr = cinfo->rem_intr; 4150 ldcssp->cinfo.clr_intr = cinfo->clr_intr; 4151 4152 /* register any channels that might have been previously initialized */ 4153 ldcp = ldcssp->chan_list; 4154 while (ldcp) { 4155 if ((ldcp->tstate & TS_QCONF_RDY) && 4156 (ldcp->tstate & TS_CNEX_RDY) == 0) 4157 (void) i_ldc_register_channel(ldcp); 4158 4159 ldcp = ldcp->next; 4160 } 4161 4162 mutex_exit(&ldcssp->lock); 4163 4164 return (0); 4165 } 4166 4167 int 4168 ldc_unregister(ldc_cnex_t *cinfo) 4169 { 4170 if (cinfo == NULL || cinfo->dip == NULL) { 4171 DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n"); 4172 return (EINVAL); 4173 } 4174 4175 mutex_enter(&ldcssp->lock); 4176 4177 if (cinfo->dip != ldcssp->cinfo.dip) { 4178 DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n"); 4179 mutex_exit(&ldcssp->lock); 4180 return (EINVAL); 4181 } 4182 4183 /* nexus unregister */ 4184 ldcssp->cinfo.dip = NULL; 4185 ldcssp->cinfo.reg_chan = NULL; 4186 ldcssp->cinfo.unreg_chan = NULL; 4187 ldcssp->cinfo.add_intr = NULL; 4188 ldcssp->cinfo.rem_intr = NULL; 4189 ldcssp->cinfo.clr_intr = NULL; 4190 4191 mutex_exit(&ldcssp->lock); 4192 4193 return (0); 4194 } 4195 4196 4197 /* ------------------------------------------------------------------------- */ 4198 4199 /* 4200 * Allocate a memory handle for the channel and link it into the list 4201 * Also choose which memory table to use if this is the first handle 4202 * being assigned to this channel 4203 */ 4204 int 4205 ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle) 4206 { 4207 ldc_chan_t *ldcp; 4208 ldc_mhdl_t *mhdl; 4209 4210 if (handle == NULL) { 4211 DWARN(DBG_ALL_LDCS, 4212 "ldc_mem_alloc_handle: invalid channel handle\n"); 4213 return (EINVAL); 4214 } 4215 ldcp = (ldc_chan_t *)handle; 4216 4217 mutex_enter(&ldcp->lock); 4218 4219 /* check to see if channel is initalized */ 4220 if ((ldcp->tstate & ~TS_IN_RESET) < TS_INIT) { 4221 DWARN(ldcp->id, 4222 "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n", 4223 ldcp->id); 4224 mutex_exit(&ldcp->lock); 4225 return (EINVAL); 4226 } 4227 4228 /* allocate handle for channel */ 4229 mhdl = kmem_cache_alloc(ldcssp->memhdl_cache, KM_SLEEP); 4230 4231 /* initialize the lock */ 4232 mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL); 4233 4234 mhdl->myshadow = B_FALSE; 4235 mhdl->memseg = NULL; 4236 mhdl->ldcp = ldcp; 4237 mhdl->status = LDC_UNBOUND; 4238 4239 /* insert memory handle (@ head) into list */ 4240 if (ldcp->mhdl_list == NULL) { 4241 ldcp->mhdl_list = mhdl; 4242 mhdl->next = NULL; 4243 } else { 4244 /* insert @ head */ 4245 mhdl->next = ldcp->mhdl_list; 4246 ldcp->mhdl_list = mhdl; 4247 } 4248 4249 /* return the handle */ 4250 *mhandle = (ldc_mem_handle_t)mhdl; 4251 4252 mutex_exit(&ldcp->lock); 4253 4254 D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n", 4255 ldcp->id, mhdl); 4256 4257 return (0); 4258 } 4259 4260 /* 4261 * Free memory handle for the channel and unlink it from the list 4262 */ 4263 int 4264 ldc_mem_free_handle(ldc_mem_handle_t mhandle) 4265 { 4266 ldc_mhdl_t *mhdl, *phdl; 4267 ldc_chan_t *ldcp; 4268 4269 if (mhandle == NULL) { 4270 DWARN(DBG_ALL_LDCS, 4271 "ldc_mem_free_handle: invalid memory handle\n"); 4272 return (EINVAL); 4273 } 4274 mhdl = (ldc_mhdl_t *)mhandle; 4275 4276 mutex_enter(&mhdl->lock); 4277 4278 ldcp = mhdl->ldcp; 4279 4280 if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) { 4281 DWARN(ldcp->id, 4282 "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n", 4283 mhdl); 4284 mutex_exit(&mhdl->lock); 4285 return (EINVAL); 4286 } 4287 mutex_exit(&mhdl->lock); 4288 4289 mutex_enter(&ldcp->mlist_lock); 4290 4291 phdl = ldcp->mhdl_list; 4292 4293 /* first handle */ 4294 if (phdl == mhdl) { 4295 ldcp->mhdl_list = mhdl->next; 4296 mutex_destroy(&mhdl->lock); 4297 kmem_cache_free(ldcssp->memhdl_cache, mhdl); 4298 4299 D1(ldcp->id, 4300 "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n", 4301 ldcp->id, mhdl); 4302 } else { 4303 /* walk the list - unlink and free */ 4304 while (phdl != NULL) { 4305 if (phdl->next == mhdl) { 4306 phdl->next = mhdl->next; 4307 mutex_destroy(&mhdl->lock); 4308 kmem_cache_free(ldcssp->memhdl_cache, mhdl); 4309 D1(ldcp->id, 4310 "ldc_mem_free_handle: (0x%llx) freed " 4311 "handle 0x%llx\n", ldcp->id, mhdl); 4312 break; 4313 } 4314 phdl = phdl->next; 4315 } 4316 } 4317 4318 if (phdl == NULL) { 4319 DWARN(ldcp->id, 4320 "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl); 4321 mutex_exit(&ldcp->mlist_lock); 4322 return (EINVAL); 4323 } 4324 4325 mutex_exit(&ldcp->mlist_lock); 4326 4327 return (0); 4328 } 4329 4330 /* 4331 * Bind a memory handle to a virtual address. 4332 * The virtual address is converted to the corresponding real addresses. 4333 * Returns pointer to the first ldc_mem_cookie and the total number 4334 * of cookies for this virtual address. Other cookies can be obtained 4335 * using the ldc_mem_nextcookie() call. If the pages are stored in 4336 * consecutive locations in the table, a single cookie corresponding to 4337 * the first location is returned. The cookie size spans all the entries. 4338 * 4339 * If the VA corresponds to a page that is already being exported, reuse 4340 * the page and do not export it again. Bump the page's use count. 4341 */ 4342 int 4343 ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len, 4344 uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount) 4345 { 4346 ldc_mhdl_t *mhdl; 4347 ldc_chan_t *ldcp; 4348 ldc_mtbl_t *mtbl; 4349 ldc_memseg_t *memseg; 4350 ldc_mte_t tmp_mte; 4351 uint64_t index, prev_index = 0; 4352 int64_t cookie_idx; 4353 uintptr_t raddr, ra_aligned; 4354 uint64_t psize, poffset, v_offset; 4355 uint64_t pg_shift, pg_size, pg_size_code, pg_mask; 4356 pgcnt_t npages; 4357 caddr_t v_align, addr; 4358 int i, rv; 4359 4360 if (mhandle == NULL) { 4361 DWARN(DBG_ALL_LDCS, 4362 "ldc_mem_bind_handle: invalid memory handle\n"); 4363 return (EINVAL); 4364 } 4365 mhdl = (ldc_mhdl_t *)mhandle; 4366 ldcp = mhdl->ldcp; 4367 4368 /* clear count */ 4369 *ccount = 0; 4370 4371 mutex_enter(&mhdl->lock); 4372 4373 if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) { 4374 DWARN(ldcp->id, 4375 "ldc_mem_bind_handle: (0x%x) handle already bound\n", 4376 mhandle); 4377 mutex_exit(&mhdl->lock); 4378 return (EINVAL); 4379 } 4380 4381 /* Force address and size to be 8-byte aligned */ 4382 if ((((uintptr_t)vaddr | len) & 0x7) != 0) { 4383 DWARN(ldcp->id, 4384 "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n"); 4385 mutex_exit(&mhdl->lock); 4386 return (EINVAL); 4387 } 4388 4389 /* 4390 * If this channel is binding a memory handle for the 4391 * first time allocate it a memory map table and initialize it 4392 */ 4393 if ((mtbl = ldcp->mtbl) == NULL) { 4394 4395 mutex_enter(&ldcp->lock); 4396 4397 /* Allocate and initialize the map table structure */ 4398 mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP); 4399 mtbl->num_entries = mtbl->num_avail = ldc_maptable_entries; 4400 mtbl->size = ldc_maptable_entries * sizeof (ldc_mte_slot_t); 4401 mtbl->next_entry = NULL; 4402 mtbl->contigmem = B_TRUE; 4403 4404 /* Allocate the table itself */ 4405 mtbl->table = (ldc_mte_slot_t *) 4406 contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE); 4407 if (mtbl->table == NULL) { 4408 4409 /* allocate a page of memory using kmem_alloc */ 4410 mtbl->table = kmem_alloc(MMU_PAGESIZE, KM_SLEEP); 4411 mtbl->size = MMU_PAGESIZE; 4412 mtbl->contigmem = B_FALSE; 4413 mtbl->num_entries = mtbl->num_avail = 4414 mtbl->size / sizeof (ldc_mte_slot_t); 4415 DWARN(ldcp->id, 4416 "ldc_mem_bind_handle: (0x%llx) reduced tbl size " 4417 "to %lx entries\n", ldcp->id, mtbl->num_entries); 4418 } 4419 4420 /* zero out the memory */ 4421 bzero(mtbl->table, mtbl->size); 4422 4423 /* initialize the lock */ 4424 mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL); 4425 4426 /* register table for this channel */ 4427 rv = hv_ldc_set_map_table(ldcp->id, 4428 va_to_pa(mtbl->table), mtbl->num_entries); 4429 if (rv != 0) { 4430 cmn_err(CE_WARN, 4431 "ldc_mem_bind_handle: (0x%lx) err %d mapping tbl", 4432 ldcp->id, rv); 4433 if (mtbl->contigmem) 4434 contig_mem_free(mtbl->table, mtbl->size); 4435 else 4436 kmem_free(mtbl->table, mtbl->size); 4437 mutex_destroy(&mtbl->lock); 4438 kmem_free(mtbl, sizeof (ldc_mtbl_t)); 4439 mutex_exit(&ldcp->lock); 4440 mutex_exit(&mhdl->lock); 4441 return (EIO); 4442 } 4443 4444 ldcp->mtbl = mtbl; 4445 mutex_exit(&ldcp->lock); 4446 4447 D1(ldcp->id, 4448 "ldc_mem_bind_handle: (0x%llx) alloc'd map table 0x%llx\n", 4449 ldcp->id, ldcp->mtbl->table); 4450 } 4451 4452 /* FUTURE: get the page size, pgsz code, and shift */ 4453 pg_size = MMU_PAGESIZE; 4454 pg_size_code = page_szc(pg_size); 4455 pg_shift = page_get_shift(pg_size_code); 4456 pg_mask = ~(pg_size - 1); 4457 4458 D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding " 4459 "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", 4460 ldcp->id, vaddr, pg_size, pg_size_code, pg_shift); 4461 4462 /* aligned VA and its offset */ 4463 v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1)); 4464 v_offset = ((uintptr_t)vaddr) & (pg_size - 1); 4465 4466 npages = (len+v_offset)/pg_size; 4467 npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1; 4468 4469 D1(ldcp->id, "ldc_mem_bind_handle: binding " 4470 "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n", 4471 ldcp->id, vaddr, v_align, v_offset, npages); 4472 4473 /* lock the memory table - exclusive access to channel */ 4474 mutex_enter(&mtbl->lock); 4475 4476 if (npages > mtbl->num_avail) { 4477 D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) no table entries\n", 4478 ldcp->id); 4479 mutex_exit(&mtbl->lock); 4480 mutex_exit(&mhdl->lock); 4481 return (ENOMEM); 4482 } 4483 4484 /* Allocate a memseg structure */ 4485 memseg = mhdl->memseg = 4486 kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP); 4487 4488 /* Allocate memory to store all pages and cookies */ 4489 memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP); 4490 memseg->cookies = 4491 kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP); 4492 4493 D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n", 4494 ldcp->id, npages); 4495 4496 addr = v_align; 4497 4498 /* 4499 * Check if direct shared memory map is enabled, if not change 4500 * the mapping type to include SHADOW_MAP. 4501 */ 4502 if (ldc_shmem_enabled == 0) 4503 mtype = LDC_SHADOW_MAP; 4504 4505 /* 4506 * Table slots are used in a round-robin manner. The algorithm permits 4507 * inserting duplicate entries. Slots allocated earlier will typically 4508 * get freed before we get back to reusing the slot.Inserting duplicate 4509 * entries should be OK as we only lookup entries using the cookie addr 4510 * i.e. tbl index, during export, unexport and copy operation. 4511 * 4512 * One implementation what was tried was to search for a duplicate 4513 * page entry first and reuse it. The search overhead is very high and 4514 * in the vnet case dropped the perf by almost half, 50 to 24 mbps. 4515 * So it does make sense to avoid searching for duplicates. 4516 * 4517 * But during the process of searching for a free slot, if we find a 4518 * duplicate entry we will go ahead and use it, and bump its use count. 4519 */ 4520 4521 /* index to start searching from */ 4522 index = mtbl->next_entry; 4523 cookie_idx = -1; 4524 4525 tmp_mte.ll = 0; /* initialise fields to 0 */ 4526 4527 if (mtype & LDC_DIRECT_MAP) { 4528 tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0; 4529 tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0; 4530 tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0; 4531 } 4532 4533 if (mtype & LDC_SHADOW_MAP) { 4534 tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0; 4535 tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0; 4536 } 4537 4538 if (mtype & LDC_IO_MAP) { 4539 tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0; 4540 tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0; 4541 } 4542 4543 D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll); 4544 4545 tmp_mte.mte_pgszc = pg_size_code; 4546 4547 /* initialize each mem table entry */ 4548 for (i = 0; i < npages; i++) { 4549 4550 /* check if slot is available in the table */ 4551 while (mtbl->table[index].entry.ll != 0) { 4552 4553 index = (index + 1) % mtbl->num_entries; 4554 4555 if (index == mtbl->next_entry) { 4556 /* we have looped around */ 4557 DWARN(DBG_ALL_LDCS, 4558 "ldc_mem_bind_handle: (0x%llx) cannot find " 4559 "entry\n", ldcp->id); 4560 *ccount = 0; 4561 4562 /* NOTE: free memory, remove previous entries */ 4563 /* this shouldnt happen as num_avail was ok */ 4564 4565 mutex_exit(&mtbl->lock); 4566 mutex_exit(&mhdl->lock); 4567 return (ENOMEM); 4568 } 4569 } 4570 4571 /* get the real address */ 4572 raddr = va_to_pa((void *)addr); 4573 ra_aligned = ((uintptr_t)raddr & pg_mask); 4574 4575 /* build the mte */ 4576 tmp_mte.mte_rpfn = ra_aligned >> pg_shift; 4577 4578 D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll); 4579 4580 /* update entry in table */ 4581 mtbl->table[index].entry = tmp_mte; 4582 4583 D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx" 4584 " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index); 4585 4586 /* calculate the size and offset for this export range */ 4587 if (i == 0) { 4588 /* first page */ 4589 psize = min((pg_size - v_offset), len); 4590 poffset = v_offset; 4591 4592 } else if (i == (npages - 1)) { 4593 /* last page */ 4594 psize = (((uintptr_t)(vaddr + len)) & 4595 ((uint64_t)(pg_size-1))); 4596 if (psize == 0) 4597 psize = pg_size; 4598 poffset = 0; 4599 4600 } else { 4601 /* middle pages */ 4602 psize = pg_size; 4603 poffset = 0; 4604 } 4605 4606 /* store entry for this page */ 4607 memseg->pages[i].index = index; 4608 memseg->pages[i].raddr = raddr; 4609 memseg->pages[i].offset = poffset; 4610 memseg->pages[i].size = psize; 4611 memseg->pages[i].mte = &(mtbl->table[index]); 4612 4613 /* create the cookie */ 4614 if (i == 0 || (index != prev_index + 1)) { 4615 cookie_idx++; 4616 memseg->cookies[cookie_idx].addr = 4617 IDX2COOKIE(index, pg_size_code, pg_shift); 4618 memseg->cookies[cookie_idx].addr |= poffset; 4619 memseg->cookies[cookie_idx].size = psize; 4620 4621 } else { 4622 memseg->cookies[cookie_idx].size += psize; 4623 } 4624 4625 D1(ldcp->id, "ldc_mem_bind_handle: bound " 4626 "(0x%llx) va=0x%llx, idx=0x%llx, " 4627 "ra=0x%llx(sz=0x%x,off=0x%x)\n", 4628 ldcp->id, addr, index, raddr, psize, poffset); 4629 4630 /* decrement number of available entries */ 4631 mtbl->num_avail--; 4632 4633 /* increment va by page size */ 4634 addr += pg_size; 4635 4636 /* increment index */ 4637 prev_index = index; 4638 index = (index + 1) % mtbl->num_entries; 4639 4640 /* save the next slot */ 4641 mtbl->next_entry = index; 4642 } 4643 4644 mutex_exit(&mtbl->lock); 4645 4646 /* memory handle = bound */ 4647 mhdl->mtype = mtype; 4648 mhdl->perm = perm; 4649 mhdl->status = LDC_BOUND; 4650 4651 /* update memseg_t */ 4652 memseg->vaddr = vaddr; 4653 memseg->raddr = memseg->pages[0].raddr; 4654 memseg->size = len; 4655 memseg->npages = npages; 4656 memseg->ncookies = cookie_idx + 1; 4657 memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0; 4658 4659 /* return count and first cookie */ 4660 *ccount = memseg->ncookies; 4661 cookie->addr = memseg->cookies[0].addr; 4662 cookie->size = memseg->cookies[0].size; 4663 4664 D1(ldcp->id, 4665 "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, " 4666 "pgs=0x%llx cookies=0x%llx\n", 4667 ldcp->id, mhdl, vaddr, npages, memseg->ncookies); 4668 4669 mutex_exit(&mhdl->lock); 4670 return (0); 4671 } 4672 4673 /* 4674 * Return the next cookie associated with the specified memory handle 4675 */ 4676 int 4677 ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie) 4678 { 4679 ldc_mhdl_t *mhdl; 4680 ldc_chan_t *ldcp; 4681 ldc_memseg_t *memseg; 4682 4683 if (mhandle == NULL) { 4684 DWARN(DBG_ALL_LDCS, 4685 "ldc_mem_nextcookie: invalid memory handle\n"); 4686 return (EINVAL); 4687 } 4688 mhdl = (ldc_mhdl_t *)mhandle; 4689 4690 mutex_enter(&mhdl->lock); 4691 4692 ldcp = mhdl->ldcp; 4693 memseg = mhdl->memseg; 4694 4695 if (cookie == 0) { 4696 DWARN(ldcp->id, 4697 "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n", 4698 ldcp->id); 4699 mutex_exit(&mhdl->lock); 4700 return (EINVAL); 4701 } 4702 4703 if (memseg->next_cookie != 0) { 4704 cookie->addr = memseg->cookies[memseg->next_cookie].addr; 4705 cookie->size = memseg->cookies[memseg->next_cookie].size; 4706 memseg->next_cookie++; 4707 if (memseg->next_cookie == memseg->ncookies) 4708 memseg->next_cookie = 0; 4709 4710 } else { 4711 DWARN(ldcp->id, 4712 "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id); 4713 cookie->addr = 0; 4714 cookie->size = 0; 4715 mutex_exit(&mhdl->lock); 4716 return (EINVAL); 4717 } 4718 4719 D1(ldcp->id, 4720 "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n", 4721 ldcp->id, cookie->addr, cookie->size); 4722 4723 mutex_exit(&mhdl->lock); 4724 return (0); 4725 } 4726 4727 /* 4728 * Unbind the virtual memory region associated with the specified 4729 * memory handle. Allassociated cookies are freed and the corresponding 4730 * RA space is no longer exported. 4731 */ 4732 int 4733 ldc_mem_unbind_handle(ldc_mem_handle_t mhandle) 4734 { 4735 ldc_mhdl_t *mhdl; 4736 ldc_chan_t *ldcp; 4737 ldc_mtbl_t *mtbl; 4738 ldc_memseg_t *memseg; 4739 uint64_t cookie_addr; 4740 uint64_t pg_shift, pg_size_code; 4741 int i, rv; 4742 4743 if (mhandle == NULL) { 4744 DWARN(DBG_ALL_LDCS, 4745 "ldc_mem_unbind_handle: invalid memory handle\n"); 4746 return (EINVAL); 4747 } 4748 mhdl = (ldc_mhdl_t *)mhandle; 4749 4750 mutex_enter(&mhdl->lock); 4751 4752 if (mhdl->status == LDC_UNBOUND) { 4753 DWARN(DBG_ALL_LDCS, 4754 "ldc_mem_unbind_handle: (0x%x) handle is not bound\n", 4755 mhandle); 4756 mutex_exit(&mhdl->lock); 4757 return (EINVAL); 4758 } 4759 4760 ldcp = mhdl->ldcp; 4761 mtbl = ldcp->mtbl; 4762 4763 memseg = mhdl->memseg; 4764 4765 /* lock the memory table - exclusive access to channel */ 4766 mutex_enter(&mtbl->lock); 4767 4768 /* undo the pages exported */ 4769 for (i = 0; i < memseg->npages; i++) { 4770 4771 /* check for mapped pages, revocation cookie != 0 */ 4772 if (memseg->pages[i].mte->cookie) { 4773 4774 pg_size_code = page_szc(memseg->pages[i].size); 4775 pg_shift = page_get_shift(memseg->pages[i].size); 4776 cookie_addr = IDX2COOKIE(memseg->pages[i].index, 4777 pg_size_code, pg_shift); 4778 4779 D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) revoke " 4780 "cookie 0x%llx, rcookie 0x%llx\n", ldcp->id, 4781 cookie_addr, memseg->pages[i].mte->cookie); 4782 rv = hv_ldc_revoke(ldcp->id, cookie_addr, 4783 memseg->pages[i].mte->cookie); 4784 if (rv) { 4785 DWARN(ldcp->id, 4786 "ldc_mem_unbind_handle: (0x%llx) cannot " 4787 "revoke mapping, cookie %llx\n", ldcp->id, 4788 cookie_addr); 4789 } 4790 } 4791 4792 /* clear the entry from the table */ 4793 memseg->pages[i].mte->entry.ll = 0; 4794 mtbl->num_avail++; 4795 } 4796 mutex_exit(&mtbl->lock); 4797 4798 /* free the allocated memseg and page structures */ 4799 kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages)); 4800 kmem_free(memseg->cookies, 4801 (sizeof (ldc_mem_cookie_t) * memseg->npages)); 4802 kmem_cache_free(ldcssp->memseg_cache, memseg); 4803 4804 /* uninitialize the memory handle */ 4805 mhdl->memseg = NULL; 4806 mhdl->status = LDC_UNBOUND; 4807 4808 D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n", 4809 ldcp->id, mhdl); 4810 4811 mutex_exit(&mhdl->lock); 4812 return (0); 4813 } 4814 4815 /* 4816 * Get information about the dring. The base address of the descriptor 4817 * ring along with the type and permission are returned back. 4818 */ 4819 int 4820 ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo) 4821 { 4822 ldc_mhdl_t *mhdl; 4823 4824 if (mhandle == NULL) { 4825 DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n"); 4826 return (EINVAL); 4827 } 4828 mhdl = (ldc_mhdl_t *)mhandle; 4829 4830 if (minfo == NULL) { 4831 DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n"); 4832 return (EINVAL); 4833 } 4834 4835 mutex_enter(&mhdl->lock); 4836 4837 minfo->status = mhdl->status; 4838 if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) { 4839 minfo->vaddr = mhdl->memseg->vaddr; 4840 minfo->raddr = mhdl->memseg->raddr; 4841 minfo->mtype = mhdl->mtype; 4842 minfo->perm = mhdl->perm; 4843 } 4844 mutex_exit(&mhdl->lock); 4845 4846 return (0); 4847 } 4848 4849 /* 4850 * Copy data either from or to the client specified virtual address 4851 * space to or from the exported memory associated with the cookies. 4852 * The direction argument determines whether the data is read from or 4853 * written to exported memory. 4854 */ 4855 int 4856 ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size, 4857 ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction) 4858 { 4859 ldc_chan_t *ldcp; 4860 uint64_t local_voff, local_valign; 4861 uint64_t cookie_addr, cookie_size; 4862 uint64_t pg_shift, pg_size, pg_size_code; 4863 uint64_t export_caddr, export_poff, export_psize, export_size; 4864 uint64_t local_ra, local_poff, local_psize; 4865 uint64_t copy_size, copied_len = 0, total_bal = 0, idx = 0; 4866 pgcnt_t npages; 4867 size_t len = *size; 4868 int i, rv = 0; 4869 4870 uint64_t chid; 4871 4872 if (handle == NULL) { 4873 DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n"); 4874 return (EINVAL); 4875 } 4876 ldcp = (ldc_chan_t *)handle; 4877 chid = ldcp->id; 4878 4879 /* check to see if channel is UP */ 4880 if (ldcp->tstate != TS_UP) { 4881 DWARN(chid, "ldc_mem_copy: (0x%llx) channel is not UP\n", 4882 chid); 4883 return (ECONNRESET); 4884 } 4885 4886 /* Force address and size to be 8-byte aligned */ 4887 if ((((uintptr_t)vaddr | len) & 0x7) != 0) { 4888 DWARN(chid, 4889 "ldc_mem_copy: addr/sz is not 8-byte aligned\n"); 4890 return (EINVAL); 4891 } 4892 4893 /* Find the size of the exported memory */ 4894 export_size = 0; 4895 for (i = 0; i < ccount; i++) 4896 export_size += cookies[i].size; 4897 4898 /* check to see if offset is valid */ 4899 if (off > export_size) { 4900 DWARN(chid, 4901 "ldc_mem_copy: (0x%llx) start offset > export mem size\n", 4902 chid); 4903 return (EINVAL); 4904 } 4905 4906 /* 4907 * Check to see if the export size is smaller than the size we 4908 * are requesting to copy - if so flag an error 4909 */ 4910 if ((export_size - off) < *size) { 4911 DWARN(chid, 4912 "ldc_mem_copy: (0x%llx) copy size > export mem size\n", 4913 chid); 4914 return (EINVAL); 4915 } 4916 4917 total_bal = min(export_size, *size); 4918 4919 /* FUTURE: get the page size, pgsz code, and shift */ 4920 pg_size = MMU_PAGESIZE; 4921 pg_size_code = page_szc(pg_size); 4922 pg_shift = page_get_shift(pg_size_code); 4923 4924 D1(chid, "ldc_mem_copy: copying data " 4925 "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", 4926 chid, vaddr, pg_size, pg_size_code, pg_shift); 4927 4928 /* aligned VA and its offset */ 4929 local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1)); 4930 local_voff = ((uintptr_t)vaddr) & (pg_size - 1); 4931 4932 npages = (len+local_voff)/pg_size; 4933 npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1; 4934 4935 D1(chid, 4936 "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n", 4937 chid, vaddr, local_valign, local_voff, npages); 4938 4939 local_ra = va_to_pa((void *)local_valign); 4940 local_poff = local_voff; 4941 local_psize = min(len, (pg_size - local_voff)); 4942 4943 len -= local_psize; 4944 4945 /* 4946 * find the first cookie in the list of cookies 4947 * if the offset passed in is not zero 4948 */ 4949 for (idx = 0; idx < ccount; idx++) { 4950 cookie_size = cookies[idx].size; 4951 if (off < cookie_size) 4952 break; 4953 off -= cookie_size; 4954 } 4955 4956 cookie_addr = cookies[idx].addr + off; 4957 cookie_size = cookies[idx].size - off; 4958 4959 export_caddr = cookie_addr & ~(pg_size - 1); 4960 export_poff = cookie_addr & (pg_size - 1); 4961 export_psize = min(cookie_size, (pg_size - export_poff)); 4962 4963 for (;;) { 4964 4965 copy_size = min(export_psize, local_psize); 4966 4967 D1(chid, 4968 "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx," 4969 " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx," 4970 " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx," 4971 " total_bal=0x%llx\n", 4972 chid, direction, export_caddr, local_ra, export_poff, 4973 local_poff, export_psize, local_psize, copy_size, 4974 total_bal); 4975 4976 rv = hv_ldc_copy(chid, direction, 4977 (export_caddr + export_poff), (local_ra + local_poff), 4978 copy_size, &copied_len); 4979 4980 if (rv != 0) { 4981 int error = EIO; 4982 uint64_t rx_hd, rx_tl; 4983 4984 DWARN(chid, 4985 "ldc_mem_copy: (0x%llx) err %d during copy\n", 4986 (unsigned long long)chid, rv); 4987 DWARN(chid, 4988 "ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%lx, " 4989 "loc_ra=0x%lx, exp_poff=0x%lx, loc_poff=0x%lx," 4990 " exp_psz=0x%lx, loc_psz=0x%lx, copy_sz=0x%lx," 4991 " copied_len=0x%lx, total_bal=0x%lx\n", 4992 chid, direction, export_caddr, local_ra, 4993 export_poff, local_poff, export_psize, local_psize, 4994 copy_size, copied_len, total_bal); 4995 4996 *size = *size - total_bal; 4997 4998 /* 4999 * check if reason for copy error was due to 5000 * a channel reset. we need to grab the lock 5001 * just in case we have to do a reset. 5002 */ 5003 mutex_enter(&ldcp->lock); 5004 mutex_enter(&ldcp->tx_lock); 5005 5006 rv = hv_ldc_rx_get_state(ldcp->id, 5007 &rx_hd, &rx_tl, &(ldcp->link_state)); 5008 if (ldcp->link_state == LDC_CHANNEL_DOWN || 5009 ldcp->link_state == LDC_CHANNEL_RESET) { 5010 i_ldc_reset(ldcp, B_FALSE); 5011 error = ECONNRESET; 5012 } 5013 5014 mutex_exit(&ldcp->tx_lock); 5015 mutex_exit(&ldcp->lock); 5016 5017 return (error); 5018 } 5019 5020 ASSERT(copied_len <= copy_size); 5021 5022 D2(chid, "ldc_mem_copy: copied=0x%llx\n", copied_len); 5023 export_poff += copied_len; 5024 local_poff += copied_len; 5025 export_psize -= copied_len; 5026 local_psize -= copied_len; 5027 cookie_size -= copied_len; 5028 5029 total_bal -= copied_len; 5030 5031 if (copy_size != copied_len) 5032 continue; 5033 5034 if (export_psize == 0 && total_bal != 0) { 5035 5036 if (cookie_size == 0) { 5037 idx++; 5038 cookie_addr = cookies[idx].addr; 5039 cookie_size = cookies[idx].size; 5040 5041 export_caddr = cookie_addr & ~(pg_size - 1); 5042 export_poff = cookie_addr & (pg_size - 1); 5043 export_psize = 5044 min(cookie_size, (pg_size-export_poff)); 5045 } else { 5046 export_caddr += pg_size; 5047 export_poff = 0; 5048 export_psize = min(cookie_size, pg_size); 5049 } 5050 } 5051 5052 if (local_psize == 0 && total_bal != 0) { 5053 local_valign += pg_size; 5054 local_ra = va_to_pa((void *)local_valign); 5055 local_poff = 0; 5056 local_psize = min(pg_size, len); 5057 len -= local_psize; 5058 } 5059 5060 /* check if we are all done */ 5061 if (total_bal == 0) 5062 break; 5063 } 5064 5065 5066 D1(chid, 5067 "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n", 5068 chid, *size); 5069 5070 return (0); 5071 } 5072 5073 /* 5074 * Copy data either from or to the client specified virtual address 5075 * space to or from HV physical memory. 5076 * 5077 * The direction argument determines whether the data is read from or 5078 * written to HV memory. direction values are LDC_COPY_IN/OUT similar 5079 * to the ldc_mem_copy interface 5080 */ 5081 int 5082 ldc_mem_rdwr_cookie(ldc_handle_t handle, caddr_t vaddr, size_t *size, 5083 caddr_t paddr, uint8_t direction) 5084 { 5085 ldc_chan_t *ldcp; 5086 uint64_t local_voff, local_valign; 5087 uint64_t pg_shift, pg_size, pg_size_code; 5088 uint64_t target_pa, target_poff, target_psize, target_size; 5089 uint64_t local_ra, local_poff, local_psize; 5090 uint64_t copy_size, copied_len = 0; 5091 pgcnt_t npages; 5092 size_t len = *size; 5093 int rv = 0; 5094 5095 if (handle == NULL) { 5096 DWARN(DBG_ALL_LDCS, 5097 "ldc_mem_rdwr_cookie: invalid channel handle\n"); 5098 return (EINVAL); 5099 } 5100 ldcp = (ldc_chan_t *)handle; 5101 5102 mutex_enter(&ldcp->lock); 5103 5104 /* check to see if channel is UP */ 5105 if (ldcp->tstate != TS_UP) { 5106 DWARN(ldcp->id, 5107 "ldc_mem_rdwr_cookie: (0x%llx) channel is not UP\n", 5108 ldcp->id); 5109 mutex_exit(&ldcp->lock); 5110 return (ECONNRESET); 5111 } 5112 5113 /* Force address and size to be 8-byte aligned */ 5114 if ((((uintptr_t)vaddr | len) & 0x7) != 0) { 5115 DWARN(ldcp->id, 5116 "ldc_mem_rdwr_cookie: addr/size is not 8-byte aligned\n"); 5117 mutex_exit(&ldcp->lock); 5118 return (EINVAL); 5119 } 5120 5121 target_size = *size; 5122 5123 /* FUTURE: get the page size, pgsz code, and shift */ 5124 pg_size = MMU_PAGESIZE; 5125 pg_size_code = page_szc(pg_size); 5126 pg_shift = page_get_shift(pg_size_code); 5127 5128 D1(ldcp->id, "ldc_mem_rdwr_cookie: copying data " 5129 "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", 5130 ldcp->id, vaddr, pg_size, pg_size_code, pg_shift); 5131 5132 /* aligned VA and its offset */ 5133 local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1); 5134 local_voff = ((uintptr_t)vaddr) & (pg_size - 1); 5135 5136 npages = (len + local_voff) / pg_size; 5137 npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1; 5138 5139 D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) v=0x%llx, " 5140 "val=0x%llx,off=0x%x,pgs=0x%x\n", 5141 ldcp->id, vaddr, local_valign, local_voff, npages); 5142 5143 local_ra = va_to_pa((void *)local_valign); 5144 local_poff = local_voff; 5145 local_psize = min(len, (pg_size - local_voff)); 5146 5147 len -= local_psize; 5148 5149 target_pa = ((uintptr_t)paddr) & ~(pg_size - 1); 5150 target_poff = ((uintptr_t)paddr) & (pg_size - 1); 5151 target_psize = pg_size - target_poff; 5152 5153 for (;;) { 5154 5155 copy_size = min(target_psize, local_psize); 5156 5157 D1(ldcp->id, 5158 "ldc_mem_rdwr_cookie: (0x%llx) dir=0x%x, tar_pa=0x%llx," 5159 " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx," 5160 " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx," 5161 " total_bal=0x%llx\n", 5162 ldcp->id, direction, target_pa, local_ra, target_poff, 5163 local_poff, target_psize, local_psize, copy_size, 5164 target_size); 5165 5166 rv = hv_ldc_copy(ldcp->id, direction, 5167 (target_pa + target_poff), (local_ra + local_poff), 5168 copy_size, &copied_len); 5169 5170 if (rv != 0) { 5171 DWARN(DBG_ALL_LDCS, 5172 "ldc_mem_rdwr_cookie: (0x%lx) err %d during copy\n", 5173 ldcp->id, rv); 5174 DWARN(DBG_ALL_LDCS, 5175 "ldc_mem_rdwr_cookie: (0x%llx) dir=%lld, " 5176 "tar_pa=0x%llx, loc_ra=0x%llx, tar_poff=0x%llx, " 5177 "loc_poff=0x%llx, tar_psz=0x%llx, loc_psz=0x%llx, " 5178 "copy_sz=0x%llx, total_bal=0x%llx\n", 5179 ldcp->id, direction, target_pa, local_ra, 5180 target_poff, local_poff, target_psize, local_psize, 5181 copy_size, target_size); 5182 5183 *size = *size - target_size; 5184 mutex_exit(&ldcp->lock); 5185 return (i_ldc_h2v_error(rv)); 5186 } 5187 5188 D2(ldcp->id, "ldc_mem_rdwr_cookie: copied=0x%llx\n", 5189 copied_len); 5190 target_poff += copied_len; 5191 local_poff += copied_len; 5192 target_psize -= copied_len; 5193 local_psize -= copied_len; 5194 5195 target_size -= copied_len; 5196 5197 if (copy_size != copied_len) 5198 continue; 5199 5200 if (target_psize == 0 && target_size != 0) { 5201 target_pa += pg_size; 5202 target_poff = 0; 5203 target_psize = min(pg_size, target_size); 5204 } 5205 5206 if (local_psize == 0 && target_size != 0) { 5207 local_valign += pg_size; 5208 local_ra = va_to_pa((void *)local_valign); 5209 local_poff = 0; 5210 local_psize = min(pg_size, len); 5211 len -= local_psize; 5212 } 5213 5214 /* check if we are all done */ 5215 if (target_size == 0) 5216 break; 5217 } 5218 5219 mutex_exit(&ldcp->lock); 5220 5221 D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) done copying sz=0x%llx\n", 5222 ldcp->id, *size); 5223 5224 return (0); 5225 } 5226 5227 /* 5228 * Map an exported memory segment into the local address space. If the 5229 * memory range was exported for direct map access, a HV call is made 5230 * to allocate a RA range. If the map is done via a shadow copy, local 5231 * shadow memory is allocated and the base VA is returned in 'vaddr'. If 5232 * the mapping is a direct map then the RA is returned in 'raddr'. 5233 */ 5234 int 5235 ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount, 5236 uint8_t mtype, uint8_t perm, caddr_t *vaddr, caddr_t *raddr) 5237 { 5238 int i, j, idx, rv, retries; 5239 ldc_chan_t *ldcp; 5240 ldc_mhdl_t *mhdl; 5241 ldc_memseg_t *memseg; 5242 caddr_t tmpaddr; 5243 uint64_t map_perm = perm; 5244 uint64_t pg_size, pg_shift, pg_size_code, pg_mask; 5245 uint64_t exp_size = 0, base_off, map_size, npages; 5246 uint64_t cookie_addr, cookie_off, cookie_size; 5247 tte_t ldc_tte; 5248 5249 if (mhandle == NULL) { 5250 DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n"); 5251 return (EINVAL); 5252 } 5253 mhdl = (ldc_mhdl_t *)mhandle; 5254 5255 mutex_enter(&mhdl->lock); 5256 5257 if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED || 5258 mhdl->memseg != NULL) { 5259 DWARN(DBG_ALL_LDCS, 5260 "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle); 5261 mutex_exit(&mhdl->lock); 5262 return (EINVAL); 5263 } 5264 5265 ldcp = mhdl->ldcp; 5266 5267 mutex_enter(&ldcp->lock); 5268 5269 if (ldcp->tstate != TS_UP) { 5270 DWARN(ldcp->id, 5271 "ldc_mem_dring_map: (0x%llx) channel is not UP\n", 5272 ldcp->id); 5273 mutex_exit(&ldcp->lock); 5274 mutex_exit(&mhdl->lock); 5275 return (ECONNRESET); 5276 } 5277 5278 if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) { 5279 DWARN(ldcp->id, "ldc_mem_map: invalid map type\n"); 5280 mutex_exit(&ldcp->lock); 5281 mutex_exit(&mhdl->lock); 5282 return (EINVAL); 5283 } 5284 5285 D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n", 5286 ldcp->id, cookie->addr, cookie->size); 5287 5288 /* FUTURE: get the page size, pgsz code, and shift */ 5289 pg_size = MMU_PAGESIZE; 5290 pg_size_code = page_szc(pg_size); 5291 pg_shift = page_get_shift(pg_size_code); 5292 pg_mask = ~(pg_size - 1); 5293 5294 /* calculate the number of pages in the exported cookie */ 5295 base_off = cookie[0].addr & (pg_size - 1); 5296 for (idx = 0; idx < ccount; idx++) 5297 exp_size += cookie[idx].size; 5298 map_size = P2ROUNDUP((exp_size + base_off), pg_size); 5299 npages = (map_size >> pg_shift); 5300 5301 /* Allocate memseg structure */ 5302 memseg = mhdl->memseg = 5303 kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP); 5304 5305 /* Allocate memory to store all pages and cookies */ 5306 memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP); 5307 memseg->cookies = 5308 kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP); 5309 5310 D2(ldcp->id, "ldc_mem_map: (0x%llx) exp_size=0x%llx, map_size=0x%llx," 5311 "pages=0x%llx\n", ldcp->id, exp_size, map_size, npages); 5312 5313 /* 5314 * Check if direct map over shared memory is enabled, if not change 5315 * the mapping type to SHADOW_MAP. 5316 */ 5317 if (ldc_shmem_enabled == 0) 5318 mtype = LDC_SHADOW_MAP; 5319 5320 /* 5321 * Check to see if the client is requesting direct or shadow map 5322 * If direct map is requested, try to map remote memory first, 5323 * and if that fails, revert to shadow map 5324 */ 5325 if (mtype == LDC_DIRECT_MAP) { 5326 5327 /* Allocate kernel virtual space for mapping */ 5328 memseg->vaddr = vmem_xalloc(heap_arena, map_size, 5329 pg_size, 0, 0, NULL, NULL, VM_NOSLEEP); 5330 if (memseg->vaddr == NULL) { 5331 cmn_err(CE_WARN, 5332 "ldc_mem_map: (0x%lx) memory map failed\n", 5333 ldcp->id); 5334 kmem_free(memseg->cookies, 5335 (sizeof (ldc_mem_cookie_t) * ccount)); 5336 kmem_free(memseg->pages, 5337 (sizeof (ldc_page_t) * npages)); 5338 kmem_cache_free(ldcssp->memseg_cache, memseg); 5339 5340 mutex_exit(&ldcp->lock); 5341 mutex_exit(&mhdl->lock); 5342 return (ENOMEM); 5343 } 5344 5345 /* Unload previous mapping */ 5346 hat_unload(kas.a_hat, memseg->vaddr, map_size, 5347 HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK); 5348 5349 /* for each cookie passed in - map into address space */ 5350 idx = 0; 5351 cookie_size = 0; 5352 tmpaddr = memseg->vaddr; 5353 5354 for (i = 0; i < npages; i++) { 5355 5356 if (cookie_size == 0) { 5357 ASSERT(idx < ccount); 5358 cookie_addr = cookie[idx].addr & pg_mask; 5359 cookie_off = cookie[idx].addr & (pg_size - 1); 5360 cookie_size = 5361 P2ROUNDUP((cookie_off + cookie[idx].size), 5362 pg_size); 5363 idx++; 5364 } 5365 5366 D1(ldcp->id, "ldc_mem_map: (0x%llx) mapping " 5367 "cookie 0x%llx, bal=0x%llx\n", ldcp->id, 5368 cookie_addr, cookie_size); 5369 5370 /* map the cookie into address space */ 5371 for (retries = 0; retries < ldc_max_retries; 5372 retries++) { 5373 5374 rv = hv_ldc_mapin(ldcp->id, cookie_addr, 5375 &memseg->pages[i].raddr, &map_perm); 5376 if (rv != H_EWOULDBLOCK && rv != H_ETOOMANY) 5377 break; 5378 5379 drv_usecwait(ldc_delay); 5380 } 5381 5382 if (rv || memseg->pages[i].raddr == 0) { 5383 DWARN(ldcp->id, 5384 "ldc_mem_map: (0x%llx) hv mapin err %d\n", 5385 ldcp->id, rv); 5386 5387 /* remove previous mapins */ 5388 hat_unload(kas.a_hat, memseg->vaddr, map_size, 5389 HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK); 5390 for (j = 0; j < i; j++) { 5391 rv = hv_ldc_unmap( 5392 memseg->pages[j].raddr); 5393 if (rv) { 5394 DWARN(ldcp->id, 5395 "ldc_mem_map: (0x%llx) " 5396 "cannot unmap ra=0x%llx\n", 5397 ldcp->id, 5398 memseg->pages[j].raddr); 5399 } 5400 } 5401 5402 /* free kernel virtual space */ 5403 vmem_free(heap_arena, (void *)memseg->vaddr, 5404 map_size); 5405 5406 /* direct map failed - revert to shadow map */ 5407 mtype = LDC_SHADOW_MAP; 5408 break; 5409 5410 } else { 5411 5412 D1(ldcp->id, 5413 "ldc_mem_map: (0x%llx) vtop map 0x%llx -> " 5414 "0x%llx, cookie=0x%llx, perm=0x%llx\n", 5415 ldcp->id, tmpaddr, memseg->pages[i].raddr, 5416 cookie_addr, perm); 5417 5418 /* 5419 * NOTE: Calling hat_devload directly, causes it 5420 * to look for page_t using the pfn. Since this 5421 * addr is greater than the memlist, it treates 5422 * it as non-memory 5423 */ 5424 sfmmu_memtte(&ldc_tte, 5425 (pfn_t)(memseg->pages[i].raddr >> pg_shift), 5426 PROT_READ | PROT_WRITE | HAT_NOSYNC, TTE8K); 5427 5428 D1(ldcp->id, 5429 "ldc_mem_map: (0x%llx) ra 0x%llx -> " 5430 "tte 0x%llx\n", ldcp->id, 5431 memseg->pages[i].raddr, ldc_tte); 5432 5433 sfmmu_tteload(kas.a_hat, &ldc_tte, tmpaddr, 5434 NULL, HAT_LOAD_LOCK); 5435 5436 cookie_size -= pg_size; 5437 cookie_addr += pg_size; 5438 tmpaddr += pg_size; 5439 } 5440 } 5441 } 5442 5443 if (mtype == LDC_SHADOW_MAP) { 5444 if (*vaddr == NULL) { 5445 memseg->vaddr = kmem_zalloc(exp_size, KM_SLEEP); 5446 mhdl->myshadow = B_TRUE; 5447 5448 D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated " 5449 "shadow page va=0x%llx\n", ldcp->id, memseg->vaddr); 5450 } else { 5451 /* 5452 * Use client supplied memory for memseg->vaddr 5453 * WARNING: assuming that client mem is >= exp_size 5454 */ 5455 memseg->vaddr = *vaddr; 5456 } 5457 5458 /* Save all page and cookie information */ 5459 for (i = 0, tmpaddr = memseg->vaddr; i < npages; i++) { 5460 memseg->pages[i].raddr = va_to_pa(tmpaddr); 5461 memseg->pages[i].size = pg_size; 5462 tmpaddr += pg_size; 5463 } 5464 5465 } 5466 5467 /* save all cookies */ 5468 bcopy(cookie, memseg->cookies, ccount * sizeof (ldc_mem_cookie_t)); 5469 5470 /* update memseg_t */ 5471 memseg->raddr = memseg->pages[0].raddr; 5472 memseg->size = (mtype == LDC_SHADOW_MAP) ? exp_size : map_size; 5473 memseg->npages = npages; 5474 memseg->ncookies = ccount; 5475 memseg->next_cookie = 0; 5476 5477 /* memory handle = mapped */ 5478 mhdl->mtype = mtype; 5479 mhdl->perm = perm; 5480 mhdl->status = LDC_MAPPED; 5481 5482 D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, " 5483 "va=0x%llx, pgs=0x%llx cookies=0x%llx\n", 5484 ldcp->id, mhdl, memseg->raddr, memseg->vaddr, 5485 memseg->npages, memseg->ncookies); 5486 5487 if (mtype == LDC_SHADOW_MAP) 5488 base_off = 0; 5489 if (raddr) 5490 *raddr = (caddr_t)(memseg->raddr | base_off); 5491 if (vaddr) 5492 *vaddr = (caddr_t)((uintptr_t)memseg->vaddr | base_off); 5493 5494 mutex_exit(&ldcp->lock); 5495 mutex_exit(&mhdl->lock); 5496 return (0); 5497 } 5498 5499 /* 5500 * Unmap a memory segment. Free shadow memory (if any). 5501 */ 5502 int 5503 ldc_mem_unmap(ldc_mem_handle_t mhandle) 5504 { 5505 int i, rv; 5506 ldc_mhdl_t *mhdl = (ldc_mhdl_t *)mhandle; 5507 ldc_chan_t *ldcp; 5508 ldc_memseg_t *memseg; 5509 5510 if (mhdl == 0 || mhdl->status != LDC_MAPPED) { 5511 DWARN(DBG_ALL_LDCS, 5512 "ldc_mem_unmap: (0x%llx) handle is not mapped\n", 5513 mhandle); 5514 return (EINVAL); 5515 } 5516 5517 mutex_enter(&mhdl->lock); 5518 5519 ldcp = mhdl->ldcp; 5520 memseg = mhdl->memseg; 5521 5522 D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n", 5523 ldcp->id, mhdl); 5524 5525 /* if we allocated shadow memory - free it */ 5526 if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) { 5527 kmem_free(memseg->vaddr, memseg->size); 5528 } else if (mhdl->mtype == LDC_DIRECT_MAP) { 5529 5530 /* unmap in the case of DIRECT_MAP */ 5531 hat_unload(kas.a_hat, memseg->vaddr, memseg->size, 5532 HAT_UNLOAD_UNLOCK); 5533 5534 for (i = 0; i < memseg->npages; i++) { 5535 rv = hv_ldc_unmap(memseg->pages[i].raddr); 5536 if (rv) { 5537 cmn_err(CE_WARN, 5538 "ldc_mem_map: (0x%lx) hv unmap err %d\n", 5539 ldcp->id, rv); 5540 } 5541 } 5542 5543 vmem_free(heap_arena, (void *)memseg->vaddr, memseg->size); 5544 } 5545 5546 /* free the allocated memseg and page structures */ 5547 kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages)); 5548 kmem_free(memseg->cookies, 5549 (sizeof (ldc_mem_cookie_t) * memseg->ncookies)); 5550 kmem_cache_free(ldcssp->memseg_cache, memseg); 5551 5552 /* uninitialize the memory handle */ 5553 mhdl->memseg = NULL; 5554 mhdl->status = LDC_UNBOUND; 5555 5556 D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n", 5557 ldcp->id, mhdl); 5558 5559 mutex_exit(&mhdl->lock); 5560 return (0); 5561 } 5562 5563 /* 5564 * Internal entry point for LDC mapped memory entry consistency 5565 * semantics. Acquire copies the contents of the remote memory 5566 * into the local shadow copy. The release operation copies the local 5567 * contents into the remote memory. The offset and size specify the 5568 * bounds for the memory range being synchronized. 5569 */ 5570 static int 5571 i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction, 5572 uint64_t offset, size_t size) 5573 { 5574 int err; 5575 ldc_mhdl_t *mhdl; 5576 ldc_chan_t *ldcp; 5577 ldc_memseg_t *memseg; 5578 caddr_t local_vaddr; 5579 size_t copy_size; 5580 5581 if (mhandle == NULL) { 5582 DWARN(DBG_ALL_LDCS, 5583 "i_ldc_mem_acquire_release: invalid memory handle\n"); 5584 return (EINVAL); 5585 } 5586 mhdl = (ldc_mhdl_t *)mhandle; 5587 5588 mutex_enter(&mhdl->lock); 5589 5590 if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) { 5591 DWARN(DBG_ALL_LDCS, 5592 "i_ldc_mem_acquire_release: not mapped memory\n"); 5593 mutex_exit(&mhdl->lock); 5594 return (EINVAL); 5595 } 5596 5597 /* do nothing for direct map */ 5598 if (mhdl->mtype == LDC_DIRECT_MAP) { 5599 mutex_exit(&mhdl->lock); 5600 return (0); 5601 } 5602 5603 /* do nothing if COPY_IN+MEM_W and COPY_OUT+MEM_R */ 5604 if ((direction == LDC_COPY_IN && (mhdl->perm & LDC_MEM_R) == 0) || 5605 (direction == LDC_COPY_OUT && (mhdl->perm & LDC_MEM_W) == 0)) { 5606 mutex_exit(&mhdl->lock); 5607 return (0); 5608 } 5609 5610 if (offset >= mhdl->memseg->size || 5611 (offset + size) > mhdl->memseg->size) { 5612 DWARN(DBG_ALL_LDCS, 5613 "i_ldc_mem_acquire_release: memory out of range\n"); 5614 mutex_exit(&mhdl->lock); 5615 return (EINVAL); 5616 } 5617 5618 /* get the channel handle and memory segment */ 5619 ldcp = mhdl->ldcp; 5620 memseg = mhdl->memseg; 5621 5622 if (mhdl->mtype == LDC_SHADOW_MAP) { 5623 5624 local_vaddr = memseg->vaddr + offset; 5625 copy_size = size; 5626 5627 /* copy to/from remote from/to local memory */ 5628 err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset, 5629 ©_size, memseg->cookies, memseg->ncookies, 5630 direction); 5631 if (err || copy_size != size) { 5632 DWARN(ldcp->id, 5633 "i_ldc_mem_acquire_release: copy failed\n"); 5634 mutex_exit(&mhdl->lock); 5635 return (err); 5636 } 5637 } 5638 5639 mutex_exit(&mhdl->lock); 5640 5641 return (0); 5642 } 5643 5644 /* 5645 * Ensure that the contents in the remote memory seg are consistent 5646 * with the contents if of local segment 5647 */ 5648 int 5649 ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size) 5650 { 5651 return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size)); 5652 } 5653 5654 5655 /* 5656 * Ensure that the contents in the local memory seg are consistent 5657 * with the contents if of remote segment 5658 */ 5659 int 5660 ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size) 5661 { 5662 return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size)); 5663 } 5664 5665 /* 5666 * Allocate a descriptor ring. The size of each each descriptor 5667 * must be 8-byte aligned and the entire ring should be a multiple 5668 * of MMU_PAGESIZE. 5669 */ 5670 int 5671 ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle) 5672 { 5673 ldc_dring_t *dringp; 5674 size_t size = (dsize * len); 5675 5676 D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n", 5677 len, dsize); 5678 5679 if (dhandle == NULL) { 5680 DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n"); 5681 return (EINVAL); 5682 } 5683 5684 if (len == 0) { 5685 DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n"); 5686 return (EINVAL); 5687 } 5688 5689 /* descriptor size should be 8-byte aligned */ 5690 if (dsize == 0 || (dsize & 0x7)) { 5691 DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n"); 5692 return (EINVAL); 5693 } 5694 5695 *dhandle = 0; 5696 5697 /* Allocate a desc ring structure */ 5698 dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP); 5699 5700 /* Initialize dring */ 5701 dringp->length = len; 5702 dringp->dsize = dsize; 5703 5704 /* round off to multiple of pagesize */ 5705 dringp->size = (size & MMU_PAGEMASK); 5706 if (size & MMU_PAGEOFFSET) 5707 dringp->size += MMU_PAGESIZE; 5708 5709 dringp->status = LDC_UNBOUND; 5710 5711 /* allocate descriptor ring memory */ 5712 dringp->base = kmem_zalloc(dringp->size, KM_SLEEP); 5713 5714 /* initialize the desc ring lock */ 5715 mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL); 5716 5717 /* Add descriptor ring to the head of global list */ 5718 mutex_enter(&ldcssp->lock); 5719 dringp->next = ldcssp->dring_list; 5720 ldcssp->dring_list = dringp; 5721 mutex_exit(&ldcssp->lock); 5722 5723 *dhandle = (ldc_dring_handle_t)dringp; 5724 5725 D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n"); 5726 5727 return (0); 5728 } 5729 5730 5731 /* 5732 * Destroy a descriptor ring. 5733 */ 5734 int 5735 ldc_mem_dring_destroy(ldc_dring_handle_t dhandle) 5736 { 5737 ldc_dring_t *dringp; 5738 ldc_dring_t *tmp_dringp; 5739 5740 D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n"); 5741 5742 if (dhandle == NULL) { 5743 DWARN(DBG_ALL_LDCS, 5744 "ldc_mem_dring_destroy: invalid desc ring handle\n"); 5745 return (EINVAL); 5746 } 5747 dringp = (ldc_dring_t *)dhandle; 5748 5749 if (dringp->status == LDC_BOUND) { 5750 DWARN(DBG_ALL_LDCS, 5751 "ldc_mem_dring_destroy: desc ring is bound\n"); 5752 return (EACCES); 5753 } 5754 5755 mutex_enter(&dringp->lock); 5756 mutex_enter(&ldcssp->lock); 5757 5758 /* remove from linked list - if not bound */ 5759 tmp_dringp = ldcssp->dring_list; 5760 if (tmp_dringp == dringp) { 5761 ldcssp->dring_list = dringp->next; 5762 dringp->next = NULL; 5763 5764 } else { 5765 while (tmp_dringp != NULL) { 5766 if (tmp_dringp->next == dringp) { 5767 tmp_dringp->next = dringp->next; 5768 dringp->next = NULL; 5769 break; 5770 } 5771 tmp_dringp = tmp_dringp->next; 5772 } 5773 if (tmp_dringp == NULL) { 5774 DWARN(DBG_ALL_LDCS, 5775 "ldc_mem_dring_destroy: invalid descriptor\n"); 5776 mutex_exit(&ldcssp->lock); 5777 mutex_exit(&dringp->lock); 5778 return (EINVAL); 5779 } 5780 } 5781 5782 mutex_exit(&ldcssp->lock); 5783 5784 /* free the descriptor ring */ 5785 kmem_free(dringp->base, dringp->size); 5786 5787 mutex_exit(&dringp->lock); 5788 5789 /* destroy dring lock */ 5790 mutex_destroy(&dringp->lock); 5791 5792 /* free desc ring object */ 5793 kmem_free(dringp, sizeof (ldc_dring_t)); 5794 5795 return (0); 5796 } 5797 5798 /* 5799 * Bind a previously allocated dring to a channel. The channel should 5800 * be OPEN in order to bind the ring to the channel. Returns back a 5801 * descriptor ring cookie. The descriptor ring is exported for remote 5802 * access by the client at the other end of the channel. An entry for 5803 * dring pages is stored in map table (via call to ldc_mem_bind_handle). 5804 */ 5805 int 5806 ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle, 5807 uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount) 5808 { 5809 int err; 5810 ldc_chan_t *ldcp; 5811 ldc_dring_t *dringp; 5812 ldc_mem_handle_t mhandle; 5813 5814 /* check to see if channel is initalized */ 5815 if (handle == NULL) { 5816 DWARN(DBG_ALL_LDCS, 5817 "ldc_mem_dring_bind: invalid channel handle\n"); 5818 return (EINVAL); 5819 } 5820 ldcp = (ldc_chan_t *)handle; 5821 5822 if (dhandle == NULL) { 5823 DWARN(DBG_ALL_LDCS, 5824 "ldc_mem_dring_bind: invalid desc ring handle\n"); 5825 return (EINVAL); 5826 } 5827 dringp = (ldc_dring_t *)dhandle; 5828 5829 if (cookie == NULL) { 5830 DWARN(ldcp->id, 5831 "ldc_mem_dring_bind: invalid cookie arg\n"); 5832 return (EINVAL); 5833 } 5834 5835 mutex_enter(&dringp->lock); 5836 5837 if (dringp->status == LDC_BOUND) { 5838 DWARN(DBG_ALL_LDCS, 5839 "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n", 5840 ldcp->id); 5841 mutex_exit(&dringp->lock); 5842 return (EINVAL); 5843 } 5844 5845 if ((perm & LDC_MEM_RW) == 0) { 5846 DWARN(DBG_ALL_LDCS, 5847 "ldc_mem_dring_bind: invalid permissions\n"); 5848 mutex_exit(&dringp->lock); 5849 return (EINVAL); 5850 } 5851 5852 if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) { 5853 DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n"); 5854 mutex_exit(&dringp->lock); 5855 return (EINVAL); 5856 } 5857 5858 dringp->ldcp = ldcp; 5859 5860 /* create an memory handle */ 5861 err = ldc_mem_alloc_handle(handle, &mhandle); 5862 if (err || mhandle == NULL) { 5863 DWARN(DBG_ALL_LDCS, 5864 "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n", 5865 ldcp->id); 5866 mutex_exit(&dringp->lock); 5867 return (err); 5868 } 5869 dringp->mhdl = mhandle; 5870 5871 /* bind the descriptor ring to channel */ 5872 err = ldc_mem_bind_handle(mhandle, dringp->base, dringp->size, 5873 mtype, perm, cookie, ccount); 5874 if (err) { 5875 DWARN(ldcp->id, 5876 "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n", 5877 ldcp->id); 5878 mutex_exit(&dringp->lock); 5879 return (err); 5880 } 5881 5882 /* 5883 * For now return error if we get more than one cookie 5884 * FUTURE: Return multiple cookies .. 5885 */ 5886 if (*ccount > 1) { 5887 (void) ldc_mem_unbind_handle(mhandle); 5888 (void) ldc_mem_free_handle(mhandle); 5889 5890 dringp->ldcp = NULL; 5891 dringp->mhdl = NULL; 5892 *ccount = 0; 5893 5894 mutex_exit(&dringp->lock); 5895 return (EAGAIN); 5896 } 5897 5898 /* Add descriptor ring to channel's exported dring list */ 5899 mutex_enter(&ldcp->exp_dlist_lock); 5900 dringp->ch_next = ldcp->exp_dring_list; 5901 ldcp->exp_dring_list = dringp; 5902 mutex_exit(&ldcp->exp_dlist_lock); 5903 5904 dringp->status = LDC_BOUND; 5905 5906 mutex_exit(&dringp->lock); 5907 5908 return (0); 5909 } 5910 5911 /* 5912 * Return the next cookie associated with the specified dring handle 5913 */ 5914 int 5915 ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie) 5916 { 5917 int rv = 0; 5918 ldc_dring_t *dringp; 5919 ldc_chan_t *ldcp; 5920 5921 if (dhandle == NULL) { 5922 DWARN(DBG_ALL_LDCS, 5923 "ldc_mem_dring_nextcookie: invalid desc ring handle\n"); 5924 return (EINVAL); 5925 } 5926 dringp = (ldc_dring_t *)dhandle; 5927 mutex_enter(&dringp->lock); 5928 5929 if (dringp->status != LDC_BOUND) { 5930 DWARN(DBG_ALL_LDCS, 5931 "ldc_mem_dring_nextcookie: descriptor ring 0x%llx " 5932 "is not bound\n", dringp); 5933 mutex_exit(&dringp->lock); 5934 return (EINVAL); 5935 } 5936 5937 ldcp = dringp->ldcp; 5938 5939 if (cookie == NULL) { 5940 DWARN(ldcp->id, 5941 "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n", 5942 ldcp->id); 5943 mutex_exit(&dringp->lock); 5944 return (EINVAL); 5945 } 5946 5947 rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie); 5948 mutex_exit(&dringp->lock); 5949 5950 return (rv); 5951 } 5952 /* 5953 * Unbind a previously bound dring from a channel. 5954 */ 5955 int 5956 ldc_mem_dring_unbind(ldc_dring_handle_t dhandle) 5957 { 5958 ldc_dring_t *dringp; 5959 ldc_dring_t *tmp_dringp; 5960 ldc_chan_t *ldcp; 5961 5962 if (dhandle == NULL) { 5963 DWARN(DBG_ALL_LDCS, 5964 "ldc_mem_dring_unbind: invalid desc ring handle\n"); 5965 return (EINVAL); 5966 } 5967 dringp = (ldc_dring_t *)dhandle; 5968 5969 mutex_enter(&dringp->lock); 5970 5971 if (dringp->status == LDC_UNBOUND) { 5972 DWARN(DBG_ALL_LDCS, 5973 "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n", 5974 dringp); 5975 mutex_exit(&dringp->lock); 5976 return (EINVAL); 5977 } 5978 ldcp = dringp->ldcp; 5979 5980 mutex_enter(&ldcp->exp_dlist_lock); 5981 5982 tmp_dringp = ldcp->exp_dring_list; 5983 if (tmp_dringp == dringp) { 5984 ldcp->exp_dring_list = dringp->ch_next; 5985 dringp->ch_next = NULL; 5986 5987 } else { 5988 while (tmp_dringp != NULL) { 5989 if (tmp_dringp->ch_next == dringp) { 5990 tmp_dringp->ch_next = dringp->ch_next; 5991 dringp->ch_next = NULL; 5992 break; 5993 } 5994 tmp_dringp = tmp_dringp->ch_next; 5995 } 5996 if (tmp_dringp == NULL) { 5997 DWARN(DBG_ALL_LDCS, 5998 "ldc_mem_dring_unbind: invalid descriptor\n"); 5999 mutex_exit(&ldcp->exp_dlist_lock); 6000 mutex_exit(&dringp->lock); 6001 return (EINVAL); 6002 } 6003 } 6004 6005 mutex_exit(&ldcp->exp_dlist_lock); 6006 6007 (void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl); 6008 (void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl); 6009 6010 dringp->ldcp = NULL; 6011 dringp->mhdl = NULL; 6012 dringp->status = LDC_UNBOUND; 6013 6014 mutex_exit(&dringp->lock); 6015 6016 return (0); 6017 } 6018 6019 /* 6020 * Get information about the dring. The base address of the descriptor 6021 * ring along with the type and permission are returned back. 6022 */ 6023 int 6024 ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo) 6025 { 6026 ldc_dring_t *dringp; 6027 int rv; 6028 6029 if (dhandle == NULL) { 6030 DWARN(DBG_ALL_LDCS, 6031 "ldc_mem_dring_info: invalid desc ring handle\n"); 6032 return (EINVAL); 6033 } 6034 dringp = (ldc_dring_t *)dhandle; 6035 6036 mutex_enter(&dringp->lock); 6037 6038 if (dringp->mhdl) { 6039 rv = ldc_mem_info(dringp->mhdl, minfo); 6040 if (rv) { 6041 DWARN(DBG_ALL_LDCS, 6042 "ldc_mem_dring_info: error reading mem info\n"); 6043 mutex_exit(&dringp->lock); 6044 return (rv); 6045 } 6046 } else { 6047 minfo->vaddr = dringp->base; 6048 minfo->raddr = NULL; 6049 minfo->status = dringp->status; 6050 } 6051 6052 mutex_exit(&dringp->lock); 6053 6054 return (0); 6055 } 6056 6057 /* 6058 * Map an exported descriptor ring into the local address space. If the 6059 * descriptor ring was exported for direct map access, a HV call is made 6060 * to allocate a RA range. If the map is done via a shadow copy, local 6061 * shadow memory is allocated. 6062 */ 6063 int 6064 ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie, 6065 uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype, 6066 ldc_dring_handle_t *dhandle) 6067 { 6068 int err; 6069 ldc_chan_t *ldcp = (ldc_chan_t *)handle; 6070 ldc_mem_handle_t mhandle; 6071 ldc_dring_t *dringp; 6072 size_t dring_size; 6073 6074 if (dhandle == NULL) { 6075 DWARN(DBG_ALL_LDCS, 6076 "ldc_mem_dring_map: invalid dhandle\n"); 6077 return (EINVAL); 6078 } 6079 6080 /* check to see if channel is initalized */ 6081 if (handle == NULL) { 6082 DWARN(DBG_ALL_LDCS, 6083 "ldc_mem_dring_map: invalid channel handle\n"); 6084 return (EINVAL); 6085 } 6086 ldcp = (ldc_chan_t *)handle; 6087 6088 if (cookie == NULL) { 6089 DWARN(ldcp->id, 6090 "ldc_mem_dring_map: (0x%llx) invalid cookie\n", 6091 ldcp->id); 6092 return (EINVAL); 6093 } 6094 6095 /* FUTURE: For now we support only one cookie per dring */ 6096 ASSERT(ccount == 1); 6097 6098 if (cookie->size < (dsize * len)) { 6099 DWARN(ldcp->id, 6100 "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n", 6101 ldcp->id); 6102 return (EINVAL); 6103 } 6104 6105 *dhandle = 0; 6106 6107 /* Allocate an dring structure */ 6108 dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP); 6109 6110 D1(ldcp->id, 6111 "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n", 6112 mtype, len, dsize, cookie->addr, cookie->size); 6113 6114 /* Initialize dring */ 6115 dringp->length = len; 6116 dringp->dsize = dsize; 6117 6118 /* round of to multiple of page size */ 6119 dring_size = len * dsize; 6120 dringp->size = (dring_size & MMU_PAGEMASK); 6121 if (dring_size & MMU_PAGEOFFSET) 6122 dringp->size += MMU_PAGESIZE; 6123 6124 dringp->ldcp = ldcp; 6125 6126 /* create an memory handle */ 6127 err = ldc_mem_alloc_handle(handle, &mhandle); 6128 if (err || mhandle == NULL) { 6129 DWARN(DBG_ALL_LDCS, 6130 "ldc_mem_dring_map: cannot alloc hdl err=%d\n", 6131 err); 6132 kmem_free(dringp, sizeof (ldc_dring_t)); 6133 return (ENOMEM); 6134 } 6135 6136 dringp->mhdl = mhandle; 6137 dringp->base = NULL; 6138 6139 /* map the dring into local memory */ 6140 err = ldc_mem_map(mhandle, cookie, ccount, mtype, LDC_MEM_RW, 6141 &(dringp->base), NULL); 6142 if (err || dringp->base == NULL) { 6143 cmn_err(CE_WARN, 6144 "ldc_mem_dring_map: cannot map desc ring err=%d\n", err); 6145 (void) ldc_mem_free_handle(mhandle); 6146 kmem_free(dringp, sizeof (ldc_dring_t)); 6147 return (ENOMEM); 6148 } 6149 6150 /* initialize the desc ring lock */ 6151 mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL); 6152 6153 /* Add descriptor ring to channel's imported dring list */ 6154 mutex_enter(&ldcp->imp_dlist_lock); 6155 dringp->ch_next = ldcp->imp_dring_list; 6156 ldcp->imp_dring_list = dringp; 6157 mutex_exit(&ldcp->imp_dlist_lock); 6158 6159 dringp->status = LDC_MAPPED; 6160 6161 *dhandle = (ldc_dring_handle_t)dringp; 6162 6163 return (0); 6164 } 6165 6166 /* 6167 * Unmap a descriptor ring. Free shadow memory (if any). 6168 */ 6169 int 6170 ldc_mem_dring_unmap(ldc_dring_handle_t dhandle) 6171 { 6172 ldc_dring_t *dringp; 6173 ldc_dring_t *tmp_dringp; 6174 ldc_chan_t *ldcp; 6175 6176 if (dhandle == NULL) { 6177 DWARN(DBG_ALL_LDCS, 6178 "ldc_mem_dring_unmap: invalid desc ring handle\n"); 6179 return (EINVAL); 6180 } 6181 dringp = (ldc_dring_t *)dhandle; 6182 6183 if (dringp->status != LDC_MAPPED) { 6184 DWARN(DBG_ALL_LDCS, 6185 "ldc_mem_dring_unmap: not a mapped desc ring\n"); 6186 return (EINVAL); 6187 } 6188 6189 mutex_enter(&dringp->lock); 6190 6191 ldcp = dringp->ldcp; 6192 6193 mutex_enter(&ldcp->imp_dlist_lock); 6194 6195 /* find and unlink the desc ring from channel import list */ 6196 tmp_dringp = ldcp->imp_dring_list; 6197 if (tmp_dringp == dringp) { 6198 ldcp->imp_dring_list = dringp->ch_next; 6199 dringp->ch_next = NULL; 6200 6201 } else { 6202 while (tmp_dringp != NULL) { 6203 if (tmp_dringp->ch_next == dringp) { 6204 tmp_dringp->ch_next = dringp->ch_next; 6205 dringp->ch_next = NULL; 6206 break; 6207 } 6208 tmp_dringp = tmp_dringp->ch_next; 6209 } 6210 if (tmp_dringp == NULL) { 6211 DWARN(DBG_ALL_LDCS, 6212 "ldc_mem_dring_unmap: invalid descriptor\n"); 6213 mutex_exit(&ldcp->imp_dlist_lock); 6214 mutex_exit(&dringp->lock); 6215 return (EINVAL); 6216 } 6217 } 6218 6219 mutex_exit(&ldcp->imp_dlist_lock); 6220 6221 /* do a LDC memory handle unmap and free */ 6222 (void) ldc_mem_unmap(dringp->mhdl); 6223 (void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl); 6224 6225 dringp->status = 0; 6226 dringp->ldcp = NULL; 6227 6228 mutex_exit(&dringp->lock); 6229 6230 /* destroy dring lock */ 6231 mutex_destroy(&dringp->lock); 6232 6233 /* free desc ring object */ 6234 kmem_free(dringp, sizeof (ldc_dring_t)); 6235 6236 return (0); 6237 } 6238 6239 /* 6240 * Internal entry point for descriptor ring access entry consistency 6241 * semantics. Acquire copies the contents of the remote descriptor ring 6242 * into the local shadow copy. The release operation copies the local 6243 * contents into the remote dring. The start and end locations specify 6244 * bounds for the entries being synchronized. 6245 */ 6246 static int 6247 i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle, 6248 uint8_t direction, uint64_t start, uint64_t end) 6249 { 6250 int err; 6251 ldc_dring_t *dringp; 6252 ldc_chan_t *ldcp; 6253 uint64_t soff; 6254 size_t copy_size; 6255 6256 if (dhandle == NULL) { 6257 DWARN(DBG_ALL_LDCS, 6258 "i_ldc_dring_acquire_release: invalid desc ring handle\n"); 6259 return (EINVAL); 6260 } 6261 dringp = (ldc_dring_t *)dhandle; 6262 mutex_enter(&dringp->lock); 6263 6264 if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) { 6265 DWARN(DBG_ALL_LDCS, 6266 "i_ldc_dring_acquire_release: not a mapped desc ring\n"); 6267 mutex_exit(&dringp->lock); 6268 return (EINVAL); 6269 } 6270 6271 if (start >= dringp->length || end >= dringp->length) { 6272 DWARN(DBG_ALL_LDCS, 6273 "i_ldc_dring_acquire_release: index out of range\n"); 6274 mutex_exit(&dringp->lock); 6275 return (EINVAL); 6276 } 6277 6278 /* get the channel handle */ 6279 ldcp = dringp->ldcp; 6280 6281 copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) : 6282 ((dringp->length - start) * dringp->dsize); 6283 6284 /* Calculate the relative offset for the first desc */ 6285 soff = (start * dringp->dsize); 6286 6287 /* copy to/from remote from/to local memory */ 6288 D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n", 6289 soff, copy_size); 6290 err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl, 6291 direction, soff, copy_size); 6292 if (err) { 6293 DWARN(ldcp->id, 6294 "i_ldc_dring_acquire_release: copy failed\n"); 6295 mutex_exit(&dringp->lock); 6296 return (err); 6297 } 6298 6299 /* do the balance */ 6300 if (start > end) { 6301 copy_size = ((end + 1) * dringp->dsize); 6302 soff = 0; 6303 6304 /* copy to/from remote from/to local memory */ 6305 D1(ldcp->id, "i_ldc_dring_acquire_release: c2 " 6306 "off=0x%llx sz=0x%llx\n", soff, copy_size); 6307 err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl, 6308 direction, soff, copy_size); 6309 if (err) { 6310 DWARN(ldcp->id, 6311 "i_ldc_dring_acquire_release: copy failed\n"); 6312 mutex_exit(&dringp->lock); 6313 return (err); 6314 } 6315 } 6316 6317 mutex_exit(&dringp->lock); 6318 6319 return (0); 6320 } 6321 6322 /* 6323 * Ensure that the contents in the local dring are consistent 6324 * with the contents if of remote dring 6325 */ 6326 int 6327 ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end) 6328 { 6329 return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end)); 6330 } 6331 6332 /* 6333 * Ensure that the contents in the remote dring are consistent 6334 * with the contents if of local dring 6335 */ 6336 int 6337 ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end) 6338 { 6339 return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end)); 6340 } 6341 6342 6343 /* ------------------------------------------------------------------------- */ 6344