1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * sun4v LDC Link Layer 31 */ 32 #include <sys/types.h> 33 #include <sys/file.h> 34 #include <sys/errno.h> 35 #include <sys/open.h> 36 #include <sys/cred.h> 37 #include <sys/kmem.h> 38 #include <sys/conf.h> 39 #include <sys/cmn_err.h> 40 #include <sys/ksynch.h> 41 #include <sys/modctl.h> 42 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */ 43 #include <sys/debug.h> 44 #include <sys/types.h> 45 #include <sys/cred.h> 46 #include <sys/promif.h> 47 #include <sys/ddi.h> 48 #include <sys/sunddi.h> 49 #include <sys/cyclic.h> 50 #include <sys/machsystm.h> 51 #include <sys/vm.h> 52 #include <sys/cpu.h> 53 #include <sys/intreg.h> 54 #include <sys/machcpuvar.h> 55 #include <sys/mmu.h> 56 #include <sys/pte.h> 57 #include <vm/hat.h> 58 #include <vm/as.h> 59 #include <vm/hat_sfmmu.h> 60 #include <sys/vm_machparam.h> 61 #include <vm/seg_kmem.h> 62 #include <vm/seg_kpm.h> 63 #include <sys/note.h> 64 #include <sys/ivintr.h> 65 #include <sys/hypervisor_api.h> 66 #include <sys/ldc.h> 67 #include <sys/ldc_impl.h> 68 #include <sys/cnex.h> 69 #include <sys/hsvc.h> 70 71 /* Core internal functions */ 72 static int i_ldc_h2v_error(int h_error); 73 static int i_ldc_txq_reconf(ldc_chan_t *ldcp); 74 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset); 75 static int i_ldc_rxq_drain(ldc_chan_t *ldcp); 76 static void i_ldc_reset_state(ldc_chan_t *ldcp); 77 static void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset); 78 79 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail); 80 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail); 81 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head); 82 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype, 83 uint8_t ctrlmsg); 84 85 /* Interrupt handling functions */ 86 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2); 87 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2); 88 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype); 89 90 /* Read method functions */ 91 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep); 92 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, 93 size_t *sizep); 94 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, 95 size_t *sizep); 96 97 /* Write method functions */ 98 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp, 99 size_t *sizep); 100 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp, 101 size_t *sizep); 102 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp, 103 size_t *sizep); 104 105 /* Pkt processing internal functions */ 106 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg); 107 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg); 108 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg); 109 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg); 110 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg); 111 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg); 112 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg); 113 114 /* Memory synchronization internal functions */ 115 static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, 116 uint8_t direction, uint64_t offset, size_t size); 117 static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle, 118 uint8_t direction, uint64_t start, uint64_t end); 119 120 /* LDC Version */ 121 static ldc_ver_t ldc_versions[] = { {1, 0} }; 122 123 /* number of supported versions */ 124 #define LDC_NUM_VERS (sizeof (ldc_versions) / sizeof (ldc_versions[0])) 125 126 /* Module State Pointer */ 127 static ldc_soft_state_t *ldcssp; 128 129 static struct modldrv md = { 130 &mod_miscops, /* This is a misc module */ 131 "sun4v LDC module v%I%", /* Name of the module */ 132 }; 133 134 static struct modlinkage ml = { 135 MODREV_1, 136 &md, 137 NULL 138 }; 139 140 static uint64_t ldc_sup_minor; /* Supported minor number */ 141 static hsvc_info_t ldc_hsvc = { 142 HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 0, "ldc" 143 }; 144 145 static uint64_t intr_sup_minor; /* Supported minor number */ 146 static hsvc_info_t intr_hsvc = { 147 HSVC_REV_1, NULL, HSVC_GROUP_INTR, 1, 0, "ldc" 148 }; 149 150 /* 151 * LDC framework supports mapping remote domain's memory 152 * either directly or via shadow memory pages. Default 153 * support is currently implemented via shadow copy. 154 * Direct map can be enabled by setting 'ldc_shmem_enabled' 155 */ 156 int ldc_shmem_enabled = 0; 157 158 /* 159 * The no. of MTU size messages that can be stored in 160 * the LDC Tx queue. The number of Tx queue entries is 161 * then computed as (mtu * mtu_msgs)/sizeof(queue_entry) 162 */ 163 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS; 164 165 /* 166 * The minimum queue length. This is the size of the smallest 167 * LDC queue. If the computed value is less than this default, 168 * the queue length is rounded up to 'ldc_queue_entries'. 169 */ 170 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES; 171 172 /* 173 * Pages exported for remote access over each channel is 174 * maintained in a table registered with the Hypervisor. 175 * The default number of entries in the table is set to 176 * 'ldc_mtbl_entries'. 177 */ 178 uint64_t ldc_maptable_entries = LDC_MTBL_ENTRIES; 179 180 /* 181 * LDC retry count and delay - when the HV returns EWOULDBLOCK 182 * the operation is retried 'ldc_max_retries' times with a 183 * wait of 'ldc_delay' usecs between each retry. 184 */ 185 int ldc_max_retries = LDC_MAX_RETRIES; 186 clock_t ldc_delay = LDC_DELAY; 187 188 /* 189 * delay between each retry of channel unregistration in 190 * ldc_close(), to wait for pending interrupts to complete. 191 */ 192 clock_t ldc_close_delay = LDC_CLOSE_DELAY; 193 194 #ifdef DEBUG 195 196 /* 197 * Print debug messages 198 * 199 * set ldcdbg to 0x7 for enabling all msgs 200 * 0x4 - Warnings 201 * 0x2 - All debug messages 202 * 0x1 - Minimal debug messages 203 * 204 * set ldcdbgchan to the channel number you want to debug 205 * setting it to -1 prints debug messages for all channels 206 * NOTE: ldcdbgchan has no effect on error messages 207 */ 208 209 #define DBG_ALL_LDCS -1 210 211 int ldcdbg = 0x0; 212 int64_t ldcdbgchan = DBG_ALL_LDCS; 213 uint64_t ldc_inject_err_flag = 0; 214 215 static void 216 ldcdebug(int64_t id, const char *fmt, ...) 217 { 218 char buf[512]; 219 va_list ap; 220 221 /* 222 * Do not return if, 223 * caller wants to print it anyway - (id == DBG_ALL_LDCS) 224 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS) 225 * debug channel = caller specified channel 226 */ 227 if ((id != DBG_ALL_LDCS) && 228 (ldcdbgchan != DBG_ALL_LDCS) && 229 (ldcdbgchan != id)) { 230 return; 231 } 232 233 va_start(ap, fmt); 234 (void) vsprintf(buf, fmt, ap); 235 va_end(ap); 236 237 cmn_err(CE_CONT, "?%s", buf); 238 } 239 240 #define LDC_ERR_RESET 0x1 241 #define LDC_ERR_PKTLOSS 0x2 242 243 static boolean_t 244 ldc_inject_error(ldc_chan_t *ldcp, uint64_t error) 245 { 246 if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id)) 247 return (B_FALSE); 248 249 if ((ldc_inject_err_flag & error) == 0) 250 return (B_FALSE); 251 252 /* clear the injection state */ 253 ldc_inject_err_flag &= ~error; 254 255 return (B_TRUE); 256 } 257 258 #define D1 \ 259 if (ldcdbg & 0x01) \ 260 ldcdebug 261 262 #define D2 \ 263 if (ldcdbg & 0x02) \ 264 ldcdebug 265 266 #define DWARN \ 267 if (ldcdbg & 0x04) \ 268 ldcdebug 269 270 #define DUMP_PAYLOAD(id, addr) \ 271 { \ 272 char buf[65*3]; \ 273 int i; \ 274 uint8_t *src = (uint8_t *)addr; \ 275 for (i = 0; i < 64; i++, src++) \ 276 (void) sprintf(&buf[i * 3], "|%02x", *src); \ 277 (void) sprintf(&buf[i * 3], "|\n"); \ 278 D2((id), "payload: %s", buf); \ 279 } 280 281 #define DUMP_LDC_PKT(c, s, addr) \ 282 { \ 283 ldc_msg_t *msg = (ldc_msg_t *)(addr); \ 284 uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0; \ 285 if (msg->type == LDC_DATA) { \ 286 D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])", \ 287 (s), mid, msg->type, msg->stype, msg->ctrl, \ 288 (msg->env & LDC_FRAG_START) ? 'B' : ' ', \ 289 (msg->env & LDC_FRAG_STOP) ? 'E' : ' ', \ 290 (msg->env & LDC_LEN_MASK)); \ 291 } else { \ 292 D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s), \ 293 mid, msg->type, msg->stype, msg->ctrl, msg->env); \ 294 } \ 295 } 296 297 #define LDC_INJECT_RESET(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_RESET) 298 #define LDC_INJECT_PKTLOSS(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_PKTLOSS) 299 300 #else 301 302 #define DBG_ALL_LDCS -1 303 304 #define D1 305 #define D2 306 #define DWARN 307 308 #define DUMP_PAYLOAD(id, addr) 309 #define DUMP_LDC_PKT(c, s, addr) 310 311 #define LDC_INJECT_RESET(_ldcp) (B_FALSE) 312 #define LDC_INJECT_PKTLOSS(_ldcp) (B_FALSE) 313 314 #endif 315 316 #define ZERO_PKT(p) \ 317 bzero((p), sizeof (ldc_msg_t)); 318 319 #define IDX2COOKIE(idx, pg_szc, pg_shift) \ 320 (((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift))) 321 322 323 int 324 _init(void) 325 { 326 int status; 327 328 status = hsvc_register(&ldc_hsvc, &ldc_sup_minor); 329 if (status != 0) { 330 cmn_err(CE_WARN, "%s: cannot negotiate hypervisor LDC services" 331 " group: 0x%lx major: %ld minor: %ld errno: %d", 332 ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group, 333 ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status); 334 return (-1); 335 } 336 337 status = hsvc_register(&intr_hsvc, &intr_sup_minor); 338 if (status != 0) { 339 cmn_err(CE_WARN, "%s: cannot negotiate hypervisor interrupt " 340 "services group: 0x%lx major: %ld minor: %ld errno: %d", 341 intr_hsvc.hsvc_modname, intr_hsvc.hsvc_group, 342 intr_hsvc.hsvc_major, intr_hsvc.hsvc_minor, status); 343 (void) hsvc_unregister(&ldc_hsvc); 344 return (-1); 345 } 346 347 /* allocate soft state structure */ 348 ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP); 349 350 /* Link the module into the system */ 351 status = mod_install(&ml); 352 if (status != 0) { 353 kmem_free(ldcssp, sizeof (ldc_soft_state_t)); 354 return (status); 355 } 356 357 /* Initialize the LDC state structure */ 358 mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL); 359 360 mutex_enter(&ldcssp->lock); 361 362 /* Create a cache for memory handles */ 363 ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache", 364 sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 365 if (ldcssp->memhdl_cache == NULL) { 366 DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n"); 367 mutex_exit(&ldcssp->lock); 368 return (-1); 369 } 370 371 /* Create cache for memory segment structures */ 372 ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache", 373 sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 374 if (ldcssp->memseg_cache == NULL) { 375 DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n"); 376 mutex_exit(&ldcssp->lock); 377 return (-1); 378 } 379 380 381 ldcssp->channel_count = 0; 382 ldcssp->channels_open = 0; 383 ldcssp->chan_list = NULL; 384 ldcssp->dring_list = NULL; 385 386 mutex_exit(&ldcssp->lock); 387 388 return (0); 389 } 390 391 int 392 _info(struct modinfo *modinfop) 393 { 394 /* Report status of the dynamically loadable driver module */ 395 return (mod_info(&ml, modinfop)); 396 } 397 398 int 399 _fini(void) 400 { 401 int rv, status; 402 ldc_chan_t *ldcp; 403 ldc_dring_t *dringp; 404 ldc_mem_info_t minfo; 405 406 /* Unlink the driver module from the system */ 407 status = mod_remove(&ml); 408 if (status) { 409 DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n"); 410 return (EIO); 411 } 412 413 /* close and finalize channels */ 414 ldcp = ldcssp->chan_list; 415 while (ldcp != NULL) { 416 (void) ldc_close((ldc_handle_t)ldcp); 417 (void) ldc_fini((ldc_handle_t)ldcp); 418 419 ldcp = ldcp->next; 420 } 421 422 /* Free descriptor rings */ 423 dringp = ldcssp->dring_list; 424 while (dringp != NULL) { 425 dringp = dringp->next; 426 427 rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo); 428 if (rv == 0 && minfo.status != LDC_UNBOUND) { 429 if (minfo.status == LDC_BOUND) { 430 (void) ldc_mem_dring_unbind( 431 (ldc_dring_handle_t)dringp); 432 } 433 if (minfo.status == LDC_MAPPED) { 434 (void) ldc_mem_dring_unmap( 435 (ldc_dring_handle_t)dringp); 436 } 437 } 438 439 (void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp); 440 } 441 ldcssp->dring_list = NULL; 442 443 /* Destroy kmem caches */ 444 kmem_cache_destroy(ldcssp->memhdl_cache); 445 kmem_cache_destroy(ldcssp->memseg_cache); 446 447 /* 448 * We have successfully "removed" the driver. 449 * Destroying soft states 450 */ 451 mutex_destroy(&ldcssp->lock); 452 kmem_free(ldcssp, sizeof (ldc_soft_state_t)); 453 454 (void) hsvc_unregister(&ldc_hsvc); 455 (void) hsvc_unregister(&intr_hsvc); 456 457 return (status); 458 } 459 460 /* -------------------------------------------------------------------------- */ 461 462 /* 463 * LDC Link Layer Internal Functions 464 */ 465 466 /* 467 * Translate HV Errors to sun4v error codes 468 */ 469 static int 470 i_ldc_h2v_error(int h_error) 471 { 472 switch (h_error) { 473 474 case H_EOK: 475 return (0); 476 477 case H_ENORADDR: 478 return (EFAULT); 479 480 case H_EBADPGSZ: 481 case H_EINVAL: 482 return (EINVAL); 483 484 case H_EWOULDBLOCK: 485 return (EWOULDBLOCK); 486 487 case H_ENOACCESS: 488 case H_ENOMAP: 489 return (EACCES); 490 491 case H_EIO: 492 case H_ECPUERROR: 493 return (EIO); 494 495 case H_ENOTSUPPORTED: 496 return (ENOTSUP); 497 498 case H_ETOOMANY: 499 return (ENOSPC); 500 501 case H_ECHANNEL: 502 return (ECHRNG); 503 default: 504 break; 505 } 506 507 return (EIO); 508 } 509 510 /* 511 * Reconfigure the transmit queue 512 */ 513 static int 514 i_ldc_txq_reconf(ldc_chan_t *ldcp) 515 { 516 int rv; 517 518 ASSERT(MUTEX_HELD(&ldcp->lock)); 519 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 520 521 rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries); 522 if (rv) { 523 cmn_err(CE_WARN, 524 "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id); 525 return (EIO); 526 } 527 rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head), 528 &(ldcp->tx_tail), &(ldcp->link_state)); 529 if (rv) { 530 cmn_err(CE_WARN, 531 "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id); 532 return (EIO); 533 } 534 D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx," 535 "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail, 536 ldcp->link_state); 537 538 return (0); 539 } 540 541 /* 542 * Reconfigure the receive queue 543 */ 544 static int 545 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset) 546 { 547 int rv; 548 uint64_t rx_head, rx_tail; 549 550 ASSERT(MUTEX_HELD(&ldcp->lock)); 551 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 552 &(ldcp->link_state)); 553 if (rv) { 554 cmn_err(CE_WARN, 555 "i_ldc_rxq_reconf: (0x%lx) cannot get state", 556 ldcp->id); 557 return (EIO); 558 } 559 560 if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) { 561 rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, 562 ldcp->rx_q_entries); 563 if (rv) { 564 cmn_err(CE_WARN, 565 "i_ldc_rxq_reconf: (0x%lx) cannot set qconf", 566 ldcp->id); 567 return (EIO); 568 } 569 D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf", 570 ldcp->id); 571 } 572 573 return (0); 574 } 575 576 577 /* 578 * Drain the contents of the receive queue 579 */ 580 static int 581 i_ldc_rxq_drain(ldc_chan_t *ldcp) 582 { 583 int rv; 584 uint64_t rx_head, rx_tail; 585 586 ASSERT(MUTEX_HELD(&ldcp->lock)); 587 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 588 &(ldcp->link_state)); 589 if (rv) { 590 cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state", 591 ldcp->id); 592 return (EIO); 593 } 594 595 /* flush contents by setting the head = tail */ 596 return (i_ldc_set_rx_head(ldcp, rx_tail)); 597 } 598 599 600 /* 601 * Reset LDC state structure and its contents 602 */ 603 static void 604 i_ldc_reset_state(ldc_chan_t *ldcp) 605 { 606 ASSERT(MUTEX_HELD(&ldcp->lock)); 607 ldcp->last_msg_snt = LDC_INIT_SEQID; 608 ldcp->last_ack_rcd = 0; 609 ldcp->last_msg_rcd = 0; 610 ldcp->tx_ackd_head = ldcp->tx_head; 611 ldcp->next_vidx = 0; 612 ldcp->hstate = 0; 613 ldcp->tstate = TS_OPEN; 614 ldcp->status = LDC_OPEN; 615 616 if (ldcp->link_state == LDC_CHANNEL_UP || 617 ldcp->link_state == LDC_CHANNEL_RESET) { 618 619 if (ldcp->mode == LDC_MODE_RAW) { 620 ldcp->status = LDC_UP; 621 ldcp->tstate = TS_UP; 622 } else { 623 ldcp->status = LDC_READY; 624 ldcp->tstate |= TS_LINK_READY; 625 } 626 } 627 } 628 629 /* 630 * Reset a LDC channel 631 */ 632 static void 633 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset) 634 { 635 DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id); 636 637 ASSERT(MUTEX_HELD(&ldcp->lock)); 638 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 639 640 /* reconfig Tx and Rx queues */ 641 (void) i_ldc_txq_reconf(ldcp); 642 (void) i_ldc_rxq_reconf(ldcp, force_reset); 643 644 /* Clear Tx and Rx interrupts */ 645 (void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR); 646 (void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 647 648 /* Reset channel state */ 649 i_ldc_reset_state(ldcp); 650 651 /* Mark channel in reset */ 652 ldcp->tstate |= TS_IN_RESET; 653 } 654 655 656 /* 657 * Clear pending interrupts 658 */ 659 static void 660 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype) 661 { 662 ldc_cnex_t *cinfo = &ldcssp->cinfo; 663 664 ASSERT(MUTEX_HELD(&ldcp->lock)); 665 ASSERT(cinfo->dip != NULL); 666 667 switch (itype) { 668 case CNEX_TX_INTR: 669 /* check Tx interrupt */ 670 if (ldcp->tx_intr_state) 671 ldcp->tx_intr_state = LDC_INTR_NONE; 672 else 673 return; 674 break; 675 676 case CNEX_RX_INTR: 677 /* check Rx interrupt */ 678 if (ldcp->rx_intr_state) 679 ldcp->rx_intr_state = LDC_INTR_NONE; 680 else 681 return; 682 break; 683 } 684 685 (void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype); 686 D2(ldcp->id, 687 "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n", 688 ldcp->id, itype); 689 } 690 691 /* 692 * Set the receive queue head 693 * Resets connection and returns an error if it fails. 694 */ 695 static int 696 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head) 697 { 698 int rv; 699 int retries; 700 701 ASSERT(MUTEX_HELD(&ldcp->lock)); 702 for (retries = 0; retries < ldc_max_retries; retries++) { 703 704 if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0) 705 return (0); 706 707 if (rv != H_EWOULDBLOCK) 708 break; 709 710 /* wait for ldc_delay usecs */ 711 drv_usecwait(ldc_delay); 712 } 713 714 cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx", 715 ldcp->id, head); 716 mutex_enter(&ldcp->tx_lock); 717 i_ldc_reset(ldcp, B_TRUE); 718 mutex_exit(&ldcp->tx_lock); 719 720 return (ECONNRESET); 721 } 722 723 724 /* 725 * Returns the tx_tail to be used for transfer 726 * Re-reads the TX queue ptrs if and only if the 727 * the cached head and tail are equal (queue is full) 728 */ 729 static int 730 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail) 731 { 732 int rv; 733 uint64_t current_head, new_tail; 734 735 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 736 /* Read the head and tail ptrs from HV */ 737 rv = hv_ldc_tx_get_state(ldcp->id, 738 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 739 if (rv) { 740 cmn_err(CE_WARN, 741 "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n", 742 ldcp->id); 743 return (EIO); 744 } 745 if (ldcp->link_state == LDC_CHANNEL_DOWN) { 746 D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n", 747 ldcp->id); 748 return (ECONNRESET); 749 } 750 751 /* In reliable mode, check against last ACKd msg */ 752 current_head = (ldcp->mode == LDC_MODE_RELIABLE || 753 ldcp->mode == LDC_MODE_STREAM) 754 ? ldcp->tx_ackd_head : ldcp->tx_head; 755 756 /* increment the tail */ 757 new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) % 758 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 759 760 if (new_tail == current_head) { 761 DWARN(ldcp->id, 762 "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n", 763 ldcp->id); 764 return (EWOULDBLOCK); 765 } 766 767 D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n", 768 ldcp->id, ldcp->tx_head, ldcp->tx_tail); 769 770 *tail = ldcp->tx_tail; 771 return (0); 772 } 773 774 /* 775 * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off 776 * and retry ldc_max_retries times before returning an error. 777 * Returns 0, EWOULDBLOCK or EIO 778 */ 779 static int 780 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail) 781 { 782 int rv, retval = EWOULDBLOCK; 783 int retries; 784 785 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 786 for (retries = 0; retries < ldc_max_retries; retries++) { 787 788 if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) { 789 retval = 0; 790 break; 791 } 792 if (rv != H_EWOULDBLOCK) { 793 DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set " 794 "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv); 795 retval = EIO; 796 break; 797 } 798 799 /* wait for ldc_delay usecs */ 800 drv_usecwait(ldc_delay); 801 } 802 return (retval); 803 } 804 805 /* 806 * Send a LDC message 807 */ 808 static int 809 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype, 810 uint8_t ctrlmsg) 811 { 812 int rv; 813 ldc_msg_t *pkt; 814 uint64_t tx_tail; 815 uint32_t curr_seqid = ldcp->last_msg_snt; 816 817 /* Obtain Tx lock */ 818 mutex_enter(&ldcp->tx_lock); 819 820 /* get the current tail for the message */ 821 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 822 if (rv) { 823 DWARN(ldcp->id, 824 "i_ldc_send_pkt: (0x%llx) error sending pkt, " 825 "type=0x%x,subtype=0x%x,ctrl=0x%x\n", 826 ldcp->id, pkttype, subtype, ctrlmsg); 827 mutex_exit(&ldcp->tx_lock); 828 return (rv); 829 } 830 831 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 832 ZERO_PKT(pkt); 833 834 /* Initialize the packet */ 835 pkt->type = pkttype; 836 pkt->stype = subtype; 837 pkt->ctrl = ctrlmsg; 838 839 /* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */ 840 if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) && 841 ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) { 842 curr_seqid++; 843 if (ldcp->mode != LDC_MODE_RAW) { 844 pkt->seqid = curr_seqid; 845 pkt->ackid = ldcp->last_msg_rcd; 846 } 847 } 848 DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt); 849 850 /* initiate the send by calling into HV and set the new tail */ 851 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 852 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 853 854 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 855 if (rv) { 856 DWARN(ldcp->id, 857 "i_ldc_send_pkt:(0x%llx) error sending pkt, " 858 "type=0x%x,stype=0x%x,ctrl=0x%x\n", 859 ldcp->id, pkttype, subtype, ctrlmsg); 860 mutex_exit(&ldcp->tx_lock); 861 return (EIO); 862 } 863 864 ldcp->last_msg_snt = curr_seqid; 865 ldcp->tx_tail = tx_tail; 866 867 mutex_exit(&ldcp->tx_lock); 868 return (0); 869 } 870 871 /* 872 * Checks if packet was received in right order 873 * in the case of a reliable link. 874 * Returns 0 if in order, else EIO 875 */ 876 static int 877 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg) 878 { 879 /* No seqid checking for RAW mode */ 880 if (ldcp->mode == LDC_MODE_RAW) 881 return (0); 882 883 /* No seqid checking for version, RTS, RTR message */ 884 if (msg->ctrl == LDC_VER || 885 msg->ctrl == LDC_RTS || 886 msg->ctrl == LDC_RTR) 887 return (0); 888 889 /* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */ 890 if (msg->seqid != (ldcp->last_msg_rcd + 1)) { 891 DWARN(ldcp->id, 892 "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, " 893 "expecting 0x%x\n", ldcp->id, msg->seqid, 894 (ldcp->last_msg_rcd + 1)); 895 return (EIO); 896 } 897 898 #ifdef DEBUG 899 if (LDC_INJECT_PKTLOSS(ldcp)) { 900 DWARN(ldcp->id, 901 "i_ldc_check_seqid: (0x%llx) inject pkt loss\n", ldcp->id); 902 return (EIO); 903 } 904 #endif 905 906 return (0); 907 } 908 909 910 /* 911 * Process an incoming version ctrl message 912 */ 913 static int 914 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg) 915 { 916 int rv = 0, idx = ldcp->next_vidx; 917 ldc_msg_t *pkt; 918 uint64_t tx_tail; 919 ldc_ver_t *rcvd_ver; 920 921 /* get the received version */ 922 rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF); 923 924 D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n", 925 ldcp->id, rcvd_ver->major, rcvd_ver->minor); 926 927 /* Obtain Tx lock */ 928 mutex_enter(&ldcp->tx_lock); 929 930 switch (msg->stype) { 931 case LDC_INFO: 932 933 if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) { 934 (void) i_ldc_txq_reconf(ldcp); 935 i_ldc_reset_state(ldcp); 936 mutex_exit(&ldcp->tx_lock); 937 return (EAGAIN); 938 } 939 940 /* get the current tail and pkt for the response */ 941 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 942 if (rv != 0) { 943 DWARN(ldcp->id, 944 "i_ldc_process_VER: (0x%llx) err sending " 945 "version ACK/NACK\n", ldcp->id); 946 i_ldc_reset(ldcp, B_TRUE); 947 mutex_exit(&ldcp->tx_lock); 948 return (ECONNRESET); 949 } 950 951 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 952 ZERO_PKT(pkt); 953 954 /* initialize the packet */ 955 pkt->type = LDC_CTRL; 956 pkt->ctrl = LDC_VER; 957 958 for (;;) { 959 960 D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n", 961 rcvd_ver->major, rcvd_ver->minor, 962 ldc_versions[idx].major, ldc_versions[idx].minor); 963 964 if (rcvd_ver->major == ldc_versions[idx].major) { 965 /* major version match - ACK version */ 966 pkt->stype = LDC_ACK; 967 968 /* 969 * lower minor version to the one this endpt 970 * supports, if necessary 971 */ 972 if (rcvd_ver->minor > ldc_versions[idx].minor) 973 rcvd_ver->minor = 974 ldc_versions[idx].minor; 975 bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver)); 976 977 break; 978 } 979 980 if (rcvd_ver->major > ldc_versions[idx].major) { 981 982 D1(ldcp->id, "i_ldc_process_VER: using next" 983 " lower idx=%d, v%u.%u\n", idx, 984 ldc_versions[idx].major, 985 ldc_versions[idx].minor); 986 987 /* nack with next lower version */ 988 pkt->stype = LDC_NACK; 989 bcopy(&ldc_versions[idx], pkt->udata, 990 sizeof (ldc_versions[idx])); 991 ldcp->next_vidx = idx; 992 break; 993 } 994 995 /* next major version */ 996 idx++; 997 998 D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx); 999 1000 if (idx == LDC_NUM_VERS) { 1001 /* no version match - send NACK */ 1002 pkt->stype = LDC_NACK; 1003 bzero(pkt->udata, sizeof (ldc_ver_t)); 1004 ldcp->next_vidx = 0; 1005 break; 1006 } 1007 } 1008 1009 /* initiate the send by calling into HV and set the new tail */ 1010 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 1011 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1012 1013 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 1014 if (rv == 0) { 1015 ldcp->tx_tail = tx_tail; 1016 if (pkt->stype == LDC_ACK) { 1017 D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent" 1018 " version ACK\n", ldcp->id); 1019 /* Save the ACK'd version */ 1020 ldcp->version.major = rcvd_ver->major; 1021 ldcp->version.minor = rcvd_ver->minor; 1022 ldcp->hstate |= TS_RCVD_VER; 1023 ldcp->tstate |= TS_VER_DONE; 1024 D1(DBG_ALL_LDCS, 1025 "(0x%llx) Sent ACK, " 1026 "Agreed on version v%u.%u\n", 1027 ldcp->id, rcvd_ver->major, rcvd_ver->minor); 1028 } 1029 } else { 1030 DWARN(ldcp->id, 1031 "i_ldc_process_VER: (0x%llx) error sending " 1032 "ACK/NACK\n", ldcp->id); 1033 i_ldc_reset(ldcp, B_TRUE); 1034 mutex_exit(&ldcp->tx_lock); 1035 return (ECONNRESET); 1036 } 1037 1038 break; 1039 1040 case LDC_ACK: 1041 if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) { 1042 if (ldcp->version.major != rcvd_ver->major || 1043 ldcp->version.minor != rcvd_ver->minor) { 1044 1045 /* mismatched version - reset connection */ 1046 DWARN(ldcp->id, 1047 "i_ldc_process_VER: (0x%llx) recvd" 1048 " ACK ver != sent ACK ver\n", ldcp->id); 1049 i_ldc_reset(ldcp, B_TRUE); 1050 mutex_exit(&ldcp->tx_lock); 1051 return (ECONNRESET); 1052 } 1053 } else { 1054 /* SUCCESS - we have agreed on a version */ 1055 ldcp->version.major = rcvd_ver->major; 1056 ldcp->version.minor = rcvd_ver->minor; 1057 ldcp->tstate |= TS_VER_DONE; 1058 } 1059 1060 D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n", 1061 ldcp->id, rcvd_ver->major, rcvd_ver->minor); 1062 1063 /* initiate RTS-RTR-RDX handshake */ 1064 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 1065 if (rv) { 1066 DWARN(ldcp->id, 1067 "i_ldc_process_VER: (0x%llx) cannot send RTS\n", 1068 ldcp->id); 1069 i_ldc_reset(ldcp, B_TRUE); 1070 mutex_exit(&ldcp->tx_lock); 1071 return (ECONNRESET); 1072 } 1073 1074 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 1075 ZERO_PKT(pkt); 1076 1077 pkt->type = LDC_CTRL; 1078 pkt->stype = LDC_INFO; 1079 pkt->ctrl = LDC_RTS; 1080 pkt->env = ldcp->mode; 1081 if (ldcp->mode != LDC_MODE_RAW) 1082 pkt->seqid = LDC_INIT_SEQID; 1083 1084 ldcp->last_msg_rcd = LDC_INIT_SEQID; 1085 1086 DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt); 1087 1088 /* initiate the send by calling into HV and set the new tail */ 1089 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 1090 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1091 1092 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 1093 if (rv) { 1094 D2(ldcp->id, 1095 "i_ldc_process_VER: (0x%llx) no listener\n", 1096 ldcp->id); 1097 i_ldc_reset(ldcp, B_TRUE); 1098 mutex_exit(&ldcp->tx_lock); 1099 return (ECONNRESET); 1100 } 1101 1102 ldcp->tx_tail = tx_tail; 1103 ldcp->hstate |= TS_SENT_RTS; 1104 1105 break; 1106 1107 case LDC_NACK: 1108 /* check if version in NACK is zero */ 1109 if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) { 1110 /* version handshake failure */ 1111 DWARN(DBG_ALL_LDCS, 1112 "i_ldc_process_VER: (0x%llx) no version match\n", 1113 ldcp->id); 1114 i_ldc_reset(ldcp, B_TRUE); 1115 mutex_exit(&ldcp->tx_lock); 1116 return (ECONNRESET); 1117 } 1118 1119 /* get the current tail and pkt for the response */ 1120 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 1121 if (rv != 0) { 1122 cmn_err(CE_NOTE, 1123 "i_ldc_process_VER: (0x%lx) err sending " 1124 "version ACK/NACK\n", ldcp->id); 1125 i_ldc_reset(ldcp, B_TRUE); 1126 mutex_exit(&ldcp->tx_lock); 1127 return (ECONNRESET); 1128 } 1129 1130 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 1131 ZERO_PKT(pkt); 1132 1133 /* initialize the packet */ 1134 pkt->type = LDC_CTRL; 1135 pkt->ctrl = LDC_VER; 1136 pkt->stype = LDC_INFO; 1137 1138 /* check ver in NACK msg has a match */ 1139 for (;;) { 1140 if (rcvd_ver->major == ldc_versions[idx].major) { 1141 /* 1142 * major version match - resubmit request 1143 * if lower minor version to the one this endpt 1144 * supports, if necessary 1145 */ 1146 if (rcvd_ver->minor > ldc_versions[idx].minor) 1147 rcvd_ver->minor = 1148 ldc_versions[idx].minor; 1149 bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver)); 1150 break; 1151 1152 } 1153 1154 if (rcvd_ver->major > ldc_versions[idx].major) { 1155 1156 D1(ldcp->id, "i_ldc_process_VER: using next" 1157 " lower idx=%d, v%u.%u\n", idx, 1158 ldc_versions[idx].major, 1159 ldc_versions[idx].minor); 1160 1161 /* send next lower version */ 1162 bcopy(&ldc_versions[idx], pkt->udata, 1163 sizeof (ldc_versions[idx])); 1164 ldcp->next_vidx = idx; 1165 break; 1166 } 1167 1168 /* next version */ 1169 idx++; 1170 1171 D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx); 1172 1173 if (idx == LDC_NUM_VERS) { 1174 /* no version match - terminate */ 1175 ldcp->next_vidx = 0; 1176 mutex_exit(&ldcp->tx_lock); 1177 return (ECONNRESET); 1178 } 1179 } 1180 1181 /* initiate the send by calling into HV and set the new tail */ 1182 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 1183 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1184 1185 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 1186 if (rv == 0) { 1187 D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version" 1188 "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major, 1189 ldc_versions[idx].minor); 1190 ldcp->tx_tail = tx_tail; 1191 } else { 1192 cmn_err(CE_NOTE, 1193 "i_ldc_process_VER: (0x%lx) error sending version" 1194 "INFO\n", ldcp->id); 1195 i_ldc_reset(ldcp, B_TRUE); 1196 mutex_exit(&ldcp->tx_lock); 1197 return (ECONNRESET); 1198 } 1199 1200 break; 1201 } 1202 1203 mutex_exit(&ldcp->tx_lock); 1204 return (rv); 1205 } 1206 1207 1208 /* 1209 * Process an incoming RTS ctrl message 1210 */ 1211 static int 1212 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg) 1213 { 1214 int rv = 0; 1215 ldc_msg_t *pkt; 1216 uint64_t tx_tail; 1217 boolean_t sent_NACK = B_FALSE; 1218 1219 D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id); 1220 1221 switch (msg->stype) { 1222 case LDC_NACK: 1223 DWARN(ldcp->id, 1224 "i_ldc_process_RTS: (0x%llx) RTS NACK received\n", 1225 ldcp->id); 1226 1227 /* Reset the channel -- as we cannot continue */ 1228 mutex_enter(&ldcp->tx_lock); 1229 i_ldc_reset(ldcp, B_TRUE); 1230 mutex_exit(&ldcp->tx_lock); 1231 rv = ECONNRESET; 1232 break; 1233 1234 case LDC_INFO: 1235 1236 /* check mode */ 1237 if (ldcp->mode != (ldc_mode_t)msg->env) { 1238 cmn_err(CE_NOTE, 1239 "i_ldc_process_RTS: (0x%lx) mode mismatch\n", 1240 ldcp->id); 1241 /* 1242 * send NACK in response to MODE message 1243 * get the current tail for the response 1244 */ 1245 rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS); 1246 if (rv) { 1247 /* if cannot send NACK - reset channel */ 1248 mutex_enter(&ldcp->tx_lock); 1249 i_ldc_reset(ldcp, B_TRUE); 1250 mutex_exit(&ldcp->tx_lock); 1251 rv = ECONNRESET; 1252 break; 1253 } 1254 sent_NACK = B_TRUE; 1255 } 1256 break; 1257 default: 1258 DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n", 1259 ldcp->id); 1260 mutex_enter(&ldcp->tx_lock); 1261 i_ldc_reset(ldcp, B_TRUE); 1262 mutex_exit(&ldcp->tx_lock); 1263 rv = ECONNRESET; 1264 break; 1265 } 1266 1267 /* 1268 * If either the connection was reset (when rv != 0) or 1269 * a NACK was sent, we return. In the case of a NACK 1270 * we dont want to consume the packet that came in but 1271 * not record that we received the RTS 1272 */ 1273 if (rv || sent_NACK) 1274 return (rv); 1275 1276 /* record RTS received */ 1277 ldcp->hstate |= TS_RCVD_RTS; 1278 1279 /* store initial SEQID info */ 1280 ldcp->last_msg_snt = msg->seqid; 1281 1282 /* Obtain Tx lock */ 1283 mutex_enter(&ldcp->tx_lock); 1284 1285 /* get the current tail for the response */ 1286 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 1287 if (rv != 0) { 1288 cmn_err(CE_NOTE, 1289 "i_ldc_process_RTS: (0x%lx) err sending RTR\n", 1290 ldcp->id); 1291 i_ldc_reset(ldcp, B_TRUE); 1292 mutex_exit(&ldcp->tx_lock); 1293 return (ECONNRESET); 1294 } 1295 1296 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 1297 ZERO_PKT(pkt); 1298 1299 /* initialize the packet */ 1300 pkt->type = LDC_CTRL; 1301 pkt->stype = LDC_INFO; 1302 pkt->ctrl = LDC_RTR; 1303 pkt->env = ldcp->mode; 1304 if (ldcp->mode != LDC_MODE_RAW) 1305 pkt->seqid = LDC_INIT_SEQID; 1306 1307 ldcp->last_msg_rcd = msg->seqid; 1308 1309 /* initiate the send by calling into HV and set the new tail */ 1310 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 1311 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1312 1313 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 1314 if (rv == 0) { 1315 D2(ldcp->id, 1316 "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id); 1317 DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt); 1318 1319 ldcp->tx_tail = tx_tail; 1320 ldcp->hstate |= TS_SENT_RTR; 1321 1322 } else { 1323 cmn_err(CE_NOTE, 1324 "i_ldc_process_RTS: (0x%lx) error sending RTR\n", 1325 ldcp->id); 1326 i_ldc_reset(ldcp, B_TRUE); 1327 mutex_exit(&ldcp->tx_lock); 1328 return (ECONNRESET); 1329 } 1330 1331 mutex_exit(&ldcp->tx_lock); 1332 return (0); 1333 } 1334 1335 /* 1336 * Process an incoming RTR ctrl message 1337 */ 1338 static int 1339 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg) 1340 { 1341 int rv = 0; 1342 boolean_t sent_NACK = B_FALSE; 1343 1344 D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id); 1345 1346 switch (msg->stype) { 1347 case LDC_NACK: 1348 /* RTR NACK received */ 1349 DWARN(ldcp->id, 1350 "i_ldc_process_RTR: (0x%llx) RTR NACK received\n", 1351 ldcp->id); 1352 1353 /* Reset the channel -- as we cannot continue */ 1354 mutex_enter(&ldcp->tx_lock); 1355 i_ldc_reset(ldcp, B_TRUE); 1356 mutex_exit(&ldcp->tx_lock); 1357 rv = ECONNRESET; 1358 1359 break; 1360 1361 case LDC_INFO: 1362 1363 /* check mode */ 1364 if (ldcp->mode != (ldc_mode_t)msg->env) { 1365 DWARN(ldcp->id, 1366 "i_ldc_process_RTR: (0x%llx) mode mismatch, " 1367 "expecting 0x%x, got 0x%x\n", 1368 ldcp->id, ldcp->mode, (ldc_mode_t)msg->env); 1369 /* 1370 * send NACK in response to MODE message 1371 * get the current tail for the response 1372 */ 1373 rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR); 1374 if (rv) { 1375 /* if cannot send NACK - reset channel */ 1376 mutex_enter(&ldcp->tx_lock); 1377 i_ldc_reset(ldcp, B_TRUE); 1378 mutex_exit(&ldcp->tx_lock); 1379 rv = ECONNRESET; 1380 break; 1381 } 1382 sent_NACK = B_TRUE; 1383 } 1384 break; 1385 1386 default: 1387 DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n", 1388 ldcp->id); 1389 1390 /* Reset the channel -- as we cannot continue */ 1391 mutex_enter(&ldcp->tx_lock); 1392 i_ldc_reset(ldcp, B_TRUE); 1393 mutex_exit(&ldcp->tx_lock); 1394 rv = ECONNRESET; 1395 break; 1396 } 1397 1398 /* 1399 * If either the connection was reset (when rv != 0) or 1400 * a NACK was sent, we return. In the case of a NACK 1401 * we dont want to consume the packet that came in but 1402 * not record that we received the RTR 1403 */ 1404 if (rv || sent_NACK) 1405 return (rv); 1406 1407 ldcp->last_msg_snt = msg->seqid; 1408 ldcp->hstate |= TS_RCVD_RTR; 1409 1410 rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX); 1411 if (rv) { 1412 cmn_err(CE_NOTE, 1413 "i_ldc_process_RTR: (0x%lx) cannot send RDX\n", 1414 ldcp->id); 1415 mutex_enter(&ldcp->tx_lock); 1416 i_ldc_reset(ldcp, B_TRUE); 1417 mutex_exit(&ldcp->tx_lock); 1418 return (ECONNRESET); 1419 } 1420 D2(ldcp->id, 1421 "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id); 1422 1423 ldcp->hstate |= TS_SENT_RDX; 1424 ldcp->tstate |= TS_HSHAKE_DONE; 1425 if ((ldcp->tstate & TS_IN_RESET) == 0) 1426 ldcp->status = LDC_UP; 1427 1428 D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id); 1429 1430 return (0); 1431 } 1432 1433 1434 /* 1435 * Process an incoming RDX ctrl message 1436 */ 1437 static int 1438 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg) 1439 { 1440 int rv = 0; 1441 1442 D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id); 1443 1444 switch (msg->stype) { 1445 case LDC_NACK: 1446 /* RDX NACK received */ 1447 DWARN(ldcp->id, 1448 "i_ldc_process_RDX: (0x%llx) RDX NACK received\n", 1449 ldcp->id); 1450 1451 /* Reset the channel -- as we cannot continue */ 1452 mutex_enter(&ldcp->tx_lock); 1453 i_ldc_reset(ldcp, B_TRUE); 1454 mutex_exit(&ldcp->tx_lock); 1455 rv = ECONNRESET; 1456 1457 break; 1458 1459 case LDC_INFO: 1460 1461 /* 1462 * if channel is UP and a RDX received after data transmission 1463 * has commenced it is an error 1464 */ 1465 if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) { 1466 DWARN(DBG_ALL_LDCS, 1467 "i_ldc_process_RDX: (0x%llx) unexpected RDX" 1468 " - LDC reset\n", ldcp->id); 1469 mutex_enter(&ldcp->tx_lock); 1470 i_ldc_reset(ldcp, B_TRUE); 1471 mutex_exit(&ldcp->tx_lock); 1472 return (ECONNRESET); 1473 } 1474 1475 ldcp->hstate |= TS_RCVD_RDX; 1476 ldcp->tstate |= TS_HSHAKE_DONE; 1477 if ((ldcp->tstate & TS_IN_RESET) == 0) 1478 ldcp->status = LDC_UP; 1479 1480 D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id); 1481 break; 1482 1483 default: 1484 DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n", 1485 ldcp->id); 1486 1487 /* Reset the channel -- as we cannot continue */ 1488 mutex_enter(&ldcp->tx_lock); 1489 i_ldc_reset(ldcp, B_TRUE); 1490 mutex_exit(&ldcp->tx_lock); 1491 rv = ECONNRESET; 1492 break; 1493 } 1494 1495 return (rv); 1496 } 1497 1498 /* 1499 * Process an incoming ACK for a data packet 1500 */ 1501 static int 1502 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg) 1503 { 1504 int rv; 1505 uint64_t tx_head; 1506 ldc_msg_t *pkt; 1507 1508 /* Obtain Tx lock */ 1509 mutex_enter(&ldcp->tx_lock); 1510 1511 /* 1512 * Read the current Tx head and tail 1513 */ 1514 rv = hv_ldc_tx_get_state(ldcp->id, 1515 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 1516 if (rv != 0) { 1517 cmn_err(CE_WARN, 1518 "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n", 1519 ldcp->id); 1520 1521 /* Reset the channel -- as we cannot continue */ 1522 i_ldc_reset(ldcp, B_TRUE); 1523 mutex_exit(&ldcp->tx_lock); 1524 return (ECONNRESET); 1525 } 1526 1527 /* 1528 * loop from where the previous ACK location was to the 1529 * current head location. This is how far the HV has 1530 * actually send pkts. Pkts between head and tail are 1531 * yet to be sent by HV. 1532 */ 1533 tx_head = ldcp->tx_ackd_head; 1534 for (;;) { 1535 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head); 1536 tx_head = (tx_head + LDC_PACKET_SIZE) % 1537 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1538 1539 if (pkt->seqid == msg->ackid) { 1540 D2(ldcp->id, 1541 "i_ldc_process_data_ACK: (0x%llx) found packet\n", 1542 ldcp->id); 1543 ldcp->last_ack_rcd = msg->ackid; 1544 ldcp->tx_ackd_head = tx_head; 1545 break; 1546 } 1547 if (tx_head == ldcp->tx_head) { 1548 /* could not find packet */ 1549 DWARN(ldcp->id, 1550 "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n", 1551 ldcp->id); 1552 1553 /* Reset the channel -- as we cannot continue */ 1554 i_ldc_reset(ldcp, B_TRUE); 1555 mutex_exit(&ldcp->tx_lock); 1556 return (ECONNRESET); 1557 } 1558 } 1559 1560 mutex_exit(&ldcp->tx_lock); 1561 return (0); 1562 } 1563 1564 /* 1565 * Process incoming control message 1566 * Return 0 - session can continue 1567 * EAGAIN - reprocess packet - state was changed 1568 * ECONNRESET - channel was reset 1569 */ 1570 static int 1571 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg) 1572 { 1573 int rv = 0; 1574 1575 D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n", 1576 ldcp->id, ldcp->tstate, ldcp->hstate); 1577 1578 switch (ldcp->tstate & ~TS_IN_RESET) { 1579 1580 case TS_OPEN: 1581 case TS_READY: 1582 1583 switch (msg->ctrl & LDC_CTRL_MASK) { 1584 case LDC_VER: 1585 /* process version message */ 1586 rv = i_ldc_process_VER(ldcp, msg); 1587 break; 1588 default: 1589 DWARN(ldcp->id, 1590 "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x " 1591 "tstate=0x%x\n", ldcp->id, 1592 (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate); 1593 break; 1594 } 1595 1596 break; 1597 1598 case TS_VREADY: 1599 1600 switch (msg->ctrl & LDC_CTRL_MASK) { 1601 case LDC_VER: 1602 /* process version message */ 1603 rv = i_ldc_process_VER(ldcp, msg); 1604 break; 1605 case LDC_RTS: 1606 /* process RTS message */ 1607 rv = i_ldc_process_RTS(ldcp, msg); 1608 break; 1609 case LDC_RTR: 1610 /* process RTR message */ 1611 rv = i_ldc_process_RTR(ldcp, msg); 1612 break; 1613 case LDC_RDX: 1614 /* process RDX message */ 1615 rv = i_ldc_process_RDX(ldcp, msg); 1616 break; 1617 default: 1618 DWARN(ldcp->id, 1619 "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x " 1620 "tstate=0x%x\n", ldcp->id, 1621 (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate); 1622 break; 1623 } 1624 1625 break; 1626 1627 case TS_UP: 1628 1629 switch (msg->ctrl & LDC_CTRL_MASK) { 1630 case LDC_VER: 1631 DWARN(ldcp->id, 1632 "i_ldc_ctrlmsg: (0x%llx) unexpected VER " 1633 "- LDC reset\n", ldcp->id); 1634 /* peer is redoing version negotiation */ 1635 mutex_enter(&ldcp->tx_lock); 1636 (void) i_ldc_txq_reconf(ldcp); 1637 i_ldc_reset_state(ldcp); 1638 mutex_exit(&ldcp->tx_lock); 1639 rv = EAGAIN; 1640 break; 1641 1642 case LDC_RDX: 1643 /* process RDX message */ 1644 rv = i_ldc_process_RDX(ldcp, msg); 1645 break; 1646 1647 default: 1648 DWARN(ldcp->id, 1649 "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x " 1650 "tstate=0x%x\n", ldcp->id, 1651 (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate); 1652 break; 1653 } 1654 } 1655 1656 return (rv); 1657 } 1658 1659 /* 1660 * Register channel with the channel nexus 1661 */ 1662 static int 1663 i_ldc_register_channel(ldc_chan_t *ldcp) 1664 { 1665 int rv = 0; 1666 ldc_cnex_t *cinfo = &ldcssp->cinfo; 1667 1668 if (cinfo->dip == NULL) { 1669 DWARN(ldcp->id, 1670 "i_ldc_register_channel: cnex has not registered\n"); 1671 return (EAGAIN); 1672 } 1673 1674 rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass); 1675 if (rv) { 1676 DWARN(ldcp->id, 1677 "i_ldc_register_channel: cannot register channel\n"); 1678 return (rv); 1679 } 1680 1681 rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR, 1682 i_ldc_tx_hdlr, ldcp, NULL); 1683 if (rv) { 1684 DWARN(ldcp->id, 1685 "i_ldc_register_channel: cannot add Tx interrupt\n"); 1686 (void) cinfo->unreg_chan(cinfo->dip, ldcp->id); 1687 return (rv); 1688 } 1689 1690 rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR, 1691 i_ldc_rx_hdlr, ldcp, NULL); 1692 if (rv) { 1693 DWARN(ldcp->id, 1694 "i_ldc_register_channel: cannot add Rx interrupt\n"); 1695 (void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR); 1696 (void) cinfo->unreg_chan(cinfo->dip, ldcp->id); 1697 return (rv); 1698 } 1699 1700 ldcp->tstate |= TS_CNEX_RDY; 1701 1702 return (0); 1703 } 1704 1705 /* 1706 * Unregister a channel with the channel nexus 1707 */ 1708 static int 1709 i_ldc_unregister_channel(ldc_chan_t *ldcp) 1710 { 1711 int rv = 0; 1712 ldc_cnex_t *cinfo = &ldcssp->cinfo; 1713 1714 if (cinfo->dip == NULL) { 1715 DWARN(ldcp->id, 1716 "i_ldc_unregister_channel: cnex has not registered\n"); 1717 return (EAGAIN); 1718 } 1719 1720 if (ldcp->tstate & TS_CNEX_RDY) { 1721 1722 /* Remove the Rx interrupt */ 1723 rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR); 1724 if (rv) { 1725 if (rv != EAGAIN) { 1726 DWARN(ldcp->id, 1727 "i_ldc_unregister_channel: err removing " 1728 "Rx intr\n"); 1729 return (rv); 1730 } 1731 1732 /* 1733 * If interrupts are pending and handler has 1734 * finished running, clear interrupt and try 1735 * again 1736 */ 1737 if (ldcp->rx_intr_state != LDC_INTR_PEND) 1738 return (rv); 1739 1740 (void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 1741 rv = cinfo->rem_intr(cinfo->dip, ldcp->id, 1742 CNEX_RX_INTR); 1743 if (rv) { 1744 DWARN(ldcp->id, "i_ldc_unregister_channel: " 1745 "err removing Rx interrupt\n"); 1746 return (rv); 1747 } 1748 } 1749 1750 /* Remove the Tx interrupt */ 1751 rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR); 1752 if (rv) { 1753 DWARN(ldcp->id, 1754 "i_ldc_unregister_channel: err removing Tx intr\n"); 1755 return (rv); 1756 } 1757 1758 /* Unregister the channel */ 1759 rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id); 1760 if (rv) { 1761 DWARN(ldcp->id, 1762 "i_ldc_unregister_channel: cannot unreg channel\n"); 1763 return (rv); 1764 } 1765 1766 ldcp->tstate &= ~TS_CNEX_RDY; 1767 } 1768 1769 return (0); 1770 } 1771 1772 1773 /* 1774 * LDC transmit interrupt handler 1775 * triggered for chanel up/down/reset events 1776 * and Tx queue content changes 1777 */ 1778 static uint_t 1779 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2) 1780 { 1781 _NOTE(ARGUNUSED(arg2)) 1782 1783 int rv; 1784 ldc_chan_t *ldcp; 1785 boolean_t notify_client = B_FALSE; 1786 uint64_t notify_event = 0, link_state; 1787 1788 /* Get the channel for which interrupt was received */ 1789 ASSERT(arg1 != NULL); 1790 ldcp = (ldc_chan_t *)arg1; 1791 1792 D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n", 1793 ldcp->id, ldcp); 1794 1795 /* Lock channel */ 1796 mutex_enter(&ldcp->lock); 1797 1798 /* Obtain Tx lock */ 1799 mutex_enter(&ldcp->tx_lock); 1800 1801 /* mark interrupt as pending */ 1802 ldcp->tx_intr_state = LDC_INTR_ACTIVE; 1803 1804 /* save current link state */ 1805 link_state = ldcp->link_state; 1806 1807 rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail, 1808 &ldcp->link_state); 1809 if (rv) { 1810 cmn_err(CE_WARN, 1811 "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n", 1812 ldcp->id, rv); 1813 i_ldc_clear_intr(ldcp, CNEX_TX_INTR); 1814 mutex_exit(&ldcp->tx_lock); 1815 mutex_exit(&ldcp->lock); 1816 return (DDI_INTR_CLAIMED); 1817 } 1818 1819 /* 1820 * reset the channel state if the channel went down 1821 * (other side unconfigured queue) or channel was reset 1822 * (other side reconfigured its queue) 1823 */ 1824 if (link_state != ldcp->link_state && 1825 ldcp->link_state == LDC_CHANNEL_DOWN) { 1826 D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id); 1827 i_ldc_reset(ldcp, B_FALSE); 1828 notify_client = B_TRUE; 1829 notify_event = LDC_EVT_DOWN; 1830 } 1831 1832 if (link_state != ldcp->link_state && 1833 ldcp->link_state == LDC_CHANNEL_RESET) { 1834 D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id); 1835 i_ldc_reset(ldcp, B_FALSE); 1836 notify_client = B_TRUE; 1837 notify_event = LDC_EVT_RESET; 1838 } 1839 1840 if (link_state != ldcp->link_state && 1841 (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN && 1842 ldcp->link_state == LDC_CHANNEL_UP) { 1843 D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id); 1844 notify_client = B_TRUE; 1845 notify_event = LDC_EVT_RESET; 1846 ldcp->tstate |= TS_LINK_READY; 1847 ldcp->status = LDC_READY; 1848 } 1849 1850 /* if callbacks are disabled, do not notify */ 1851 if (!ldcp->cb_enabled) 1852 notify_client = B_FALSE; 1853 1854 i_ldc_clear_intr(ldcp, CNEX_TX_INTR); 1855 1856 if (notify_client) { 1857 ldcp->cb_inprogress = B_TRUE; 1858 mutex_exit(&ldcp->tx_lock); 1859 mutex_exit(&ldcp->lock); 1860 rv = ldcp->cb(notify_event, ldcp->cb_arg); 1861 if (rv) { 1862 DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback " 1863 "failure", ldcp->id); 1864 } 1865 mutex_enter(&ldcp->lock); 1866 ldcp->cb_inprogress = B_FALSE; 1867 } 1868 1869 mutex_exit(&ldcp->lock); 1870 1871 D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id); 1872 1873 return (DDI_INTR_CLAIMED); 1874 } 1875 1876 /* 1877 * LDC receive interrupt handler 1878 * triggered for channel with data pending to read 1879 * i.e. Rx queue content changes 1880 */ 1881 static uint_t 1882 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2) 1883 { 1884 _NOTE(ARGUNUSED(arg2)) 1885 1886 int rv; 1887 uint64_t rx_head, rx_tail; 1888 ldc_msg_t *msg; 1889 ldc_chan_t *ldcp; 1890 boolean_t notify_client = B_FALSE; 1891 uint64_t notify_event = 0; 1892 uint64_t link_state, first_fragment = 0; 1893 1894 1895 /* Get the channel for which interrupt was received */ 1896 if (arg1 == NULL) { 1897 cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n"); 1898 return (DDI_INTR_UNCLAIMED); 1899 } 1900 1901 ldcp = (ldc_chan_t *)arg1; 1902 1903 D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n", 1904 ldcp->id, ldcp); 1905 D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n", 1906 ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate, 1907 ldcp->link_state); 1908 1909 /* Lock channel */ 1910 mutex_enter(&ldcp->lock); 1911 1912 /* mark interrupt as pending */ 1913 ldcp->rx_intr_state = LDC_INTR_ACTIVE; 1914 1915 /* 1916 * Read packet(s) from the queue 1917 */ 1918 for (;;) { 1919 1920 link_state = ldcp->link_state; 1921 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 1922 &ldcp->link_state); 1923 if (rv) { 1924 cmn_err(CE_WARN, 1925 "i_ldc_rx_hdlr: (0x%lx) cannot read " 1926 "queue ptrs, rv=0x%d\n", ldcp->id, rv); 1927 i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 1928 mutex_exit(&ldcp->lock); 1929 return (DDI_INTR_CLAIMED); 1930 } 1931 1932 /* 1933 * reset the channel state if the channel went down 1934 * (other side unconfigured queue) or channel was reset 1935 * (other side reconfigured its queue) 1936 */ 1937 1938 if (link_state != ldcp->link_state) { 1939 1940 switch (ldcp->link_state) { 1941 case LDC_CHANNEL_DOWN: 1942 D1(ldcp->id, "i_ldc_rx_hdlr: channel " 1943 "link down\n", ldcp->id); 1944 mutex_enter(&ldcp->tx_lock); 1945 i_ldc_reset(ldcp, B_FALSE); 1946 mutex_exit(&ldcp->tx_lock); 1947 notify_client = B_TRUE; 1948 notify_event = LDC_EVT_DOWN; 1949 goto loop_exit; 1950 1951 case LDC_CHANNEL_UP: 1952 D1(ldcp->id, "i_ldc_rx_hdlr: " 1953 "channel link up\n", ldcp->id); 1954 1955 if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) { 1956 notify_client = B_TRUE; 1957 notify_event = LDC_EVT_RESET; 1958 ldcp->tstate |= TS_LINK_READY; 1959 ldcp->status = LDC_READY; 1960 } 1961 break; 1962 1963 case LDC_CHANNEL_RESET: 1964 default: 1965 #ifdef DEBUG 1966 force_reset: 1967 #endif 1968 D1(ldcp->id, "i_ldc_rx_hdlr: channel " 1969 "link reset\n", ldcp->id); 1970 mutex_enter(&ldcp->tx_lock); 1971 i_ldc_reset(ldcp, B_FALSE); 1972 mutex_exit(&ldcp->tx_lock); 1973 notify_client = B_TRUE; 1974 notify_event = LDC_EVT_RESET; 1975 break; 1976 } 1977 } 1978 1979 #ifdef DEBUG 1980 if (LDC_INJECT_RESET(ldcp)) 1981 goto force_reset; 1982 #endif 1983 1984 if (rx_head == rx_tail) { 1985 D2(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) No packets\n", 1986 ldcp->id); 1987 break; 1988 } 1989 1990 D2(ldcp->id, "i_ldc_rx_hdlr: head=0x%llx, tail=0x%llx\n", 1991 rx_head, rx_tail); 1992 DUMP_LDC_PKT(ldcp, "i_ldc_rx_hdlr rcd", 1993 ldcp->rx_q_va + rx_head); 1994 1995 /* get the message */ 1996 msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head); 1997 1998 /* if channel is in RAW mode or data pkt, notify and return */ 1999 if (ldcp->mode == LDC_MODE_RAW) { 2000 notify_client = B_TRUE; 2001 notify_event |= LDC_EVT_READ; 2002 break; 2003 } 2004 2005 if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) { 2006 2007 /* discard packet if channel is not up */ 2008 if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) { 2009 2010 /* move the head one position */ 2011 rx_head = (rx_head + LDC_PACKET_SIZE) % 2012 (ldcp->rx_q_entries << LDC_PACKET_SHIFT); 2013 2014 if (rv = i_ldc_set_rx_head(ldcp, rx_head)) 2015 break; 2016 2017 continue; 2018 } else { 2019 if ((ldcp->tstate & TS_IN_RESET) == 0) 2020 notify_client = B_TRUE; 2021 notify_event |= LDC_EVT_READ; 2022 break; 2023 } 2024 } 2025 2026 /* Check the sequence ID for the message received */ 2027 rv = i_ldc_check_seqid(ldcp, msg); 2028 if (rv != 0) { 2029 2030 DWARN(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) seqid error, " 2031 "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail); 2032 2033 /* Reset last_msg_rcd to start of message */ 2034 if (first_fragment != 0) { 2035 ldcp->last_msg_rcd = first_fragment - 1; 2036 first_fragment = 0; 2037 } 2038 2039 /* 2040 * Send a NACK due to seqid mismatch 2041 */ 2042 rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, 2043 (msg->ctrl & LDC_CTRL_MASK)); 2044 2045 if (rv) { 2046 cmn_err(CE_NOTE, 2047 "i_ldc_rx_hdlr: (0x%lx) err sending " 2048 "CTRL/NACK msg\n", ldcp->id); 2049 2050 /* if cannot send NACK - reset channel */ 2051 mutex_enter(&ldcp->tx_lock); 2052 i_ldc_reset(ldcp, B_TRUE); 2053 mutex_exit(&ldcp->tx_lock); 2054 2055 notify_client = B_TRUE; 2056 notify_event = LDC_EVT_RESET; 2057 break; 2058 } 2059 2060 /* purge receive queue */ 2061 (void) i_ldc_set_rx_head(ldcp, rx_tail); 2062 break; 2063 } 2064 2065 /* record the message ID */ 2066 ldcp->last_msg_rcd = msg->seqid; 2067 2068 /* process control messages */ 2069 if (msg->type & LDC_CTRL) { 2070 /* save current internal state */ 2071 uint64_t tstate = ldcp->tstate; 2072 2073 rv = i_ldc_ctrlmsg(ldcp, msg); 2074 if (rv == EAGAIN) { 2075 /* re-process pkt - state was adjusted */ 2076 continue; 2077 } 2078 if (rv == ECONNRESET) { 2079 notify_client = B_TRUE; 2080 notify_event = LDC_EVT_RESET; 2081 break; 2082 } 2083 2084 /* 2085 * control message processing was successful 2086 * channel transitioned to ready for communication 2087 */ 2088 if (rv == 0 && ldcp->tstate == TS_UP && 2089 (tstate & ~TS_IN_RESET) != 2090 (ldcp->tstate & ~TS_IN_RESET)) { 2091 notify_client = B_TRUE; 2092 notify_event = LDC_EVT_UP; 2093 } 2094 } 2095 2096 /* process data NACKs */ 2097 if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) { 2098 DWARN(ldcp->id, 2099 "i_ldc_rx_hdlr: (0x%llx) received DATA/NACK", 2100 ldcp->id); 2101 mutex_enter(&ldcp->tx_lock); 2102 i_ldc_reset(ldcp, B_TRUE); 2103 mutex_exit(&ldcp->tx_lock); 2104 notify_client = B_TRUE; 2105 notify_event = LDC_EVT_RESET; 2106 break; 2107 } 2108 2109 /* process data ACKs */ 2110 if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) { 2111 if (rv = i_ldc_process_data_ACK(ldcp, msg)) { 2112 notify_client = B_TRUE; 2113 notify_event = LDC_EVT_RESET; 2114 break; 2115 } 2116 } 2117 2118 /* move the head one position */ 2119 rx_head = (rx_head + LDC_PACKET_SIZE) % 2120 (ldcp->rx_q_entries << LDC_PACKET_SHIFT); 2121 if (rv = i_ldc_set_rx_head(ldcp, rx_head)) { 2122 notify_client = B_TRUE; 2123 notify_event = LDC_EVT_RESET; 2124 break; 2125 } 2126 2127 } /* for */ 2128 2129 loop_exit: 2130 2131 /* if callbacks are disabled, do not notify */ 2132 if (!ldcp->cb_enabled) 2133 notify_client = B_FALSE; 2134 2135 /* 2136 * If there are data packets in the queue, the ldc_read will 2137 * clear interrupts after draining the queue, else clear interrupts 2138 */ 2139 if ((notify_event & LDC_EVT_READ) == 0) { 2140 i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 2141 } else 2142 ldcp->rx_intr_state = LDC_INTR_PEND; 2143 2144 2145 if (notify_client) { 2146 ldcp->cb_inprogress = B_TRUE; 2147 mutex_exit(&ldcp->lock); 2148 rv = ldcp->cb(notify_event, ldcp->cb_arg); 2149 if (rv) { 2150 DWARN(ldcp->id, 2151 "i_ldc_rx_hdlr: (0x%llx) callback failure", 2152 ldcp->id); 2153 } 2154 mutex_enter(&ldcp->lock); 2155 ldcp->cb_inprogress = B_FALSE; 2156 } 2157 2158 mutex_exit(&ldcp->lock); 2159 2160 D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id); 2161 return (DDI_INTR_CLAIMED); 2162 } 2163 2164 2165 /* -------------------------------------------------------------------------- */ 2166 2167 /* 2168 * LDC API functions 2169 */ 2170 2171 /* 2172 * Initialize the channel. Allocate internal structure and memory for 2173 * TX/RX queues, and initialize locks. 2174 */ 2175 int 2176 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle) 2177 { 2178 ldc_chan_t *ldcp; 2179 int rv, exit_val; 2180 uint64_t ra_base, nentries; 2181 uint64_t qlen; 2182 2183 exit_val = EINVAL; /* guarantee an error if exit on failure */ 2184 2185 if (attr == NULL) { 2186 DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id); 2187 return (EINVAL); 2188 } 2189 if (handle == NULL) { 2190 DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id); 2191 return (EINVAL); 2192 } 2193 2194 /* check if channel is valid */ 2195 rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries); 2196 if (rv == H_ECHANNEL) { 2197 DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id); 2198 return (EINVAL); 2199 } 2200 2201 /* check if the channel has already been initialized */ 2202 mutex_enter(&ldcssp->lock); 2203 ldcp = ldcssp->chan_list; 2204 while (ldcp != NULL) { 2205 if (ldcp->id == id) { 2206 DWARN(id, "ldc_init: (0x%llx) already initialized\n", 2207 id); 2208 mutex_exit(&ldcssp->lock); 2209 return (EADDRINUSE); 2210 } 2211 ldcp = ldcp->next; 2212 } 2213 mutex_exit(&ldcssp->lock); 2214 2215 ASSERT(ldcp == NULL); 2216 2217 *handle = 0; 2218 2219 /* Allocate an ldcp structure */ 2220 ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP); 2221 2222 /* 2223 * Initialize the channel and Tx lock 2224 * 2225 * The channel 'lock' protects the entire channel and 2226 * should be acquired before initializing, resetting, 2227 * destroying or reading from a channel. 2228 * 2229 * The 'tx_lock' should be acquired prior to transmitting 2230 * data over the channel. The lock should also be acquired 2231 * prior to channel reconfiguration (in order to prevent 2232 * concurrent writes). 2233 * 2234 * ORDERING: When both locks are being acquired, to prevent 2235 * deadlocks, the channel lock should be always acquired prior 2236 * to the tx_lock. 2237 */ 2238 mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL); 2239 mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL); 2240 2241 /* Initialize the channel */ 2242 ldcp->id = id; 2243 ldcp->cb = NULL; 2244 ldcp->cb_arg = NULL; 2245 ldcp->cb_inprogress = B_FALSE; 2246 ldcp->cb_enabled = B_FALSE; 2247 ldcp->next = NULL; 2248 2249 /* Read attributes */ 2250 ldcp->mode = attr->mode; 2251 ldcp->devclass = attr->devclass; 2252 ldcp->devinst = attr->instance; 2253 ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU; 2254 2255 D1(ldcp->id, 2256 "ldc_init: (0x%llx) channel attributes, class=0x%x, " 2257 "instance=0x%llx, mode=%d, mtu=%d\n", 2258 ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu); 2259 2260 ldcp->next_vidx = 0; 2261 ldcp->tstate = TS_IN_RESET; 2262 ldcp->hstate = 0; 2263 ldcp->last_msg_snt = LDC_INIT_SEQID; 2264 ldcp->last_ack_rcd = 0; 2265 ldcp->last_msg_rcd = 0; 2266 2267 ldcp->stream_bufferp = NULL; 2268 ldcp->exp_dring_list = NULL; 2269 ldcp->imp_dring_list = NULL; 2270 ldcp->mhdl_list = NULL; 2271 2272 ldcp->tx_intr_state = LDC_INTR_NONE; 2273 ldcp->rx_intr_state = LDC_INTR_NONE; 2274 2275 /* Initialize payload size depending on whether channel is reliable */ 2276 switch (ldcp->mode) { 2277 case LDC_MODE_RAW: 2278 ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW; 2279 ldcp->read_p = i_ldc_read_raw; 2280 ldcp->write_p = i_ldc_write_raw; 2281 break; 2282 case LDC_MODE_UNRELIABLE: 2283 ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE; 2284 ldcp->read_p = i_ldc_read_packet; 2285 ldcp->write_p = i_ldc_write_packet; 2286 break; 2287 case LDC_MODE_RELIABLE: 2288 ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE; 2289 ldcp->read_p = i_ldc_read_packet; 2290 ldcp->write_p = i_ldc_write_packet; 2291 break; 2292 case LDC_MODE_STREAM: 2293 ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE; 2294 2295 ldcp->stream_remains = 0; 2296 ldcp->stream_offset = 0; 2297 ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP); 2298 ldcp->read_p = i_ldc_read_stream; 2299 ldcp->write_p = i_ldc_write_stream; 2300 break; 2301 default: 2302 exit_val = EINVAL; 2303 goto cleanup_on_exit; 2304 } 2305 2306 /* 2307 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this 2308 * value is smaller than default length of ldc_queue_entries, 2309 * qlen is set to ldc_queue_entries.. 2310 */ 2311 qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload; 2312 ldcp->rx_q_entries = 2313 (qlen < ldc_queue_entries) ? ldc_queue_entries : qlen; 2314 ldcp->tx_q_entries = ldcp->rx_q_entries; 2315 2316 D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", qlen); 2317 2318 /* Create a transmit queue */ 2319 ldcp->tx_q_va = (uint64_t) 2320 contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT); 2321 if (ldcp->tx_q_va == NULL) { 2322 cmn_err(CE_WARN, 2323 "ldc_init: (0x%lx) TX queue allocation failed\n", 2324 ldcp->id); 2325 exit_val = ENOMEM; 2326 goto cleanup_on_exit; 2327 } 2328 ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va); 2329 2330 D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n", 2331 ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries); 2332 2333 ldcp->tstate |= TS_TXQ_RDY; 2334 2335 /* Create a receive queue */ 2336 ldcp->rx_q_va = (uint64_t) 2337 contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT); 2338 if (ldcp->rx_q_va == NULL) { 2339 cmn_err(CE_WARN, 2340 "ldc_init: (0x%lx) RX queue allocation failed\n", 2341 ldcp->id); 2342 exit_val = ENOMEM; 2343 goto cleanup_on_exit; 2344 } 2345 ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va); 2346 2347 D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n", 2348 ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries); 2349 2350 ldcp->tstate |= TS_RXQ_RDY; 2351 2352 /* Init descriptor ring and memory handle list lock */ 2353 mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL); 2354 mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL); 2355 mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL); 2356 2357 /* mark status as INITialized */ 2358 ldcp->status = LDC_INIT; 2359 2360 /* Add to channel list */ 2361 mutex_enter(&ldcssp->lock); 2362 ldcp->next = ldcssp->chan_list; 2363 ldcssp->chan_list = ldcp; 2364 ldcssp->channel_count++; 2365 mutex_exit(&ldcssp->lock); 2366 2367 /* set the handle */ 2368 *handle = (ldc_handle_t)ldcp; 2369 2370 D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id); 2371 2372 return (0); 2373 2374 cleanup_on_exit: 2375 2376 if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp) 2377 kmem_free(ldcp->stream_bufferp, ldcp->mtu); 2378 2379 if (ldcp->tstate & TS_TXQ_RDY) 2380 contig_mem_free((caddr_t)ldcp->tx_q_va, 2381 (ldcp->tx_q_entries << LDC_PACKET_SHIFT)); 2382 2383 if (ldcp->tstate & TS_RXQ_RDY) 2384 contig_mem_free((caddr_t)ldcp->rx_q_va, 2385 (ldcp->rx_q_entries << LDC_PACKET_SHIFT)); 2386 2387 mutex_destroy(&ldcp->tx_lock); 2388 mutex_destroy(&ldcp->lock); 2389 2390 if (ldcp) 2391 kmem_free(ldcp, sizeof (ldc_chan_t)); 2392 2393 return (exit_val); 2394 } 2395 2396 /* 2397 * Finalizes the LDC connection. It will return EBUSY if the 2398 * channel is open. A ldc_close() has to be done prior to 2399 * a ldc_fini operation. It frees TX/RX queues, associated 2400 * with the channel 2401 */ 2402 int 2403 ldc_fini(ldc_handle_t handle) 2404 { 2405 ldc_chan_t *ldcp; 2406 ldc_chan_t *tmp_ldcp; 2407 uint64_t id; 2408 2409 if (handle == NULL) { 2410 DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n"); 2411 return (EINVAL); 2412 } 2413 ldcp = (ldc_chan_t *)handle; 2414 id = ldcp->id; 2415 2416 mutex_enter(&ldcp->lock); 2417 2418 if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) { 2419 DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n", 2420 ldcp->id); 2421 mutex_exit(&ldcp->lock); 2422 return (EBUSY); 2423 } 2424 2425 /* Remove from the channel list */ 2426 mutex_enter(&ldcssp->lock); 2427 tmp_ldcp = ldcssp->chan_list; 2428 if (tmp_ldcp == ldcp) { 2429 ldcssp->chan_list = ldcp->next; 2430 ldcp->next = NULL; 2431 } else { 2432 while (tmp_ldcp != NULL) { 2433 if (tmp_ldcp->next == ldcp) { 2434 tmp_ldcp->next = ldcp->next; 2435 ldcp->next = NULL; 2436 break; 2437 } 2438 tmp_ldcp = tmp_ldcp->next; 2439 } 2440 if (tmp_ldcp == NULL) { 2441 DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n"); 2442 mutex_exit(&ldcssp->lock); 2443 mutex_exit(&ldcp->lock); 2444 return (EINVAL); 2445 } 2446 } 2447 2448 ldcssp->channel_count--; 2449 2450 mutex_exit(&ldcssp->lock); 2451 2452 /* Free the map table for this channel */ 2453 if (ldcp->mtbl) { 2454 (void) hv_ldc_set_map_table(ldcp->id, NULL, NULL); 2455 if (ldcp->mtbl->contigmem) 2456 contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size); 2457 else 2458 kmem_free(ldcp->mtbl->table, ldcp->mtbl->size); 2459 mutex_destroy(&ldcp->mtbl->lock); 2460 kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t)); 2461 } 2462 2463 /* Destroy descriptor ring and memory handle list lock */ 2464 mutex_destroy(&ldcp->exp_dlist_lock); 2465 mutex_destroy(&ldcp->imp_dlist_lock); 2466 mutex_destroy(&ldcp->mlist_lock); 2467 2468 /* Free the stream buffer for STREAM_MODE */ 2469 if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp) 2470 kmem_free(ldcp->stream_bufferp, ldcp->mtu); 2471 2472 /* Free the RX queue */ 2473 contig_mem_free((caddr_t)ldcp->rx_q_va, 2474 (ldcp->rx_q_entries << LDC_PACKET_SHIFT)); 2475 ldcp->tstate &= ~TS_RXQ_RDY; 2476 2477 /* Free the TX queue */ 2478 contig_mem_free((caddr_t)ldcp->tx_q_va, 2479 (ldcp->tx_q_entries << LDC_PACKET_SHIFT)); 2480 ldcp->tstate &= ~TS_TXQ_RDY; 2481 2482 mutex_exit(&ldcp->lock); 2483 2484 /* Destroy mutex */ 2485 mutex_destroy(&ldcp->tx_lock); 2486 mutex_destroy(&ldcp->lock); 2487 2488 /* free channel structure */ 2489 kmem_free(ldcp, sizeof (ldc_chan_t)); 2490 2491 D1(id, "ldc_fini: (0x%llx) channel finalized\n", id); 2492 2493 return (0); 2494 } 2495 2496 /* 2497 * Open the LDC channel for use. It registers the TX/RX queues 2498 * with the Hypervisor. It also specifies the interrupt number 2499 * and target CPU for this channel 2500 */ 2501 int 2502 ldc_open(ldc_handle_t handle) 2503 { 2504 ldc_chan_t *ldcp; 2505 int rv; 2506 2507 if (handle == NULL) { 2508 DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n"); 2509 return (EINVAL); 2510 } 2511 2512 ldcp = (ldc_chan_t *)handle; 2513 2514 mutex_enter(&ldcp->lock); 2515 2516 if (ldcp->tstate < TS_INIT) { 2517 DWARN(ldcp->id, 2518 "ldc_open: (0x%llx) channel not initialized\n", ldcp->id); 2519 mutex_exit(&ldcp->lock); 2520 return (EFAULT); 2521 } 2522 if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) { 2523 DWARN(ldcp->id, 2524 "ldc_open: (0x%llx) channel is already open\n", ldcp->id); 2525 mutex_exit(&ldcp->lock); 2526 return (EFAULT); 2527 } 2528 2529 /* 2530 * Unregister/Register the tx queue with the hypervisor 2531 */ 2532 rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL); 2533 if (rv) { 2534 cmn_err(CE_WARN, 2535 "ldc_open: (0x%lx) channel tx queue unconf failed\n", 2536 ldcp->id); 2537 mutex_exit(&ldcp->lock); 2538 return (EIO); 2539 } 2540 2541 rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries); 2542 if (rv) { 2543 cmn_err(CE_WARN, 2544 "ldc_open: (0x%lx) channel tx queue conf failed\n", 2545 ldcp->id); 2546 mutex_exit(&ldcp->lock); 2547 return (EIO); 2548 } 2549 2550 D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n", 2551 ldcp->id); 2552 2553 /* 2554 * Unregister/Register the rx queue with the hypervisor 2555 */ 2556 rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL); 2557 if (rv) { 2558 cmn_err(CE_WARN, 2559 "ldc_open: (0x%lx) channel rx queue unconf failed\n", 2560 ldcp->id); 2561 mutex_exit(&ldcp->lock); 2562 return (EIO); 2563 } 2564 2565 rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries); 2566 if (rv) { 2567 cmn_err(CE_WARN, 2568 "ldc_open: (0x%lx) channel rx queue conf failed\n", 2569 ldcp->id); 2570 mutex_exit(&ldcp->lock); 2571 return (EIO); 2572 } 2573 2574 D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n", 2575 ldcp->id); 2576 2577 ldcp->tstate |= TS_QCONF_RDY; 2578 2579 /* Register the channel with the channel nexus */ 2580 rv = i_ldc_register_channel(ldcp); 2581 if (rv && rv != EAGAIN) { 2582 cmn_err(CE_WARN, 2583 "ldc_open: (0x%lx) channel register failed\n", ldcp->id); 2584 (void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL); 2585 (void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL); 2586 mutex_exit(&ldcp->lock); 2587 return (EIO); 2588 } 2589 2590 /* mark channel in OPEN state */ 2591 ldcp->status = LDC_OPEN; 2592 2593 /* Read channel state */ 2594 rv = hv_ldc_tx_get_state(ldcp->id, 2595 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 2596 if (rv) { 2597 cmn_err(CE_WARN, 2598 "ldc_open: (0x%lx) cannot read channel state\n", 2599 ldcp->id); 2600 (void) i_ldc_unregister_channel(ldcp); 2601 (void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL); 2602 (void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL); 2603 mutex_exit(&ldcp->lock); 2604 return (EIO); 2605 } 2606 2607 /* 2608 * set the ACKd head to current head location for reliable & 2609 * streaming mode 2610 */ 2611 ldcp->tx_ackd_head = ldcp->tx_head; 2612 2613 /* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */ 2614 if (ldcp->link_state == LDC_CHANNEL_UP || 2615 ldcp->link_state == LDC_CHANNEL_RESET) { 2616 ldcp->tstate |= TS_LINK_READY; 2617 ldcp->status = LDC_READY; 2618 } 2619 2620 /* 2621 * if channel is being opened in RAW mode - no handshake is needed 2622 * switch the channel READY and UP state 2623 */ 2624 if (ldcp->mode == LDC_MODE_RAW) { 2625 ldcp->tstate = TS_UP; /* set bits associated with LDC UP */ 2626 ldcp->status = LDC_UP; 2627 } 2628 2629 mutex_exit(&ldcp->lock); 2630 2631 /* 2632 * Increment number of open channels 2633 */ 2634 mutex_enter(&ldcssp->lock); 2635 ldcssp->channels_open++; 2636 mutex_exit(&ldcssp->lock); 2637 2638 D1(ldcp->id, 2639 "ldc_open: (0x%llx) channel (0x%p) open for use " 2640 "(tstate=0x%x, status=0x%x)\n", 2641 ldcp->id, ldcp, ldcp->tstate, ldcp->status); 2642 2643 return (0); 2644 } 2645 2646 /* 2647 * Close the LDC connection. It will return EBUSY if there 2648 * are memory segments or descriptor rings either bound to or 2649 * mapped over the channel 2650 */ 2651 int 2652 ldc_close(ldc_handle_t handle) 2653 { 2654 ldc_chan_t *ldcp; 2655 int rv = 0, retries = 0; 2656 boolean_t chk_done = B_FALSE; 2657 2658 if (handle == NULL) { 2659 DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n"); 2660 return (EINVAL); 2661 } 2662 ldcp = (ldc_chan_t *)handle; 2663 2664 mutex_enter(&ldcp->lock); 2665 2666 /* return error if channel is not open */ 2667 if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) { 2668 DWARN(ldcp->id, 2669 "ldc_close: (0x%llx) channel is not open\n", ldcp->id); 2670 mutex_exit(&ldcp->lock); 2671 return (EFAULT); 2672 } 2673 2674 /* if any memory handles, drings, are bound or mapped cannot close */ 2675 if (ldcp->mhdl_list != NULL) { 2676 DWARN(ldcp->id, 2677 "ldc_close: (0x%llx) channel has bound memory handles\n", 2678 ldcp->id); 2679 mutex_exit(&ldcp->lock); 2680 return (EBUSY); 2681 } 2682 if (ldcp->exp_dring_list != NULL) { 2683 DWARN(ldcp->id, 2684 "ldc_close: (0x%llx) channel has bound descriptor rings\n", 2685 ldcp->id); 2686 mutex_exit(&ldcp->lock); 2687 return (EBUSY); 2688 } 2689 if (ldcp->imp_dring_list != NULL) { 2690 DWARN(ldcp->id, 2691 "ldc_close: (0x%llx) channel has mapped descriptor rings\n", 2692 ldcp->id); 2693 mutex_exit(&ldcp->lock); 2694 return (EBUSY); 2695 } 2696 2697 if (ldcp->cb_inprogress) { 2698 DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n", 2699 ldcp->id); 2700 mutex_exit(&ldcp->lock); 2701 return (EWOULDBLOCK); 2702 } 2703 2704 /* Obtain Tx lock */ 2705 mutex_enter(&ldcp->tx_lock); 2706 2707 /* 2708 * Wait for pending transmits to complete i.e Tx queue to drain 2709 * if there are pending pkts - wait 1 ms and retry again 2710 */ 2711 for (;;) { 2712 2713 rv = hv_ldc_tx_get_state(ldcp->id, 2714 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 2715 if (rv) { 2716 cmn_err(CE_WARN, 2717 "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id); 2718 mutex_exit(&ldcp->tx_lock); 2719 mutex_exit(&ldcp->lock); 2720 return (EIO); 2721 } 2722 2723 if (ldcp->tx_head == ldcp->tx_tail || 2724 ldcp->link_state != LDC_CHANNEL_UP) { 2725 break; 2726 } 2727 2728 if (chk_done) { 2729 DWARN(ldcp->id, 2730 "ldc_close: (0x%llx) Tx queue drain timeout\n", 2731 ldcp->id); 2732 break; 2733 } 2734 2735 /* wait for one ms and try again */ 2736 delay(drv_usectohz(1000)); 2737 chk_done = B_TRUE; 2738 } 2739 2740 /* 2741 * Drain the Tx and Rx queues as we are closing the 2742 * channel. We dont care about any pending packets. 2743 * We have to also drain the queue prior to clearing 2744 * pending interrupts, otherwise the HV will trigger 2745 * an interrupt the moment the interrupt state is 2746 * cleared. 2747 */ 2748 (void) i_ldc_txq_reconf(ldcp); 2749 (void) i_ldc_rxq_drain(ldcp); 2750 2751 /* 2752 * Unregister the channel with the nexus 2753 */ 2754 while ((rv = i_ldc_unregister_channel(ldcp)) != 0) { 2755 2756 mutex_exit(&ldcp->tx_lock); 2757 mutex_exit(&ldcp->lock); 2758 2759 /* if any error other than EAGAIN return back */ 2760 if (rv != EAGAIN || retries >= ldc_max_retries) { 2761 cmn_err(CE_WARN, 2762 "ldc_close: (0x%lx) unregister failed, %d\n", 2763 ldcp->id, rv); 2764 return (rv); 2765 } 2766 2767 /* 2768 * As there could be pending interrupts we need 2769 * to wait and try again 2770 */ 2771 drv_usecwait(ldc_close_delay); 2772 mutex_enter(&ldcp->lock); 2773 mutex_enter(&ldcp->tx_lock); 2774 retries++; 2775 } 2776 2777 /* 2778 * Unregister queues 2779 */ 2780 rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL); 2781 if (rv) { 2782 cmn_err(CE_WARN, 2783 "ldc_close: (0x%lx) channel TX queue unconf failed\n", 2784 ldcp->id); 2785 mutex_exit(&ldcp->tx_lock); 2786 mutex_exit(&ldcp->lock); 2787 return (EIO); 2788 } 2789 rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL); 2790 if (rv) { 2791 cmn_err(CE_WARN, 2792 "ldc_close: (0x%lx) channel RX queue unconf failed\n", 2793 ldcp->id); 2794 mutex_exit(&ldcp->tx_lock); 2795 mutex_exit(&ldcp->lock); 2796 return (EIO); 2797 } 2798 2799 ldcp->tstate &= ~TS_QCONF_RDY; 2800 2801 /* Reset channel state information */ 2802 i_ldc_reset_state(ldcp); 2803 2804 /* Mark channel as down and in initialized state */ 2805 ldcp->tx_ackd_head = 0; 2806 ldcp->tx_head = 0; 2807 ldcp->tstate = TS_IN_RESET|TS_INIT; 2808 ldcp->status = LDC_INIT; 2809 2810 mutex_exit(&ldcp->tx_lock); 2811 mutex_exit(&ldcp->lock); 2812 2813 /* Decrement number of open channels */ 2814 mutex_enter(&ldcssp->lock); 2815 ldcssp->channels_open--; 2816 mutex_exit(&ldcssp->lock); 2817 2818 D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id); 2819 2820 return (0); 2821 } 2822 2823 /* 2824 * Register channel callback 2825 */ 2826 int 2827 ldc_reg_callback(ldc_handle_t handle, 2828 uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg) 2829 { 2830 ldc_chan_t *ldcp; 2831 2832 if (handle == NULL) { 2833 DWARN(DBG_ALL_LDCS, 2834 "ldc_reg_callback: invalid channel handle\n"); 2835 return (EINVAL); 2836 } 2837 if (((uint64_t)cb) < KERNELBASE) { 2838 DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n"); 2839 return (EINVAL); 2840 } 2841 ldcp = (ldc_chan_t *)handle; 2842 2843 mutex_enter(&ldcp->lock); 2844 2845 if (ldcp->cb) { 2846 DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n", 2847 ldcp->id); 2848 mutex_exit(&ldcp->lock); 2849 return (EIO); 2850 } 2851 if (ldcp->cb_inprogress) { 2852 DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n", 2853 ldcp->id); 2854 mutex_exit(&ldcp->lock); 2855 return (EWOULDBLOCK); 2856 } 2857 2858 ldcp->cb = cb; 2859 ldcp->cb_arg = arg; 2860 ldcp->cb_enabled = B_TRUE; 2861 2862 D1(ldcp->id, 2863 "ldc_reg_callback: (0x%llx) registered callback for channel\n", 2864 ldcp->id); 2865 2866 mutex_exit(&ldcp->lock); 2867 2868 return (0); 2869 } 2870 2871 /* 2872 * Unregister channel callback 2873 */ 2874 int 2875 ldc_unreg_callback(ldc_handle_t handle) 2876 { 2877 ldc_chan_t *ldcp; 2878 2879 if (handle == NULL) { 2880 DWARN(DBG_ALL_LDCS, 2881 "ldc_unreg_callback: invalid channel handle\n"); 2882 return (EINVAL); 2883 } 2884 ldcp = (ldc_chan_t *)handle; 2885 2886 mutex_enter(&ldcp->lock); 2887 2888 if (ldcp->cb == NULL) { 2889 DWARN(ldcp->id, 2890 "ldc_unreg_callback: (0x%llx) no callback exists\n", 2891 ldcp->id); 2892 mutex_exit(&ldcp->lock); 2893 return (EIO); 2894 } 2895 if (ldcp->cb_inprogress) { 2896 DWARN(ldcp->id, 2897 "ldc_unreg_callback: (0x%llx) callback active\n", 2898 ldcp->id); 2899 mutex_exit(&ldcp->lock); 2900 return (EWOULDBLOCK); 2901 } 2902 2903 ldcp->cb = NULL; 2904 ldcp->cb_arg = NULL; 2905 ldcp->cb_enabled = B_FALSE; 2906 2907 D1(ldcp->id, 2908 "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n", 2909 ldcp->id); 2910 2911 mutex_exit(&ldcp->lock); 2912 2913 return (0); 2914 } 2915 2916 2917 /* 2918 * Bring a channel up by initiating a handshake with the peer 2919 * This call is asynchronous. It will complete at a later point 2920 * in time when the peer responds back with an RTR. 2921 */ 2922 int 2923 ldc_up(ldc_handle_t handle) 2924 { 2925 int rv; 2926 ldc_chan_t *ldcp; 2927 ldc_msg_t *ldcmsg; 2928 uint64_t tx_tail, tstate; 2929 2930 if (handle == NULL) { 2931 DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n"); 2932 return (EINVAL); 2933 } 2934 ldcp = (ldc_chan_t *)handle; 2935 2936 mutex_enter(&ldcp->lock); 2937 2938 D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id); 2939 2940 /* clear the reset state */ 2941 tstate = ldcp->tstate; 2942 ldcp->tstate &= ~TS_IN_RESET; 2943 2944 if (ldcp->tstate == TS_UP) { 2945 DWARN(ldcp->id, 2946 "ldc_up: (0x%llx) channel is already in UP state\n", 2947 ldcp->id); 2948 2949 /* mark channel as up */ 2950 ldcp->status = LDC_UP; 2951 2952 /* 2953 * if channel was in reset state and there was 2954 * pending data clear interrupt state. this will 2955 * trigger an interrupt, causing the RX handler to 2956 * to invoke the client's callback 2957 */ 2958 if ((tstate & TS_IN_RESET) && 2959 ldcp->rx_intr_state == LDC_INTR_PEND) { 2960 D1(ldcp->id, 2961 "ldc_up: (0x%llx) channel has pending data, " 2962 "clearing interrupt\n", ldcp->id); 2963 i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 2964 } 2965 2966 mutex_exit(&ldcp->lock); 2967 return (0); 2968 } 2969 2970 /* if the channel is in RAW mode - mark it as UP, if READY */ 2971 if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) { 2972 ldcp->tstate = TS_UP; 2973 mutex_exit(&ldcp->lock); 2974 return (0); 2975 } 2976 2977 /* Don't start another handshake if there is one in progress */ 2978 if (ldcp->hstate) { 2979 D1(ldcp->id, 2980 "ldc_up: (0x%llx) channel handshake in progress\n", 2981 ldcp->id); 2982 mutex_exit(&ldcp->lock); 2983 return (0); 2984 } 2985 2986 mutex_enter(&ldcp->tx_lock); 2987 2988 /* get the current tail for the LDC msg */ 2989 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 2990 if (rv) { 2991 D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n", 2992 ldcp->id); 2993 mutex_exit(&ldcp->tx_lock); 2994 mutex_exit(&ldcp->lock); 2995 return (ECONNREFUSED); 2996 } 2997 2998 ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 2999 ZERO_PKT(ldcmsg); 3000 3001 ldcmsg->type = LDC_CTRL; 3002 ldcmsg->stype = LDC_INFO; 3003 ldcmsg->ctrl = LDC_VER; 3004 ldcp->next_vidx = 0; 3005 bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0])); 3006 3007 DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg); 3008 3009 /* initiate the send by calling into HV and set the new tail */ 3010 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 3011 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 3012 3013 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 3014 if (rv) { 3015 DWARN(ldcp->id, 3016 "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n", 3017 ldcp->id, rv); 3018 mutex_exit(&ldcp->tx_lock); 3019 mutex_exit(&ldcp->lock); 3020 return (rv); 3021 } 3022 3023 ldcp->hstate |= TS_SENT_VER; 3024 ldcp->tx_tail = tx_tail; 3025 D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id); 3026 3027 mutex_exit(&ldcp->tx_lock); 3028 mutex_exit(&ldcp->lock); 3029 3030 return (rv); 3031 } 3032 3033 3034 /* 3035 * Bring a channel down by resetting its state and queues 3036 */ 3037 int 3038 ldc_down(ldc_handle_t handle) 3039 { 3040 ldc_chan_t *ldcp; 3041 3042 if (handle == NULL) { 3043 DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n"); 3044 return (EINVAL); 3045 } 3046 ldcp = (ldc_chan_t *)handle; 3047 mutex_enter(&ldcp->lock); 3048 mutex_enter(&ldcp->tx_lock); 3049 i_ldc_reset(ldcp, B_TRUE); 3050 mutex_exit(&ldcp->tx_lock); 3051 mutex_exit(&ldcp->lock); 3052 3053 return (0); 3054 } 3055 3056 /* 3057 * Get the current channel status 3058 */ 3059 int 3060 ldc_status(ldc_handle_t handle, ldc_status_t *status) 3061 { 3062 ldc_chan_t *ldcp; 3063 3064 if (handle == NULL || status == NULL) { 3065 DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n"); 3066 return (EINVAL); 3067 } 3068 ldcp = (ldc_chan_t *)handle; 3069 3070 *status = ((ldc_chan_t *)handle)->status; 3071 3072 D1(ldcp->id, 3073 "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status); 3074 return (0); 3075 } 3076 3077 3078 /* 3079 * Set the channel's callback mode - enable/disable callbacks 3080 */ 3081 int 3082 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode) 3083 { 3084 ldc_chan_t *ldcp; 3085 3086 if (handle == NULL) { 3087 DWARN(DBG_ALL_LDCS, 3088 "ldc_set_intr_mode: invalid channel handle\n"); 3089 return (EINVAL); 3090 } 3091 ldcp = (ldc_chan_t *)handle; 3092 3093 /* 3094 * Record no callbacks should be invoked 3095 */ 3096 mutex_enter(&ldcp->lock); 3097 3098 switch (cmode) { 3099 case LDC_CB_DISABLE: 3100 if (!ldcp->cb_enabled) { 3101 DWARN(ldcp->id, 3102 "ldc_set_cb_mode: (0x%llx) callbacks disabled\n", 3103 ldcp->id); 3104 break; 3105 } 3106 ldcp->cb_enabled = B_FALSE; 3107 3108 D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n", 3109 ldcp->id); 3110 break; 3111 3112 case LDC_CB_ENABLE: 3113 if (ldcp->cb_enabled) { 3114 DWARN(ldcp->id, 3115 "ldc_set_cb_mode: (0x%llx) callbacks enabled\n", 3116 ldcp->id); 3117 break; 3118 } 3119 ldcp->cb_enabled = B_TRUE; 3120 3121 D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n", 3122 ldcp->id); 3123 break; 3124 } 3125 3126 mutex_exit(&ldcp->lock); 3127 3128 return (0); 3129 } 3130 3131 /* 3132 * Check to see if there are packets on the incoming queue 3133 * Will return hasdata = B_FALSE if there are no packets 3134 */ 3135 int 3136 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata) 3137 { 3138 int rv; 3139 uint64_t rx_head, rx_tail; 3140 ldc_chan_t *ldcp; 3141 3142 if (handle == NULL) { 3143 DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n"); 3144 return (EINVAL); 3145 } 3146 ldcp = (ldc_chan_t *)handle; 3147 3148 *hasdata = B_FALSE; 3149 3150 mutex_enter(&ldcp->lock); 3151 3152 if (ldcp->tstate != TS_UP) { 3153 D1(ldcp->id, 3154 "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id); 3155 mutex_exit(&ldcp->lock); 3156 return (ECONNRESET); 3157 } 3158 3159 /* Read packet(s) from the queue */ 3160 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 3161 &ldcp->link_state); 3162 if (rv != 0) { 3163 cmn_err(CE_WARN, 3164 "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id); 3165 mutex_exit(&ldcp->lock); 3166 return (EIO); 3167 } 3168 /* reset the channel state if the channel went down */ 3169 if (ldcp->link_state == LDC_CHANNEL_DOWN || 3170 ldcp->link_state == LDC_CHANNEL_RESET) { 3171 mutex_enter(&ldcp->tx_lock); 3172 i_ldc_reset(ldcp, B_FALSE); 3173 mutex_exit(&ldcp->tx_lock); 3174 mutex_exit(&ldcp->lock); 3175 return (ECONNRESET); 3176 } 3177 3178 if ((rx_head != rx_tail) || 3179 (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_remains > 0)) { 3180 D1(ldcp->id, 3181 "ldc_chkq: (0x%llx) queue has pkt(s) or buffered data\n", 3182 ldcp->id); 3183 *hasdata = B_TRUE; 3184 } 3185 3186 mutex_exit(&ldcp->lock); 3187 3188 return (0); 3189 } 3190 3191 3192 /* 3193 * Read 'size' amount of bytes or less. If incoming buffer 3194 * is more than 'size', ENOBUFS is returned. 3195 * 3196 * On return, size contains the number of bytes read. 3197 */ 3198 int 3199 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep) 3200 { 3201 ldc_chan_t *ldcp; 3202 uint64_t rx_head = 0, rx_tail = 0; 3203 int rv = 0, exit_val; 3204 3205 if (handle == NULL) { 3206 DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n"); 3207 return (EINVAL); 3208 } 3209 3210 ldcp = (ldc_chan_t *)handle; 3211 3212 /* channel lock */ 3213 mutex_enter(&ldcp->lock); 3214 3215 if (ldcp->tstate != TS_UP) { 3216 DWARN(ldcp->id, 3217 "ldc_read: (0x%llx) channel is not in UP state\n", 3218 ldcp->id); 3219 exit_val = ECONNRESET; 3220 } else { 3221 exit_val = ldcp->read_p(ldcp, bufp, sizep); 3222 } 3223 3224 /* 3225 * if queue has been drained - clear interrupt 3226 */ 3227 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 3228 &ldcp->link_state); 3229 if (rv != 0) { 3230 cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs", 3231 ldcp->id); 3232 mutex_enter(&ldcp->tx_lock); 3233 i_ldc_reset(ldcp, B_TRUE); 3234 mutex_exit(&ldcp->tx_lock); 3235 return (ECONNRESET); 3236 } 3237 3238 if (exit_val == 0) { 3239 if (ldcp->link_state == LDC_CHANNEL_DOWN || 3240 ldcp->link_state == LDC_CHANNEL_RESET) { 3241 mutex_enter(&ldcp->tx_lock); 3242 i_ldc_reset(ldcp, B_FALSE); 3243 exit_val = ECONNRESET; 3244 mutex_exit(&ldcp->tx_lock); 3245 } 3246 if ((rv == 0) && 3247 (ldcp->rx_intr_state == LDC_INTR_PEND) && 3248 (rx_head == rx_tail)) { 3249 i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 3250 } 3251 } 3252 3253 mutex_exit(&ldcp->lock); 3254 return (exit_val); 3255 } 3256 3257 /* 3258 * Basic raw mondo read - 3259 * no interpretation of mondo contents at all. 3260 * 3261 * Enter and exit with ldcp->lock held by caller 3262 */ 3263 static int 3264 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) 3265 { 3266 uint64_t q_size_mask; 3267 ldc_msg_t *msgp; 3268 uint8_t *msgbufp; 3269 int rv = 0, space; 3270 uint64_t rx_head, rx_tail; 3271 3272 space = *sizep; 3273 3274 if (space < LDC_PAYLOAD_SIZE_RAW) 3275 return (ENOBUFS); 3276 3277 ASSERT(mutex_owned(&ldcp->lock)); 3278 3279 /* compute mask for increment */ 3280 q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT; 3281 3282 /* 3283 * Read packet(s) from the queue 3284 */ 3285 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 3286 &ldcp->link_state); 3287 if (rv != 0) { 3288 cmn_err(CE_WARN, 3289 "ldc_read_raw: (0x%lx) unable to read queue ptrs", 3290 ldcp->id); 3291 return (EIO); 3292 } 3293 D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx," 3294 " rxt=0x%llx, st=0x%llx\n", 3295 ldcp->id, rx_head, rx_tail, ldcp->link_state); 3296 3297 /* reset the channel state if the channel went down */ 3298 if (ldcp->link_state == LDC_CHANNEL_DOWN || 3299 ldcp->link_state == LDC_CHANNEL_RESET) { 3300 mutex_enter(&ldcp->tx_lock); 3301 i_ldc_reset(ldcp, B_FALSE); 3302 mutex_exit(&ldcp->tx_lock); 3303 return (ECONNRESET); 3304 } 3305 3306 /* 3307 * Check for empty queue 3308 */ 3309 if (rx_head == rx_tail) { 3310 *sizep = 0; 3311 return (0); 3312 } 3313 3314 /* get the message */ 3315 msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head); 3316 3317 /* if channel is in RAW mode, copy data and return */ 3318 msgbufp = (uint8_t *)&(msgp->raw[0]); 3319 3320 bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW); 3321 3322 DUMP_PAYLOAD(ldcp->id, msgbufp); 3323 3324 *sizep = LDC_PAYLOAD_SIZE_RAW; 3325 3326 rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask; 3327 rv = i_ldc_set_rx_head(ldcp, rx_head); 3328 3329 return (rv); 3330 } 3331 3332 /* 3333 * Process LDC mondos to build larger packets 3334 * with either un-reliable or reliable delivery. 3335 * 3336 * Enter and exit with ldcp->lock held by caller 3337 */ 3338 static int 3339 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) 3340 { 3341 int rv = 0; 3342 uint64_t rx_head = 0, rx_tail = 0; 3343 uint64_t curr_head = 0; 3344 ldc_msg_t *msg; 3345 caddr_t target; 3346 size_t len = 0, bytes_read = 0; 3347 int retries = 0; 3348 uint64_t q_size_mask; 3349 uint64_t first_fragment = 0; 3350 3351 target = target_bufp; 3352 3353 ASSERT(mutex_owned(&ldcp->lock)); 3354 3355 /* check if the buffer and size are valid */ 3356 if (target_bufp == NULL || *sizep == 0) { 3357 DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n", 3358 ldcp->id); 3359 return (EINVAL); 3360 } 3361 3362 /* compute mask for increment */ 3363 q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT; 3364 3365 /* 3366 * Read packet(s) from the queue 3367 */ 3368 rv = hv_ldc_rx_get_state(ldcp->id, &curr_head, &rx_tail, 3369 &ldcp->link_state); 3370 if (rv != 0) { 3371 cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs", 3372 ldcp->id); 3373 mutex_enter(&ldcp->tx_lock); 3374 i_ldc_reset(ldcp, B_TRUE); 3375 mutex_exit(&ldcp->tx_lock); 3376 return (ECONNRESET); 3377 } 3378 D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n", 3379 ldcp->id, curr_head, rx_tail, ldcp->link_state); 3380 3381 /* reset the channel state if the channel went down */ 3382 if (ldcp->link_state != LDC_CHANNEL_UP) 3383 goto channel_is_reset; 3384 3385 for (;;) { 3386 3387 if (curr_head == rx_tail) { 3388 rv = hv_ldc_rx_get_state(ldcp->id, 3389 &rx_head, &rx_tail, &ldcp->link_state); 3390 if (rv != 0) { 3391 cmn_err(CE_WARN, 3392 "ldc_read: (0x%lx) cannot read queue ptrs", 3393 ldcp->id); 3394 mutex_enter(&ldcp->tx_lock); 3395 i_ldc_reset(ldcp, B_TRUE); 3396 mutex_exit(&ldcp->tx_lock); 3397 return (ECONNRESET); 3398 } 3399 if (ldcp->link_state != LDC_CHANNEL_UP) 3400 goto channel_is_reset; 3401 3402 if (curr_head == rx_tail) { 3403 3404 /* If in the middle of a fragmented xfer */ 3405 if (first_fragment != 0) { 3406 3407 /* wait for ldc_delay usecs */ 3408 drv_usecwait(ldc_delay); 3409 3410 if (++retries < ldc_max_retries) 3411 continue; 3412 3413 *sizep = 0; 3414 ldcp->last_msg_rcd = first_fragment - 1; 3415 DWARN(DBG_ALL_LDCS, "ldc_read: " 3416 "(0x%llx) read timeout", 3417 ldcp->id); 3418 return (EAGAIN); 3419 } 3420 *sizep = 0; 3421 break; 3422 } 3423 } 3424 retries = 0; 3425 3426 D2(ldcp->id, 3427 "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n", 3428 ldcp->id, curr_head, rx_head, rx_tail); 3429 3430 /* get the message */ 3431 msg = (ldc_msg_t *)(ldcp->rx_q_va + curr_head); 3432 3433 DUMP_LDC_PKT(ldcp, "ldc_read received pkt", 3434 ldcp->rx_q_va + curr_head); 3435 3436 /* Check the message ID for the message received */ 3437 if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) { 3438 3439 DWARN(ldcp->id, "ldc_read: (0x%llx) seqid error, " 3440 "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail); 3441 3442 /* throw away data */ 3443 bytes_read = 0; 3444 3445 /* Reset last_msg_rcd to start of message */ 3446 if (first_fragment != 0) { 3447 ldcp->last_msg_rcd = first_fragment - 1; 3448 first_fragment = 0; 3449 } 3450 /* 3451 * Send a NACK -- invalid seqid 3452 * get the current tail for the response 3453 */ 3454 rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK, 3455 (msg->ctrl & LDC_CTRL_MASK)); 3456 if (rv) { 3457 cmn_err(CE_NOTE, 3458 "ldc_read: (0x%lx) err sending " 3459 "NACK msg\n", ldcp->id); 3460 3461 /* if cannot send NACK - reset channel */ 3462 mutex_enter(&ldcp->tx_lock); 3463 i_ldc_reset(ldcp, B_FALSE); 3464 mutex_exit(&ldcp->tx_lock); 3465 rv = ECONNRESET; 3466 break; 3467 } 3468 3469 /* purge receive queue */ 3470 rv = i_ldc_set_rx_head(ldcp, rx_tail); 3471 3472 break; 3473 } 3474 3475 /* 3476 * Process any messages of type CTRL messages 3477 * Future implementations should try to pass these 3478 * to LDC link by resetting the intr state. 3479 * 3480 * NOTE: not done as a switch() as type can be both ctrl+data 3481 */ 3482 if (msg->type & LDC_CTRL) { 3483 if (rv = i_ldc_ctrlmsg(ldcp, msg)) { 3484 if (rv == EAGAIN) 3485 continue; 3486 rv = i_ldc_set_rx_head(ldcp, rx_tail); 3487 *sizep = 0; 3488 bytes_read = 0; 3489 break; 3490 } 3491 } 3492 3493 /* process data ACKs */ 3494 if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) { 3495 if (rv = i_ldc_process_data_ACK(ldcp, msg)) { 3496 *sizep = 0; 3497 bytes_read = 0; 3498 break; 3499 } 3500 } 3501 3502 /* process data NACKs */ 3503 if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) { 3504 DWARN(ldcp->id, 3505 "ldc_read: (0x%llx) received DATA/NACK", ldcp->id); 3506 mutex_enter(&ldcp->tx_lock); 3507 i_ldc_reset(ldcp, B_TRUE); 3508 mutex_exit(&ldcp->tx_lock); 3509 return (ECONNRESET); 3510 } 3511 3512 /* process data messages */ 3513 if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) { 3514 3515 uint8_t *msgbuf = (uint8_t *)( 3516 (ldcp->mode == LDC_MODE_RELIABLE || 3517 ldcp->mode == LDC_MODE_STREAM) 3518 ? msg->rdata : msg->udata); 3519 3520 D2(ldcp->id, 3521 "ldc_read: (0x%llx) received data msg\n", ldcp->id); 3522 3523 /* get the packet length */ 3524 len = (msg->env & LDC_LEN_MASK); 3525 3526 /* 3527 * FUTURE OPTIMIZATION: 3528 * dont need to set q head for every 3529 * packet we read just need to do this when 3530 * we are done or need to wait for more 3531 * mondos to make a full packet - this is 3532 * currently expensive. 3533 */ 3534 3535 if (first_fragment == 0) { 3536 3537 /* 3538 * first packets should always have the start 3539 * bit set (even for a single packet). If not 3540 * throw away the packet 3541 */ 3542 if (!(msg->env & LDC_FRAG_START)) { 3543 3544 DWARN(DBG_ALL_LDCS, 3545 "ldc_read: (0x%llx) not start - " 3546 "frag=%x\n", ldcp->id, 3547 (msg->env) & LDC_FRAG_MASK); 3548 3549 /* toss pkt, inc head, cont reading */ 3550 bytes_read = 0; 3551 target = target_bufp; 3552 curr_head = 3553 (curr_head + LDC_PACKET_SIZE) 3554 & q_size_mask; 3555 if (rv = i_ldc_set_rx_head(ldcp, 3556 curr_head)) 3557 break; 3558 3559 continue; 3560 } 3561 3562 first_fragment = msg->seqid; 3563 } else { 3564 /* check to see if this is a pkt w/ START bit */ 3565 if (msg->env & LDC_FRAG_START) { 3566 DWARN(DBG_ALL_LDCS, 3567 "ldc_read:(0x%llx) unexpected pkt" 3568 " env=0x%x discarding %d bytes," 3569 " lastmsg=%d, currentmsg=%d\n", 3570 ldcp->id, msg->env&LDC_FRAG_MASK, 3571 bytes_read, ldcp->last_msg_rcd, 3572 msg->seqid); 3573 3574 /* throw data we have read so far */ 3575 bytes_read = 0; 3576 target = target_bufp; 3577 first_fragment = msg->seqid; 3578 3579 if (rv = i_ldc_set_rx_head(ldcp, 3580 curr_head)) 3581 break; 3582 } 3583 } 3584 3585 /* copy (next) pkt into buffer */ 3586 if (len <= (*sizep - bytes_read)) { 3587 bcopy(msgbuf, target, len); 3588 target += len; 3589 bytes_read += len; 3590 } else { 3591 /* 3592 * there is not enough space in the buffer to 3593 * read this pkt. throw message away & continue 3594 * reading data from queue 3595 */ 3596 DWARN(DBG_ALL_LDCS, 3597 "ldc_read: (0x%llx) buffer too small, " 3598 "head=0x%lx, expect=%d, got=%d\n", ldcp->id, 3599 curr_head, *sizep, bytes_read+len); 3600 3601 first_fragment = 0; 3602 target = target_bufp; 3603 bytes_read = 0; 3604 3605 /* throw away everything received so far */ 3606 if (rv = i_ldc_set_rx_head(ldcp, curr_head)) 3607 break; 3608 3609 /* continue reading remaining pkts */ 3610 continue; 3611 } 3612 } 3613 3614 /* set the message id */ 3615 ldcp->last_msg_rcd = msg->seqid; 3616 3617 /* move the head one position */ 3618 curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask; 3619 3620 if (msg->env & LDC_FRAG_STOP) { 3621 3622 /* 3623 * All pkts that are part of this fragmented transfer 3624 * have been read or this was a single pkt read 3625 * or there was an error 3626 */ 3627 3628 /* set the queue head */ 3629 if (rv = i_ldc_set_rx_head(ldcp, curr_head)) 3630 bytes_read = 0; 3631 3632 *sizep = bytes_read; 3633 3634 break; 3635 } 3636 3637 /* advance head if it is a DATA ACK */ 3638 if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) { 3639 3640 /* set the queue head */ 3641 if (rv = i_ldc_set_rx_head(ldcp, curr_head)) { 3642 bytes_read = 0; 3643 break; 3644 } 3645 3646 D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx", 3647 ldcp->id, curr_head); 3648 } 3649 3650 } /* for (;;) */ 3651 3652 3653 /* 3654 * If useful data was read - Send msg ACK 3655 * OPTIMIZE: do not send ACK for all msgs - use some frequency 3656 */ 3657 if ((bytes_read > 0) && (ldcp->mode == LDC_MODE_RELIABLE || 3658 ldcp->mode == LDC_MODE_STREAM)) { 3659 3660 rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0); 3661 if (rv && rv != EWOULDBLOCK) { 3662 cmn_err(CE_NOTE, 3663 "ldc_read: (0x%lx) cannot send ACK\n", ldcp->id); 3664 3665 /* if cannot send ACK - reset channel */ 3666 goto channel_is_reset; 3667 } 3668 } 3669 3670 D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep); 3671 3672 return (rv); 3673 3674 channel_is_reset: 3675 mutex_enter(&ldcp->tx_lock); 3676 i_ldc_reset(ldcp, B_FALSE); 3677 mutex_exit(&ldcp->tx_lock); 3678 return (ECONNRESET); 3679 } 3680 3681 /* 3682 * Use underlying reliable packet mechanism to fetch 3683 * and buffer incoming packets so we can hand them back as 3684 * a basic byte stream. 3685 * 3686 * Enter and exit with ldcp->lock held by caller 3687 */ 3688 static int 3689 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) 3690 { 3691 int rv; 3692 size_t size; 3693 3694 ASSERT(mutex_owned(&ldcp->lock)); 3695 3696 D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d", 3697 ldcp->id, *sizep); 3698 3699 if (ldcp->stream_remains == 0) { 3700 size = ldcp->mtu; 3701 rv = i_ldc_read_packet(ldcp, 3702 (caddr_t)ldcp->stream_bufferp, &size); 3703 D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d", 3704 ldcp->id, size); 3705 3706 if (rv != 0) 3707 return (rv); 3708 3709 ldcp->stream_remains = size; 3710 ldcp->stream_offset = 0; 3711 } 3712 3713 size = MIN(ldcp->stream_remains, *sizep); 3714 3715 bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size); 3716 ldcp->stream_offset += size; 3717 ldcp->stream_remains -= size; 3718 3719 D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d", 3720 ldcp->id, size); 3721 3722 *sizep = size; 3723 return (0); 3724 } 3725 3726 /* 3727 * Write specified amount of bytes to the channel 3728 * in multiple pkts of pkt_payload size. Each 3729 * packet is tagged with an unique packet ID in 3730 * the case of a reliable link. 3731 * 3732 * On return, size contains the number of bytes written. 3733 */ 3734 int 3735 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep) 3736 { 3737 ldc_chan_t *ldcp; 3738 int rv = 0; 3739 3740 if (handle == NULL) { 3741 DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n"); 3742 return (EINVAL); 3743 } 3744 ldcp = (ldc_chan_t *)handle; 3745 3746 /* check if writes can occur */ 3747 if (!mutex_tryenter(&ldcp->tx_lock)) { 3748 /* 3749 * Could not get the lock - channel could 3750 * be in the process of being unconfigured 3751 * or reader has encountered an error 3752 */ 3753 return (EAGAIN); 3754 } 3755 3756 /* check if non-zero data to write */ 3757 if (buf == NULL || sizep == NULL) { 3758 DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n", 3759 ldcp->id); 3760 mutex_exit(&ldcp->tx_lock); 3761 return (EINVAL); 3762 } 3763 3764 if (*sizep == 0) { 3765 DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n", 3766 ldcp->id); 3767 mutex_exit(&ldcp->tx_lock); 3768 return (0); 3769 } 3770 3771 /* Check if channel is UP for data exchange */ 3772 if (ldcp->tstate != TS_UP) { 3773 DWARN(ldcp->id, 3774 "ldc_write: (0x%llx) channel is not in UP state\n", 3775 ldcp->id); 3776 *sizep = 0; 3777 rv = ECONNRESET; 3778 } else { 3779 rv = ldcp->write_p(ldcp, buf, sizep); 3780 } 3781 3782 mutex_exit(&ldcp->tx_lock); 3783 3784 return (rv); 3785 } 3786 3787 /* 3788 * Write a raw packet to the channel 3789 * On return, size contains the number of bytes written. 3790 */ 3791 static int 3792 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep) 3793 { 3794 ldc_msg_t *ldcmsg; 3795 uint64_t tx_head, tx_tail, new_tail; 3796 int rv = 0; 3797 size_t size; 3798 3799 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 3800 ASSERT(ldcp->mode == LDC_MODE_RAW); 3801 3802 size = *sizep; 3803 3804 /* 3805 * Check to see if the packet size is less than or 3806 * equal to packet size support in raw mode 3807 */ 3808 if (size > ldcp->pkt_payload) { 3809 DWARN(ldcp->id, 3810 "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n", 3811 ldcp->id, *sizep); 3812 *sizep = 0; 3813 return (EMSGSIZE); 3814 } 3815 3816 /* get the qptrs for the tx queue */ 3817 rv = hv_ldc_tx_get_state(ldcp->id, 3818 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 3819 if (rv != 0) { 3820 cmn_err(CE_WARN, 3821 "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id); 3822 *sizep = 0; 3823 return (EIO); 3824 } 3825 3826 if (ldcp->link_state == LDC_CHANNEL_DOWN || 3827 ldcp->link_state == LDC_CHANNEL_RESET) { 3828 DWARN(ldcp->id, 3829 "ldc_write: (0x%llx) channel down/reset\n", ldcp->id); 3830 3831 *sizep = 0; 3832 if (mutex_tryenter(&ldcp->lock)) { 3833 i_ldc_reset(ldcp, B_FALSE); 3834 mutex_exit(&ldcp->lock); 3835 } else { 3836 /* 3837 * Release Tx lock, and then reacquire channel 3838 * and Tx lock in correct order 3839 */ 3840 mutex_exit(&ldcp->tx_lock); 3841 mutex_enter(&ldcp->lock); 3842 mutex_enter(&ldcp->tx_lock); 3843 i_ldc_reset(ldcp, B_FALSE); 3844 mutex_exit(&ldcp->lock); 3845 } 3846 return (ECONNRESET); 3847 } 3848 3849 tx_tail = ldcp->tx_tail; 3850 tx_head = ldcp->tx_head; 3851 new_tail = (tx_tail + LDC_PACKET_SIZE) & 3852 ((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT); 3853 3854 if (new_tail == tx_head) { 3855 DWARN(DBG_ALL_LDCS, 3856 "ldc_write: (0x%llx) TX queue is full\n", ldcp->id); 3857 *sizep = 0; 3858 return (EWOULDBLOCK); 3859 } 3860 3861 D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d", 3862 ldcp->id, size); 3863 3864 /* Send the data now */ 3865 ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 3866 3867 /* copy the data into pkt */ 3868 bcopy((uint8_t *)buf, ldcmsg, size); 3869 3870 /* increment tail */ 3871 tx_tail = new_tail; 3872 3873 /* 3874 * All packets have been copied into the TX queue 3875 * update the tail ptr in the HV 3876 */ 3877 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 3878 if (rv) { 3879 if (rv == EWOULDBLOCK) { 3880 DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n", 3881 ldcp->id); 3882 *sizep = 0; 3883 return (EWOULDBLOCK); 3884 } 3885 3886 *sizep = 0; 3887 if (mutex_tryenter(&ldcp->lock)) { 3888 i_ldc_reset(ldcp, B_FALSE); 3889 mutex_exit(&ldcp->lock); 3890 } else { 3891 /* 3892 * Release Tx lock, and then reacquire channel 3893 * and Tx lock in correct order 3894 */ 3895 mutex_exit(&ldcp->tx_lock); 3896 mutex_enter(&ldcp->lock); 3897 mutex_enter(&ldcp->tx_lock); 3898 i_ldc_reset(ldcp, B_FALSE); 3899 mutex_exit(&ldcp->lock); 3900 } 3901 return (ECONNRESET); 3902 } 3903 3904 ldcp->tx_tail = tx_tail; 3905 *sizep = size; 3906 3907 D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size); 3908 3909 return (rv); 3910 } 3911 3912 3913 /* 3914 * Write specified amount of bytes to the channel 3915 * in multiple pkts of pkt_payload size. Each 3916 * packet is tagged with an unique packet ID in 3917 * the case of a reliable link. 3918 * 3919 * On return, size contains the number of bytes written. 3920 * This function needs to ensure that the write size is < MTU size 3921 */ 3922 static int 3923 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size) 3924 { 3925 ldc_msg_t *ldcmsg; 3926 uint64_t tx_head, tx_tail, new_tail, start; 3927 uint64_t txq_size_mask, numavail; 3928 uint8_t *msgbuf, *source = (uint8_t *)buf; 3929 size_t len, bytes_written = 0, remaining; 3930 int rv; 3931 uint32_t curr_seqid; 3932 3933 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 3934 3935 ASSERT(ldcp->mode == LDC_MODE_RELIABLE || 3936 ldcp->mode == LDC_MODE_UNRELIABLE || 3937 ldcp->mode == LDC_MODE_STREAM); 3938 3939 /* compute mask for increment */ 3940 txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT; 3941 3942 /* get the qptrs for the tx queue */ 3943 rv = hv_ldc_tx_get_state(ldcp->id, 3944 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 3945 if (rv != 0) { 3946 cmn_err(CE_WARN, 3947 "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id); 3948 *size = 0; 3949 return (EIO); 3950 } 3951 3952 if (ldcp->link_state == LDC_CHANNEL_DOWN || 3953 ldcp->link_state == LDC_CHANNEL_RESET) { 3954 DWARN(ldcp->id, 3955 "ldc_write: (0x%llx) channel down/reset\n", ldcp->id); 3956 *size = 0; 3957 if (mutex_tryenter(&ldcp->lock)) { 3958 i_ldc_reset(ldcp, B_FALSE); 3959 mutex_exit(&ldcp->lock); 3960 } else { 3961 /* 3962 * Release Tx lock, and then reacquire channel 3963 * and Tx lock in correct order 3964 */ 3965 mutex_exit(&ldcp->tx_lock); 3966 mutex_enter(&ldcp->lock); 3967 mutex_enter(&ldcp->tx_lock); 3968 i_ldc_reset(ldcp, B_FALSE); 3969 mutex_exit(&ldcp->lock); 3970 } 3971 return (ECONNRESET); 3972 } 3973 3974 tx_tail = ldcp->tx_tail; 3975 new_tail = (tx_tail + LDC_PACKET_SIZE) % 3976 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 3977 3978 /* 3979 * Link mode determines whether we use HV Tx head or the 3980 * private protocol head (corresponding to last ACKd pkt) for 3981 * determining how much we can write 3982 */ 3983 tx_head = (ldcp->mode == LDC_MODE_RELIABLE || 3984 ldcp->mode == LDC_MODE_STREAM) 3985 ? ldcp->tx_ackd_head : ldcp->tx_head; 3986 if (new_tail == tx_head) { 3987 DWARN(DBG_ALL_LDCS, 3988 "ldc_write: (0x%llx) TX queue is full\n", ldcp->id); 3989 *size = 0; 3990 return (EWOULDBLOCK); 3991 } 3992 3993 /* 3994 * Make sure that the LDC Tx queue has enough space 3995 */ 3996 numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT) 3997 + ldcp->tx_q_entries - 1; 3998 numavail %= ldcp->tx_q_entries; 3999 4000 if (*size > (numavail * ldcp->pkt_payload)) { 4001 DWARN(DBG_ALL_LDCS, 4002 "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id); 4003 return (EWOULDBLOCK); 4004 } 4005 4006 D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d", 4007 ldcp->id, *size); 4008 4009 /* Send the data now */ 4010 bytes_written = 0; 4011 curr_seqid = ldcp->last_msg_snt; 4012 start = tx_tail; 4013 4014 while (*size > bytes_written) { 4015 4016 ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 4017 4018 msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE || 4019 ldcp->mode == LDC_MODE_STREAM) 4020 ? ldcmsg->rdata : ldcmsg->udata); 4021 4022 ldcmsg->type = LDC_DATA; 4023 ldcmsg->stype = LDC_INFO; 4024 ldcmsg->ctrl = 0; 4025 4026 remaining = *size - bytes_written; 4027 len = min(ldcp->pkt_payload, remaining); 4028 ldcmsg->env = (uint8_t)len; 4029 4030 curr_seqid++; 4031 ldcmsg->seqid = curr_seqid; 4032 4033 /* copy the data into pkt */ 4034 bcopy(source, msgbuf, len); 4035 4036 source += len; 4037 bytes_written += len; 4038 4039 /* increment tail */ 4040 tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask; 4041 4042 ASSERT(tx_tail != tx_head); 4043 } 4044 4045 /* Set the start and stop bits */ 4046 ldcmsg->env |= LDC_FRAG_STOP; 4047 ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start); 4048 ldcmsg->env |= LDC_FRAG_START; 4049 4050 /* 4051 * All packets have been copied into the TX queue 4052 * update the tail ptr in the HV 4053 */ 4054 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 4055 if (rv == 0) { 4056 ldcp->tx_tail = tx_tail; 4057 ldcp->last_msg_snt = curr_seqid; 4058 *size = bytes_written; 4059 } else { 4060 int rv2; 4061 4062 if (rv != EWOULDBLOCK) { 4063 *size = 0; 4064 if (mutex_tryenter(&ldcp->lock)) { 4065 i_ldc_reset(ldcp, B_FALSE); 4066 mutex_exit(&ldcp->lock); 4067 } else { 4068 /* 4069 * Release Tx lock, and then reacquire channel 4070 * and Tx lock in correct order 4071 */ 4072 mutex_exit(&ldcp->tx_lock); 4073 mutex_enter(&ldcp->lock); 4074 mutex_enter(&ldcp->tx_lock); 4075 i_ldc_reset(ldcp, B_FALSE); 4076 mutex_exit(&ldcp->lock); 4077 } 4078 return (ECONNRESET); 4079 } 4080 4081 D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, " 4082 "old tail 0x%x, new tail 0x%x, qsize=0x%x)\n", 4083 rv, ldcp->tx_head, ldcp->tx_tail, tx_tail, 4084 (ldcp->tx_q_entries << LDC_PACKET_SHIFT)); 4085 4086 rv2 = hv_ldc_tx_get_state(ldcp->id, 4087 &tx_head, &tx_tail, &ldcp->link_state); 4088 4089 D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x " 4090 "(head 0x%x, tail 0x%x state 0x%x)\n", 4091 rv2, tx_head, tx_tail, ldcp->link_state); 4092 4093 *size = 0; 4094 } 4095 4096 D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size); 4097 4098 return (rv); 4099 } 4100 4101 /* 4102 * Write specified amount of bytes to the channel 4103 * in multiple pkts of pkt_payload size. Each 4104 * packet is tagged with an unique packet ID in 4105 * the case of a reliable link. 4106 * 4107 * On return, size contains the number of bytes written. 4108 * This function needs to ensure that the write size is < MTU size 4109 */ 4110 static int 4111 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep) 4112 { 4113 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 4114 ASSERT(ldcp->mode == LDC_MODE_STREAM); 4115 4116 /* Truncate packet to max of MTU size */ 4117 if (*sizep > ldcp->mtu) *sizep = ldcp->mtu; 4118 return (i_ldc_write_packet(ldcp, buf, sizep)); 4119 } 4120 4121 4122 /* 4123 * Interfaces for channel nexus to register/unregister with LDC module 4124 * The nexus will register functions to be used to register individual 4125 * channels with the nexus and enable interrupts for the channels 4126 */ 4127 int 4128 ldc_register(ldc_cnex_t *cinfo) 4129 { 4130 ldc_chan_t *ldcp; 4131 4132 if (cinfo == NULL || cinfo->dip == NULL || 4133 cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL || 4134 cinfo->add_intr == NULL || cinfo->rem_intr == NULL || 4135 cinfo->clr_intr == NULL) { 4136 4137 DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n"); 4138 return (EINVAL); 4139 } 4140 4141 mutex_enter(&ldcssp->lock); 4142 4143 /* nexus registration */ 4144 ldcssp->cinfo.dip = cinfo->dip; 4145 ldcssp->cinfo.reg_chan = cinfo->reg_chan; 4146 ldcssp->cinfo.unreg_chan = cinfo->unreg_chan; 4147 ldcssp->cinfo.add_intr = cinfo->add_intr; 4148 ldcssp->cinfo.rem_intr = cinfo->rem_intr; 4149 ldcssp->cinfo.clr_intr = cinfo->clr_intr; 4150 4151 /* register any channels that might have been previously initialized */ 4152 ldcp = ldcssp->chan_list; 4153 while (ldcp) { 4154 if ((ldcp->tstate & TS_QCONF_RDY) && 4155 (ldcp->tstate & TS_CNEX_RDY) == 0) 4156 (void) i_ldc_register_channel(ldcp); 4157 4158 ldcp = ldcp->next; 4159 } 4160 4161 mutex_exit(&ldcssp->lock); 4162 4163 return (0); 4164 } 4165 4166 int 4167 ldc_unregister(ldc_cnex_t *cinfo) 4168 { 4169 if (cinfo == NULL || cinfo->dip == NULL) { 4170 DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n"); 4171 return (EINVAL); 4172 } 4173 4174 mutex_enter(&ldcssp->lock); 4175 4176 if (cinfo->dip != ldcssp->cinfo.dip) { 4177 DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n"); 4178 mutex_exit(&ldcssp->lock); 4179 return (EINVAL); 4180 } 4181 4182 /* nexus unregister */ 4183 ldcssp->cinfo.dip = NULL; 4184 ldcssp->cinfo.reg_chan = NULL; 4185 ldcssp->cinfo.unreg_chan = NULL; 4186 ldcssp->cinfo.add_intr = NULL; 4187 ldcssp->cinfo.rem_intr = NULL; 4188 ldcssp->cinfo.clr_intr = NULL; 4189 4190 mutex_exit(&ldcssp->lock); 4191 4192 return (0); 4193 } 4194 4195 4196 /* ------------------------------------------------------------------------- */ 4197 4198 /* 4199 * Allocate a memory handle for the channel and link it into the list 4200 * Also choose which memory table to use if this is the first handle 4201 * being assigned to this channel 4202 */ 4203 int 4204 ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle) 4205 { 4206 ldc_chan_t *ldcp; 4207 ldc_mhdl_t *mhdl; 4208 4209 if (handle == NULL) { 4210 DWARN(DBG_ALL_LDCS, 4211 "ldc_mem_alloc_handle: invalid channel handle\n"); 4212 return (EINVAL); 4213 } 4214 ldcp = (ldc_chan_t *)handle; 4215 4216 mutex_enter(&ldcp->lock); 4217 4218 /* check to see if channel is initalized */ 4219 if ((ldcp->tstate & ~TS_IN_RESET) < TS_INIT) { 4220 DWARN(ldcp->id, 4221 "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n", 4222 ldcp->id); 4223 mutex_exit(&ldcp->lock); 4224 return (EINVAL); 4225 } 4226 4227 /* allocate handle for channel */ 4228 mhdl = kmem_cache_alloc(ldcssp->memhdl_cache, KM_SLEEP); 4229 4230 /* initialize the lock */ 4231 mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL); 4232 4233 mhdl->myshadow = B_FALSE; 4234 mhdl->memseg = NULL; 4235 mhdl->ldcp = ldcp; 4236 mhdl->status = LDC_UNBOUND; 4237 4238 /* insert memory handle (@ head) into list */ 4239 if (ldcp->mhdl_list == NULL) { 4240 ldcp->mhdl_list = mhdl; 4241 mhdl->next = NULL; 4242 } else { 4243 /* insert @ head */ 4244 mhdl->next = ldcp->mhdl_list; 4245 ldcp->mhdl_list = mhdl; 4246 } 4247 4248 /* return the handle */ 4249 *mhandle = (ldc_mem_handle_t)mhdl; 4250 4251 mutex_exit(&ldcp->lock); 4252 4253 D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n", 4254 ldcp->id, mhdl); 4255 4256 return (0); 4257 } 4258 4259 /* 4260 * Free memory handle for the channel and unlink it from the list 4261 */ 4262 int 4263 ldc_mem_free_handle(ldc_mem_handle_t mhandle) 4264 { 4265 ldc_mhdl_t *mhdl, *phdl; 4266 ldc_chan_t *ldcp; 4267 4268 if (mhandle == NULL) { 4269 DWARN(DBG_ALL_LDCS, 4270 "ldc_mem_free_handle: invalid memory handle\n"); 4271 return (EINVAL); 4272 } 4273 mhdl = (ldc_mhdl_t *)mhandle; 4274 4275 mutex_enter(&mhdl->lock); 4276 4277 ldcp = mhdl->ldcp; 4278 4279 if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) { 4280 DWARN(ldcp->id, 4281 "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n", 4282 mhdl); 4283 mutex_exit(&mhdl->lock); 4284 return (EINVAL); 4285 } 4286 mutex_exit(&mhdl->lock); 4287 4288 mutex_enter(&ldcp->mlist_lock); 4289 4290 phdl = ldcp->mhdl_list; 4291 4292 /* first handle */ 4293 if (phdl == mhdl) { 4294 ldcp->mhdl_list = mhdl->next; 4295 mutex_destroy(&mhdl->lock); 4296 kmem_cache_free(ldcssp->memhdl_cache, mhdl); 4297 4298 D1(ldcp->id, 4299 "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n", 4300 ldcp->id, mhdl); 4301 } else { 4302 /* walk the list - unlink and free */ 4303 while (phdl != NULL) { 4304 if (phdl->next == mhdl) { 4305 phdl->next = mhdl->next; 4306 mutex_destroy(&mhdl->lock); 4307 kmem_cache_free(ldcssp->memhdl_cache, mhdl); 4308 D1(ldcp->id, 4309 "ldc_mem_free_handle: (0x%llx) freed " 4310 "handle 0x%llx\n", ldcp->id, mhdl); 4311 break; 4312 } 4313 phdl = phdl->next; 4314 } 4315 } 4316 4317 if (phdl == NULL) { 4318 DWARN(ldcp->id, 4319 "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl); 4320 mutex_exit(&ldcp->mlist_lock); 4321 return (EINVAL); 4322 } 4323 4324 mutex_exit(&ldcp->mlist_lock); 4325 4326 return (0); 4327 } 4328 4329 /* 4330 * Bind a memory handle to a virtual address. 4331 * The virtual address is converted to the corresponding real addresses. 4332 * Returns pointer to the first ldc_mem_cookie and the total number 4333 * of cookies for this virtual address. Other cookies can be obtained 4334 * using the ldc_mem_nextcookie() call. If the pages are stored in 4335 * consecutive locations in the table, a single cookie corresponding to 4336 * the first location is returned. The cookie size spans all the entries. 4337 * 4338 * If the VA corresponds to a page that is already being exported, reuse 4339 * the page and do not export it again. Bump the page's use count. 4340 */ 4341 int 4342 ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len, 4343 uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount) 4344 { 4345 ldc_mhdl_t *mhdl; 4346 ldc_chan_t *ldcp; 4347 ldc_mtbl_t *mtbl; 4348 ldc_memseg_t *memseg; 4349 ldc_mte_t tmp_mte; 4350 uint64_t index, prev_index = 0; 4351 int64_t cookie_idx; 4352 uintptr_t raddr, ra_aligned; 4353 uint64_t psize, poffset, v_offset; 4354 uint64_t pg_shift, pg_size, pg_size_code, pg_mask; 4355 pgcnt_t npages; 4356 caddr_t v_align, addr; 4357 int i, rv; 4358 4359 if (mhandle == NULL) { 4360 DWARN(DBG_ALL_LDCS, 4361 "ldc_mem_bind_handle: invalid memory handle\n"); 4362 return (EINVAL); 4363 } 4364 mhdl = (ldc_mhdl_t *)mhandle; 4365 ldcp = mhdl->ldcp; 4366 4367 /* clear count */ 4368 *ccount = 0; 4369 4370 mutex_enter(&mhdl->lock); 4371 4372 if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) { 4373 DWARN(ldcp->id, 4374 "ldc_mem_bind_handle: (0x%x) handle already bound\n", 4375 mhandle); 4376 mutex_exit(&mhdl->lock); 4377 return (EINVAL); 4378 } 4379 4380 /* Force address and size to be 8-byte aligned */ 4381 if ((((uintptr_t)vaddr | len) & 0x7) != 0) { 4382 DWARN(ldcp->id, 4383 "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n"); 4384 mutex_exit(&mhdl->lock); 4385 return (EINVAL); 4386 } 4387 4388 /* 4389 * If this channel is binding a memory handle for the 4390 * first time allocate it a memory map table and initialize it 4391 */ 4392 if ((mtbl = ldcp->mtbl) == NULL) { 4393 4394 mutex_enter(&ldcp->lock); 4395 4396 /* Allocate and initialize the map table structure */ 4397 mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP); 4398 mtbl->num_entries = mtbl->num_avail = ldc_maptable_entries; 4399 mtbl->size = ldc_maptable_entries * sizeof (ldc_mte_slot_t); 4400 mtbl->next_entry = NULL; 4401 mtbl->contigmem = B_TRUE; 4402 4403 /* Allocate the table itself */ 4404 mtbl->table = (ldc_mte_slot_t *) 4405 contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE); 4406 if (mtbl->table == NULL) { 4407 4408 /* allocate a page of memory using kmem_alloc */ 4409 mtbl->table = kmem_alloc(MMU_PAGESIZE, KM_SLEEP); 4410 mtbl->size = MMU_PAGESIZE; 4411 mtbl->contigmem = B_FALSE; 4412 mtbl->num_entries = mtbl->num_avail = 4413 mtbl->size / sizeof (ldc_mte_slot_t); 4414 DWARN(ldcp->id, 4415 "ldc_mem_bind_handle: (0x%llx) reduced tbl size " 4416 "to %lx entries\n", ldcp->id, mtbl->num_entries); 4417 } 4418 4419 /* zero out the memory */ 4420 bzero(mtbl->table, mtbl->size); 4421 4422 /* initialize the lock */ 4423 mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL); 4424 4425 /* register table for this channel */ 4426 rv = hv_ldc_set_map_table(ldcp->id, 4427 va_to_pa(mtbl->table), mtbl->num_entries); 4428 if (rv != 0) { 4429 cmn_err(CE_WARN, 4430 "ldc_mem_bind_handle: (0x%lx) err %d mapping tbl", 4431 ldcp->id, rv); 4432 if (mtbl->contigmem) 4433 contig_mem_free(mtbl->table, mtbl->size); 4434 else 4435 kmem_free(mtbl->table, mtbl->size); 4436 mutex_destroy(&mtbl->lock); 4437 kmem_free(mtbl, sizeof (ldc_mtbl_t)); 4438 mutex_exit(&ldcp->lock); 4439 mutex_exit(&mhdl->lock); 4440 return (EIO); 4441 } 4442 4443 ldcp->mtbl = mtbl; 4444 mutex_exit(&ldcp->lock); 4445 4446 D1(ldcp->id, 4447 "ldc_mem_bind_handle: (0x%llx) alloc'd map table 0x%llx\n", 4448 ldcp->id, ldcp->mtbl->table); 4449 } 4450 4451 /* FUTURE: get the page size, pgsz code, and shift */ 4452 pg_size = MMU_PAGESIZE; 4453 pg_size_code = page_szc(pg_size); 4454 pg_shift = page_get_shift(pg_size_code); 4455 pg_mask = ~(pg_size - 1); 4456 4457 D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding " 4458 "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", 4459 ldcp->id, vaddr, pg_size, pg_size_code, pg_shift); 4460 4461 /* aligned VA and its offset */ 4462 v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1)); 4463 v_offset = ((uintptr_t)vaddr) & (pg_size - 1); 4464 4465 npages = (len+v_offset)/pg_size; 4466 npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1; 4467 4468 D1(ldcp->id, "ldc_mem_bind_handle: binding " 4469 "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n", 4470 ldcp->id, vaddr, v_align, v_offset, npages); 4471 4472 /* lock the memory table - exclusive access to channel */ 4473 mutex_enter(&mtbl->lock); 4474 4475 if (npages > mtbl->num_avail) { 4476 D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) no table entries\n", 4477 ldcp->id); 4478 mutex_exit(&mtbl->lock); 4479 mutex_exit(&mhdl->lock); 4480 return (ENOMEM); 4481 } 4482 4483 /* Allocate a memseg structure */ 4484 memseg = mhdl->memseg = 4485 kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP); 4486 4487 /* Allocate memory to store all pages and cookies */ 4488 memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP); 4489 memseg->cookies = 4490 kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP); 4491 4492 D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n", 4493 ldcp->id, npages); 4494 4495 addr = v_align; 4496 4497 /* 4498 * Check if direct shared memory map is enabled, if not change 4499 * the mapping type to include SHADOW_MAP. 4500 */ 4501 if (ldc_shmem_enabled == 0) 4502 mtype = LDC_SHADOW_MAP; 4503 4504 /* 4505 * Table slots are used in a round-robin manner. The algorithm permits 4506 * inserting duplicate entries. Slots allocated earlier will typically 4507 * get freed before we get back to reusing the slot.Inserting duplicate 4508 * entries should be OK as we only lookup entries using the cookie addr 4509 * i.e. tbl index, during export, unexport and copy operation. 4510 * 4511 * One implementation what was tried was to search for a duplicate 4512 * page entry first and reuse it. The search overhead is very high and 4513 * in the vnet case dropped the perf by almost half, 50 to 24 mbps. 4514 * So it does make sense to avoid searching for duplicates. 4515 * 4516 * But during the process of searching for a free slot, if we find a 4517 * duplicate entry we will go ahead and use it, and bump its use count. 4518 */ 4519 4520 /* index to start searching from */ 4521 index = mtbl->next_entry; 4522 cookie_idx = -1; 4523 4524 tmp_mte.ll = 0; /* initialise fields to 0 */ 4525 4526 if (mtype & LDC_DIRECT_MAP) { 4527 tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0; 4528 tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0; 4529 tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0; 4530 } 4531 4532 if (mtype & LDC_SHADOW_MAP) { 4533 tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0; 4534 tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0; 4535 } 4536 4537 if (mtype & LDC_IO_MAP) { 4538 tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0; 4539 tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0; 4540 } 4541 4542 D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll); 4543 4544 tmp_mte.mte_pgszc = pg_size_code; 4545 4546 /* initialize each mem table entry */ 4547 for (i = 0; i < npages; i++) { 4548 4549 /* check if slot is available in the table */ 4550 while (mtbl->table[index].entry.ll != 0) { 4551 4552 index = (index + 1) % mtbl->num_entries; 4553 4554 if (index == mtbl->next_entry) { 4555 /* we have looped around */ 4556 DWARN(DBG_ALL_LDCS, 4557 "ldc_mem_bind_handle: (0x%llx) cannot find " 4558 "entry\n", ldcp->id); 4559 *ccount = 0; 4560 4561 /* NOTE: free memory, remove previous entries */ 4562 /* this shouldnt happen as num_avail was ok */ 4563 4564 mutex_exit(&mtbl->lock); 4565 mutex_exit(&mhdl->lock); 4566 return (ENOMEM); 4567 } 4568 } 4569 4570 /* get the real address */ 4571 raddr = va_to_pa((void *)addr); 4572 ra_aligned = ((uintptr_t)raddr & pg_mask); 4573 4574 /* build the mte */ 4575 tmp_mte.mte_rpfn = ra_aligned >> pg_shift; 4576 4577 D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll); 4578 4579 /* update entry in table */ 4580 mtbl->table[index].entry = tmp_mte; 4581 4582 D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx" 4583 " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index); 4584 4585 /* calculate the size and offset for this export range */ 4586 if (i == 0) { 4587 /* first page */ 4588 psize = min((pg_size - v_offset), len); 4589 poffset = v_offset; 4590 4591 } else if (i == (npages - 1)) { 4592 /* last page */ 4593 psize = (((uintptr_t)(vaddr + len)) & 4594 ((uint64_t)(pg_size-1))); 4595 if (psize == 0) 4596 psize = pg_size; 4597 poffset = 0; 4598 4599 } else { 4600 /* middle pages */ 4601 psize = pg_size; 4602 poffset = 0; 4603 } 4604 4605 /* store entry for this page */ 4606 memseg->pages[i].index = index; 4607 memseg->pages[i].raddr = raddr; 4608 memseg->pages[i].offset = poffset; 4609 memseg->pages[i].size = psize; 4610 memseg->pages[i].mte = &(mtbl->table[index]); 4611 4612 /* create the cookie */ 4613 if (i == 0 || (index != prev_index + 1)) { 4614 cookie_idx++; 4615 memseg->cookies[cookie_idx].addr = 4616 IDX2COOKIE(index, pg_size_code, pg_shift); 4617 memseg->cookies[cookie_idx].addr |= poffset; 4618 memseg->cookies[cookie_idx].size = psize; 4619 4620 } else { 4621 memseg->cookies[cookie_idx].size += psize; 4622 } 4623 4624 D1(ldcp->id, "ldc_mem_bind_handle: bound " 4625 "(0x%llx) va=0x%llx, idx=0x%llx, " 4626 "ra=0x%llx(sz=0x%x,off=0x%x)\n", 4627 ldcp->id, addr, index, raddr, psize, poffset); 4628 4629 /* decrement number of available entries */ 4630 mtbl->num_avail--; 4631 4632 /* increment va by page size */ 4633 addr += pg_size; 4634 4635 /* increment index */ 4636 prev_index = index; 4637 index = (index + 1) % mtbl->num_entries; 4638 4639 /* save the next slot */ 4640 mtbl->next_entry = index; 4641 } 4642 4643 mutex_exit(&mtbl->lock); 4644 4645 /* memory handle = bound */ 4646 mhdl->mtype = mtype; 4647 mhdl->perm = perm; 4648 mhdl->status = LDC_BOUND; 4649 4650 /* update memseg_t */ 4651 memseg->vaddr = vaddr; 4652 memseg->raddr = memseg->pages[0].raddr; 4653 memseg->size = len; 4654 memseg->npages = npages; 4655 memseg->ncookies = cookie_idx + 1; 4656 memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0; 4657 4658 /* return count and first cookie */ 4659 *ccount = memseg->ncookies; 4660 cookie->addr = memseg->cookies[0].addr; 4661 cookie->size = memseg->cookies[0].size; 4662 4663 D1(ldcp->id, 4664 "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, " 4665 "pgs=0x%llx cookies=0x%llx\n", 4666 ldcp->id, mhdl, vaddr, npages, memseg->ncookies); 4667 4668 mutex_exit(&mhdl->lock); 4669 return (0); 4670 } 4671 4672 /* 4673 * Return the next cookie associated with the specified memory handle 4674 */ 4675 int 4676 ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie) 4677 { 4678 ldc_mhdl_t *mhdl; 4679 ldc_chan_t *ldcp; 4680 ldc_memseg_t *memseg; 4681 4682 if (mhandle == NULL) { 4683 DWARN(DBG_ALL_LDCS, 4684 "ldc_mem_nextcookie: invalid memory handle\n"); 4685 return (EINVAL); 4686 } 4687 mhdl = (ldc_mhdl_t *)mhandle; 4688 4689 mutex_enter(&mhdl->lock); 4690 4691 ldcp = mhdl->ldcp; 4692 memseg = mhdl->memseg; 4693 4694 if (cookie == 0) { 4695 DWARN(ldcp->id, 4696 "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n", 4697 ldcp->id); 4698 mutex_exit(&mhdl->lock); 4699 return (EINVAL); 4700 } 4701 4702 if (memseg->next_cookie != 0) { 4703 cookie->addr = memseg->cookies[memseg->next_cookie].addr; 4704 cookie->size = memseg->cookies[memseg->next_cookie].size; 4705 memseg->next_cookie++; 4706 if (memseg->next_cookie == memseg->ncookies) 4707 memseg->next_cookie = 0; 4708 4709 } else { 4710 DWARN(ldcp->id, 4711 "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id); 4712 cookie->addr = 0; 4713 cookie->size = 0; 4714 mutex_exit(&mhdl->lock); 4715 return (EINVAL); 4716 } 4717 4718 D1(ldcp->id, 4719 "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n", 4720 ldcp->id, cookie->addr, cookie->size); 4721 4722 mutex_exit(&mhdl->lock); 4723 return (0); 4724 } 4725 4726 /* 4727 * Unbind the virtual memory region associated with the specified 4728 * memory handle. Allassociated cookies are freed and the corresponding 4729 * RA space is no longer exported. 4730 */ 4731 int 4732 ldc_mem_unbind_handle(ldc_mem_handle_t mhandle) 4733 { 4734 ldc_mhdl_t *mhdl; 4735 ldc_chan_t *ldcp; 4736 ldc_mtbl_t *mtbl; 4737 ldc_memseg_t *memseg; 4738 uint64_t cookie_addr; 4739 uint64_t pg_shift, pg_size_code; 4740 int i, rv; 4741 4742 if (mhandle == NULL) { 4743 DWARN(DBG_ALL_LDCS, 4744 "ldc_mem_unbind_handle: invalid memory handle\n"); 4745 return (EINVAL); 4746 } 4747 mhdl = (ldc_mhdl_t *)mhandle; 4748 4749 mutex_enter(&mhdl->lock); 4750 4751 if (mhdl->status == LDC_UNBOUND) { 4752 DWARN(DBG_ALL_LDCS, 4753 "ldc_mem_unbind_handle: (0x%x) handle is not bound\n", 4754 mhandle); 4755 mutex_exit(&mhdl->lock); 4756 return (EINVAL); 4757 } 4758 4759 ldcp = mhdl->ldcp; 4760 mtbl = ldcp->mtbl; 4761 4762 memseg = mhdl->memseg; 4763 4764 /* lock the memory table - exclusive access to channel */ 4765 mutex_enter(&mtbl->lock); 4766 4767 /* undo the pages exported */ 4768 for (i = 0; i < memseg->npages; i++) { 4769 4770 /* check for mapped pages, revocation cookie != 0 */ 4771 if (memseg->pages[i].mte->cookie) { 4772 4773 pg_size_code = page_szc(memseg->pages[i].size); 4774 pg_shift = page_get_shift(memseg->pages[i].size); 4775 cookie_addr = IDX2COOKIE(memseg->pages[i].index, 4776 pg_size_code, pg_shift); 4777 4778 D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) revoke " 4779 "cookie 0x%llx, rcookie 0x%llx\n", ldcp->id, 4780 cookie_addr, memseg->pages[i].mte->cookie); 4781 rv = hv_ldc_revoke(ldcp->id, cookie_addr, 4782 memseg->pages[i].mte->cookie); 4783 if (rv) { 4784 DWARN(ldcp->id, 4785 "ldc_mem_unbind_handle: (0x%llx) cannot " 4786 "revoke mapping, cookie %llx\n", ldcp->id, 4787 cookie_addr); 4788 } 4789 } 4790 4791 /* clear the entry from the table */ 4792 memseg->pages[i].mte->entry.ll = 0; 4793 mtbl->num_avail++; 4794 } 4795 mutex_exit(&mtbl->lock); 4796 4797 /* free the allocated memseg and page structures */ 4798 kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages)); 4799 kmem_free(memseg->cookies, 4800 (sizeof (ldc_mem_cookie_t) * memseg->npages)); 4801 kmem_cache_free(ldcssp->memseg_cache, memseg); 4802 4803 /* uninitialize the memory handle */ 4804 mhdl->memseg = NULL; 4805 mhdl->status = LDC_UNBOUND; 4806 4807 D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n", 4808 ldcp->id, mhdl); 4809 4810 mutex_exit(&mhdl->lock); 4811 return (0); 4812 } 4813 4814 /* 4815 * Get information about the dring. The base address of the descriptor 4816 * ring along with the type and permission are returned back. 4817 */ 4818 int 4819 ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo) 4820 { 4821 ldc_mhdl_t *mhdl; 4822 4823 if (mhandle == NULL) { 4824 DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n"); 4825 return (EINVAL); 4826 } 4827 mhdl = (ldc_mhdl_t *)mhandle; 4828 4829 if (minfo == NULL) { 4830 DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n"); 4831 return (EINVAL); 4832 } 4833 4834 mutex_enter(&mhdl->lock); 4835 4836 minfo->status = mhdl->status; 4837 if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) { 4838 minfo->vaddr = mhdl->memseg->vaddr; 4839 minfo->raddr = mhdl->memseg->raddr; 4840 minfo->mtype = mhdl->mtype; 4841 minfo->perm = mhdl->perm; 4842 } 4843 mutex_exit(&mhdl->lock); 4844 4845 return (0); 4846 } 4847 4848 /* 4849 * Copy data either from or to the client specified virtual address 4850 * space to or from the exported memory associated with the cookies. 4851 * The direction argument determines whether the data is read from or 4852 * written to exported memory. 4853 */ 4854 int 4855 ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size, 4856 ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction) 4857 { 4858 ldc_chan_t *ldcp; 4859 uint64_t local_voff, local_valign; 4860 uint64_t cookie_addr, cookie_size; 4861 uint64_t pg_shift, pg_size, pg_size_code; 4862 uint64_t export_caddr, export_poff, export_psize, export_size; 4863 uint64_t local_ra, local_poff, local_psize; 4864 uint64_t copy_size, copied_len = 0, total_bal = 0, idx = 0; 4865 pgcnt_t npages; 4866 size_t len = *size; 4867 int i, rv = 0; 4868 4869 uint64_t chid; 4870 4871 if (handle == NULL) { 4872 DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n"); 4873 return (EINVAL); 4874 } 4875 ldcp = (ldc_chan_t *)handle; 4876 chid = ldcp->id; 4877 4878 /* check to see if channel is UP */ 4879 if (ldcp->tstate != TS_UP) { 4880 DWARN(chid, "ldc_mem_copy: (0x%llx) channel is not UP\n", 4881 chid); 4882 return (ECONNRESET); 4883 } 4884 4885 /* Force address and size to be 8-byte aligned */ 4886 if ((((uintptr_t)vaddr | len) & 0x7) != 0) { 4887 DWARN(chid, 4888 "ldc_mem_copy: addr/sz is not 8-byte aligned\n"); 4889 return (EINVAL); 4890 } 4891 4892 /* Find the size of the exported memory */ 4893 export_size = 0; 4894 for (i = 0; i < ccount; i++) 4895 export_size += cookies[i].size; 4896 4897 /* check to see if offset is valid */ 4898 if (off > export_size) { 4899 DWARN(chid, 4900 "ldc_mem_copy: (0x%llx) start offset > export mem size\n", 4901 chid); 4902 return (EINVAL); 4903 } 4904 4905 /* 4906 * Check to see if the export size is smaller than the size we 4907 * are requesting to copy - if so flag an error 4908 */ 4909 if ((export_size - off) < *size) { 4910 DWARN(chid, 4911 "ldc_mem_copy: (0x%llx) copy size > export mem size\n", 4912 chid); 4913 return (EINVAL); 4914 } 4915 4916 total_bal = min(export_size, *size); 4917 4918 /* FUTURE: get the page size, pgsz code, and shift */ 4919 pg_size = MMU_PAGESIZE; 4920 pg_size_code = page_szc(pg_size); 4921 pg_shift = page_get_shift(pg_size_code); 4922 4923 D1(chid, "ldc_mem_copy: copying data " 4924 "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", 4925 chid, vaddr, pg_size, pg_size_code, pg_shift); 4926 4927 /* aligned VA and its offset */ 4928 local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1)); 4929 local_voff = ((uintptr_t)vaddr) & (pg_size - 1); 4930 4931 npages = (len+local_voff)/pg_size; 4932 npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1; 4933 4934 D1(chid, 4935 "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n", 4936 chid, vaddr, local_valign, local_voff, npages); 4937 4938 local_ra = va_to_pa((void *)local_valign); 4939 local_poff = local_voff; 4940 local_psize = min(len, (pg_size - local_voff)); 4941 4942 len -= local_psize; 4943 4944 /* 4945 * find the first cookie in the list of cookies 4946 * if the offset passed in is not zero 4947 */ 4948 for (idx = 0; idx < ccount; idx++) { 4949 cookie_size = cookies[idx].size; 4950 if (off < cookie_size) 4951 break; 4952 off -= cookie_size; 4953 } 4954 4955 cookie_addr = cookies[idx].addr + off; 4956 cookie_size = cookies[idx].size - off; 4957 4958 export_caddr = cookie_addr & ~(pg_size - 1); 4959 export_poff = cookie_addr & (pg_size - 1); 4960 export_psize = min(cookie_size, (pg_size - export_poff)); 4961 4962 for (;;) { 4963 4964 copy_size = min(export_psize, local_psize); 4965 4966 D1(chid, 4967 "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx," 4968 " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx," 4969 " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx," 4970 " total_bal=0x%llx\n", 4971 chid, direction, export_caddr, local_ra, export_poff, 4972 local_poff, export_psize, local_psize, copy_size, 4973 total_bal); 4974 4975 rv = hv_ldc_copy(chid, direction, 4976 (export_caddr + export_poff), (local_ra + local_poff), 4977 copy_size, &copied_len); 4978 4979 if (rv != 0) { 4980 int error = EIO; 4981 uint64_t rx_hd, rx_tl; 4982 4983 DWARN(chid, 4984 "ldc_mem_copy: (0x%llx) err %d during copy\n", 4985 (unsigned long long)chid, rv); 4986 DWARN(chid, 4987 "ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%lx, " 4988 "loc_ra=0x%lx, exp_poff=0x%lx, loc_poff=0x%lx," 4989 " exp_psz=0x%lx, loc_psz=0x%lx, copy_sz=0x%lx," 4990 " copied_len=0x%lx, total_bal=0x%lx\n", 4991 chid, direction, export_caddr, local_ra, 4992 export_poff, local_poff, export_psize, local_psize, 4993 copy_size, copied_len, total_bal); 4994 4995 *size = *size - total_bal; 4996 4997 /* 4998 * check if reason for copy error was due to 4999 * a channel reset. we need to grab the lock 5000 * just in case we have to do a reset. 5001 */ 5002 mutex_enter(&ldcp->lock); 5003 mutex_enter(&ldcp->tx_lock); 5004 5005 rv = hv_ldc_rx_get_state(ldcp->id, 5006 &rx_hd, &rx_tl, &(ldcp->link_state)); 5007 if (ldcp->link_state == LDC_CHANNEL_DOWN || 5008 ldcp->link_state == LDC_CHANNEL_RESET) { 5009 i_ldc_reset(ldcp, B_FALSE); 5010 error = ECONNRESET; 5011 } 5012 5013 mutex_exit(&ldcp->tx_lock); 5014 mutex_exit(&ldcp->lock); 5015 5016 return (error); 5017 } 5018 5019 ASSERT(copied_len <= copy_size); 5020 5021 D2(chid, "ldc_mem_copy: copied=0x%llx\n", copied_len); 5022 export_poff += copied_len; 5023 local_poff += copied_len; 5024 export_psize -= copied_len; 5025 local_psize -= copied_len; 5026 cookie_size -= copied_len; 5027 5028 total_bal -= copied_len; 5029 5030 if (copy_size != copied_len) 5031 continue; 5032 5033 if (export_psize == 0 && total_bal != 0) { 5034 5035 if (cookie_size == 0) { 5036 idx++; 5037 cookie_addr = cookies[idx].addr; 5038 cookie_size = cookies[idx].size; 5039 5040 export_caddr = cookie_addr & ~(pg_size - 1); 5041 export_poff = cookie_addr & (pg_size - 1); 5042 export_psize = 5043 min(cookie_size, (pg_size-export_poff)); 5044 } else { 5045 export_caddr += pg_size; 5046 export_poff = 0; 5047 export_psize = min(cookie_size, pg_size); 5048 } 5049 } 5050 5051 if (local_psize == 0 && total_bal != 0) { 5052 local_valign += pg_size; 5053 local_ra = va_to_pa((void *)local_valign); 5054 local_poff = 0; 5055 local_psize = min(pg_size, len); 5056 len -= local_psize; 5057 } 5058 5059 /* check if we are all done */ 5060 if (total_bal == 0) 5061 break; 5062 } 5063 5064 5065 D1(chid, 5066 "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n", 5067 chid, *size); 5068 5069 return (0); 5070 } 5071 5072 /* 5073 * Copy data either from or to the client specified virtual address 5074 * space to or from HV physical memory. 5075 * 5076 * The direction argument determines whether the data is read from or 5077 * written to HV memory. direction values are LDC_COPY_IN/OUT similar 5078 * to the ldc_mem_copy interface 5079 */ 5080 int 5081 ldc_mem_rdwr_cookie(ldc_handle_t handle, caddr_t vaddr, size_t *size, 5082 caddr_t paddr, uint8_t direction) 5083 { 5084 ldc_chan_t *ldcp; 5085 uint64_t local_voff, local_valign; 5086 uint64_t pg_shift, pg_size, pg_size_code; 5087 uint64_t target_pa, target_poff, target_psize, target_size; 5088 uint64_t local_ra, local_poff, local_psize; 5089 uint64_t copy_size, copied_len = 0; 5090 pgcnt_t npages; 5091 size_t len = *size; 5092 int rv = 0; 5093 5094 if (handle == NULL) { 5095 DWARN(DBG_ALL_LDCS, 5096 "ldc_mem_rdwr_cookie: invalid channel handle\n"); 5097 return (EINVAL); 5098 } 5099 ldcp = (ldc_chan_t *)handle; 5100 5101 mutex_enter(&ldcp->lock); 5102 5103 /* check to see if channel is UP */ 5104 if (ldcp->tstate != TS_UP) { 5105 DWARN(ldcp->id, 5106 "ldc_mem_rdwr_cookie: (0x%llx) channel is not UP\n", 5107 ldcp->id); 5108 mutex_exit(&ldcp->lock); 5109 return (ECONNRESET); 5110 } 5111 5112 /* Force address and size to be 8-byte aligned */ 5113 if ((((uintptr_t)vaddr | len) & 0x7) != 0) { 5114 DWARN(ldcp->id, 5115 "ldc_mem_rdwr_cookie: addr/size is not 8-byte aligned\n"); 5116 mutex_exit(&ldcp->lock); 5117 return (EINVAL); 5118 } 5119 5120 target_size = *size; 5121 5122 /* FUTURE: get the page size, pgsz code, and shift */ 5123 pg_size = MMU_PAGESIZE; 5124 pg_size_code = page_szc(pg_size); 5125 pg_shift = page_get_shift(pg_size_code); 5126 5127 D1(ldcp->id, "ldc_mem_rdwr_cookie: copying data " 5128 "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", 5129 ldcp->id, vaddr, pg_size, pg_size_code, pg_shift); 5130 5131 /* aligned VA and its offset */ 5132 local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1); 5133 local_voff = ((uintptr_t)vaddr) & (pg_size - 1); 5134 5135 npages = (len + local_voff) / pg_size; 5136 npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1; 5137 5138 D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) v=0x%llx, " 5139 "val=0x%llx,off=0x%x,pgs=0x%x\n", 5140 ldcp->id, vaddr, local_valign, local_voff, npages); 5141 5142 local_ra = va_to_pa((void *)local_valign); 5143 local_poff = local_voff; 5144 local_psize = min(len, (pg_size - local_voff)); 5145 5146 len -= local_psize; 5147 5148 target_pa = ((uintptr_t)paddr) & ~(pg_size - 1); 5149 target_poff = ((uintptr_t)paddr) & (pg_size - 1); 5150 target_psize = pg_size - target_poff; 5151 5152 for (;;) { 5153 5154 copy_size = min(target_psize, local_psize); 5155 5156 D1(ldcp->id, 5157 "ldc_mem_rdwr_cookie: (0x%llx) dir=0x%x, tar_pa=0x%llx," 5158 " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx," 5159 " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx," 5160 " total_bal=0x%llx\n", 5161 ldcp->id, direction, target_pa, local_ra, target_poff, 5162 local_poff, target_psize, local_psize, copy_size, 5163 target_size); 5164 5165 rv = hv_ldc_copy(ldcp->id, direction, 5166 (target_pa + target_poff), (local_ra + local_poff), 5167 copy_size, &copied_len); 5168 5169 if (rv != 0) { 5170 DWARN(DBG_ALL_LDCS, 5171 "ldc_mem_rdwr_cookie: (0x%lx) err %d during copy\n", 5172 ldcp->id, rv); 5173 DWARN(DBG_ALL_LDCS, 5174 "ldc_mem_rdwr_cookie: (0x%llx) dir=%lld, " 5175 "tar_pa=0x%llx, loc_ra=0x%llx, tar_poff=0x%llx, " 5176 "loc_poff=0x%llx, tar_psz=0x%llx, loc_psz=0x%llx, " 5177 "copy_sz=0x%llx, total_bal=0x%llx\n", 5178 ldcp->id, direction, target_pa, local_ra, 5179 target_poff, local_poff, target_psize, local_psize, 5180 copy_size, target_size); 5181 5182 *size = *size - target_size; 5183 mutex_exit(&ldcp->lock); 5184 return (i_ldc_h2v_error(rv)); 5185 } 5186 5187 D2(ldcp->id, "ldc_mem_rdwr_cookie: copied=0x%llx\n", 5188 copied_len); 5189 target_poff += copied_len; 5190 local_poff += copied_len; 5191 target_psize -= copied_len; 5192 local_psize -= copied_len; 5193 5194 target_size -= copied_len; 5195 5196 if (copy_size != copied_len) 5197 continue; 5198 5199 if (target_psize == 0 && target_size != 0) { 5200 target_pa += pg_size; 5201 target_poff = 0; 5202 target_psize = min(pg_size, target_size); 5203 } 5204 5205 if (local_psize == 0 && target_size != 0) { 5206 local_valign += pg_size; 5207 local_ra = va_to_pa((void *)local_valign); 5208 local_poff = 0; 5209 local_psize = min(pg_size, len); 5210 len -= local_psize; 5211 } 5212 5213 /* check if we are all done */ 5214 if (target_size == 0) 5215 break; 5216 } 5217 5218 mutex_exit(&ldcp->lock); 5219 5220 D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) done copying sz=0x%llx\n", 5221 ldcp->id, *size); 5222 5223 return (0); 5224 } 5225 5226 /* 5227 * Map an exported memory segment into the local address space. If the 5228 * memory range was exported for direct map access, a HV call is made 5229 * to allocate a RA range. If the map is done via a shadow copy, local 5230 * shadow memory is allocated and the base VA is returned in 'vaddr'. If 5231 * the mapping is a direct map then the RA is returned in 'raddr'. 5232 */ 5233 int 5234 ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount, 5235 uint8_t mtype, uint8_t perm, caddr_t *vaddr, caddr_t *raddr) 5236 { 5237 int i, j, idx, rv, retries; 5238 ldc_chan_t *ldcp; 5239 ldc_mhdl_t *mhdl; 5240 ldc_memseg_t *memseg; 5241 caddr_t tmpaddr; 5242 uint64_t map_perm = perm; 5243 uint64_t pg_size, pg_shift, pg_size_code, pg_mask; 5244 uint64_t exp_size = 0, base_off, map_size, npages; 5245 uint64_t cookie_addr, cookie_off, cookie_size; 5246 tte_t ldc_tte; 5247 5248 if (mhandle == NULL) { 5249 DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n"); 5250 return (EINVAL); 5251 } 5252 mhdl = (ldc_mhdl_t *)mhandle; 5253 5254 mutex_enter(&mhdl->lock); 5255 5256 if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED || 5257 mhdl->memseg != NULL) { 5258 DWARN(DBG_ALL_LDCS, 5259 "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle); 5260 mutex_exit(&mhdl->lock); 5261 return (EINVAL); 5262 } 5263 5264 ldcp = mhdl->ldcp; 5265 5266 mutex_enter(&ldcp->lock); 5267 5268 if (ldcp->tstate != TS_UP) { 5269 DWARN(ldcp->id, 5270 "ldc_mem_dring_map: (0x%llx) channel is not UP\n", 5271 ldcp->id); 5272 mutex_exit(&ldcp->lock); 5273 mutex_exit(&mhdl->lock); 5274 return (ECONNRESET); 5275 } 5276 5277 if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) { 5278 DWARN(ldcp->id, "ldc_mem_map: invalid map type\n"); 5279 mutex_exit(&ldcp->lock); 5280 mutex_exit(&mhdl->lock); 5281 return (EINVAL); 5282 } 5283 5284 D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n", 5285 ldcp->id, cookie->addr, cookie->size); 5286 5287 /* FUTURE: get the page size, pgsz code, and shift */ 5288 pg_size = MMU_PAGESIZE; 5289 pg_size_code = page_szc(pg_size); 5290 pg_shift = page_get_shift(pg_size_code); 5291 pg_mask = ~(pg_size - 1); 5292 5293 /* calculate the number of pages in the exported cookie */ 5294 base_off = cookie[0].addr & (pg_size - 1); 5295 for (idx = 0; idx < ccount; idx++) 5296 exp_size += cookie[idx].size; 5297 map_size = P2ROUNDUP((exp_size + base_off), pg_size); 5298 npages = (map_size >> pg_shift); 5299 5300 /* Allocate memseg structure */ 5301 memseg = mhdl->memseg = 5302 kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP); 5303 5304 /* Allocate memory to store all pages and cookies */ 5305 memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP); 5306 memseg->cookies = 5307 kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP); 5308 5309 D2(ldcp->id, "ldc_mem_map: (0x%llx) exp_size=0x%llx, map_size=0x%llx," 5310 "pages=0x%llx\n", ldcp->id, exp_size, map_size, npages); 5311 5312 /* 5313 * Check if direct map over shared memory is enabled, if not change 5314 * the mapping type to SHADOW_MAP. 5315 */ 5316 if (ldc_shmem_enabled == 0) 5317 mtype = LDC_SHADOW_MAP; 5318 5319 /* 5320 * Check to see if the client is requesting direct or shadow map 5321 * If direct map is requested, try to map remote memory first, 5322 * and if that fails, revert to shadow map 5323 */ 5324 if (mtype == LDC_DIRECT_MAP) { 5325 5326 /* Allocate kernel virtual space for mapping */ 5327 memseg->vaddr = vmem_xalloc(heap_arena, map_size, 5328 pg_size, 0, 0, NULL, NULL, VM_NOSLEEP); 5329 if (memseg->vaddr == NULL) { 5330 cmn_err(CE_WARN, 5331 "ldc_mem_map: (0x%lx) memory map failed\n", 5332 ldcp->id); 5333 kmem_free(memseg->cookies, 5334 (sizeof (ldc_mem_cookie_t) * ccount)); 5335 kmem_free(memseg->pages, 5336 (sizeof (ldc_page_t) * npages)); 5337 kmem_cache_free(ldcssp->memseg_cache, memseg); 5338 5339 mutex_exit(&ldcp->lock); 5340 mutex_exit(&mhdl->lock); 5341 return (ENOMEM); 5342 } 5343 5344 /* Unload previous mapping */ 5345 hat_unload(kas.a_hat, memseg->vaddr, map_size, 5346 HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK); 5347 5348 /* for each cookie passed in - map into address space */ 5349 idx = 0; 5350 cookie_size = 0; 5351 tmpaddr = memseg->vaddr; 5352 5353 for (i = 0; i < npages; i++) { 5354 5355 if (cookie_size == 0) { 5356 ASSERT(idx < ccount); 5357 cookie_addr = cookie[idx].addr & pg_mask; 5358 cookie_off = cookie[idx].addr & (pg_size - 1); 5359 cookie_size = 5360 P2ROUNDUP((cookie_off + cookie[idx].size), 5361 pg_size); 5362 idx++; 5363 } 5364 5365 D1(ldcp->id, "ldc_mem_map: (0x%llx) mapping " 5366 "cookie 0x%llx, bal=0x%llx\n", ldcp->id, 5367 cookie_addr, cookie_size); 5368 5369 /* map the cookie into address space */ 5370 for (retries = 0; retries < ldc_max_retries; 5371 retries++) { 5372 5373 rv = hv_ldc_mapin(ldcp->id, cookie_addr, 5374 &memseg->pages[i].raddr, &map_perm); 5375 if (rv != H_EWOULDBLOCK && rv != H_ETOOMANY) 5376 break; 5377 5378 drv_usecwait(ldc_delay); 5379 } 5380 5381 if (rv || memseg->pages[i].raddr == 0) { 5382 DWARN(ldcp->id, 5383 "ldc_mem_map: (0x%llx) hv mapin err %d\n", 5384 ldcp->id, rv); 5385 5386 /* remove previous mapins */ 5387 hat_unload(kas.a_hat, memseg->vaddr, map_size, 5388 HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK); 5389 for (j = 0; j < i; j++) { 5390 rv = hv_ldc_unmap( 5391 memseg->pages[j].raddr); 5392 if (rv) { 5393 DWARN(ldcp->id, 5394 "ldc_mem_map: (0x%llx) " 5395 "cannot unmap ra=0x%llx\n", 5396 ldcp->id, 5397 memseg->pages[j].raddr); 5398 } 5399 } 5400 5401 /* free kernel virtual space */ 5402 vmem_free(heap_arena, (void *)memseg->vaddr, 5403 map_size); 5404 5405 /* direct map failed - revert to shadow map */ 5406 mtype = LDC_SHADOW_MAP; 5407 break; 5408 5409 } else { 5410 5411 D1(ldcp->id, 5412 "ldc_mem_map: (0x%llx) vtop map 0x%llx -> " 5413 "0x%llx, cookie=0x%llx, perm=0x%llx\n", 5414 ldcp->id, tmpaddr, memseg->pages[i].raddr, 5415 cookie_addr, perm); 5416 5417 /* 5418 * NOTE: Calling hat_devload directly, causes it 5419 * to look for page_t using the pfn. Since this 5420 * addr is greater than the memlist, it treates 5421 * it as non-memory 5422 */ 5423 sfmmu_memtte(&ldc_tte, 5424 (pfn_t)(memseg->pages[i].raddr >> pg_shift), 5425 PROT_READ | PROT_WRITE | HAT_NOSYNC, TTE8K); 5426 5427 D1(ldcp->id, 5428 "ldc_mem_map: (0x%llx) ra 0x%llx -> " 5429 "tte 0x%llx\n", ldcp->id, 5430 memseg->pages[i].raddr, ldc_tte); 5431 5432 sfmmu_tteload(kas.a_hat, &ldc_tte, tmpaddr, 5433 NULL, HAT_LOAD_LOCK); 5434 5435 cookie_size -= pg_size; 5436 cookie_addr += pg_size; 5437 tmpaddr += pg_size; 5438 } 5439 } 5440 } 5441 5442 if (mtype == LDC_SHADOW_MAP) { 5443 if (*vaddr == NULL) { 5444 memseg->vaddr = kmem_zalloc(exp_size, KM_SLEEP); 5445 mhdl->myshadow = B_TRUE; 5446 5447 D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated " 5448 "shadow page va=0x%llx\n", ldcp->id, memseg->vaddr); 5449 } else { 5450 /* 5451 * Use client supplied memory for memseg->vaddr 5452 * WARNING: assuming that client mem is >= exp_size 5453 */ 5454 memseg->vaddr = *vaddr; 5455 } 5456 5457 /* Save all page and cookie information */ 5458 for (i = 0, tmpaddr = memseg->vaddr; i < npages; i++) { 5459 memseg->pages[i].raddr = va_to_pa(tmpaddr); 5460 memseg->pages[i].size = pg_size; 5461 tmpaddr += pg_size; 5462 } 5463 5464 } 5465 5466 /* save all cookies */ 5467 bcopy(cookie, memseg->cookies, ccount * sizeof (ldc_mem_cookie_t)); 5468 5469 /* update memseg_t */ 5470 memseg->raddr = memseg->pages[0].raddr; 5471 memseg->size = (mtype == LDC_SHADOW_MAP) ? exp_size : map_size; 5472 memseg->npages = npages; 5473 memseg->ncookies = ccount; 5474 memseg->next_cookie = 0; 5475 5476 /* memory handle = mapped */ 5477 mhdl->mtype = mtype; 5478 mhdl->perm = perm; 5479 mhdl->status = LDC_MAPPED; 5480 5481 D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, " 5482 "va=0x%llx, pgs=0x%llx cookies=0x%llx\n", 5483 ldcp->id, mhdl, memseg->raddr, memseg->vaddr, 5484 memseg->npages, memseg->ncookies); 5485 5486 if (mtype == LDC_SHADOW_MAP) 5487 base_off = 0; 5488 if (raddr) 5489 *raddr = (caddr_t)(memseg->raddr | base_off); 5490 if (vaddr) 5491 *vaddr = (caddr_t)((uintptr_t)memseg->vaddr | base_off); 5492 5493 mutex_exit(&ldcp->lock); 5494 mutex_exit(&mhdl->lock); 5495 return (0); 5496 } 5497 5498 /* 5499 * Unmap a memory segment. Free shadow memory (if any). 5500 */ 5501 int 5502 ldc_mem_unmap(ldc_mem_handle_t mhandle) 5503 { 5504 int i, rv; 5505 ldc_mhdl_t *mhdl = (ldc_mhdl_t *)mhandle; 5506 ldc_chan_t *ldcp; 5507 ldc_memseg_t *memseg; 5508 5509 if (mhdl == 0 || mhdl->status != LDC_MAPPED) { 5510 DWARN(DBG_ALL_LDCS, 5511 "ldc_mem_unmap: (0x%llx) handle is not mapped\n", 5512 mhandle); 5513 return (EINVAL); 5514 } 5515 5516 mutex_enter(&mhdl->lock); 5517 5518 ldcp = mhdl->ldcp; 5519 memseg = mhdl->memseg; 5520 5521 D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n", 5522 ldcp->id, mhdl); 5523 5524 /* if we allocated shadow memory - free it */ 5525 if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) { 5526 kmem_free(memseg->vaddr, memseg->size); 5527 } else if (mhdl->mtype == LDC_DIRECT_MAP) { 5528 5529 /* unmap in the case of DIRECT_MAP */ 5530 hat_unload(kas.a_hat, memseg->vaddr, memseg->size, 5531 HAT_UNLOAD_UNLOCK); 5532 5533 for (i = 0; i < memseg->npages; i++) { 5534 rv = hv_ldc_unmap(memseg->pages[i].raddr); 5535 if (rv) { 5536 cmn_err(CE_WARN, 5537 "ldc_mem_map: (0x%lx) hv unmap err %d\n", 5538 ldcp->id, rv); 5539 } 5540 } 5541 5542 vmem_free(heap_arena, (void *)memseg->vaddr, memseg->size); 5543 } 5544 5545 /* free the allocated memseg and page structures */ 5546 kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages)); 5547 kmem_free(memseg->cookies, 5548 (sizeof (ldc_mem_cookie_t) * memseg->ncookies)); 5549 kmem_cache_free(ldcssp->memseg_cache, memseg); 5550 5551 /* uninitialize the memory handle */ 5552 mhdl->memseg = NULL; 5553 mhdl->status = LDC_UNBOUND; 5554 5555 D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n", 5556 ldcp->id, mhdl); 5557 5558 mutex_exit(&mhdl->lock); 5559 return (0); 5560 } 5561 5562 /* 5563 * Internal entry point for LDC mapped memory entry consistency 5564 * semantics. Acquire copies the contents of the remote memory 5565 * into the local shadow copy. The release operation copies the local 5566 * contents into the remote memory. The offset and size specify the 5567 * bounds for the memory range being synchronized. 5568 */ 5569 static int 5570 i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction, 5571 uint64_t offset, size_t size) 5572 { 5573 int err; 5574 ldc_mhdl_t *mhdl; 5575 ldc_chan_t *ldcp; 5576 ldc_memseg_t *memseg; 5577 caddr_t local_vaddr; 5578 size_t copy_size; 5579 5580 if (mhandle == NULL) { 5581 DWARN(DBG_ALL_LDCS, 5582 "i_ldc_mem_acquire_release: invalid memory handle\n"); 5583 return (EINVAL); 5584 } 5585 mhdl = (ldc_mhdl_t *)mhandle; 5586 5587 mutex_enter(&mhdl->lock); 5588 5589 if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) { 5590 DWARN(DBG_ALL_LDCS, 5591 "i_ldc_mem_acquire_release: not mapped memory\n"); 5592 mutex_exit(&mhdl->lock); 5593 return (EINVAL); 5594 } 5595 5596 /* do nothing for direct map */ 5597 if (mhdl->mtype == LDC_DIRECT_MAP) { 5598 mutex_exit(&mhdl->lock); 5599 return (0); 5600 } 5601 5602 /* do nothing if COPY_IN+MEM_W and COPY_OUT+MEM_R */ 5603 if ((direction == LDC_COPY_IN && (mhdl->perm & LDC_MEM_R) == 0) || 5604 (direction == LDC_COPY_OUT && (mhdl->perm & LDC_MEM_W) == 0)) { 5605 mutex_exit(&mhdl->lock); 5606 return (0); 5607 } 5608 5609 if (offset >= mhdl->memseg->size || 5610 (offset + size) > mhdl->memseg->size) { 5611 DWARN(DBG_ALL_LDCS, 5612 "i_ldc_mem_acquire_release: memory out of range\n"); 5613 mutex_exit(&mhdl->lock); 5614 return (EINVAL); 5615 } 5616 5617 /* get the channel handle and memory segment */ 5618 ldcp = mhdl->ldcp; 5619 memseg = mhdl->memseg; 5620 5621 if (mhdl->mtype == LDC_SHADOW_MAP) { 5622 5623 local_vaddr = memseg->vaddr + offset; 5624 copy_size = size; 5625 5626 /* copy to/from remote from/to local memory */ 5627 err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset, 5628 ©_size, memseg->cookies, memseg->ncookies, 5629 direction); 5630 if (err || copy_size != size) { 5631 DWARN(ldcp->id, 5632 "i_ldc_mem_acquire_release: copy failed\n"); 5633 mutex_exit(&mhdl->lock); 5634 return (err); 5635 } 5636 } 5637 5638 mutex_exit(&mhdl->lock); 5639 5640 return (0); 5641 } 5642 5643 /* 5644 * Ensure that the contents in the remote memory seg are consistent 5645 * with the contents if of local segment 5646 */ 5647 int 5648 ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size) 5649 { 5650 return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size)); 5651 } 5652 5653 5654 /* 5655 * Ensure that the contents in the local memory seg are consistent 5656 * with the contents if of remote segment 5657 */ 5658 int 5659 ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size) 5660 { 5661 return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size)); 5662 } 5663 5664 /* 5665 * Allocate a descriptor ring. The size of each each descriptor 5666 * must be 8-byte aligned and the entire ring should be a multiple 5667 * of MMU_PAGESIZE. 5668 */ 5669 int 5670 ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle) 5671 { 5672 ldc_dring_t *dringp; 5673 size_t size = (dsize * len); 5674 5675 D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n", 5676 len, dsize); 5677 5678 if (dhandle == NULL) { 5679 DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n"); 5680 return (EINVAL); 5681 } 5682 5683 if (len == 0) { 5684 DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n"); 5685 return (EINVAL); 5686 } 5687 5688 /* descriptor size should be 8-byte aligned */ 5689 if (dsize == 0 || (dsize & 0x7)) { 5690 DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n"); 5691 return (EINVAL); 5692 } 5693 5694 *dhandle = 0; 5695 5696 /* Allocate a desc ring structure */ 5697 dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP); 5698 5699 /* Initialize dring */ 5700 dringp->length = len; 5701 dringp->dsize = dsize; 5702 5703 /* round off to multiple of pagesize */ 5704 dringp->size = (size & MMU_PAGEMASK); 5705 if (size & MMU_PAGEOFFSET) 5706 dringp->size += MMU_PAGESIZE; 5707 5708 dringp->status = LDC_UNBOUND; 5709 5710 /* allocate descriptor ring memory */ 5711 dringp->base = kmem_zalloc(dringp->size, KM_SLEEP); 5712 5713 /* initialize the desc ring lock */ 5714 mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL); 5715 5716 /* Add descriptor ring to the head of global list */ 5717 mutex_enter(&ldcssp->lock); 5718 dringp->next = ldcssp->dring_list; 5719 ldcssp->dring_list = dringp; 5720 mutex_exit(&ldcssp->lock); 5721 5722 *dhandle = (ldc_dring_handle_t)dringp; 5723 5724 D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n"); 5725 5726 return (0); 5727 } 5728 5729 5730 /* 5731 * Destroy a descriptor ring. 5732 */ 5733 int 5734 ldc_mem_dring_destroy(ldc_dring_handle_t dhandle) 5735 { 5736 ldc_dring_t *dringp; 5737 ldc_dring_t *tmp_dringp; 5738 5739 D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n"); 5740 5741 if (dhandle == NULL) { 5742 DWARN(DBG_ALL_LDCS, 5743 "ldc_mem_dring_destroy: invalid desc ring handle\n"); 5744 return (EINVAL); 5745 } 5746 dringp = (ldc_dring_t *)dhandle; 5747 5748 if (dringp->status == LDC_BOUND) { 5749 DWARN(DBG_ALL_LDCS, 5750 "ldc_mem_dring_destroy: desc ring is bound\n"); 5751 return (EACCES); 5752 } 5753 5754 mutex_enter(&dringp->lock); 5755 mutex_enter(&ldcssp->lock); 5756 5757 /* remove from linked list - if not bound */ 5758 tmp_dringp = ldcssp->dring_list; 5759 if (tmp_dringp == dringp) { 5760 ldcssp->dring_list = dringp->next; 5761 dringp->next = NULL; 5762 5763 } else { 5764 while (tmp_dringp != NULL) { 5765 if (tmp_dringp->next == dringp) { 5766 tmp_dringp->next = dringp->next; 5767 dringp->next = NULL; 5768 break; 5769 } 5770 tmp_dringp = tmp_dringp->next; 5771 } 5772 if (tmp_dringp == NULL) { 5773 DWARN(DBG_ALL_LDCS, 5774 "ldc_mem_dring_destroy: invalid descriptor\n"); 5775 mutex_exit(&ldcssp->lock); 5776 mutex_exit(&dringp->lock); 5777 return (EINVAL); 5778 } 5779 } 5780 5781 mutex_exit(&ldcssp->lock); 5782 5783 /* free the descriptor ring */ 5784 kmem_free(dringp->base, dringp->size); 5785 5786 mutex_exit(&dringp->lock); 5787 5788 /* destroy dring lock */ 5789 mutex_destroy(&dringp->lock); 5790 5791 /* free desc ring object */ 5792 kmem_free(dringp, sizeof (ldc_dring_t)); 5793 5794 return (0); 5795 } 5796 5797 /* 5798 * Bind a previously allocated dring to a channel. The channel should 5799 * be OPEN in order to bind the ring to the channel. Returns back a 5800 * descriptor ring cookie. The descriptor ring is exported for remote 5801 * access by the client at the other end of the channel. An entry for 5802 * dring pages is stored in map table (via call to ldc_mem_bind_handle). 5803 */ 5804 int 5805 ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle, 5806 uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount) 5807 { 5808 int err; 5809 ldc_chan_t *ldcp; 5810 ldc_dring_t *dringp; 5811 ldc_mem_handle_t mhandle; 5812 5813 /* check to see if channel is initalized */ 5814 if (handle == NULL) { 5815 DWARN(DBG_ALL_LDCS, 5816 "ldc_mem_dring_bind: invalid channel handle\n"); 5817 return (EINVAL); 5818 } 5819 ldcp = (ldc_chan_t *)handle; 5820 5821 if (dhandle == NULL) { 5822 DWARN(DBG_ALL_LDCS, 5823 "ldc_mem_dring_bind: invalid desc ring handle\n"); 5824 return (EINVAL); 5825 } 5826 dringp = (ldc_dring_t *)dhandle; 5827 5828 if (cookie == NULL) { 5829 DWARN(ldcp->id, 5830 "ldc_mem_dring_bind: invalid cookie arg\n"); 5831 return (EINVAL); 5832 } 5833 5834 mutex_enter(&dringp->lock); 5835 5836 if (dringp->status == LDC_BOUND) { 5837 DWARN(DBG_ALL_LDCS, 5838 "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n", 5839 ldcp->id); 5840 mutex_exit(&dringp->lock); 5841 return (EINVAL); 5842 } 5843 5844 if ((perm & LDC_MEM_RW) == 0) { 5845 DWARN(DBG_ALL_LDCS, 5846 "ldc_mem_dring_bind: invalid permissions\n"); 5847 mutex_exit(&dringp->lock); 5848 return (EINVAL); 5849 } 5850 5851 if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) { 5852 DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n"); 5853 mutex_exit(&dringp->lock); 5854 return (EINVAL); 5855 } 5856 5857 dringp->ldcp = ldcp; 5858 5859 /* create an memory handle */ 5860 err = ldc_mem_alloc_handle(handle, &mhandle); 5861 if (err || mhandle == NULL) { 5862 DWARN(DBG_ALL_LDCS, 5863 "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n", 5864 ldcp->id); 5865 mutex_exit(&dringp->lock); 5866 return (err); 5867 } 5868 dringp->mhdl = mhandle; 5869 5870 /* bind the descriptor ring to channel */ 5871 err = ldc_mem_bind_handle(mhandle, dringp->base, dringp->size, 5872 mtype, perm, cookie, ccount); 5873 if (err) { 5874 DWARN(ldcp->id, 5875 "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n", 5876 ldcp->id); 5877 mutex_exit(&dringp->lock); 5878 return (err); 5879 } 5880 5881 /* 5882 * For now return error if we get more than one cookie 5883 * FUTURE: Return multiple cookies .. 5884 */ 5885 if (*ccount > 1) { 5886 (void) ldc_mem_unbind_handle(mhandle); 5887 (void) ldc_mem_free_handle(mhandle); 5888 5889 dringp->ldcp = NULL; 5890 dringp->mhdl = NULL; 5891 *ccount = 0; 5892 5893 mutex_exit(&dringp->lock); 5894 return (EAGAIN); 5895 } 5896 5897 /* Add descriptor ring to channel's exported dring list */ 5898 mutex_enter(&ldcp->exp_dlist_lock); 5899 dringp->ch_next = ldcp->exp_dring_list; 5900 ldcp->exp_dring_list = dringp; 5901 mutex_exit(&ldcp->exp_dlist_lock); 5902 5903 dringp->status = LDC_BOUND; 5904 5905 mutex_exit(&dringp->lock); 5906 5907 return (0); 5908 } 5909 5910 /* 5911 * Return the next cookie associated with the specified dring handle 5912 */ 5913 int 5914 ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie) 5915 { 5916 int rv = 0; 5917 ldc_dring_t *dringp; 5918 ldc_chan_t *ldcp; 5919 5920 if (dhandle == NULL) { 5921 DWARN(DBG_ALL_LDCS, 5922 "ldc_mem_dring_nextcookie: invalid desc ring handle\n"); 5923 return (EINVAL); 5924 } 5925 dringp = (ldc_dring_t *)dhandle; 5926 mutex_enter(&dringp->lock); 5927 5928 if (dringp->status != LDC_BOUND) { 5929 DWARN(DBG_ALL_LDCS, 5930 "ldc_mem_dring_nextcookie: descriptor ring 0x%llx " 5931 "is not bound\n", dringp); 5932 mutex_exit(&dringp->lock); 5933 return (EINVAL); 5934 } 5935 5936 ldcp = dringp->ldcp; 5937 5938 if (cookie == NULL) { 5939 DWARN(ldcp->id, 5940 "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n", 5941 ldcp->id); 5942 mutex_exit(&dringp->lock); 5943 return (EINVAL); 5944 } 5945 5946 rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie); 5947 mutex_exit(&dringp->lock); 5948 5949 return (rv); 5950 } 5951 /* 5952 * Unbind a previously bound dring from a channel. 5953 */ 5954 int 5955 ldc_mem_dring_unbind(ldc_dring_handle_t dhandle) 5956 { 5957 ldc_dring_t *dringp; 5958 ldc_dring_t *tmp_dringp; 5959 ldc_chan_t *ldcp; 5960 5961 if (dhandle == NULL) { 5962 DWARN(DBG_ALL_LDCS, 5963 "ldc_mem_dring_unbind: invalid desc ring handle\n"); 5964 return (EINVAL); 5965 } 5966 dringp = (ldc_dring_t *)dhandle; 5967 5968 mutex_enter(&dringp->lock); 5969 5970 if (dringp->status == LDC_UNBOUND) { 5971 DWARN(DBG_ALL_LDCS, 5972 "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n", 5973 dringp); 5974 mutex_exit(&dringp->lock); 5975 return (EINVAL); 5976 } 5977 ldcp = dringp->ldcp; 5978 5979 mutex_enter(&ldcp->exp_dlist_lock); 5980 5981 tmp_dringp = ldcp->exp_dring_list; 5982 if (tmp_dringp == dringp) { 5983 ldcp->exp_dring_list = dringp->ch_next; 5984 dringp->ch_next = NULL; 5985 5986 } else { 5987 while (tmp_dringp != NULL) { 5988 if (tmp_dringp->ch_next == dringp) { 5989 tmp_dringp->ch_next = dringp->ch_next; 5990 dringp->ch_next = NULL; 5991 break; 5992 } 5993 tmp_dringp = tmp_dringp->ch_next; 5994 } 5995 if (tmp_dringp == NULL) { 5996 DWARN(DBG_ALL_LDCS, 5997 "ldc_mem_dring_unbind: invalid descriptor\n"); 5998 mutex_exit(&ldcp->exp_dlist_lock); 5999 mutex_exit(&dringp->lock); 6000 return (EINVAL); 6001 } 6002 } 6003 6004 mutex_exit(&ldcp->exp_dlist_lock); 6005 6006 (void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl); 6007 (void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl); 6008 6009 dringp->ldcp = NULL; 6010 dringp->mhdl = NULL; 6011 dringp->status = LDC_UNBOUND; 6012 6013 mutex_exit(&dringp->lock); 6014 6015 return (0); 6016 } 6017 6018 /* 6019 * Get information about the dring. The base address of the descriptor 6020 * ring along with the type and permission are returned back. 6021 */ 6022 int 6023 ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo) 6024 { 6025 ldc_dring_t *dringp; 6026 int rv; 6027 6028 if (dhandle == NULL) { 6029 DWARN(DBG_ALL_LDCS, 6030 "ldc_mem_dring_info: invalid desc ring handle\n"); 6031 return (EINVAL); 6032 } 6033 dringp = (ldc_dring_t *)dhandle; 6034 6035 mutex_enter(&dringp->lock); 6036 6037 if (dringp->mhdl) { 6038 rv = ldc_mem_info(dringp->mhdl, minfo); 6039 if (rv) { 6040 DWARN(DBG_ALL_LDCS, 6041 "ldc_mem_dring_info: error reading mem info\n"); 6042 mutex_exit(&dringp->lock); 6043 return (rv); 6044 } 6045 } else { 6046 minfo->vaddr = dringp->base; 6047 minfo->raddr = NULL; 6048 minfo->status = dringp->status; 6049 } 6050 6051 mutex_exit(&dringp->lock); 6052 6053 return (0); 6054 } 6055 6056 /* 6057 * Map an exported descriptor ring into the local address space. If the 6058 * descriptor ring was exported for direct map access, a HV call is made 6059 * to allocate a RA range. If the map is done via a shadow copy, local 6060 * shadow memory is allocated. 6061 */ 6062 int 6063 ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie, 6064 uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype, 6065 ldc_dring_handle_t *dhandle) 6066 { 6067 int err; 6068 ldc_chan_t *ldcp = (ldc_chan_t *)handle; 6069 ldc_mem_handle_t mhandle; 6070 ldc_dring_t *dringp; 6071 size_t dring_size; 6072 6073 if (dhandle == NULL) { 6074 DWARN(DBG_ALL_LDCS, 6075 "ldc_mem_dring_map: invalid dhandle\n"); 6076 return (EINVAL); 6077 } 6078 6079 /* check to see if channel is initalized */ 6080 if (handle == NULL) { 6081 DWARN(DBG_ALL_LDCS, 6082 "ldc_mem_dring_map: invalid channel handle\n"); 6083 return (EINVAL); 6084 } 6085 ldcp = (ldc_chan_t *)handle; 6086 6087 if (cookie == NULL) { 6088 DWARN(ldcp->id, 6089 "ldc_mem_dring_map: (0x%llx) invalid cookie\n", 6090 ldcp->id); 6091 return (EINVAL); 6092 } 6093 6094 /* FUTURE: For now we support only one cookie per dring */ 6095 ASSERT(ccount == 1); 6096 6097 if (cookie->size < (dsize * len)) { 6098 DWARN(ldcp->id, 6099 "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n", 6100 ldcp->id); 6101 return (EINVAL); 6102 } 6103 6104 *dhandle = 0; 6105 6106 /* Allocate an dring structure */ 6107 dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP); 6108 6109 D1(ldcp->id, 6110 "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n", 6111 mtype, len, dsize, cookie->addr, cookie->size); 6112 6113 /* Initialize dring */ 6114 dringp->length = len; 6115 dringp->dsize = dsize; 6116 6117 /* round of to multiple of page size */ 6118 dring_size = len * dsize; 6119 dringp->size = (dring_size & MMU_PAGEMASK); 6120 if (dring_size & MMU_PAGEOFFSET) 6121 dringp->size += MMU_PAGESIZE; 6122 6123 dringp->ldcp = ldcp; 6124 6125 /* create an memory handle */ 6126 err = ldc_mem_alloc_handle(handle, &mhandle); 6127 if (err || mhandle == NULL) { 6128 DWARN(DBG_ALL_LDCS, 6129 "ldc_mem_dring_map: cannot alloc hdl err=%d\n", 6130 err); 6131 kmem_free(dringp, sizeof (ldc_dring_t)); 6132 return (ENOMEM); 6133 } 6134 6135 dringp->mhdl = mhandle; 6136 dringp->base = NULL; 6137 6138 /* map the dring into local memory */ 6139 err = ldc_mem_map(mhandle, cookie, ccount, mtype, LDC_MEM_RW, 6140 &(dringp->base), NULL); 6141 if (err || dringp->base == NULL) { 6142 cmn_err(CE_WARN, 6143 "ldc_mem_dring_map: cannot map desc ring err=%d\n", err); 6144 (void) ldc_mem_free_handle(mhandle); 6145 kmem_free(dringp, sizeof (ldc_dring_t)); 6146 return (ENOMEM); 6147 } 6148 6149 /* initialize the desc ring lock */ 6150 mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL); 6151 6152 /* Add descriptor ring to channel's imported dring list */ 6153 mutex_enter(&ldcp->imp_dlist_lock); 6154 dringp->ch_next = ldcp->imp_dring_list; 6155 ldcp->imp_dring_list = dringp; 6156 mutex_exit(&ldcp->imp_dlist_lock); 6157 6158 dringp->status = LDC_MAPPED; 6159 6160 *dhandle = (ldc_dring_handle_t)dringp; 6161 6162 return (0); 6163 } 6164 6165 /* 6166 * Unmap a descriptor ring. Free shadow memory (if any). 6167 */ 6168 int 6169 ldc_mem_dring_unmap(ldc_dring_handle_t dhandle) 6170 { 6171 ldc_dring_t *dringp; 6172 ldc_dring_t *tmp_dringp; 6173 ldc_chan_t *ldcp; 6174 6175 if (dhandle == NULL) { 6176 DWARN(DBG_ALL_LDCS, 6177 "ldc_mem_dring_unmap: invalid desc ring handle\n"); 6178 return (EINVAL); 6179 } 6180 dringp = (ldc_dring_t *)dhandle; 6181 6182 if (dringp->status != LDC_MAPPED) { 6183 DWARN(DBG_ALL_LDCS, 6184 "ldc_mem_dring_unmap: not a mapped desc ring\n"); 6185 return (EINVAL); 6186 } 6187 6188 mutex_enter(&dringp->lock); 6189 6190 ldcp = dringp->ldcp; 6191 6192 mutex_enter(&ldcp->imp_dlist_lock); 6193 6194 /* find and unlink the desc ring from channel import list */ 6195 tmp_dringp = ldcp->imp_dring_list; 6196 if (tmp_dringp == dringp) { 6197 ldcp->imp_dring_list = dringp->ch_next; 6198 dringp->ch_next = NULL; 6199 6200 } else { 6201 while (tmp_dringp != NULL) { 6202 if (tmp_dringp->ch_next == dringp) { 6203 tmp_dringp->ch_next = dringp->ch_next; 6204 dringp->ch_next = NULL; 6205 break; 6206 } 6207 tmp_dringp = tmp_dringp->ch_next; 6208 } 6209 if (tmp_dringp == NULL) { 6210 DWARN(DBG_ALL_LDCS, 6211 "ldc_mem_dring_unmap: invalid descriptor\n"); 6212 mutex_exit(&ldcp->imp_dlist_lock); 6213 mutex_exit(&dringp->lock); 6214 return (EINVAL); 6215 } 6216 } 6217 6218 mutex_exit(&ldcp->imp_dlist_lock); 6219 6220 /* do a LDC memory handle unmap and free */ 6221 (void) ldc_mem_unmap(dringp->mhdl); 6222 (void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl); 6223 6224 dringp->status = 0; 6225 dringp->ldcp = NULL; 6226 6227 mutex_exit(&dringp->lock); 6228 6229 /* destroy dring lock */ 6230 mutex_destroy(&dringp->lock); 6231 6232 /* free desc ring object */ 6233 kmem_free(dringp, sizeof (ldc_dring_t)); 6234 6235 return (0); 6236 } 6237 6238 /* 6239 * Internal entry point for descriptor ring access entry consistency 6240 * semantics. Acquire copies the contents of the remote descriptor ring 6241 * into the local shadow copy. The release operation copies the local 6242 * contents into the remote dring. The start and end locations specify 6243 * bounds for the entries being synchronized. 6244 */ 6245 static int 6246 i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle, 6247 uint8_t direction, uint64_t start, uint64_t end) 6248 { 6249 int err; 6250 ldc_dring_t *dringp; 6251 ldc_chan_t *ldcp; 6252 uint64_t soff; 6253 size_t copy_size; 6254 6255 if (dhandle == NULL) { 6256 DWARN(DBG_ALL_LDCS, 6257 "i_ldc_dring_acquire_release: invalid desc ring handle\n"); 6258 return (EINVAL); 6259 } 6260 dringp = (ldc_dring_t *)dhandle; 6261 mutex_enter(&dringp->lock); 6262 6263 if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) { 6264 DWARN(DBG_ALL_LDCS, 6265 "i_ldc_dring_acquire_release: not a mapped desc ring\n"); 6266 mutex_exit(&dringp->lock); 6267 return (EINVAL); 6268 } 6269 6270 if (start >= dringp->length || end >= dringp->length) { 6271 DWARN(DBG_ALL_LDCS, 6272 "i_ldc_dring_acquire_release: index out of range\n"); 6273 mutex_exit(&dringp->lock); 6274 return (EINVAL); 6275 } 6276 6277 /* get the channel handle */ 6278 ldcp = dringp->ldcp; 6279 6280 copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) : 6281 ((dringp->length - start) * dringp->dsize); 6282 6283 /* Calculate the relative offset for the first desc */ 6284 soff = (start * dringp->dsize); 6285 6286 /* copy to/from remote from/to local memory */ 6287 D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n", 6288 soff, copy_size); 6289 err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl, 6290 direction, soff, copy_size); 6291 if (err) { 6292 DWARN(ldcp->id, 6293 "i_ldc_dring_acquire_release: copy failed\n"); 6294 mutex_exit(&dringp->lock); 6295 return (err); 6296 } 6297 6298 /* do the balance */ 6299 if (start > end) { 6300 copy_size = ((end + 1) * dringp->dsize); 6301 soff = 0; 6302 6303 /* copy to/from remote from/to local memory */ 6304 D1(ldcp->id, "i_ldc_dring_acquire_release: c2 " 6305 "off=0x%llx sz=0x%llx\n", soff, copy_size); 6306 err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl, 6307 direction, soff, copy_size); 6308 if (err) { 6309 DWARN(ldcp->id, 6310 "i_ldc_dring_acquire_release: copy failed\n"); 6311 mutex_exit(&dringp->lock); 6312 return (err); 6313 } 6314 } 6315 6316 mutex_exit(&dringp->lock); 6317 6318 return (0); 6319 } 6320 6321 /* 6322 * Ensure that the contents in the local dring are consistent 6323 * with the contents if of remote dring 6324 */ 6325 int 6326 ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end) 6327 { 6328 return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end)); 6329 } 6330 6331 /* 6332 * Ensure that the contents in the remote dring are consistent 6333 * with the contents if of local dring 6334 */ 6335 int 6336 ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end) 6337 { 6338 return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end)); 6339 } 6340 6341 6342 /* ------------------------------------------------------------------------- */ 6343