1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * sun4v LDC Link Layer 31 */ 32 #include <sys/types.h> 33 #include <sys/file.h> 34 #include <sys/errno.h> 35 #include <sys/open.h> 36 #include <sys/cred.h> 37 #include <sys/kmem.h> 38 #include <sys/conf.h> 39 #include <sys/cmn_err.h> 40 #include <sys/ksynch.h> 41 #include <sys/modctl.h> 42 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */ 43 #include <sys/debug.h> 44 #include <sys/types.h> 45 #include <sys/cred.h> 46 #include <sys/promif.h> 47 #include <sys/ddi.h> 48 #include <sys/sunddi.h> 49 #include <sys/cyclic.h> 50 #include <sys/machsystm.h> 51 #include <sys/vm.h> 52 #include <sys/cpu.h> 53 #include <sys/intreg.h> 54 #include <sys/machcpuvar.h> 55 #include <sys/mmu.h> 56 #include <sys/pte.h> 57 #include <vm/hat.h> 58 #include <vm/as.h> 59 #include <vm/hat_sfmmu.h> 60 #include <sys/vm_machparam.h> 61 #include <vm/seg_kmem.h> 62 #include <vm/seg_kpm.h> 63 #include <sys/note.h> 64 #include <sys/ivintr.h> 65 #include <sys/hypervisor_api.h> 66 #include <sys/ldc.h> 67 #include <sys/ldc_impl.h> 68 #include <sys/cnex.h> 69 #include <sys/hsvc.h> 70 #include <sys/sdt.h> 71 72 /* Core internal functions */ 73 static int i_ldc_h2v_error(int h_error); 74 static int i_ldc_txq_reconf(ldc_chan_t *ldcp); 75 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset); 76 static int i_ldc_rxq_drain(ldc_chan_t *ldcp); 77 static void i_ldc_reset_state(ldc_chan_t *ldcp); 78 static void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset); 79 80 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail); 81 static void i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head); 82 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail); 83 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head); 84 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype, 85 uint8_t ctrlmsg); 86 87 static int i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head); 88 static void i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head); 89 static uint64_t i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, 90 uint64_t *tail, uint64_t *link_state); 91 static uint64_t i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, 92 uint64_t *tail, uint64_t *link_state); 93 static int i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head, 94 uint64_t rx_tail); 95 static uint_t i_ldc_chkq(ldc_chan_t *ldcp); 96 97 /* Interrupt handling functions */ 98 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2); 99 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2); 100 static uint_t i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client, 101 uint64_t *notify_event); 102 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype); 103 104 /* Read method functions */ 105 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep); 106 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, 107 size_t *sizep); 108 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, 109 size_t *sizep); 110 111 /* Write method functions */ 112 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp, 113 size_t *sizep); 114 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp, 115 size_t *sizep); 116 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp, 117 size_t *sizep); 118 119 /* Pkt processing internal functions */ 120 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg); 121 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg); 122 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg); 123 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg); 124 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg); 125 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg); 126 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg); 127 128 /* Memory synchronization internal functions */ 129 static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, 130 uint8_t direction, uint64_t offset, size_t size); 131 static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle, 132 uint8_t direction, uint64_t start, uint64_t end); 133 134 /* LDC Version */ 135 static ldc_ver_t ldc_versions[] = { {1, 0} }; 136 137 /* number of supported versions */ 138 #define LDC_NUM_VERS (sizeof (ldc_versions) / sizeof (ldc_versions[0])) 139 140 /* Invalid value for the ldc_chan_t rx_ack_head field */ 141 #define ACKPEEK_HEAD_INVALID ((uint64_t)-1) 142 143 144 /* Module State Pointer */ 145 static ldc_soft_state_t *ldcssp; 146 147 static struct modldrv md = { 148 &mod_miscops, /* This is a misc module */ 149 "sun4v LDC module v%I%", /* Name of the module */ 150 }; 151 152 static struct modlinkage ml = { 153 MODREV_1, 154 &md, 155 NULL 156 }; 157 158 static uint64_t ldc_sup_minor; /* Supported minor number */ 159 static hsvc_info_t ldc_hsvc = { 160 HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 0, "ldc" 161 }; 162 163 /* 164 * LDC framework supports mapping remote domain's memory 165 * either directly or via shadow memory pages. Default 166 * support is currently implemented via shadow copy. 167 * Direct map can be enabled by setting 'ldc_shmem_enabled' 168 */ 169 int ldc_shmem_enabled = 0; 170 171 /* 172 * The no. of MTU size messages that can be stored in 173 * the LDC Tx queue. The number of Tx queue entries is 174 * then computed as (mtu * mtu_msgs)/sizeof(queue_entry) 175 */ 176 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS; 177 178 /* 179 * The minimum queue length. This is the size of the smallest 180 * LDC queue. If the computed value is less than this default, 181 * the queue length is rounded up to 'ldc_queue_entries'. 182 */ 183 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES; 184 185 /* 186 * The length of the reliable-mode data queue in terms of the LDC 187 * receive queue length. i.e., the number of times larger than the 188 * LDC receive queue that the data queue should be. The HV receive 189 * queue is required to be a power of 2 and this implementation 190 * assumes the data queue will also be a power of 2. By making the 191 * multiplier a power of 2, we ensure the data queue will be a 192 * power of 2. We use a multiplier because the receive queue is 193 * sized to be sane relative to the MTU and the same is needed for 194 * the data queue. 195 */ 196 uint64_t ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER; 197 198 /* 199 * Pages exported for remote access over each channel is 200 * maintained in a table registered with the Hypervisor. 201 * The default number of entries in the table is set to 202 * 'ldc_mtbl_entries'. 203 */ 204 uint64_t ldc_maptable_entries = LDC_MTBL_ENTRIES; 205 206 /* 207 * LDC retry count and delay - when the HV returns EWOULDBLOCK 208 * the operation is retried 'ldc_max_retries' times with a 209 * wait of 'ldc_delay' usecs between each retry. 210 */ 211 int ldc_max_retries = LDC_MAX_RETRIES; 212 clock_t ldc_delay = LDC_DELAY; 213 214 /* 215 * delay between each retry of channel unregistration in 216 * ldc_close(), to wait for pending interrupts to complete. 217 */ 218 clock_t ldc_close_delay = LDC_CLOSE_DELAY; 219 220 #ifdef DEBUG 221 222 /* 223 * Print debug messages 224 * 225 * set ldcdbg to 0x7 for enabling all msgs 226 * 0x4 - Warnings 227 * 0x2 - All debug messages 228 * 0x1 - Minimal debug messages 229 * 230 * set ldcdbgchan to the channel number you want to debug 231 * setting it to -1 prints debug messages for all channels 232 * NOTE: ldcdbgchan has no effect on error messages 233 */ 234 235 #define DBG_ALL_LDCS -1 236 237 int ldcdbg = 0x0; 238 int64_t ldcdbgchan = DBG_ALL_LDCS; 239 uint64_t ldc_inject_err_flag = 0; 240 241 static void 242 ldcdebug(int64_t id, const char *fmt, ...) 243 { 244 char buf[512]; 245 va_list ap; 246 247 /* 248 * Do not return if, 249 * caller wants to print it anyway - (id == DBG_ALL_LDCS) 250 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS) 251 * debug channel = caller specified channel 252 */ 253 if ((id != DBG_ALL_LDCS) && 254 (ldcdbgchan != DBG_ALL_LDCS) && 255 (ldcdbgchan != id)) { 256 return; 257 } 258 259 va_start(ap, fmt); 260 (void) vsprintf(buf, fmt, ap); 261 va_end(ap); 262 263 cmn_err(CE_CONT, "?%s", buf); 264 } 265 266 #define LDC_ERR_RESET 0x1 267 #define LDC_ERR_PKTLOSS 0x2 268 #define LDC_ERR_DQFULL 0x4 269 270 static boolean_t 271 ldc_inject_error(ldc_chan_t *ldcp, uint64_t error) 272 { 273 if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id)) 274 return (B_FALSE); 275 276 if ((ldc_inject_err_flag & error) == 0) 277 return (B_FALSE); 278 279 /* clear the injection state */ 280 ldc_inject_err_flag &= ~error; 281 282 return (B_TRUE); 283 } 284 285 #define D1 \ 286 if (ldcdbg & 0x01) \ 287 ldcdebug 288 289 #define D2 \ 290 if (ldcdbg & 0x02) \ 291 ldcdebug 292 293 #define DWARN \ 294 if (ldcdbg & 0x04) \ 295 ldcdebug 296 297 #define DUMP_PAYLOAD(id, addr) \ 298 { \ 299 char buf[65*3]; \ 300 int i; \ 301 uint8_t *src = (uint8_t *)addr; \ 302 for (i = 0; i < 64; i++, src++) \ 303 (void) sprintf(&buf[i * 3], "|%02x", *src); \ 304 (void) sprintf(&buf[i * 3], "|\n"); \ 305 D2((id), "payload: %s", buf); \ 306 } 307 308 #define DUMP_LDC_PKT(c, s, addr) \ 309 { \ 310 ldc_msg_t *msg = (ldc_msg_t *)(addr); \ 311 uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0; \ 312 if (msg->type == LDC_DATA) { \ 313 D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])", \ 314 (s), mid, msg->type, msg->stype, msg->ctrl, \ 315 (msg->env & LDC_FRAG_START) ? 'B' : ' ', \ 316 (msg->env & LDC_FRAG_STOP) ? 'E' : ' ', \ 317 (msg->env & LDC_LEN_MASK)); \ 318 } else { \ 319 D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s), \ 320 mid, msg->type, msg->stype, msg->ctrl, msg->env); \ 321 } \ 322 } 323 324 #define LDC_INJECT_RESET(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_RESET) 325 #define LDC_INJECT_PKTLOSS(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_PKTLOSS) 326 #define LDC_INJECT_DQFULL(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DQFULL) 327 328 #else 329 330 #define DBG_ALL_LDCS -1 331 332 #define D1 333 #define D2 334 #define DWARN 335 336 #define DUMP_PAYLOAD(id, addr) 337 #define DUMP_LDC_PKT(c, s, addr) 338 339 #define LDC_INJECT_RESET(_ldcp) (B_FALSE) 340 #define LDC_INJECT_PKTLOSS(_ldcp) (B_FALSE) 341 #define LDC_INJECT_DQFULL(_ldcp) (B_FALSE) 342 343 #endif 344 345 /* 346 * dtrace SDT probes to ease tracing of the rx data queue and HV queue 347 * lengths. Just pass the head, tail, and entries values so that the 348 * length can be calculated in a dtrace script when the probe is enabled. 349 */ 350 #define TRACE_RXDQ_LENGTH(ldcp) \ 351 DTRACE_PROBE4(rxdq__size, \ 352 uint64_t, ldcp->id, \ 353 uint64_t, ldcp->rx_dq_head, \ 354 uint64_t, ldcp->rx_dq_tail, \ 355 uint64_t, ldcp->rx_dq_entries) 356 357 #define TRACE_RXHVQ_LENGTH(ldcp, head, tail) \ 358 DTRACE_PROBE4(rxhvq__size, \ 359 uint64_t, ldcp->id, \ 360 uint64_t, head, \ 361 uint64_t, tail, \ 362 uint64_t, ldcp->rx_q_entries) 363 364 /* A dtrace SDT probe to ease tracing of data queue copy operations */ 365 #define TRACE_RXDQ_COPY(ldcp, bytes) \ 366 DTRACE_PROBE2(rxdq__copy, uint64_t, ldcp->id, uint64_t, bytes) \ 367 368 /* The amount of contiguous space at the tail of the queue */ 369 #define Q_CONTIG_SPACE(head, tail, size) \ 370 ((head) <= (tail) ? ((size) - (tail)) : \ 371 ((head) - (tail) - LDC_PACKET_SIZE)) 372 373 #define ZERO_PKT(p) \ 374 bzero((p), sizeof (ldc_msg_t)); 375 376 #define IDX2COOKIE(idx, pg_szc, pg_shift) \ 377 (((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift))) 378 379 int 380 _init(void) 381 { 382 int status; 383 384 status = hsvc_register(&ldc_hsvc, &ldc_sup_minor); 385 if (status != 0) { 386 cmn_err(CE_NOTE, "!%s: cannot negotiate hypervisor LDC services" 387 " group: 0x%lx major: %ld minor: %ld errno: %d", 388 ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group, 389 ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status); 390 return (-1); 391 } 392 393 /* allocate soft state structure */ 394 ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP); 395 396 /* Link the module into the system */ 397 status = mod_install(&ml); 398 if (status != 0) { 399 kmem_free(ldcssp, sizeof (ldc_soft_state_t)); 400 return (status); 401 } 402 403 /* Initialize the LDC state structure */ 404 mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL); 405 406 mutex_enter(&ldcssp->lock); 407 408 /* Create a cache for memory handles */ 409 ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache", 410 sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 411 if (ldcssp->memhdl_cache == NULL) { 412 DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n"); 413 mutex_exit(&ldcssp->lock); 414 return (-1); 415 } 416 417 /* Create cache for memory segment structures */ 418 ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache", 419 sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 420 if (ldcssp->memseg_cache == NULL) { 421 DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n"); 422 mutex_exit(&ldcssp->lock); 423 return (-1); 424 } 425 426 427 ldcssp->channel_count = 0; 428 ldcssp->channels_open = 0; 429 ldcssp->chan_list = NULL; 430 ldcssp->dring_list = NULL; 431 432 mutex_exit(&ldcssp->lock); 433 434 return (0); 435 } 436 437 int 438 _info(struct modinfo *modinfop) 439 { 440 /* Report status of the dynamically loadable driver module */ 441 return (mod_info(&ml, modinfop)); 442 } 443 444 int 445 _fini(void) 446 { 447 int rv, status; 448 ldc_chan_t *tmp_ldcp, *ldcp; 449 ldc_dring_t *tmp_dringp, *dringp; 450 ldc_mem_info_t minfo; 451 452 /* Unlink the driver module from the system */ 453 status = mod_remove(&ml); 454 if (status) { 455 DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n"); 456 return (EIO); 457 } 458 459 /* Free descriptor rings */ 460 dringp = ldcssp->dring_list; 461 while (dringp != NULL) { 462 tmp_dringp = dringp->next; 463 464 rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo); 465 if (rv == 0 && minfo.status != LDC_UNBOUND) { 466 if (minfo.status == LDC_BOUND) { 467 (void) ldc_mem_dring_unbind( 468 (ldc_dring_handle_t)dringp); 469 } 470 if (minfo.status == LDC_MAPPED) { 471 (void) ldc_mem_dring_unmap( 472 (ldc_dring_handle_t)dringp); 473 } 474 } 475 476 (void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp); 477 dringp = tmp_dringp; 478 } 479 ldcssp->dring_list = NULL; 480 481 /* close and finalize channels */ 482 ldcp = ldcssp->chan_list; 483 while (ldcp != NULL) { 484 tmp_ldcp = ldcp->next; 485 486 (void) ldc_close((ldc_handle_t)ldcp); 487 (void) ldc_fini((ldc_handle_t)ldcp); 488 489 ldcp = tmp_ldcp; 490 } 491 ldcssp->chan_list = NULL; 492 493 /* Destroy kmem caches */ 494 kmem_cache_destroy(ldcssp->memhdl_cache); 495 kmem_cache_destroy(ldcssp->memseg_cache); 496 497 /* 498 * We have successfully "removed" the driver. 499 * Destroying soft states 500 */ 501 mutex_destroy(&ldcssp->lock); 502 kmem_free(ldcssp, sizeof (ldc_soft_state_t)); 503 504 (void) hsvc_unregister(&ldc_hsvc); 505 506 return (status); 507 } 508 509 /* -------------------------------------------------------------------------- */ 510 511 /* 512 * LDC Link Layer Internal Functions 513 */ 514 515 /* 516 * Translate HV Errors to sun4v error codes 517 */ 518 static int 519 i_ldc_h2v_error(int h_error) 520 { 521 switch (h_error) { 522 523 case H_EOK: 524 return (0); 525 526 case H_ENORADDR: 527 return (EFAULT); 528 529 case H_EBADPGSZ: 530 case H_EINVAL: 531 return (EINVAL); 532 533 case H_EWOULDBLOCK: 534 return (EWOULDBLOCK); 535 536 case H_ENOACCESS: 537 case H_ENOMAP: 538 return (EACCES); 539 540 case H_EIO: 541 case H_ECPUERROR: 542 return (EIO); 543 544 case H_ENOTSUPPORTED: 545 return (ENOTSUP); 546 547 case H_ETOOMANY: 548 return (ENOSPC); 549 550 case H_ECHANNEL: 551 return (ECHRNG); 552 default: 553 break; 554 } 555 556 return (EIO); 557 } 558 559 /* 560 * Reconfigure the transmit queue 561 */ 562 static int 563 i_ldc_txq_reconf(ldc_chan_t *ldcp) 564 { 565 int rv; 566 567 ASSERT(MUTEX_HELD(&ldcp->lock)); 568 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 569 570 rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries); 571 if (rv) { 572 cmn_err(CE_WARN, 573 "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id); 574 return (EIO); 575 } 576 rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head), 577 &(ldcp->tx_tail), &(ldcp->link_state)); 578 if (rv) { 579 cmn_err(CE_WARN, 580 "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id); 581 return (EIO); 582 } 583 D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx," 584 "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail, 585 ldcp->link_state); 586 587 return (0); 588 } 589 590 /* 591 * Reconfigure the receive queue 592 */ 593 static int 594 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset) 595 { 596 int rv; 597 uint64_t rx_head, rx_tail; 598 599 ASSERT(MUTEX_HELD(&ldcp->lock)); 600 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 601 &(ldcp->link_state)); 602 if (rv) { 603 cmn_err(CE_WARN, 604 "i_ldc_rxq_reconf: (0x%lx) cannot get state", 605 ldcp->id); 606 return (EIO); 607 } 608 609 if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) { 610 rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, 611 ldcp->rx_q_entries); 612 if (rv) { 613 cmn_err(CE_WARN, 614 "i_ldc_rxq_reconf: (0x%lx) cannot set qconf", 615 ldcp->id); 616 return (EIO); 617 } 618 D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf", 619 ldcp->id); 620 } 621 622 return (0); 623 } 624 625 626 /* 627 * Drain the contents of the receive queue 628 */ 629 static int 630 i_ldc_rxq_drain(ldc_chan_t *ldcp) 631 { 632 int rv; 633 uint64_t rx_head, rx_tail; 634 635 ASSERT(MUTEX_HELD(&ldcp->lock)); 636 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 637 &(ldcp->link_state)); 638 if (rv) { 639 cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state", 640 ldcp->id); 641 return (EIO); 642 } 643 644 /* flush contents by setting the head = tail */ 645 return (i_ldc_set_rx_head(ldcp, rx_tail)); 646 } 647 648 649 /* 650 * Reset LDC state structure and its contents 651 */ 652 static void 653 i_ldc_reset_state(ldc_chan_t *ldcp) 654 { 655 ASSERT(MUTEX_HELD(&ldcp->lock)); 656 ldcp->last_msg_snt = LDC_INIT_SEQID; 657 ldcp->last_ack_rcd = 0; 658 ldcp->last_msg_rcd = 0; 659 ldcp->tx_ackd_head = ldcp->tx_head; 660 ldcp->stream_remains = 0; 661 ldcp->next_vidx = 0; 662 ldcp->hstate = 0; 663 ldcp->tstate = TS_OPEN; 664 ldcp->status = LDC_OPEN; 665 ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID; 666 ldcp->rx_dq_head = 0; 667 ldcp->rx_dq_tail = 0; 668 669 if (ldcp->link_state == LDC_CHANNEL_UP || 670 ldcp->link_state == LDC_CHANNEL_RESET) { 671 672 if (ldcp->mode == LDC_MODE_RAW) { 673 ldcp->status = LDC_UP; 674 ldcp->tstate = TS_UP; 675 } else { 676 ldcp->status = LDC_READY; 677 ldcp->tstate |= TS_LINK_READY; 678 } 679 } 680 } 681 682 /* 683 * Reset a LDC channel 684 */ 685 static void 686 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset) 687 { 688 DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id); 689 690 ASSERT(MUTEX_HELD(&ldcp->lock)); 691 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 692 693 /* reconfig Tx and Rx queues */ 694 (void) i_ldc_txq_reconf(ldcp); 695 (void) i_ldc_rxq_reconf(ldcp, force_reset); 696 697 /* Clear Tx and Rx interrupts */ 698 (void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR); 699 (void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 700 701 /* Reset channel state */ 702 i_ldc_reset_state(ldcp); 703 704 /* Mark channel in reset */ 705 ldcp->tstate |= TS_IN_RESET; 706 } 707 708 709 /* 710 * Clear pending interrupts 711 */ 712 static void 713 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype) 714 { 715 ldc_cnex_t *cinfo = &ldcssp->cinfo; 716 717 ASSERT(MUTEX_HELD(&ldcp->lock)); 718 ASSERT(cinfo->dip != NULL); 719 720 switch (itype) { 721 case CNEX_TX_INTR: 722 /* check Tx interrupt */ 723 if (ldcp->tx_intr_state) 724 ldcp->tx_intr_state = LDC_INTR_NONE; 725 else 726 return; 727 break; 728 729 case CNEX_RX_INTR: 730 /* check Rx interrupt */ 731 if (ldcp->rx_intr_state) 732 ldcp->rx_intr_state = LDC_INTR_NONE; 733 else 734 return; 735 break; 736 } 737 738 (void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype); 739 D2(ldcp->id, 740 "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n", 741 ldcp->id, itype); 742 } 743 744 /* 745 * Set the receive queue head 746 * Resets connection and returns an error if it fails. 747 */ 748 static int 749 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head) 750 { 751 int rv; 752 int retries; 753 754 ASSERT(MUTEX_HELD(&ldcp->lock)); 755 for (retries = 0; retries < ldc_max_retries; retries++) { 756 757 if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0) 758 return (0); 759 760 if (rv != H_EWOULDBLOCK) 761 break; 762 763 /* wait for ldc_delay usecs */ 764 drv_usecwait(ldc_delay); 765 } 766 767 cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx", 768 ldcp->id, head); 769 mutex_enter(&ldcp->tx_lock); 770 i_ldc_reset(ldcp, B_TRUE); 771 mutex_exit(&ldcp->tx_lock); 772 773 return (ECONNRESET); 774 } 775 776 /* 777 * Returns the tx_head to be used for transfer 778 */ 779 static void 780 i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head) 781 { 782 ldc_msg_t *pkt; 783 784 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 785 786 /* get current Tx head */ 787 *head = ldcp->tx_head; 788 789 /* 790 * Reliable mode will use the ACKd head instead of the regular tx_head. 791 * Also in Reliable mode, advance ackd_head for all non DATA/INFO pkts, 792 * up to the current location of tx_head. This needs to be done 793 * as the peer will only ACK DATA/INFO pkts. 794 */ 795 if (ldcp->mode == LDC_MODE_RELIABLE || ldcp->mode == LDC_MODE_STREAM) { 796 while (ldcp->tx_ackd_head != ldcp->tx_head) { 797 pkt = (ldc_msg_t *)(ldcp->tx_q_va + ldcp->tx_ackd_head); 798 if ((pkt->type & LDC_DATA) && (pkt->stype & LDC_INFO)) { 799 break; 800 } 801 /* advance ACKd head */ 802 ldcp->tx_ackd_head = 803 (ldcp->tx_ackd_head + LDC_PACKET_SIZE) % 804 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 805 } 806 *head = ldcp->tx_ackd_head; 807 } 808 } 809 810 /* 811 * Returns the tx_tail to be used for transfer 812 * Re-reads the TX queue ptrs if and only if the 813 * the cached head and tail are equal (queue is full) 814 */ 815 static int 816 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail) 817 { 818 int rv; 819 uint64_t current_head, new_tail; 820 821 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 822 /* Read the head and tail ptrs from HV */ 823 rv = hv_ldc_tx_get_state(ldcp->id, 824 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 825 if (rv) { 826 cmn_err(CE_WARN, 827 "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n", 828 ldcp->id); 829 return (EIO); 830 } 831 if (ldcp->link_state == LDC_CHANNEL_DOWN) { 832 D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n", 833 ldcp->id); 834 return (ECONNRESET); 835 } 836 837 i_ldc_get_tx_head(ldcp, ¤t_head); 838 839 /* increment the tail */ 840 new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) % 841 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 842 843 if (new_tail == current_head) { 844 DWARN(ldcp->id, 845 "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n", 846 ldcp->id); 847 return (EWOULDBLOCK); 848 } 849 850 D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n", 851 ldcp->id, ldcp->tx_head, ldcp->tx_tail); 852 853 *tail = ldcp->tx_tail; 854 return (0); 855 } 856 857 /* 858 * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off 859 * and retry ldc_max_retries times before returning an error. 860 * Returns 0, EWOULDBLOCK or EIO 861 */ 862 static int 863 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail) 864 { 865 int rv, retval = EWOULDBLOCK; 866 int retries; 867 868 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 869 for (retries = 0; retries < ldc_max_retries; retries++) { 870 871 if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) { 872 retval = 0; 873 break; 874 } 875 if (rv != H_EWOULDBLOCK) { 876 DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set " 877 "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv); 878 retval = EIO; 879 break; 880 } 881 882 /* wait for ldc_delay usecs */ 883 drv_usecwait(ldc_delay); 884 } 885 return (retval); 886 } 887 888 /* 889 * Copy a data packet from the HV receive queue to the data queue. 890 * Caller must ensure that the data queue is not already full. 891 * 892 * The *head argument represents the current head pointer for the HV 893 * receive queue. After copying a packet from the HV receive queue, 894 * the *head pointer will be updated. This allows the caller to update 895 * the head pointer in HV using the returned *head value. 896 */ 897 void 898 i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head) 899 { 900 uint64_t q_size, dq_size; 901 902 ASSERT(MUTEX_HELD(&ldcp->lock)); 903 904 q_size = ldcp->rx_q_entries << LDC_PACKET_SHIFT; 905 dq_size = ldcp->rx_dq_entries << LDC_PACKET_SHIFT; 906 907 ASSERT(Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail, 908 dq_size) >= LDC_PACKET_SIZE); 909 910 bcopy((void *)(ldcp->rx_q_va + *head), 911 (void *)(ldcp->rx_dq_va + ldcp->rx_dq_tail), LDC_PACKET_SIZE); 912 TRACE_RXDQ_COPY(ldcp, LDC_PACKET_SIZE); 913 914 /* Update rx head */ 915 *head = (*head + LDC_PACKET_SIZE) % q_size; 916 917 /* Update dq tail */ 918 ldcp->rx_dq_tail = (ldcp->rx_dq_tail + LDC_PACKET_SIZE) % dq_size; 919 } 920 921 /* 922 * Update the Rx data queue head pointer 923 */ 924 static int 925 i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head) 926 { 927 ldcp->rx_dq_head = head; 928 return (0); 929 } 930 931 /* 932 * Get the Rx data queue head and tail pointers 933 */ 934 static uint64_t 935 i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail, 936 uint64_t *link_state) 937 { 938 _NOTE(ARGUNUSED(link_state)) 939 *head = ldcp->rx_dq_head; 940 *tail = ldcp->rx_dq_tail; 941 return (0); 942 } 943 944 /* 945 * Wrapper for the Rx HV queue set head function. Giving the 946 * data queue and HV queue set head functions the same type. 947 */ 948 static uint64_t 949 i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail, 950 uint64_t *link_state) 951 { 952 return (i_ldc_h2v_error(hv_ldc_rx_get_state(ldcp->id, head, tail, 953 link_state))); 954 } 955 956 /* 957 * LDC receive interrupt handler 958 * triggered for channel with data pending to read 959 * i.e. Rx queue content changes 960 */ 961 static uint_t 962 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2) 963 { 964 _NOTE(ARGUNUSED(arg2)) 965 966 ldc_chan_t *ldcp; 967 boolean_t notify; 968 uint64_t event; 969 int rv; 970 971 /* Get the channel for which interrupt was received */ 972 if (arg1 == NULL) { 973 cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n"); 974 return (DDI_INTR_UNCLAIMED); 975 } 976 977 ldcp = (ldc_chan_t *)arg1; 978 979 D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n", 980 ldcp->id, ldcp); 981 D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n", 982 ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate, 983 ldcp->link_state); 984 985 /* Lock channel */ 986 mutex_enter(&ldcp->lock); 987 988 /* Mark the interrupt as being actively handled */ 989 ldcp->rx_intr_state = LDC_INTR_ACTIVE; 990 991 (void) i_ldc_rx_process_hvq(ldcp, ¬ify, &event); 992 993 if (ldcp->mode != LDC_MODE_STREAM) { 994 /* 995 * If there are no data packets on the queue, clear 996 * the interrupt. Otherwise, the ldc_read will clear 997 * interrupts after draining the queue. To indicate the 998 * interrupt has not yet been cleared, it is marked 999 * as pending. 1000 */ 1001 if ((event & LDC_EVT_READ) == 0) { 1002 i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 1003 } else { 1004 ldcp->rx_intr_state = LDC_INTR_PEND; 1005 } 1006 } 1007 1008 /* if callbacks are disabled, do not notify */ 1009 if (notify && ldcp->cb_enabled) { 1010 ldcp->cb_inprogress = B_TRUE; 1011 mutex_exit(&ldcp->lock); 1012 rv = ldcp->cb(event, ldcp->cb_arg); 1013 if (rv) { 1014 DWARN(ldcp->id, 1015 "i_ldc_rx_hdlr: (0x%llx) callback failure", 1016 ldcp->id); 1017 } 1018 mutex_enter(&ldcp->lock); 1019 ldcp->cb_inprogress = B_FALSE; 1020 } 1021 1022 if (ldcp->mode == LDC_MODE_STREAM) { 1023 /* 1024 * If we are using a secondary data queue, clear the 1025 * interrupt. We should have processed all CTRL packets 1026 * and copied all DATA packets to the secondary queue. 1027 * Even if secondary queue filled up, clear the interrupts, 1028 * this will trigger another interrupt and force the 1029 * handler to copy more data. 1030 */ 1031 i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 1032 } 1033 1034 mutex_exit(&ldcp->lock); 1035 1036 D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id); 1037 1038 return (DDI_INTR_CLAIMED); 1039 } 1040 1041 /* 1042 * Wrapper for the Rx HV queue processing function to be used when 1043 * checking the Rx HV queue for data packets. Unlike the interrupt 1044 * handler code flow, the Rx interrupt is not cleared here and 1045 * callbacks are not made. 1046 */ 1047 static uint_t 1048 i_ldc_chkq(ldc_chan_t *ldcp) 1049 { 1050 boolean_t notify; 1051 uint64_t event; 1052 1053 return (i_ldc_rx_process_hvq(ldcp, ¬ify, &event)); 1054 } 1055 1056 /* 1057 * Send a LDC message 1058 */ 1059 static int 1060 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype, 1061 uint8_t ctrlmsg) 1062 { 1063 int rv; 1064 ldc_msg_t *pkt; 1065 uint64_t tx_tail; 1066 uint32_t curr_seqid; 1067 1068 /* Obtain Tx lock */ 1069 mutex_enter(&ldcp->tx_lock); 1070 1071 curr_seqid = ldcp->last_msg_snt; 1072 1073 /* get the current tail for the message */ 1074 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 1075 if (rv) { 1076 DWARN(ldcp->id, 1077 "i_ldc_send_pkt: (0x%llx) error sending pkt, " 1078 "type=0x%x,subtype=0x%x,ctrl=0x%x\n", 1079 ldcp->id, pkttype, subtype, ctrlmsg); 1080 mutex_exit(&ldcp->tx_lock); 1081 return (rv); 1082 } 1083 1084 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 1085 ZERO_PKT(pkt); 1086 1087 /* Initialize the packet */ 1088 pkt->type = pkttype; 1089 pkt->stype = subtype; 1090 pkt->ctrl = ctrlmsg; 1091 1092 /* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */ 1093 if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) && 1094 ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) { 1095 curr_seqid++; 1096 if (ldcp->mode != LDC_MODE_RAW) { 1097 pkt->seqid = curr_seqid; 1098 pkt->ackid = ldcp->last_msg_rcd; 1099 } 1100 } 1101 DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt); 1102 1103 /* initiate the send by calling into HV and set the new tail */ 1104 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 1105 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1106 1107 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 1108 if (rv) { 1109 DWARN(ldcp->id, 1110 "i_ldc_send_pkt:(0x%llx) error sending pkt, " 1111 "type=0x%x,stype=0x%x,ctrl=0x%x\n", 1112 ldcp->id, pkttype, subtype, ctrlmsg); 1113 mutex_exit(&ldcp->tx_lock); 1114 return (EIO); 1115 } 1116 1117 ldcp->last_msg_snt = curr_seqid; 1118 ldcp->tx_tail = tx_tail; 1119 1120 mutex_exit(&ldcp->tx_lock); 1121 return (0); 1122 } 1123 1124 /* 1125 * Checks if packet was received in right order 1126 * in the case of a reliable link. 1127 * Returns 0 if in order, else EIO 1128 */ 1129 static int 1130 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg) 1131 { 1132 /* No seqid checking for RAW mode */ 1133 if (ldcp->mode == LDC_MODE_RAW) 1134 return (0); 1135 1136 /* No seqid checking for version, RTS, RTR message */ 1137 if (msg->ctrl == LDC_VER || 1138 msg->ctrl == LDC_RTS || 1139 msg->ctrl == LDC_RTR) 1140 return (0); 1141 1142 /* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */ 1143 if (msg->seqid != (ldcp->last_msg_rcd + 1)) { 1144 DWARN(ldcp->id, 1145 "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, " 1146 "expecting 0x%x\n", ldcp->id, msg->seqid, 1147 (ldcp->last_msg_rcd + 1)); 1148 return (EIO); 1149 } 1150 1151 #ifdef DEBUG 1152 if (LDC_INJECT_PKTLOSS(ldcp)) { 1153 DWARN(ldcp->id, 1154 "i_ldc_check_seqid: (0x%llx) inject pkt loss\n", ldcp->id); 1155 return (EIO); 1156 } 1157 #endif 1158 1159 return (0); 1160 } 1161 1162 1163 /* 1164 * Process an incoming version ctrl message 1165 */ 1166 static int 1167 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg) 1168 { 1169 int rv = 0, idx = ldcp->next_vidx; 1170 ldc_msg_t *pkt; 1171 uint64_t tx_tail; 1172 ldc_ver_t *rcvd_ver; 1173 1174 /* get the received version */ 1175 rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF); 1176 1177 D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n", 1178 ldcp->id, rcvd_ver->major, rcvd_ver->minor); 1179 1180 /* Obtain Tx lock */ 1181 mutex_enter(&ldcp->tx_lock); 1182 1183 switch (msg->stype) { 1184 case LDC_INFO: 1185 1186 if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) { 1187 (void) i_ldc_txq_reconf(ldcp); 1188 i_ldc_reset_state(ldcp); 1189 mutex_exit(&ldcp->tx_lock); 1190 return (EAGAIN); 1191 } 1192 1193 /* get the current tail and pkt for the response */ 1194 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 1195 if (rv != 0) { 1196 DWARN(ldcp->id, 1197 "i_ldc_process_VER: (0x%llx) err sending " 1198 "version ACK/NACK\n", ldcp->id); 1199 i_ldc_reset(ldcp, B_TRUE); 1200 mutex_exit(&ldcp->tx_lock); 1201 return (ECONNRESET); 1202 } 1203 1204 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 1205 ZERO_PKT(pkt); 1206 1207 /* initialize the packet */ 1208 pkt->type = LDC_CTRL; 1209 pkt->ctrl = LDC_VER; 1210 1211 for (;;) { 1212 1213 D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n", 1214 rcvd_ver->major, rcvd_ver->minor, 1215 ldc_versions[idx].major, ldc_versions[idx].minor); 1216 1217 if (rcvd_ver->major == ldc_versions[idx].major) { 1218 /* major version match - ACK version */ 1219 pkt->stype = LDC_ACK; 1220 1221 /* 1222 * lower minor version to the one this endpt 1223 * supports, if necessary 1224 */ 1225 if (rcvd_ver->minor > ldc_versions[idx].minor) 1226 rcvd_ver->minor = 1227 ldc_versions[idx].minor; 1228 bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver)); 1229 1230 break; 1231 } 1232 1233 if (rcvd_ver->major > ldc_versions[idx].major) { 1234 1235 D1(ldcp->id, "i_ldc_process_VER: using next" 1236 " lower idx=%d, v%u.%u\n", idx, 1237 ldc_versions[idx].major, 1238 ldc_versions[idx].minor); 1239 1240 /* nack with next lower version */ 1241 pkt->stype = LDC_NACK; 1242 bcopy(&ldc_versions[idx], pkt->udata, 1243 sizeof (ldc_versions[idx])); 1244 ldcp->next_vidx = idx; 1245 break; 1246 } 1247 1248 /* next major version */ 1249 idx++; 1250 1251 D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx); 1252 1253 if (idx == LDC_NUM_VERS) { 1254 /* no version match - send NACK */ 1255 pkt->stype = LDC_NACK; 1256 bzero(pkt->udata, sizeof (ldc_ver_t)); 1257 ldcp->next_vidx = 0; 1258 break; 1259 } 1260 } 1261 1262 /* initiate the send by calling into HV and set the new tail */ 1263 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 1264 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1265 1266 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 1267 if (rv == 0) { 1268 ldcp->tx_tail = tx_tail; 1269 if (pkt->stype == LDC_ACK) { 1270 D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent" 1271 " version ACK\n", ldcp->id); 1272 /* Save the ACK'd version */ 1273 ldcp->version.major = rcvd_ver->major; 1274 ldcp->version.minor = rcvd_ver->minor; 1275 ldcp->hstate |= TS_RCVD_VER; 1276 ldcp->tstate |= TS_VER_DONE; 1277 D1(DBG_ALL_LDCS, 1278 "(0x%llx) Sent ACK, " 1279 "Agreed on version v%u.%u\n", 1280 ldcp->id, rcvd_ver->major, rcvd_ver->minor); 1281 } 1282 } else { 1283 DWARN(ldcp->id, 1284 "i_ldc_process_VER: (0x%llx) error sending " 1285 "ACK/NACK\n", ldcp->id); 1286 i_ldc_reset(ldcp, B_TRUE); 1287 mutex_exit(&ldcp->tx_lock); 1288 return (ECONNRESET); 1289 } 1290 1291 break; 1292 1293 case LDC_ACK: 1294 if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) { 1295 if (ldcp->version.major != rcvd_ver->major || 1296 ldcp->version.minor != rcvd_ver->minor) { 1297 1298 /* mismatched version - reset connection */ 1299 DWARN(ldcp->id, 1300 "i_ldc_process_VER: (0x%llx) recvd" 1301 " ACK ver != sent ACK ver\n", ldcp->id); 1302 i_ldc_reset(ldcp, B_TRUE); 1303 mutex_exit(&ldcp->tx_lock); 1304 return (ECONNRESET); 1305 } 1306 } else { 1307 /* SUCCESS - we have agreed on a version */ 1308 ldcp->version.major = rcvd_ver->major; 1309 ldcp->version.minor = rcvd_ver->minor; 1310 ldcp->tstate |= TS_VER_DONE; 1311 } 1312 1313 D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n", 1314 ldcp->id, rcvd_ver->major, rcvd_ver->minor); 1315 1316 /* initiate RTS-RTR-RDX handshake */ 1317 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 1318 if (rv) { 1319 DWARN(ldcp->id, 1320 "i_ldc_process_VER: (0x%llx) cannot send RTS\n", 1321 ldcp->id); 1322 i_ldc_reset(ldcp, B_TRUE); 1323 mutex_exit(&ldcp->tx_lock); 1324 return (ECONNRESET); 1325 } 1326 1327 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 1328 ZERO_PKT(pkt); 1329 1330 pkt->type = LDC_CTRL; 1331 pkt->stype = LDC_INFO; 1332 pkt->ctrl = LDC_RTS; 1333 pkt->env = ldcp->mode; 1334 if (ldcp->mode != LDC_MODE_RAW) 1335 pkt->seqid = LDC_INIT_SEQID; 1336 1337 ldcp->last_msg_rcd = LDC_INIT_SEQID; 1338 1339 DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt); 1340 1341 /* initiate the send by calling into HV and set the new tail */ 1342 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 1343 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1344 1345 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 1346 if (rv) { 1347 D2(ldcp->id, 1348 "i_ldc_process_VER: (0x%llx) no listener\n", 1349 ldcp->id); 1350 i_ldc_reset(ldcp, B_TRUE); 1351 mutex_exit(&ldcp->tx_lock); 1352 return (ECONNRESET); 1353 } 1354 1355 ldcp->tx_tail = tx_tail; 1356 ldcp->hstate |= TS_SENT_RTS; 1357 1358 break; 1359 1360 case LDC_NACK: 1361 /* check if version in NACK is zero */ 1362 if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) { 1363 /* version handshake failure */ 1364 DWARN(DBG_ALL_LDCS, 1365 "i_ldc_process_VER: (0x%llx) no version match\n", 1366 ldcp->id); 1367 i_ldc_reset(ldcp, B_TRUE); 1368 mutex_exit(&ldcp->tx_lock); 1369 return (ECONNRESET); 1370 } 1371 1372 /* get the current tail and pkt for the response */ 1373 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 1374 if (rv != 0) { 1375 cmn_err(CE_NOTE, 1376 "i_ldc_process_VER: (0x%lx) err sending " 1377 "version ACK/NACK\n", ldcp->id); 1378 i_ldc_reset(ldcp, B_TRUE); 1379 mutex_exit(&ldcp->tx_lock); 1380 return (ECONNRESET); 1381 } 1382 1383 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 1384 ZERO_PKT(pkt); 1385 1386 /* initialize the packet */ 1387 pkt->type = LDC_CTRL; 1388 pkt->ctrl = LDC_VER; 1389 pkt->stype = LDC_INFO; 1390 1391 /* check ver in NACK msg has a match */ 1392 for (;;) { 1393 if (rcvd_ver->major == ldc_versions[idx].major) { 1394 /* 1395 * major version match - resubmit request 1396 * if lower minor version to the one this endpt 1397 * supports, if necessary 1398 */ 1399 if (rcvd_ver->minor > ldc_versions[idx].minor) 1400 rcvd_ver->minor = 1401 ldc_versions[idx].minor; 1402 bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver)); 1403 break; 1404 } 1405 1406 if (rcvd_ver->major > ldc_versions[idx].major) { 1407 1408 D1(ldcp->id, "i_ldc_process_VER: using next" 1409 " lower idx=%d, v%u.%u\n", idx, 1410 ldc_versions[idx].major, 1411 ldc_versions[idx].minor); 1412 1413 /* send next lower version */ 1414 bcopy(&ldc_versions[idx], pkt->udata, 1415 sizeof (ldc_versions[idx])); 1416 ldcp->next_vidx = idx; 1417 break; 1418 } 1419 1420 /* next version */ 1421 idx++; 1422 1423 D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx); 1424 1425 if (idx == LDC_NUM_VERS) { 1426 /* no version match - terminate */ 1427 ldcp->next_vidx = 0; 1428 mutex_exit(&ldcp->tx_lock); 1429 return (ECONNRESET); 1430 } 1431 } 1432 1433 /* initiate the send by calling into HV and set the new tail */ 1434 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 1435 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1436 1437 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 1438 if (rv == 0) { 1439 D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version" 1440 "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major, 1441 ldc_versions[idx].minor); 1442 ldcp->tx_tail = tx_tail; 1443 } else { 1444 cmn_err(CE_NOTE, 1445 "i_ldc_process_VER: (0x%lx) error sending version" 1446 "INFO\n", ldcp->id); 1447 i_ldc_reset(ldcp, B_TRUE); 1448 mutex_exit(&ldcp->tx_lock); 1449 return (ECONNRESET); 1450 } 1451 1452 break; 1453 } 1454 1455 mutex_exit(&ldcp->tx_lock); 1456 return (rv); 1457 } 1458 1459 1460 /* 1461 * Process an incoming RTS ctrl message 1462 */ 1463 static int 1464 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg) 1465 { 1466 int rv = 0; 1467 ldc_msg_t *pkt; 1468 uint64_t tx_tail; 1469 boolean_t sent_NACK = B_FALSE; 1470 1471 D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id); 1472 1473 switch (msg->stype) { 1474 case LDC_NACK: 1475 DWARN(ldcp->id, 1476 "i_ldc_process_RTS: (0x%llx) RTS NACK received\n", 1477 ldcp->id); 1478 1479 /* Reset the channel -- as we cannot continue */ 1480 mutex_enter(&ldcp->tx_lock); 1481 i_ldc_reset(ldcp, B_TRUE); 1482 mutex_exit(&ldcp->tx_lock); 1483 rv = ECONNRESET; 1484 break; 1485 1486 case LDC_INFO: 1487 1488 /* check mode */ 1489 if (ldcp->mode != (ldc_mode_t)msg->env) { 1490 cmn_err(CE_NOTE, 1491 "i_ldc_process_RTS: (0x%lx) mode mismatch\n", 1492 ldcp->id); 1493 /* 1494 * send NACK in response to MODE message 1495 * get the current tail for the response 1496 */ 1497 rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS); 1498 if (rv) { 1499 /* if cannot send NACK - reset channel */ 1500 mutex_enter(&ldcp->tx_lock); 1501 i_ldc_reset(ldcp, B_TRUE); 1502 mutex_exit(&ldcp->tx_lock); 1503 rv = ECONNRESET; 1504 break; 1505 } 1506 sent_NACK = B_TRUE; 1507 } 1508 break; 1509 default: 1510 DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n", 1511 ldcp->id); 1512 mutex_enter(&ldcp->tx_lock); 1513 i_ldc_reset(ldcp, B_TRUE); 1514 mutex_exit(&ldcp->tx_lock); 1515 rv = ECONNRESET; 1516 break; 1517 } 1518 1519 /* 1520 * If either the connection was reset (when rv != 0) or 1521 * a NACK was sent, we return. In the case of a NACK 1522 * we dont want to consume the packet that came in but 1523 * not record that we received the RTS 1524 */ 1525 if (rv || sent_NACK) 1526 return (rv); 1527 1528 /* record RTS received */ 1529 ldcp->hstate |= TS_RCVD_RTS; 1530 1531 /* store initial SEQID info */ 1532 ldcp->last_msg_snt = msg->seqid; 1533 1534 /* Obtain Tx lock */ 1535 mutex_enter(&ldcp->tx_lock); 1536 1537 /* get the current tail for the response */ 1538 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 1539 if (rv != 0) { 1540 cmn_err(CE_NOTE, 1541 "i_ldc_process_RTS: (0x%lx) err sending RTR\n", 1542 ldcp->id); 1543 i_ldc_reset(ldcp, B_TRUE); 1544 mutex_exit(&ldcp->tx_lock); 1545 return (ECONNRESET); 1546 } 1547 1548 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 1549 ZERO_PKT(pkt); 1550 1551 /* initialize the packet */ 1552 pkt->type = LDC_CTRL; 1553 pkt->stype = LDC_INFO; 1554 pkt->ctrl = LDC_RTR; 1555 pkt->env = ldcp->mode; 1556 if (ldcp->mode != LDC_MODE_RAW) 1557 pkt->seqid = LDC_INIT_SEQID; 1558 1559 ldcp->last_msg_rcd = msg->seqid; 1560 1561 /* initiate the send by calling into HV and set the new tail */ 1562 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 1563 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1564 1565 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 1566 if (rv == 0) { 1567 D2(ldcp->id, 1568 "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id); 1569 DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt); 1570 1571 ldcp->tx_tail = tx_tail; 1572 ldcp->hstate |= TS_SENT_RTR; 1573 1574 } else { 1575 cmn_err(CE_NOTE, 1576 "i_ldc_process_RTS: (0x%lx) error sending RTR\n", 1577 ldcp->id); 1578 i_ldc_reset(ldcp, B_TRUE); 1579 mutex_exit(&ldcp->tx_lock); 1580 return (ECONNRESET); 1581 } 1582 1583 mutex_exit(&ldcp->tx_lock); 1584 return (0); 1585 } 1586 1587 /* 1588 * Process an incoming RTR ctrl message 1589 */ 1590 static int 1591 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg) 1592 { 1593 int rv = 0; 1594 boolean_t sent_NACK = B_FALSE; 1595 1596 D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id); 1597 1598 switch (msg->stype) { 1599 case LDC_NACK: 1600 /* RTR NACK received */ 1601 DWARN(ldcp->id, 1602 "i_ldc_process_RTR: (0x%llx) RTR NACK received\n", 1603 ldcp->id); 1604 1605 /* Reset the channel -- as we cannot continue */ 1606 mutex_enter(&ldcp->tx_lock); 1607 i_ldc_reset(ldcp, B_TRUE); 1608 mutex_exit(&ldcp->tx_lock); 1609 rv = ECONNRESET; 1610 1611 break; 1612 1613 case LDC_INFO: 1614 1615 /* check mode */ 1616 if (ldcp->mode != (ldc_mode_t)msg->env) { 1617 DWARN(ldcp->id, 1618 "i_ldc_process_RTR: (0x%llx) mode mismatch, " 1619 "expecting 0x%x, got 0x%x\n", 1620 ldcp->id, ldcp->mode, (ldc_mode_t)msg->env); 1621 /* 1622 * send NACK in response to MODE message 1623 * get the current tail for the response 1624 */ 1625 rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR); 1626 if (rv) { 1627 /* if cannot send NACK - reset channel */ 1628 mutex_enter(&ldcp->tx_lock); 1629 i_ldc_reset(ldcp, B_TRUE); 1630 mutex_exit(&ldcp->tx_lock); 1631 rv = ECONNRESET; 1632 break; 1633 } 1634 sent_NACK = B_TRUE; 1635 } 1636 break; 1637 1638 default: 1639 DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n", 1640 ldcp->id); 1641 1642 /* Reset the channel -- as we cannot continue */ 1643 mutex_enter(&ldcp->tx_lock); 1644 i_ldc_reset(ldcp, B_TRUE); 1645 mutex_exit(&ldcp->tx_lock); 1646 rv = ECONNRESET; 1647 break; 1648 } 1649 1650 /* 1651 * If either the connection was reset (when rv != 0) or 1652 * a NACK was sent, we return. In the case of a NACK 1653 * we dont want to consume the packet that came in but 1654 * not record that we received the RTR 1655 */ 1656 if (rv || sent_NACK) 1657 return (rv); 1658 1659 ldcp->last_msg_snt = msg->seqid; 1660 ldcp->hstate |= TS_RCVD_RTR; 1661 1662 rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX); 1663 if (rv) { 1664 cmn_err(CE_NOTE, 1665 "i_ldc_process_RTR: (0x%lx) cannot send RDX\n", 1666 ldcp->id); 1667 mutex_enter(&ldcp->tx_lock); 1668 i_ldc_reset(ldcp, B_TRUE); 1669 mutex_exit(&ldcp->tx_lock); 1670 return (ECONNRESET); 1671 } 1672 D2(ldcp->id, 1673 "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id); 1674 1675 ldcp->hstate |= TS_SENT_RDX; 1676 ldcp->tstate |= TS_HSHAKE_DONE; 1677 if ((ldcp->tstate & TS_IN_RESET) == 0) 1678 ldcp->status = LDC_UP; 1679 1680 D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id); 1681 1682 return (0); 1683 } 1684 1685 1686 /* 1687 * Process an incoming RDX ctrl message 1688 */ 1689 static int 1690 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg) 1691 { 1692 int rv = 0; 1693 1694 D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id); 1695 1696 switch (msg->stype) { 1697 case LDC_NACK: 1698 /* RDX NACK received */ 1699 DWARN(ldcp->id, 1700 "i_ldc_process_RDX: (0x%llx) RDX NACK received\n", 1701 ldcp->id); 1702 1703 /* Reset the channel -- as we cannot continue */ 1704 mutex_enter(&ldcp->tx_lock); 1705 i_ldc_reset(ldcp, B_TRUE); 1706 mutex_exit(&ldcp->tx_lock); 1707 rv = ECONNRESET; 1708 1709 break; 1710 1711 case LDC_INFO: 1712 1713 /* 1714 * if channel is UP and a RDX received after data transmission 1715 * has commenced it is an error 1716 */ 1717 if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) { 1718 DWARN(DBG_ALL_LDCS, 1719 "i_ldc_process_RDX: (0x%llx) unexpected RDX" 1720 " - LDC reset\n", ldcp->id); 1721 mutex_enter(&ldcp->tx_lock); 1722 i_ldc_reset(ldcp, B_TRUE); 1723 mutex_exit(&ldcp->tx_lock); 1724 return (ECONNRESET); 1725 } 1726 1727 ldcp->hstate |= TS_RCVD_RDX; 1728 ldcp->tstate |= TS_HSHAKE_DONE; 1729 if ((ldcp->tstate & TS_IN_RESET) == 0) 1730 ldcp->status = LDC_UP; 1731 1732 D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id); 1733 break; 1734 1735 default: 1736 DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n", 1737 ldcp->id); 1738 1739 /* Reset the channel -- as we cannot continue */ 1740 mutex_enter(&ldcp->tx_lock); 1741 i_ldc_reset(ldcp, B_TRUE); 1742 mutex_exit(&ldcp->tx_lock); 1743 rv = ECONNRESET; 1744 break; 1745 } 1746 1747 return (rv); 1748 } 1749 1750 /* 1751 * Process an incoming ACK for a data packet 1752 */ 1753 static int 1754 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg) 1755 { 1756 int rv; 1757 uint64_t tx_head; 1758 ldc_msg_t *pkt; 1759 1760 /* Obtain Tx lock */ 1761 mutex_enter(&ldcp->tx_lock); 1762 1763 /* 1764 * Read the current Tx head and tail 1765 */ 1766 rv = hv_ldc_tx_get_state(ldcp->id, 1767 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 1768 if (rv != 0) { 1769 cmn_err(CE_WARN, 1770 "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n", 1771 ldcp->id); 1772 1773 /* Reset the channel -- as we cannot continue */ 1774 i_ldc_reset(ldcp, B_TRUE); 1775 mutex_exit(&ldcp->tx_lock); 1776 return (ECONNRESET); 1777 } 1778 1779 /* 1780 * loop from where the previous ACK location was to the 1781 * current head location. This is how far the HV has 1782 * actually send pkts. Pkts between head and tail are 1783 * yet to be sent by HV. 1784 */ 1785 tx_head = ldcp->tx_ackd_head; 1786 for (;;) { 1787 pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head); 1788 tx_head = (tx_head + LDC_PACKET_SIZE) % 1789 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 1790 1791 if (pkt->seqid == msg->ackid) { 1792 D2(ldcp->id, 1793 "i_ldc_process_data_ACK: (0x%llx) found packet\n", 1794 ldcp->id); 1795 ldcp->last_ack_rcd = msg->ackid; 1796 ldcp->tx_ackd_head = tx_head; 1797 break; 1798 } 1799 if (tx_head == ldcp->tx_head) { 1800 /* could not find packet */ 1801 DWARN(ldcp->id, 1802 "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n", 1803 ldcp->id); 1804 1805 /* Reset the channel -- as we cannot continue */ 1806 i_ldc_reset(ldcp, B_TRUE); 1807 mutex_exit(&ldcp->tx_lock); 1808 return (ECONNRESET); 1809 } 1810 } 1811 1812 mutex_exit(&ldcp->tx_lock); 1813 return (0); 1814 } 1815 1816 /* 1817 * Process incoming control message 1818 * Return 0 - session can continue 1819 * EAGAIN - reprocess packet - state was changed 1820 * ECONNRESET - channel was reset 1821 */ 1822 static int 1823 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg) 1824 { 1825 int rv = 0; 1826 1827 D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n", 1828 ldcp->id, ldcp->tstate, ldcp->hstate); 1829 1830 switch (ldcp->tstate & ~TS_IN_RESET) { 1831 1832 case TS_OPEN: 1833 case TS_READY: 1834 1835 switch (msg->ctrl & LDC_CTRL_MASK) { 1836 case LDC_VER: 1837 /* process version message */ 1838 rv = i_ldc_process_VER(ldcp, msg); 1839 break; 1840 default: 1841 DWARN(ldcp->id, 1842 "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x " 1843 "tstate=0x%x\n", ldcp->id, 1844 (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate); 1845 break; 1846 } 1847 1848 break; 1849 1850 case TS_VREADY: 1851 1852 switch (msg->ctrl & LDC_CTRL_MASK) { 1853 case LDC_VER: 1854 /* process version message */ 1855 rv = i_ldc_process_VER(ldcp, msg); 1856 break; 1857 case LDC_RTS: 1858 /* process RTS message */ 1859 rv = i_ldc_process_RTS(ldcp, msg); 1860 break; 1861 case LDC_RTR: 1862 /* process RTR message */ 1863 rv = i_ldc_process_RTR(ldcp, msg); 1864 break; 1865 case LDC_RDX: 1866 /* process RDX message */ 1867 rv = i_ldc_process_RDX(ldcp, msg); 1868 break; 1869 default: 1870 DWARN(ldcp->id, 1871 "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x " 1872 "tstate=0x%x\n", ldcp->id, 1873 (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate); 1874 break; 1875 } 1876 1877 break; 1878 1879 case TS_UP: 1880 1881 switch (msg->ctrl & LDC_CTRL_MASK) { 1882 case LDC_VER: 1883 DWARN(ldcp->id, 1884 "i_ldc_ctrlmsg: (0x%llx) unexpected VER " 1885 "- LDC reset\n", ldcp->id); 1886 /* peer is redoing version negotiation */ 1887 mutex_enter(&ldcp->tx_lock); 1888 (void) i_ldc_txq_reconf(ldcp); 1889 i_ldc_reset_state(ldcp); 1890 mutex_exit(&ldcp->tx_lock); 1891 rv = EAGAIN; 1892 break; 1893 1894 case LDC_RDX: 1895 /* process RDX message */ 1896 rv = i_ldc_process_RDX(ldcp, msg); 1897 break; 1898 1899 default: 1900 DWARN(ldcp->id, 1901 "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x " 1902 "tstate=0x%x\n", ldcp->id, 1903 (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate); 1904 break; 1905 } 1906 } 1907 1908 return (rv); 1909 } 1910 1911 /* 1912 * Register channel with the channel nexus 1913 */ 1914 static int 1915 i_ldc_register_channel(ldc_chan_t *ldcp) 1916 { 1917 int rv = 0; 1918 ldc_cnex_t *cinfo = &ldcssp->cinfo; 1919 1920 if (cinfo->dip == NULL) { 1921 DWARN(ldcp->id, 1922 "i_ldc_register_channel: cnex has not registered\n"); 1923 return (EAGAIN); 1924 } 1925 1926 rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass); 1927 if (rv) { 1928 DWARN(ldcp->id, 1929 "i_ldc_register_channel: cannot register channel\n"); 1930 return (rv); 1931 } 1932 1933 rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR, 1934 i_ldc_tx_hdlr, ldcp, NULL); 1935 if (rv) { 1936 DWARN(ldcp->id, 1937 "i_ldc_register_channel: cannot add Tx interrupt\n"); 1938 (void) cinfo->unreg_chan(cinfo->dip, ldcp->id); 1939 return (rv); 1940 } 1941 1942 rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR, 1943 i_ldc_rx_hdlr, ldcp, NULL); 1944 if (rv) { 1945 DWARN(ldcp->id, 1946 "i_ldc_register_channel: cannot add Rx interrupt\n"); 1947 (void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR); 1948 (void) cinfo->unreg_chan(cinfo->dip, ldcp->id); 1949 return (rv); 1950 } 1951 1952 ldcp->tstate |= TS_CNEX_RDY; 1953 1954 return (0); 1955 } 1956 1957 /* 1958 * Unregister a channel with the channel nexus 1959 */ 1960 static int 1961 i_ldc_unregister_channel(ldc_chan_t *ldcp) 1962 { 1963 int rv = 0; 1964 ldc_cnex_t *cinfo = &ldcssp->cinfo; 1965 1966 if (cinfo->dip == NULL) { 1967 DWARN(ldcp->id, 1968 "i_ldc_unregister_channel: cnex has not registered\n"); 1969 return (EAGAIN); 1970 } 1971 1972 if (ldcp->tstate & TS_CNEX_RDY) { 1973 1974 /* Remove the Rx interrupt */ 1975 rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR); 1976 if (rv) { 1977 if (rv != EAGAIN) { 1978 DWARN(ldcp->id, 1979 "i_ldc_unregister_channel: err removing " 1980 "Rx intr\n"); 1981 return (rv); 1982 } 1983 1984 /* 1985 * If interrupts are pending and handler has 1986 * finished running, clear interrupt and try 1987 * again 1988 */ 1989 if (ldcp->rx_intr_state != LDC_INTR_PEND) 1990 return (rv); 1991 1992 (void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 1993 rv = cinfo->rem_intr(cinfo->dip, ldcp->id, 1994 CNEX_RX_INTR); 1995 if (rv) { 1996 DWARN(ldcp->id, "i_ldc_unregister_channel: " 1997 "err removing Rx interrupt\n"); 1998 return (rv); 1999 } 2000 } 2001 2002 /* Remove the Tx interrupt */ 2003 rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR); 2004 if (rv) { 2005 DWARN(ldcp->id, 2006 "i_ldc_unregister_channel: err removing Tx intr\n"); 2007 return (rv); 2008 } 2009 2010 /* Unregister the channel */ 2011 rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id); 2012 if (rv) { 2013 DWARN(ldcp->id, 2014 "i_ldc_unregister_channel: cannot unreg channel\n"); 2015 return (rv); 2016 } 2017 2018 ldcp->tstate &= ~TS_CNEX_RDY; 2019 } 2020 2021 return (0); 2022 } 2023 2024 2025 /* 2026 * LDC transmit interrupt handler 2027 * triggered for chanel up/down/reset events 2028 * and Tx queue content changes 2029 */ 2030 static uint_t 2031 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2) 2032 { 2033 _NOTE(ARGUNUSED(arg2)) 2034 2035 int rv; 2036 ldc_chan_t *ldcp; 2037 boolean_t notify_client = B_FALSE; 2038 uint64_t notify_event = 0, link_state; 2039 2040 /* Get the channel for which interrupt was received */ 2041 ASSERT(arg1 != NULL); 2042 ldcp = (ldc_chan_t *)arg1; 2043 2044 D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n", 2045 ldcp->id, ldcp); 2046 2047 /* Lock channel */ 2048 mutex_enter(&ldcp->lock); 2049 2050 /* Obtain Tx lock */ 2051 mutex_enter(&ldcp->tx_lock); 2052 2053 /* mark interrupt as pending */ 2054 ldcp->tx_intr_state = LDC_INTR_ACTIVE; 2055 2056 /* save current link state */ 2057 link_state = ldcp->link_state; 2058 2059 rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail, 2060 &ldcp->link_state); 2061 if (rv) { 2062 cmn_err(CE_WARN, 2063 "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n", 2064 ldcp->id, rv); 2065 i_ldc_clear_intr(ldcp, CNEX_TX_INTR); 2066 mutex_exit(&ldcp->tx_lock); 2067 mutex_exit(&ldcp->lock); 2068 return (DDI_INTR_CLAIMED); 2069 } 2070 2071 /* 2072 * reset the channel state if the channel went down 2073 * (other side unconfigured queue) or channel was reset 2074 * (other side reconfigured its queue) 2075 */ 2076 if (link_state != ldcp->link_state && 2077 ldcp->link_state == LDC_CHANNEL_DOWN) { 2078 D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id); 2079 i_ldc_reset(ldcp, B_FALSE); 2080 notify_client = B_TRUE; 2081 notify_event = LDC_EVT_DOWN; 2082 } 2083 2084 if (link_state != ldcp->link_state && 2085 ldcp->link_state == LDC_CHANNEL_RESET) { 2086 D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id); 2087 i_ldc_reset(ldcp, B_FALSE); 2088 notify_client = B_TRUE; 2089 notify_event = LDC_EVT_RESET; 2090 } 2091 2092 if (link_state != ldcp->link_state && 2093 (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN && 2094 ldcp->link_state == LDC_CHANNEL_UP) { 2095 D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id); 2096 notify_client = B_TRUE; 2097 notify_event = LDC_EVT_RESET; 2098 ldcp->tstate |= TS_LINK_READY; 2099 ldcp->status = LDC_READY; 2100 } 2101 2102 /* if callbacks are disabled, do not notify */ 2103 if (!ldcp->cb_enabled) 2104 notify_client = B_FALSE; 2105 2106 i_ldc_clear_intr(ldcp, CNEX_TX_INTR); 2107 mutex_exit(&ldcp->tx_lock); 2108 2109 if (notify_client) { 2110 ldcp->cb_inprogress = B_TRUE; 2111 mutex_exit(&ldcp->lock); 2112 rv = ldcp->cb(notify_event, ldcp->cb_arg); 2113 if (rv) { 2114 DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback " 2115 "failure", ldcp->id); 2116 } 2117 mutex_enter(&ldcp->lock); 2118 ldcp->cb_inprogress = B_FALSE; 2119 } 2120 2121 mutex_exit(&ldcp->lock); 2122 2123 D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id); 2124 2125 return (DDI_INTR_CLAIMED); 2126 } 2127 2128 /* 2129 * Process the Rx HV queue. 2130 * 2131 * Returns 0 if data packets were found and no errors were encountered, 2132 * otherwise returns an error. In either case, the *notify argument is 2133 * set to indicate whether or not the client callback function should 2134 * be invoked. The *event argument is set to contain the callback event. 2135 * 2136 * Depending on the channel mode, packets are handled differently: 2137 * 2138 * RAW MODE 2139 * For raw mode channels, when a data packet is encountered, 2140 * processing stops and all packets are left on the queue to be removed 2141 * and processed by the ldc_read code path. 2142 * 2143 * UNRELIABLE MODE 2144 * For unreliable mode, when a data packet is encountered, processing 2145 * stops, and all packets are left on the queue to be removed and 2146 * processed by the ldc_read code path. Control packets are processed 2147 * inline if they are encountered before any data packets. 2148 * 2149 * STEAMING MODE 2150 * For streaming mode channels, all packets on the receive queue 2151 * are processed: data packets are copied to the data queue and 2152 * control packets are processed inline. Packets are only left on 2153 * the receive queue when the data queue is full. 2154 */ 2155 static uint_t 2156 i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client, 2157 uint64_t *notify_event) 2158 { 2159 int rv; 2160 uint64_t rx_head, rx_tail; 2161 ldc_msg_t *msg; 2162 uint64_t link_state, first_fragment = 0; 2163 boolean_t trace_length = B_TRUE; 2164 2165 ASSERT(MUTEX_HELD(&ldcp->lock)); 2166 *notify_client = B_FALSE; 2167 *notify_event = 0; 2168 2169 /* 2170 * Read packet(s) from the queue 2171 */ 2172 for (;;) { 2173 2174 link_state = ldcp->link_state; 2175 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 2176 &ldcp->link_state); 2177 if (rv) { 2178 cmn_err(CE_WARN, 2179 "i_ldc_rx_process_hvq: (0x%lx) cannot read " 2180 "queue ptrs, rv=0x%d\n", ldcp->id, rv); 2181 i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 2182 return (EIO); 2183 } 2184 2185 /* 2186 * reset the channel state if the channel went down 2187 * (other side unconfigured queue) or channel was reset 2188 * (other side reconfigured its queue) 2189 */ 2190 2191 if (link_state != ldcp->link_state) { 2192 2193 switch (ldcp->link_state) { 2194 case LDC_CHANNEL_DOWN: 2195 D1(ldcp->id, "i_ldc_rx_process_hvq: channel " 2196 "link down\n", ldcp->id); 2197 mutex_enter(&ldcp->tx_lock); 2198 i_ldc_reset(ldcp, B_FALSE); 2199 mutex_exit(&ldcp->tx_lock); 2200 *notify_client = B_TRUE; 2201 *notify_event = LDC_EVT_DOWN; 2202 goto loop_exit; 2203 2204 case LDC_CHANNEL_UP: 2205 D1(ldcp->id, "i_ldc_rx_process_hvq: " 2206 "channel link up\n", ldcp->id); 2207 2208 if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) { 2209 *notify_client = B_TRUE; 2210 *notify_event = LDC_EVT_RESET; 2211 ldcp->tstate |= TS_LINK_READY; 2212 ldcp->status = LDC_READY; 2213 } 2214 break; 2215 2216 case LDC_CHANNEL_RESET: 2217 default: 2218 #ifdef DEBUG 2219 force_reset: 2220 #endif 2221 D1(ldcp->id, "i_ldc_rx_process_hvq: channel " 2222 "link reset\n", ldcp->id); 2223 mutex_enter(&ldcp->tx_lock); 2224 i_ldc_reset(ldcp, B_FALSE); 2225 mutex_exit(&ldcp->tx_lock); 2226 *notify_client = B_TRUE; 2227 *notify_event = LDC_EVT_RESET; 2228 break; 2229 } 2230 } 2231 2232 #ifdef DEBUG 2233 if (LDC_INJECT_RESET(ldcp)) 2234 goto force_reset; 2235 #endif 2236 if (trace_length) { 2237 TRACE_RXHVQ_LENGTH(ldcp, rx_head, rx_tail); 2238 trace_length = B_FALSE; 2239 } 2240 2241 if (rx_head == rx_tail) { 2242 D2(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) " 2243 "No packets\n", ldcp->id); 2244 break; 2245 } 2246 2247 D2(ldcp->id, "i_ldc_rx_process_hvq: head=0x%llx, " 2248 "tail=0x%llx\n", rx_head, rx_tail); 2249 DUMP_LDC_PKT(ldcp, "i_ldc_rx_process_hvq rcd", 2250 ldcp->rx_q_va + rx_head); 2251 2252 /* get the message */ 2253 msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head); 2254 2255 /* if channel is in RAW mode or data pkt, notify and return */ 2256 if (ldcp->mode == LDC_MODE_RAW) { 2257 *notify_client = B_TRUE; 2258 *notify_event |= LDC_EVT_READ; 2259 break; 2260 } 2261 2262 if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) { 2263 2264 /* discard packet if channel is not up */ 2265 if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) { 2266 2267 /* move the head one position */ 2268 rx_head = (rx_head + LDC_PACKET_SIZE) % 2269 (ldcp->rx_q_entries << LDC_PACKET_SHIFT); 2270 2271 if (rv = i_ldc_set_rx_head(ldcp, rx_head)) 2272 break; 2273 2274 continue; 2275 } else { 2276 uint64_t dq_head, dq_tail; 2277 2278 /* process only STREAM mode data packets */ 2279 if (ldcp->mode != LDC_MODE_STREAM) { 2280 if ((ldcp->tstate & TS_IN_RESET) == 0) 2281 *notify_client = B_TRUE; 2282 *notify_event |= LDC_EVT_READ; 2283 break; 2284 } 2285 2286 /* don't process packet if queue full */ 2287 (void) i_ldc_dq_rx_get_state(ldcp, &dq_head, 2288 &dq_tail, NULL); 2289 dq_tail = (dq_tail + LDC_PACKET_SIZE) % 2290 (ldcp->rx_dq_entries << LDC_PACKET_SHIFT); 2291 if (dq_tail == dq_head || 2292 LDC_INJECT_DQFULL(ldcp)) { 2293 rv = ENOSPC; 2294 break; 2295 } 2296 } 2297 } 2298 2299 /* Check the sequence ID for the message received */ 2300 rv = i_ldc_check_seqid(ldcp, msg); 2301 if (rv != 0) { 2302 2303 DWARN(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) " 2304 "seqid error, q_ptrs=0x%lx,0x%lx", ldcp->id, 2305 rx_head, rx_tail); 2306 2307 /* Reset last_msg_rcd to start of message */ 2308 if (first_fragment != 0) { 2309 ldcp->last_msg_rcd = first_fragment - 1; 2310 first_fragment = 0; 2311 } 2312 2313 /* 2314 * Send a NACK due to seqid mismatch 2315 */ 2316 rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK, 2317 (msg->ctrl & LDC_CTRL_MASK)); 2318 2319 if (rv) { 2320 cmn_err(CE_NOTE, "i_ldc_rx_process_hvq: " 2321 "(0x%lx) err sending CTRL/DATA NACK msg\n", 2322 ldcp->id); 2323 2324 /* if cannot send NACK - reset channel */ 2325 mutex_enter(&ldcp->tx_lock); 2326 i_ldc_reset(ldcp, B_TRUE); 2327 mutex_exit(&ldcp->tx_lock); 2328 2329 *notify_client = B_TRUE; 2330 *notify_event = LDC_EVT_RESET; 2331 break; 2332 } 2333 2334 /* purge receive queue */ 2335 (void) i_ldc_set_rx_head(ldcp, rx_tail); 2336 break; 2337 } 2338 2339 /* record the message ID */ 2340 ldcp->last_msg_rcd = msg->seqid; 2341 2342 /* process control messages */ 2343 if (msg->type & LDC_CTRL) { 2344 /* save current internal state */ 2345 uint64_t tstate = ldcp->tstate; 2346 2347 rv = i_ldc_ctrlmsg(ldcp, msg); 2348 if (rv == EAGAIN) { 2349 /* re-process pkt - state was adjusted */ 2350 continue; 2351 } 2352 if (rv == ECONNRESET) { 2353 *notify_client = B_TRUE; 2354 *notify_event = LDC_EVT_RESET; 2355 break; 2356 } 2357 2358 /* 2359 * control message processing was successful 2360 * channel transitioned to ready for communication 2361 */ 2362 if (rv == 0 && ldcp->tstate == TS_UP && 2363 (tstate & ~TS_IN_RESET) != 2364 (ldcp->tstate & ~TS_IN_RESET)) { 2365 *notify_client = B_TRUE; 2366 *notify_event = LDC_EVT_UP; 2367 } 2368 } 2369 2370 /* process data NACKs */ 2371 if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) { 2372 DWARN(ldcp->id, 2373 "i_ldc_rx_process_hvq: (0x%llx) received DATA/NACK", 2374 ldcp->id); 2375 mutex_enter(&ldcp->tx_lock); 2376 i_ldc_reset(ldcp, B_TRUE); 2377 mutex_exit(&ldcp->tx_lock); 2378 *notify_client = B_TRUE; 2379 *notify_event = LDC_EVT_RESET; 2380 break; 2381 } 2382 2383 /* process data ACKs */ 2384 if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) { 2385 if (rv = i_ldc_process_data_ACK(ldcp, msg)) { 2386 *notify_client = B_TRUE; 2387 *notify_event = LDC_EVT_RESET; 2388 break; 2389 } 2390 } 2391 2392 if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) { 2393 ASSERT(ldcp->mode == LDC_MODE_STREAM); 2394 2395 /* 2396 * Copy the data packet to the data queue. Note 2397 * that the copy routine updates the rx_head pointer. 2398 */ 2399 i_ldc_rxdq_copy(ldcp, &rx_head); 2400 2401 if ((ldcp->tstate & TS_IN_RESET) == 0) 2402 *notify_client = B_TRUE; 2403 *notify_event |= LDC_EVT_READ; 2404 } else { 2405 rx_head = (rx_head + LDC_PACKET_SIZE) % 2406 (ldcp->rx_q_entries << LDC_PACKET_SHIFT); 2407 } 2408 2409 /* move the head one position */ 2410 if (rv = i_ldc_set_rx_head(ldcp, rx_head)) { 2411 *notify_client = B_TRUE; 2412 *notify_event = LDC_EVT_RESET; 2413 break; 2414 } 2415 2416 } /* for */ 2417 2418 loop_exit: 2419 2420 if (ldcp->mode == LDC_MODE_STREAM) { 2421 /* ACK data packets */ 2422 if ((*notify_event & 2423 (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ) { 2424 int ack_rv; 2425 ack_rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0); 2426 if (ack_rv && ack_rv != EWOULDBLOCK) { 2427 cmn_err(CE_NOTE, 2428 "i_ldc_rx_process_hvq: (0x%lx) cannot " 2429 "send ACK\n", ldcp->id); 2430 2431 mutex_enter(&ldcp->tx_lock); 2432 i_ldc_reset(ldcp, B_FALSE); 2433 mutex_exit(&ldcp->tx_lock); 2434 2435 *notify_client = B_TRUE; 2436 *notify_event = LDC_EVT_RESET; 2437 goto skip_ackpeek; 2438 } 2439 } 2440 2441 /* 2442 * If we have no more space on the data queue, make sure 2443 * there are no ACKs on the rx queue waiting to be processed. 2444 */ 2445 if (rv == ENOSPC) { 2446 if (i_ldc_rx_ackpeek(ldcp, rx_head, rx_tail) != 0) { 2447 ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID; 2448 *notify_client = B_TRUE; 2449 *notify_event = LDC_EVT_RESET; 2450 } 2451 } else { 2452 ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID; 2453 } 2454 } 2455 2456 skip_ackpeek: 2457 2458 /* Return, indicating whether or not data packets were found */ 2459 if ((*notify_event & (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ) 2460 return (0); 2461 2462 return (ENOMSG); 2463 } 2464 2465 /* 2466 * Process any ACK packets on the HV receive queue. 2467 * 2468 * This function is only used by STREAMING mode channels when the 2469 * secondary data queue fills up and there are packets remaining on 2470 * the HV receive queue. 2471 */ 2472 int 2473 i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head, uint64_t rx_tail) 2474 { 2475 int rv = 0; 2476 ldc_msg_t *msg; 2477 2478 if (ldcp->rx_ack_head == ACKPEEK_HEAD_INVALID) 2479 ldcp->rx_ack_head = rx_head; 2480 2481 while (ldcp->rx_ack_head != rx_tail) { 2482 msg = (ldc_msg_t *)(ldcp->rx_q_va + ldcp->rx_ack_head); 2483 2484 if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) { 2485 if (rv = i_ldc_process_data_ACK(ldcp, msg)) 2486 break; 2487 msg->stype &= ~LDC_ACK; 2488 } 2489 2490 ldcp->rx_ack_head = 2491 (ldcp->rx_ack_head + LDC_PACKET_SIZE) % 2492 (ldcp->rx_q_entries << LDC_PACKET_SHIFT); 2493 } 2494 return (rv); 2495 } 2496 2497 /* -------------------------------------------------------------------------- */ 2498 2499 /* 2500 * LDC API functions 2501 */ 2502 2503 /* 2504 * Initialize the channel. Allocate internal structure and memory for 2505 * TX/RX queues, and initialize locks. 2506 */ 2507 int 2508 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle) 2509 { 2510 ldc_chan_t *ldcp; 2511 int rv, exit_val; 2512 uint64_t ra_base, nentries; 2513 uint64_t qlen; 2514 2515 exit_val = EINVAL; /* guarantee an error if exit on failure */ 2516 2517 if (attr == NULL) { 2518 DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id); 2519 return (EINVAL); 2520 } 2521 if (handle == NULL) { 2522 DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id); 2523 return (EINVAL); 2524 } 2525 2526 /* check if channel is valid */ 2527 rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries); 2528 if (rv == H_ECHANNEL) { 2529 DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id); 2530 return (EINVAL); 2531 } 2532 2533 /* check if the channel has already been initialized */ 2534 mutex_enter(&ldcssp->lock); 2535 ldcp = ldcssp->chan_list; 2536 while (ldcp != NULL) { 2537 if (ldcp->id == id) { 2538 DWARN(id, "ldc_init: (0x%llx) already initialized\n", 2539 id); 2540 mutex_exit(&ldcssp->lock); 2541 return (EADDRINUSE); 2542 } 2543 ldcp = ldcp->next; 2544 } 2545 mutex_exit(&ldcssp->lock); 2546 2547 ASSERT(ldcp == NULL); 2548 2549 *handle = 0; 2550 2551 /* Allocate an ldcp structure */ 2552 ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP); 2553 2554 /* 2555 * Initialize the channel and Tx lock 2556 * 2557 * The channel 'lock' protects the entire channel and 2558 * should be acquired before initializing, resetting, 2559 * destroying or reading from a channel. 2560 * 2561 * The 'tx_lock' should be acquired prior to transmitting 2562 * data over the channel. The lock should also be acquired 2563 * prior to channel reconfiguration (in order to prevent 2564 * concurrent writes). 2565 * 2566 * ORDERING: When both locks are being acquired, to prevent 2567 * deadlocks, the channel lock should be always acquired prior 2568 * to the tx_lock. 2569 */ 2570 mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL); 2571 mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL); 2572 2573 /* Initialize the channel */ 2574 ldcp->id = id; 2575 ldcp->cb = NULL; 2576 ldcp->cb_arg = NULL; 2577 ldcp->cb_inprogress = B_FALSE; 2578 ldcp->cb_enabled = B_FALSE; 2579 ldcp->next = NULL; 2580 2581 /* Read attributes */ 2582 ldcp->mode = attr->mode; 2583 ldcp->devclass = attr->devclass; 2584 ldcp->devinst = attr->instance; 2585 ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU; 2586 2587 D1(ldcp->id, 2588 "ldc_init: (0x%llx) channel attributes, class=0x%x, " 2589 "instance=0x%llx, mode=%d, mtu=%d\n", 2590 ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu); 2591 2592 ldcp->next_vidx = 0; 2593 ldcp->tstate = TS_IN_RESET; 2594 ldcp->hstate = 0; 2595 ldcp->last_msg_snt = LDC_INIT_SEQID; 2596 ldcp->last_ack_rcd = 0; 2597 ldcp->last_msg_rcd = 0; 2598 ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID; 2599 2600 ldcp->stream_bufferp = NULL; 2601 ldcp->exp_dring_list = NULL; 2602 ldcp->imp_dring_list = NULL; 2603 ldcp->mhdl_list = NULL; 2604 2605 ldcp->tx_intr_state = LDC_INTR_NONE; 2606 ldcp->rx_intr_state = LDC_INTR_NONE; 2607 2608 /* Initialize payload size depending on whether channel is reliable */ 2609 switch (ldcp->mode) { 2610 case LDC_MODE_RAW: 2611 ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW; 2612 ldcp->read_p = i_ldc_read_raw; 2613 ldcp->write_p = i_ldc_write_raw; 2614 break; 2615 case LDC_MODE_UNRELIABLE: 2616 ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE; 2617 ldcp->read_p = i_ldc_read_packet; 2618 ldcp->write_p = i_ldc_write_packet; 2619 break; 2620 case LDC_MODE_RELIABLE: 2621 ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE; 2622 ldcp->read_p = i_ldc_read_packet; 2623 ldcp->write_p = i_ldc_write_packet; 2624 break; 2625 case LDC_MODE_STREAM: 2626 ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE; 2627 2628 ldcp->stream_remains = 0; 2629 ldcp->stream_offset = 0; 2630 ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP); 2631 ldcp->read_p = i_ldc_read_stream; 2632 ldcp->write_p = i_ldc_write_stream; 2633 break; 2634 default: 2635 exit_val = EINVAL; 2636 goto cleanup_on_exit; 2637 } 2638 2639 /* 2640 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this 2641 * value is smaller than default length of ldc_queue_entries, 2642 * qlen is set to ldc_queue_entries. Ensure that computed 2643 * length is a power-of-two value. 2644 */ 2645 qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload; 2646 if (!ISP2(qlen)) { 2647 uint64_t tmp = 1; 2648 while (qlen) { 2649 qlen >>= 1; tmp <<= 1; 2650 } 2651 qlen = tmp; 2652 } 2653 2654 ldcp->rx_q_entries = 2655 (qlen < ldc_queue_entries) ? ldc_queue_entries : qlen; 2656 ldcp->tx_q_entries = ldcp->rx_q_entries; 2657 2658 D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", ldcp->rx_q_entries); 2659 2660 /* Create a transmit queue */ 2661 ldcp->tx_q_va = (uint64_t) 2662 contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT); 2663 if (ldcp->tx_q_va == NULL) { 2664 cmn_err(CE_WARN, 2665 "ldc_init: (0x%lx) TX queue allocation failed\n", 2666 ldcp->id); 2667 exit_val = ENOMEM; 2668 goto cleanup_on_exit; 2669 } 2670 ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va); 2671 2672 D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n", 2673 ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries); 2674 2675 ldcp->tstate |= TS_TXQ_RDY; 2676 2677 /* Create a receive queue */ 2678 ldcp->rx_q_va = (uint64_t) 2679 contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT); 2680 if (ldcp->rx_q_va == NULL) { 2681 cmn_err(CE_WARN, 2682 "ldc_init: (0x%lx) RX queue allocation failed\n", 2683 ldcp->id); 2684 exit_val = ENOMEM; 2685 goto cleanup_on_exit; 2686 } 2687 ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va); 2688 2689 D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n", 2690 ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries); 2691 2692 ldcp->tstate |= TS_RXQ_RDY; 2693 2694 /* Setup a separate read data queue */ 2695 if (ldcp->mode == LDC_MODE_STREAM) { 2696 ldcp->readq_get_state = i_ldc_dq_rx_get_state; 2697 ldcp->readq_set_head = i_ldc_set_rxdq_head; 2698 2699 /* Make sure the data queue multiplier is a power of 2 */ 2700 if (!ISP2(ldc_rxdq_multiplier)) { 2701 D1(ldcp->id, "ldc_init: (0x%llx) ldc_rxdq_multiplier " 2702 "not a power of 2, resetting", ldcp->id); 2703 ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER; 2704 } 2705 2706 ldcp->rx_dq_entries = ldc_rxdq_multiplier * ldcp->rx_q_entries; 2707 ldcp->rx_dq_va = (uint64_t) 2708 kmem_alloc(ldcp->rx_dq_entries << LDC_PACKET_SHIFT, 2709 KM_SLEEP); 2710 if (ldcp->rx_dq_va == NULL) { 2711 cmn_err(CE_WARN, 2712 "ldc_init: (0x%lx) RX data queue " 2713 "allocation failed\n", ldcp->id); 2714 exit_val = ENOMEM; 2715 goto cleanup_on_exit; 2716 } 2717 2718 ldcp->rx_dq_head = ldcp->rx_dq_tail = 0; 2719 2720 D2(ldcp->id, "ldc_init: rx_dq_va=0x%llx, " 2721 "rx_dq_entries=0x%llx\n", ldcp->rx_dq_va, 2722 ldcp->rx_dq_entries); 2723 } else { 2724 ldcp->readq_get_state = i_ldc_hvq_rx_get_state; 2725 ldcp->readq_set_head = i_ldc_set_rx_head; 2726 } 2727 2728 /* Init descriptor ring and memory handle list lock */ 2729 mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL); 2730 mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL); 2731 mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL); 2732 2733 /* mark status as INITialized */ 2734 ldcp->status = LDC_INIT; 2735 2736 /* Add to channel list */ 2737 mutex_enter(&ldcssp->lock); 2738 ldcp->next = ldcssp->chan_list; 2739 ldcssp->chan_list = ldcp; 2740 ldcssp->channel_count++; 2741 mutex_exit(&ldcssp->lock); 2742 2743 /* set the handle */ 2744 *handle = (ldc_handle_t)ldcp; 2745 2746 D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id); 2747 2748 return (0); 2749 2750 cleanup_on_exit: 2751 2752 if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp) 2753 kmem_free(ldcp->stream_bufferp, ldcp->mtu); 2754 2755 if (ldcp->tstate & TS_TXQ_RDY) 2756 contig_mem_free((caddr_t)ldcp->tx_q_va, 2757 (ldcp->tx_q_entries << LDC_PACKET_SHIFT)); 2758 2759 if (ldcp->tstate & TS_RXQ_RDY) 2760 contig_mem_free((caddr_t)ldcp->rx_q_va, 2761 (ldcp->rx_q_entries << LDC_PACKET_SHIFT)); 2762 2763 mutex_destroy(&ldcp->tx_lock); 2764 mutex_destroy(&ldcp->lock); 2765 2766 if (ldcp) 2767 kmem_free(ldcp, sizeof (ldc_chan_t)); 2768 2769 return (exit_val); 2770 } 2771 2772 /* 2773 * Finalizes the LDC connection. It will return EBUSY if the 2774 * channel is open. A ldc_close() has to be done prior to 2775 * a ldc_fini operation. It frees TX/RX queues, associated 2776 * with the channel 2777 */ 2778 int 2779 ldc_fini(ldc_handle_t handle) 2780 { 2781 ldc_chan_t *ldcp; 2782 ldc_chan_t *tmp_ldcp; 2783 uint64_t id; 2784 2785 if (handle == NULL) { 2786 DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n"); 2787 return (EINVAL); 2788 } 2789 ldcp = (ldc_chan_t *)handle; 2790 id = ldcp->id; 2791 2792 mutex_enter(&ldcp->lock); 2793 2794 if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) { 2795 DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n", 2796 ldcp->id); 2797 mutex_exit(&ldcp->lock); 2798 return (EBUSY); 2799 } 2800 2801 /* Remove from the channel list */ 2802 mutex_enter(&ldcssp->lock); 2803 tmp_ldcp = ldcssp->chan_list; 2804 if (tmp_ldcp == ldcp) { 2805 ldcssp->chan_list = ldcp->next; 2806 ldcp->next = NULL; 2807 } else { 2808 while (tmp_ldcp != NULL) { 2809 if (tmp_ldcp->next == ldcp) { 2810 tmp_ldcp->next = ldcp->next; 2811 ldcp->next = NULL; 2812 break; 2813 } 2814 tmp_ldcp = tmp_ldcp->next; 2815 } 2816 if (tmp_ldcp == NULL) { 2817 DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n"); 2818 mutex_exit(&ldcssp->lock); 2819 mutex_exit(&ldcp->lock); 2820 return (EINVAL); 2821 } 2822 } 2823 2824 ldcssp->channel_count--; 2825 2826 mutex_exit(&ldcssp->lock); 2827 2828 /* Free the map table for this channel */ 2829 if (ldcp->mtbl) { 2830 (void) hv_ldc_set_map_table(ldcp->id, NULL, NULL); 2831 if (ldcp->mtbl->contigmem) 2832 contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size); 2833 else 2834 kmem_free(ldcp->mtbl->table, ldcp->mtbl->size); 2835 mutex_destroy(&ldcp->mtbl->lock); 2836 kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t)); 2837 } 2838 2839 /* Destroy descriptor ring and memory handle list lock */ 2840 mutex_destroy(&ldcp->exp_dlist_lock); 2841 mutex_destroy(&ldcp->imp_dlist_lock); 2842 mutex_destroy(&ldcp->mlist_lock); 2843 2844 /* Free the stream buffer for STREAM_MODE */ 2845 if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp) 2846 kmem_free(ldcp->stream_bufferp, ldcp->mtu); 2847 2848 /* Free the RX queue */ 2849 contig_mem_free((caddr_t)ldcp->rx_q_va, 2850 (ldcp->rx_q_entries << LDC_PACKET_SHIFT)); 2851 ldcp->tstate &= ~TS_RXQ_RDY; 2852 2853 /* Free the RX data queue */ 2854 if (ldcp->mode == LDC_MODE_STREAM) { 2855 kmem_free((caddr_t)ldcp->rx_dq_va, 2856 (ldcp->rx_dq_entries << LDC_PACKET_SHIFT)); 2857 } 2858 2859 /* Free the TX queue */ 2860 contig_mem_free((caddr_t)ldcp->tx_q_va, 2861 (ldcp->tx_q_entries << LDC_PACKET_SHIFT)); 2862 ldcp->tstate &= ~TS_TXQ_RDY; 2863 2864 mutex_exit(&ldcp->lock); 2865 2866 /* Destroy mutex */ 2867 mutex_destroy(&ldcp->tx_lock); 2868 mutex_destroy(&ldcp->lock); 2869 2870 /* free channel structure */ 2871 kmem_free(ldcp, sizeof (ldc_chan_t)); 2872 2873 D1(id, "ldc_fini: (0x%llx) channel finalized\n", id); 2874 2875 return (0); 2876 } 2877 2878 /* 2879 * Open the LDC channel for use. It registers the TX/RX queues 2880 * with the Hypervisor. It also specifies the interrupt number 2881 * and target CPU for this channel 2882 */ 2883 int 2884 ldc_open(ldc_handle_t handle) 2885 { 2886 ldc_chan_t *ldcp; 2887 int rv; 2888 2889 if (handle == NULL) { 2890 DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n"); 2891 return (EINVAL); 2892 } 2893 2894 ldcp = (ldc_chan_t *)handle; 2895 2896 mutex_enter(&ldcp->lock); 2897 2898 if (ldcp->tstate < TS_INIT) { 2899 DWARN(ldcp->id, 2900 "ldc_open: (0x%llx) channel not initialized\n", ldcp->id); 2901 mutex_exit(&ldcp->lock); 2902 return (EFAULT); 2903 } 2904 if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) { 2905 DWARN(ldcp->id, 2906 "ldc_open: (0x%llx) channel is already open\n", ldcp->id); 2907 mutex_exit(&ldcp->lock); 2908 return (EFAULT); 2909 } 2910 2911 /* 2912 * Unregister/Register the tx queue with the hypervisor 2913 */ 2914 rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL); 2915 if (rv) { 2916 cmn_err(CE_WARN, 2917 "ldc_open: (0x%lx) channel tx queue unconf failed\n", 2918 ldcp->id); 2919 mutex_exit(&ldcp->lock); 2920 return (EIO); 2921 } 2922 2923 rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries); 2924 if (rv) { 2925 cmn_err(CE_WARN, 2926 "ldc_open: (0x%lx) channel tx queue conf failed\n", 2927 ldcp->id); 2928 mutex_exit(&ldcp->lock); 2929 return (EIO); 2930 } 2931 2932 D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n", 2933 ldcp->id); 2934 2935 /* 2936 * Unregister/Register the rx queue with the hypervisor 2937 */ 2938 rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL); 2939 if (rv) { 2940 cmn_err(CE_WARN, 2941 "ldc_open: (0x%lx) channel rx queue unconf failed\n", 2942 ldcp->id); 2943 mutex_exit(&ldcp->lock); 2944 return (EIO); 2945 } 2946 2947 rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries); 2948 if (rv) { 2949 cmn_err(CE_WARN, 2950 "ldc_open: (0x%lx) channel rx queue conf failed\n", 2951 ldcp->id); 2952 mutex_exit(&ldcp->lock); 2953 return (EIO); 2954 } 2955 2956 D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n", 2957 ldcp->id); 2958 2959 ldcp->tstate |= TS_QCONF_RDY; 2960 2961 /* Register the channel with the channel nexus */ 2962 rv = i_ldc_register_channel(ldcp); 2963 if (rv && rv != EAGAIN) { 2964 cmn_err(CE_WARN, 2965 "ldc_open: (0x%lx) channel register failed\n", ldcp->id); 2966 (void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL); 2967 (void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL); 2968 mutex_exit(&ldcp->lock); 2969 return (EIO); 2970 } 2971 2972 /* mark channel in OPEN state */ 2973 ldcp->status = LDC_OPEN; 2974 2975 /* Read channel state */ 2976 rv = hv_ldc_tx_get_state(ldcp->id, 2977 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 2978 if (rv) { 2979 cmn_err(CE_WARN, 2980 "ldc_open: (0x%lx) cannot read channel state\n", 2981 ldcp->id); 2982 (void) i_ldc_unregister_channel(ldcp); 2983 (void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL); 2984 (void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL); 2985 mutex_exit(&ldcp->lock); 2986 return (EIO); 2987 } 2988 2989 /* 2990 * set the ACKd head to current head location for reliable & 2991 * streaming mode 2992 */ 2993 ldcp->tx_ackd_head = ldcp->tx_head; 2994 2995 /* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */ 2996 if (ldcp->link_state == LDC_CHANNEL_UP || 2997 ldcp->link_state == LDC_CHANNEL_RESET) { 2998 ldcp->tstate |= TS_LINK_READY; 2999 ldcp->status = LDC_READY; 3000 } 3001 3002 /* 3003 * if channel is being opened in RAW mode - no handshake is needed 3004 * switch the channel READY and UP state 3005 */ 3006 if (ldcp->mode == LDC_MODE_RAW) { 3007 ldcp->tstate = TS_UP; /* set bits associated with LDC UP */ 3008 ldcp->status = LDC_UP; 3009 } 3010 3011 mutex_exit(&ldcp->lock); 3012 3013 /* 3014 * Increment number of open channels 3015 */ 3016 mutex_enter(&ldcssp->lock); 3017 ldcssp->channels_open++; 3018 mutex_exit(&ldcssp->lock); 3019 3020 D1(ldcp->id, 3021 "ldc_open: (0x%llx) channel (0x%p) open for use " 3022 "(tstate=0x%x, status=0x%x)\n", 3023 ldcp->id, ldcp, ldcp->tstate, ldcp->status); 3024 3025 return (0); 3026 } 3027 3028 /* 3029 * Close the LDC connection. It will return EBUSY if there 3030 * are memory segments or descriptor rings either bound to or 3031 * mapped over the channel 3032 */ 3033 int 3034 ldc_close(ldc_handle_t handle) 3035 { 3036 ldc_chan_t *ldcp; 3037 int rv = 0, retries = 0; 3038 boolean_t chk_done = B_FALSE; 3039 3040 if (handle == NULL) { 3041 DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n"); 3042 return (EINVAL); 3043 } 3044 ldcp = (ldc_chan_t *)handle; 3045 3046 mutex_enter(&ldcp->lock); 3047 3048 /* return error if channel is not open */ 3049 if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) { 3050 DWARN(ldcp->id, 3051 "ldc_close: (0x%llx) channel is not open\n", ldcp->id); 3052 mutex_exit(&ldcp->lock); 3053 return (EFAULT); 3054 } 3055 3056 /* if any memory handles, drings, are bound or mapped cannot close */ 3057 if (ldcp->mhdl_list != NULL) { 3058 DWARN(ldcp->id, 3059 "ldc_close: (0x%llx) channel has bound memory handles\n", 3060 ldcp->id); 3061 mutex_exit(&ldcp->lock); 3062 return (EBUSY); 3063 } 3064 if (ldcp->exp_dring_list != NULL) { 3065 DWARN(ldcp->id, 3066 "ldc_close: (0x%llx) channel has bound descriptor rings\n", 3067 ldcp->id); 3068 mutex_exit(&ldcp->lock); 3069 return (EBUSY); 3070 } 3071 if (ldcp->imp_dring_list != NULL) { 3072 DWARN(ldcp->id, 3073 "ldc_close: (0x%llx) channel has mapped descriptor rings\n", 3074 ldcp->id); 3075 mutex_exit(&ldcp->lock); 3076 return (EBUSY); 3077 } 3078 3079 if (ldcp->cb_inprogress) { 3080 DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n", 3081 ldcp->id); 3082 mutex_exit(&ldcp->lock); 3083 return (EWOULDBLOCK); 3084 } 3085 3086 /* Obtain Tx lock */ 3087 mutex_enter(&ldcp->tx_lock); 3088 3089 /* 3090 * Wait for pending transmits to complete i.e Tx queue to drain 3091 * if there are pending pkts - wait 1 ms and retry again 3092 */ 3093 for (;;) { 3094 3095 rv = hv_ldc_tx_get_state(ldcp->id, 3096 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 3097 if (rv) { 3098 cmn_err(CE_WARN, 3099 "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id); 3100 mutex_exit(&ldcp->tx_lock); 3101 mutex_exit(&ldcp->lock); 3102 return (EIO); 3103 } 3104 3105 if (ldcp->tx_head == ldcp->tx_tail || 3106 ldcp->link_state != LDC_CHANNEL_UP) { 3107 break; 3108 } 3109 3110 if (chk_done) { 3111 DWARN(ldcp->id, 3112 "ldc_close: (0x%llx) Tx queue drain timeout\n", 3113 ldcp->id); 3114 break; 3115 } 3116 3117 /* wait for one ms and try again */ 3118 delay(drv_usectohz(1000)); 3119 chk_done = B_TRUE; 3120 } 3121 3122 /* 3123 * Drain the Tx and Rx queues as we are closing the 3124 * channel. We dont care about any pending packets. 3125 * We have to also drain the queue prior to clearing 3126 * pending interrupts, otherwise the HV will trigger 3127 * an interrupt the moment the interrupt state is 3128 * cleared. 3129 */ 3130 (void) i_ldc_txq_reconf(ldcp); 3131 (void) i_ldc_rxq_drain(ldcp); 3132 3133 /* 3134 * Unregister the channel with the nexus 3135 */ 3136 while ((rv = i_ldc_unregister_channel(ldcp)) != 0) { 3137 3138 mutex_exit(&ldcp->tx_lock); 3139 mutex_exit(&ldcp->lock); 3140 3141 /* if any error other than EAGAIN return back */ 3142 if (rv != EAGAIN || retries >= ldc_max_retries) { 3143 cmn_err(CE_WARN, 3144 "ldc_close: (0x%lx) unregister failed, %d\n", 3145 ldcp->id, rv); 3146 return (rv); 3147 } 3148 3149 /* 3150 * As there could be pending interrupts we need 3151 * to wait and try again 3152 */ 3153 drv_usecwait(ldc_close_delay); 3154 mutex_enter(&ldcp->lock); 3155 mutex_enter(&ldcp->tx_lock); 3156 retries++; 3157 } 3158 3159 /* 3160 * Unregister queues 3161 */ 3162 rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL); 3163 if (rv) { 3164 cmn_err(CE_WARN, 3165 "ldc_close: (0x%lx) channel TX queue unconf failed\n", 3166 ldcp->id); 3167 mutex_exit(&ldcp->tx_lock); 3168 mutex_exit(&ldcp->lock); 3169 return (EIO); 3170 } 3171 rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL); 3172 if (rv) { 3173 cmn_err(CE_WARN, 3174 "ldc_close: (0x%lx) channel RX queue unconf failed\n", 3175 ldcp->id); 3176 mutex_exit(&ldcp->tx_lock); 3177 mutex_exit(&ldcp->lock); 3178 return (EIO); 3179 } 3180 3181 ldcp->tstate &= ~TS_QCONF_RDY; 3182 3183 /* Reset channel state information */ 3184 i_ldc_reset_state(ldcp); 3185 3186 /* Mark channel as down and in initialized state */ 3187 ldcp->tx_ackd_head = 0; 3188 ldcp->tx_head = 0; 3189 ldcp->tstate = TS_IN_RESET|TS_INIT; 3190 ldcp->status = LDC_INIT; 3191 3192 mutex_exit(&ldcp->tx_lock); 3193 mutex_exit(&ldcp->lock); 3194 3195 /* Decrement number of open channels */ 3196 mutex_enter(&ldcssp->lock); 3197 ldcssp->channels_open--; 3198 mutex_exit(&ldcssp->lock); 3199 3200 D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id); 3201 3202 return (0); 3203 } 3204 3205 /* 3206 * Register channel callback 3207 */ 3208 int 3209 ldc_reg_callback(ldc_handle_t handle, 3210 uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg) 3211 { 3212 ldc_chan_t *ldcp; 3213 3214 if (handle == NULL) { 3215 DWARN(DBG_ALL_LDCS, 3216 "ldc_reg_callback: invalid channel handle\n"); 3217 return (EINVAL); 3218 } 3219 if (((uint64_t)cb) < KERNELBASE) { 3220 DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n"); 3221 return (EINVAL); 3222 } 3223 ldcp = (ldc_chan_t *)handle; 3224 3225 mutex_enter(&ldcp->lock); 3226 3227 if (ldcp->cb) { 3228 DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n", 3229 ldcp->id); 3230 mutex_exit(&ldcp->lock); 3231 return (EIO); 3232 } 3233 if (ldcp->cb_inprogress) { 3234 DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n", 3235 ldcp->id); 3236 mutex_exit(&ldcp->lock); 3237 return (EWOULDBLOCK); 3238 } 3239 3240 ldcp->cb = cb; 3241 ldcp->cb_arg = arg; 3242 ldcp->cb_enabled = B_TRUE; 3243 3244 D1(ldcp->id, 3245 "ldc_reg_callback: (0x%llx) registered callback for channel\n", 3246 ldcp->id); 3247 3248 mutex_exit(&ldcp->lock); 3249 3250 return (0); 3251 } 3252 3253 /* 3254 * Unregister channel callback 3255 */ 3256 int 3257 ldc_unreg_callback(ldc_handle_t handle) 3258 { 3259 ldc_chan_t *ldcp; 3260 3261 if (handle == NULL) { 3262 DWARN(DBG_ALL_LDCS, 3263 "ldc_unreg_callback: invalid channel handle\n"); 3264 return (EINVAL); 3265 } 3266 ldcp = (ldc_chan_t *)handle; 3267 3268 mutex_enter(&ldcp->lock); 3269 3270 if (ldcp->cb == NULL) { 3271 DWARN(ldcp->id, 3272 "ldc_unreg_callback: (0x%llx) no callback exists\n", 3273 ldcp->id); 3274 mutex_exit(&ldcp->lock); 3275 return (EIO); 3276 } 3277 if (ldcp->cb_inprogress) { 3278 DWARN(ldcp->id, 3279 "ldc_unreg_callback: (0x%llx) callback active\n", 3280 ldcp->id); 3281 mutex_exit(&ldcp->lock); 3282 return (EWOULDBLOCK); 3283 } 3284 3285 ldcp->cb = NULL; 3286 ldcp->cb_arg = NULL; 3287 ldcp->cb_enabled = B_FALSE; 3288 3289 D1(ldcp->id, 3290 "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n", 3291 ldcp->id); 3292 3293 mutex_exit(&ldcp->lock); 3294 3295 return (0); 3296 } 3297 3298 3299 /* 3300 * Bring a channel up by initiating a handshake with the peer 3301 * This call is asynchronous. It will complete at a later point 3302 * in time when the peer responds back with an RTR. 3303 */ 3304 int 3305 ldc_up(ldc_handle_t handle) 3306 { 3307 int rv; 3308 ldc_chan_t *ldcp; 3309 ldc_msg_t *ldcmsg; 3310 uint64_t tx_tail, tstate, link_state; 3311 3312 if (handle == NULL) { 3313 DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n"); 3314 return (EINVAL); 3315 } 3316 ldcp = (ldc_chan_t *)handle; 3317 3318 mutex_enter(&ldcp->lock); 3319 3320 D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id); 3321 3322 /* clear the reset state */ 3323 tstate = ldcp->tstate; 3324 ldcp->tstate &= ~TS_IN_RESET; 3325 3326 if (ldcp->tstate == TS_UP) { 3327 DWARN(ldcp->id, 3328 "ldc_up: (0x%llx) channel is already in UP state\n", 3329 ldcp->id); 3330 3331 /* mark channel as up */ 3332 ldcp->status = LDC_UP; 3333 3334 /* 3335 * if channel was in reset state and there was 3336 * pending data clear interrupt state. this will 3337 * trigger an interrupt, causing the RX handler to 3338 * to invoke the client's callback 3339 */ 3340 if ((tstate & TS_IN_RESET) && 3341 ldcp->rx_intr_state == LDC_INTR_PEND) { 3342 D1(ldcp->id, 3343 "ldc_up: (0x%llx) channel has pending data, " 3344 "clearing interrupt\n", ldcp->id); 3345 i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 3346 } 3347 3348 mutex_exit(&ldcp->lock); 3349 return (0); 3350 } 3351 3352 /* if the channel is in RAW mode - mark it as UP, if READY */ 3353 if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) { 3354 ldcp->tstate = TS_UP; 3355 mutex_exit(&ldcp->lock); 3356 return (0); 3357 } 3358 3359 /* Don't start another handshake if there is one in progress */ 3360 if (ldcp->hstate) { 3361 D1(ldcp->id, 3362 "ldc_up: (0x%llx) channel handshake in progress\n", 3363 ldcp->id); 3364 mutex_exit(&ldcp->lock); 3365 return (0); 3366 } 3367 3368 mutex_enter(&ldcp->tx_lock); 3369 3370 /* save current link state */ 3371 link_state = ldcp->link_state; 3372 3373 /* get the current tail for the LDC msg */ 3374 rv = i_ldc_get_tx_tail(ldcp, &tx_tail); 3375 if (rv) { 3376 D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n", 3377 ldcp->id); 3378 mutex_exit(&ldcp->tx_lock); 3379 mutex_exit(&ldcp->lock); 3380 return (ECONNREFUSED); 3381 } 3382 3383 /* 3384 * If i_ldc_get_tx_tail() changed link_state to either RESET or UP, 3385 * from a previous state of DOWN, then mark the channel as 3386 * being ready for handshake. 3387 */ 3388 if ((link_state == LDC_CHANNEL_DOWN) && 3389 (link_state != ldcp->link_state)) { 3390 3391 ASSERT((ldcp->link_state == LDC_CHANNEL_RESET) || 3392 (ldcp->link_state == LDC_CHANNEL_UP)); 3393 3394 if (ldcp->mode == LDC_MODE_RAW) { 3395 ldcp->status = LDC_UP; 3396 ldcp->tstate = TS_UP; 3397 mutex_exit(&ldcp->tx_lock); 3398 mutex_exit(&ldcp->lock); 3399 return (0); 3400 } else { 3401 ldcp->status = LDC_READY; 3402 ldcp->tstate |= TS_LINK_READY; 3403 } 3404 3405 } 3406 3407 ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 3408 ZERO_PKT(ldcmsg); 3409 3410 ldcmsg->type = LDC_CTRL; 3411 ldcmsg->stype = LDC_INFO; 3412 ldcmsg->ctrl = LDC_VER; 3413 ldcp->next_vidx = 0; 3414 bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0])); 3415 3416 DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg); 3417 3418 /* initiate the send by calling into HV and set the new tail */ 3419 tx_tail = (tx_tail + LDC_PACKET_SIZE) % 3420 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 3421 3422 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 3423 if (rv) { 3424 DWARN(ldcp->id, 3425 "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n", 3426 ldcp->id, rv); 3427 mutex_exit(&ldcp->tx_lock); 3428 mutex_exit(&ldcp->lock); 3429 return (rv); 3430 } 3431 3432 ldcp->hstate |= TS_SENT_VER; 3433 ldcp->tx_tail = tx_tail; 3434 D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id); 3435 3436 mutex_exit(&ldcp->tx_lock); 3437 mutex_exit(&ldcp->lock); 3438 3439 return (rv); 3440 } 3441 3442 3443 /* 3444 * Bring a channel down by resetting its state and queues 3445 */ 3446 int 3447 ldc_down(ldc_handle_t handle) 3448 { 3449 ldc_chan_t *ldcp; 3450 3451 if (handle == NULL) { 3452 DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n"); 3453 return (EINVAL); 3454 } 3455 ldcp = (ldc_chan_t *)handle; 3456 mutex_enter(&ldcp->lock); 3457 mutex_enter(&ldcp->tx_lock); 3458 i_ldc_reset(ldcp, B_TRUE); 3459 mutex_exit(&ldcp->tx_lock); 3460 mutex_exit(&ldcp->lock); 3461 3462 return (0); 3463 } 3464 3465 /* 3466 * Get the current channel status 3467 */ 3468 int 3469 ldc_status(ldc_handle_t handle, ldc_status_t *status) 3470 { 3471 ldc_chan_t *ldcp; 3472 3473 if (handle == NULL || status == NULL) { 3474 DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n"); 3475 return (EINVAL); 3476 } 3477 ldcp = (ldc_chan_t *)handle; 3478 3479 *status = ((ldc_chan_t *)handle)->status; 3480 3481 D1(ldcp->id, 3482 "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status); 3483 return (0); 3484 } 3485 3486 3487 /* 3488 * Set the channel's callback mode - enable/disable callbacks 3489 */ 3490 int 3491 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode) 3492 { 3493 ldc_chan_t *ldcp; 3494 3495 if (handle == NULL) { 3496 DWARN(DBG_ALL_LDCS, 3497 "ldc_set_intr_mode: invalid channel handle\n"); 3498 return (EINVAL); 3499 } 3500 ldcp = (ldc_chan_t *)handle; 3501 3502 /* 3503 * Record no callbacks should be invoked 3504 */ 3505 mutex_enter(&ldcp->lock); 3506 3507 switch (cmode) { 3508 case LDC_CB_DISABLE: 3509 if (!ldcp->cb_enabled) { 3510 DWARN(ldcp->id, 3511 "ldc_set_cb_mode: (0x%llx) callbacks disabled\n", 3512 ldcp->id); 3513 break; 3514 } 3515 ldcp->cb_enabled = B_FALSE; 3516 3517 D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n", 3518 ldcp->id); 3519 break; 3520 3521 case LDC_CB_ENABLE: 3522 if (ldcp->cb_enabled) { 3523 DWARN(ldcp->id, 3524 "ldc_set_cb_mode: (0x%llx) callbacks enabled\n", 3525 ldcp->id); 3526 break; 3527 } 3528 ldcp->cb_enabled = B_TRUE; 3529 3530 D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n", 3531 ldcp->id); 3532 break; 3533 } 3534 3535 mutex_exit(&ldcp->lock); 3536 3537 return (0); 3538 } 3539 3540 /* 3541 * Check to see if there are packets on the incoming queue 3542 * Will return hasdata = B_FALSE if there are no packets 3543 */ 3544 int 3545 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata) 3546 { 3547 int rv; 3548 uint64_t rx_head, rx_tail; 3549 ldc_chan_t *ldcp; 3550 3551 if (handle == NULL) { 3552 DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n"); 3553 return (EINVAL); 3554 } 3555 ldcp = (ldc_chan_t *)handle; 3556 3557 *hasdata = B_FALSE; 3558 3559 mutex_enter(&ldcp->lock); 3560 3561 if (ldcp->tstate != TS_UP) { 3562 D1(ldcp->id, 3563 "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id); 3564 mutex_exit(&ldcp->lock); 3565 return (ECONNRESET); 3566 } 3567 3568 /* Read packet(s) from the queue */ 3569 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 3570 &ldcp->link_state); 3571 if (rv != 0) { 3572 cmn_err(CE_WARN, 3573 "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id); 3574 mutex_exit(&ldcp->lock); 3575 return (EIO); 3576 } 3577 3578 /* reset the channel state if the channel went down */ 3579 if (ldcp->link_state == LDC_CHANNEL_DOWN || 3580 ldcp->link_state == LDC_CHANNEL_RESET) { 3581 mutex_enter(&ldcp->tx_lock); 3582 i_ldc_reset(ldcp, B_FALSE); 3583 mutex_exit(&ldcp->tx_lock); 3584 mutex_exit(&ldcp->lock); 3585 return (ECONNRESET); 3586 } 3587 3588 switch (ldcp->mode) { 3589 case LDC_MODE_RAW: 3590 /* 3591 * In raw mode, there are no ctrl packets, so checking 3592 * if the queue is non-empty is sufficient. 3593 */ 3594 *hasdata = (rx_head != rx_tail); 3595 break; 3596 3597 case LDC_MODE_UNRELIABLE: 3598 /* 3599 * In unreliable mode, if the queue is non-empty, we need 3600 * to check if it actually contains unread data packets. 3601 * The queue may just contain ctrl packets. 3602 */ 3603 if (rx_head != rx_tail) 3604 *hasdata = (i_ldc_chkq(ldcp) == 0); 3605 break; 3606 3607 case LDC_MODE_STREAM: 3608 /* 3609 * In stream mode, first check for 'stream_remains' > 0. 3610 * Otherwise, if the data queue head and tail pointers 3611 * differ, there must be data to read. 3612 */ 3613 if (ldcp->stream_remains > 0) 3614 *hasdata = B_TRUE; 3615 else 3616 *hasdata = (ldcp->rx_dq_head != ldcp->rx_dq_tail); 3617 break; 3618 3619 default: 3620 cmn_err(CE_WARN, "ldc_chkq: (0x%lx) unexpected channel mode " 3621 "(0x%x)", ldcp->id, ldcp->mode); 3622 mutex_exit(&ldcp->lock); 3623 return (EIO); 3624 } 3625 3626 mutex_exit(&ldcp->lock); 3627 3628 return (0); 3629 } 3630 3631 3632 /* 3633 * Read 'size' amount of bytes or less. If incoming buffer 3634 * is more than 'size', ENOBUFS is returned. 3635 * 3636 * On return, size contains the number of bytes read. 3637 */ 3638 int 3639 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep) 3640 { 3641 ldc_chan_t *ldcp; 3642 uint64_t rx_head = 0, rx_tail = 0; 3643 int rv = 0, exit_val; 3644 3645 if (handle == NULL) { 3646 DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n"); 3647 return (EINVAL); 3648 } 3649 3650 ldcp = (ldc_chan_t *)handle; 3651 3652 /* channel lock */ 3653 mutex_enter(&ldcp->lock); 3654 3655 if (ldcp->tstate != TS_UP) { 3656 DWARN(ldcp->id, 3657 "ldc_read: (0x%llx) channel is not in UP state\n", 3658 ldcp->id); 3659 exit_val = ECONNRESET; 3660 } else if (ldcp->mode == LDC_MODE_STREAM) { 3661 TRACE_RXDQ_LENGTH(ldcp); 3662 exit_val = ldcp->read_p(ldcp, bufp, sizep); 3663 mutex_exit(&ldcp->lock); 3664 return (exit_val); 3665 } else { 3666 exit_val = ldcp->read_p(ldcp, bufp, sizep); 3667 } 3668 3669 /* 3670 * if queue has been drained - clear interrupt 3671 */ 3672 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 3673 &ldcp->link_state); 3674 if (rv != 0) { 3675 cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs", 3676 ldcp->id); 3677 mutex_enter(&ldcp->tx_lock); 3678 i_ldc_reset(ldcp, B_TRUE); 3679 mutex_exit(&ldcp->tx_lock); 3680 mutex_exit(&ldcp->lock); 3681 return (ECONNRESET); 3682 } 3683 3684 if (exit_val == 0) { 3685 if (ldcp->link_state == LDC_CHANNEL_DOWN || 3686 ldcp->link_state == LDC_CHANNEL_RESET) { 3687 mutex_enter(&ldcp->tx_lock); 3688 i_ldc_reset(ldcp, B_FALSE); 3689 exit_val = ECONNRESET; 3690 mutex_exit(&ldcp->tx_lock); 3691 } 3692 if ((rv == 0) && 3693 (ldcp->rx_intr_state == LDC_INTR_PEND) && 3694 (rx_head == rx_tail)) { 3695 i_ldc_clear_intr(ldcp, CNEX_RX_INTR); 3696 } 3697 } 3698 3699 mutex_exit(&ldcp->lock); 3700 return (exit_val); 3701 } 3702 3703 /* 3704 * Basic raw mondo read - 3705 * no interpretation of mondo contents at all. 3706 * 3707 * Enter and exit with ldcp->lock held by caller 3708 */ 3709 static int 3710 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) 3711 { 3712 uint64_t q_size_mask; 3713 ldc_msg_t *msgp; 3714 uint8_t *msgbufp; 3715 int rv = 0, space; 3716 uint64_t rx_head, rx_tail; 3717 3718 space = *sizep; 3719 3720 if (space < LDC_PAYLOAD_SIZE_RAW) 3721 return (ENOBUFS); 3722 3723 ASSERT(mutex_owned(&ldcp->lock)); 3724 3725 /* compute mask for increment */ 3726 q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT; 3727 3728 /* 3729 * Read packet(s) from the queue 3730 */ 3731 rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, 3732 &ldcp->link_state); 3733 if (rv != 0) { 3734 cmn_err(CE_WARN, 3735 "ldc_read_raw: (0x%lx) unable to read queue ptrs", 3736 ldcp->id); 3737 return (EIO); 3738 } 3739 D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx," 3740 " rxt=0x%llx, st=0x%llx\n", 3741 ldcp->id, rx_head, rx_tail, ldcp->link_state); 3742 3743 /* reset the channel state if the channel went down */ 3744 if (ldcp->link_state == LDC_CHANNEL_DOWN || 3745 ldcp->link_state == LDC_CHANNEL_RESET) { 3746 mutex_enter(&ldcp->tx_lock); 3747 i_ldc_reset(ldcp, B_FALSE); 3748 mutex_exit(&ldcp->tx_lock); 3749 return (ECONNRESET); 3750 } 3751 3752 /* 3753 * Check for empty queue 3754 */ 3755 if (rx_head == rx_tail) { 3756 *sizep = 0; 3757 return (0); 3758 } 3759 3760 /* get the message */ 3761 msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head); 3762 3763 /* if channel is in RAW mode, copy data and return */ 3764 msgbufp = (uint8_t *)&(msgp->raw[0]); 3765 3766 bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW); 3767 3768 DUMP_PAYLOAD(ldcp->id, msgbufp); 3769 3770 *sizep = LDC_PAYLOAD_SIZE_RAW; 3771 3772 rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask; 3773 rv = i_ldc_set_rx_head(ldcp, rx_head); 3774 3775 return (rv); 3776 } 3777 3778 /* 3779 * Process LDC mondos to build larger packets 3780 * with either un-reliable or reliable delivery. 3781 * 3782 * Enter and exit with ldcp->lock held by caller 3783 */ 3784 static int 3785 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) 3786 { 3787 int rv = 0; 3788 uint64_t rx_head = 0, rx_tail = 0; 3789 uint64_t curr_head = 0; 3790 ldc_msg_t *msg; 3791 caddr_t target; 3792 size_t len = 0, bytes_read = 0; 3793 int retries = 0; 3794 uint64_t q_va, q_size_mask; 3795 uint64_t first_fragment = 0; 3796 3797 target = target_bufp; 3798 3799 ASSERT(mutex_owned(&ldcp->lock)); 3800 3801 /* check if the buffer and size are valid */ 3802 if (target_bufp == NULL || *sizep == 0) { 3803 DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n", 3804 ldcp->id); 3805 return (EINVAL); 3806 } 3807 3808 /* Set q_va and compute increment mask for the appropriate queue */ 3809 if (ldcp->mode == LDC_MODE_STREAM) { 3810 q_va = ldcp->rx_dq_va; 3811 q_size_mask = (ldcp->rx_dq_entries-1)<<LDC_PACKET_SHIFT; 3812 } else { 3813 q_va = ldcp->rx_q_va; 3814 q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT; 3815 } 3816 3817 /* 3818 * Read packet(s) from the queue 3819 */ 3820 rv = ldcp->readq_get_state(ldcp, &curr_head, &rx_tail, 3821 &ldcp->link_state); 3822 if (rv != 0) { 3823 cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs", 3824 ldcp->id); 3825 mutex_enter(&ldcp->tx_lock); 3826 i_ldc_reset(ldcp, B_TRUE); 3827 mutex_exit(&ldcp->tx_lock); 3828 return (ECONNRESET); 3829 } 3830 D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n", 3831 ldcp->id, curr_head, rx_tail, ldcp->link_state); 3832 3833 /* reset the channel state if the channel went down */ 3834 if (ldcp->link_state != LDC_CHANNEL_UP) 3835 goto channel_is_reset; 3836 3837 for (;;) { 3838 3839 if (curr_head == rx_tail) { 3840 /* 3841 * If a data queue is being used, check the Rx HV 3842 * queue. This will copy over any new data packets 3843 * that have arrived. 3844 */ 3845 if (ldcp->mode == LDC_MODE_STREAM) 3846 (void) i_ldc_chkq(ldcp); 3847 3848 rv = ldcp->readq_get_state(ldcp, 3849 &rx_head, &rx_tail, &ldcp->link_state); 3850 if (rv != 0) { 3851 cmn_err(CE_WARN, 3852 "ldc_read: (0x%lx) cannot read queue ptrs", 3853 ldcp->id); 3854 mutex_enter(&ldcp->tx_lock); 3855 i_ldc_reset(ldcp, B_TRUE); 3856 mutex_exit(&ldcp->tx_lock); 3857 return (ECONNRESET); 3858 } 3859 3860 if (ldcp->link_state != LDC_CHANNEL_UP) 3861 goto channel_is_reset; 3862 3863 if (curr_head == rx_tail) { 3864 3865 /* If in the middle of a fragmented xfer */ 3866 if (first_fragment != 0) { 3867 3868 /* wait for ldc_delay usecs */ 3869 drv_usecwait(ldc_delay); 3870 3871 if (++retries < ldc_max_retries) 3872 continue; 3873 3874 *sizep = 0; 3875 if (ldcp->mode != LDC_MODE_STREAM) 3876 ldcp->last_msg_rcd = 3877 first_fragment - 1; 3878 DWARN(DBG_ALL_LDCS, "ldc_read: " 3879 "(0x%llx) read timeout", ldcp->id); 3880 return (EAGAIN); 3881 } 3882 *sizep = 0; 3883 break; 3884 } 3885 } 3886 retries = 0; 3887 3888 D2(ldcp->id, 3889 "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n", 3890 ldcp->id, curr_head, rx_head, rx_tail); 3891 3892 /* get the message */ 3893 msg = (ldc_msg_t *)(q_va + curr_head); 3894 3895 DUMP_LDC_PKT(ldcp, "ldc_read received pkt", 3896 ldcp->rx_q_va + curr_head); 3897 3898 /* Check the message ID for the message received */ 3899 if (ldcp->mode != LDC_MODE_STREAM) { 3900 if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) { 3901 3902 DWARN(ldcp->id, "ldc_read: (0x%llx) seqid " 3903 "error, q_ptrs=0x%lx,0x%lx", 3904 ldcp->id, rx_head, rx_tail); 3905 3906 /* throw away data */ 3907 bytes_read = 0; 3908 3909 /* Reset last_msg_rcd to start of message */ 3910 if (first_fragment != 0) { 3911 ldcp->last_msg_rcd = first_fragment - 1; 3912 first_fragment = 0; 3913 } 3914 /* 3915 * Send a NACK -- invalid seqid 3916 * get the current tail for the response 3917 */ 3918 rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK, 3919 (msg->ctrl & LDC_CTRL_MASK)); 3920 if (rv) { 3921 cmn_err(CE_NOTE, 3922 "ldc_read: (0x%lx) err sending " 3923 "NACK msg\n", ldcp->id); 3924 3925 /* if cannot send NACK - reset chan */ 3926 mutex_enter(&ldcp->tx_lock); 3927 i_ldc_reset(ldcp, B_FALSE); 3928 mutex_exit(&ldcp->tx_lock); 3929 rv = ECONNRESET; 3930 break; 3931 } 3932 3933 /* purge receive queue */ 3934 rv = i_ldc_set_rx_head(ldcp, rx_tail); 3935 3936 break; 3937 } 3938 3939 /* 3940 * Process any messages of type CTRL messages 3941 * Future implementations should try to pass these 3942 * to LDC link by resetting the intr state. 3943 * 3944 * NOTE: not done as a switch() as type can be 3945 * both ctrl+data 3946 */ 3947 if (msg->type & LDC_CTRL) { 3948 if (rv = i_ldc_ctrlmsg(ldcp, msg)) { 3949 if (rv == EAGAIN) 3950 continue; 3951 rv = i_ldc_set_rx_head(ldcp, rx_tail); 3952 *sizep = 0; 3953 bytes_read = 0; 3954 break; 3955 } 3956 } 3957 3958 /* process data ACKs */ 3959 if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) { 3960 if (rv = i_ldc_process_data_ACK(ldcp, msg)) { 3961 *sizep = 0; 3962 bytes_read = 0; 3963 break; 3964 } 3965 } 3966 3967 /* process data NACKs */ 3968 if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) { 3969 DWARN(ldcp->id, 3970 "ldc_read: (0x%llx) received DATA/NACK", 3971 ldcp->id); 3972 mutex_enter(&ldcp->tx_lock); 3973 i_ldc_reset(ldcp, B_TRUE); 3974 mutex_exit(&ldcp->tx_lock); 3975 return (ECONNRESET); 3976 } 3977 } 3978 3979 /* process data messages */ 3980 if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) { 3981 3982 uint8_t *msgbuf = (uint8_t *)( 3983 (ldcp->mode == LDC_MODE_RELIABLE || 3984 ldcp->mode == LDC_MODE_STREAM) ? 3985 msg->rdata : msg->udata); 3986 3987 D2(ldcp->id, 3988 "ldc_read: (0x%llx) received data msg\n", ldcp->id); 3989 3990 /* get the packet length */ 3991 len = (msg->env & LDC_LEN_MASK); 3992 3993 /* 3994 * FUTURE OPTIMIZATION: 3995 * dont need to set q head for every 3996 * packet we read just need to do this when 3997 * we are done or need to wait for more 3998 * mondos to make a full packet - this is 3999 * currently expensive. 4000 */ 4001 4002 if (first_fragment == 0) { 4003 4004 /* 4005 * first packets should always have the start 4006 * bit set (even for a single packet). If not 4007 * throw away the packet 4008 */ 4009 if (!(msg->env & LDC_FRAG_START)) { 4010 4011 DWARN(DBG_ALL_LDCS, 4012 "ldc_read: (0x%llx) not start - " 4013 "frag=%x\n", ldcp->id, 4014 (msg->env) & LDC_FRAG_MASK); 4015 4016 /* toss pkt, inc head, cont reading */ 4017 bytes_read = 0; 4018 target = target_bufp; 4019 curr_head = 4020 (curr_head + LDC_PACKET_SIZE) 4021 & q_size_mask; 4022 if (rv = ldcp->readq_set_head(ldcp, 4023 curr_head)) 4024 break; 4025 4026 continue; 4027 } 4028 4029 first_fragment = msg->seqid; 4030 } else { 4031 /* check to see if this is a pkt w/ START bit */ 4032 if (msg->env & LDC_FRAG_START) { 4033 DWARN(DBG_ALL_LDCS, 4034 "ldc_read:(0x%llx) unexpected pkt" 4035 " env=0x%x discarding %d bytes," 4036 " lastmsg=%d, currentmsg=%d\n", 4037 ldcp->id, msg->env&LDC_FRAG_MASK, 4038 bytes_read, ldcp->last_msg_rcd, 4039 msg->seqid); 4040 4041 /* throw data we have read so far */ 4042 bytes_read = 0; 4043 target = target_bufp; 4044 first_fragment = msg->seqid; 4045 4046 if (rv = ldcp->readq_set_head(ldcp, 4047 curr_head)) 4048 break; 4049 } 4050 } 4051 4052 /* copy (next) pkt into buffer */ 4053 if (len <= (*sizep - bytes_read)) { 4054 bcopy(msgbuf, target, len); 4055 target += len; 4056 bytes_read += len; 4057 } else { 4058 /* 4059 * there is not enough space in the buffer to 4060 * read this pkt. throw message away & continue 4061 * reading data from queue 4062 */ 4063 DWARN(DBG_ALL_LDCS, 4064 "ldc_read: (0x%llx) buffer too small, " 4065 "head=0x%lx, expect=%d, got=%d\n", ldcp->id, 4066 curr_head, *sizep, bytes_read+len); 4067 4068 first_fragment = 0; 4069 target = target_bufp; 4070 bytes_read = 0; 4071 4072 /* throw away everything received so far */ 4073 if (rv = ldcp->readq_set_head(ldcp, curr_head)) 4074 break; 4075 4076 /* continue reading remaining pkts */ 4077 continue; 4078 } 4079 } 4080 4081 /* set the message id */ 4082 if (ldcp->mode != LDC_MODE_STREAM) 4083 ldcp->last_msg_rcd = msg->seqid; 4084 4085 /* move the head one position */ 4086 curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask; 4087 4088 if (msg->env & LDC_FRAG_STOP) { 4089 4090 /* 4091 * All pkts that are part of this fragmented transfer 4092 * have been read or this was a single pkt read 4093 * or there was an error 4094 */ 4095 4096 /* set the queue head */ 4097 if (rv = ldcp->readq_set_head(ldcp, curr_head)) 4098 bytes_read = 0; 4099 4100 *sizep = bytes_read; 4101 4102 break; 4103 } 4104 4105 /* advance head if it is a CTRL packet or a DATA ACK packet */ 4106 if ((msg->type & LDC_CTRL) || 4107 ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK))) { 4108 4109 /* set the queue head */ 4110 if (rv = ldcp->readq_set_head(ldcp, curr_head)) { 4111 bytes_read = 0; 4112 break; 4113 } 4114 4115 D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx", 4116 ldcp->id, curr_head); 4117 } 4118 4119 } /* for (;;) */ 4120 4121 D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep); 4122 4123 return (rv); 4124 4125 channel_is_reset: 4126 mutex_enter(&ldcp->tx_lock); 4127 i_ldc_reset(ldcp, B_FALSE); 4128 mutex_exit(&ldcp->tx_lock); 4129 return (ECONNRESET); 4130 } 4131 4132 /* 4133 * Use underlying reliable packet mechanism to fetch 4134 * and buffer incoming packets so we can hand them back as 4135 * a basic byte stream. 4136 * 4137 * Enter and exit with ldcp->lock held by caller 4138 */ 4139 static int 4140 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) 4141 { 4142 int rv; 4143 size_t size; 4144 4145 ASSERT(mutex_owned(&ldcp->lock)); 4146 4147 D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d", 4148 ldcp->id, *sizep); 4149 4150 if (ldcp->stream_remains == 0) { 4151 size = ldcp->mtu; 4152 rv = i_ldc_read_packet(ldcp, 4153 (caddr_t)ldcp->stream_bufferp, &size); 4154 D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d", 4155 ldcp->id, size); 4156 4157 if (rv != 0) 4158 return (rv); 4159 4160 ldcp->stream_remains = size; 4161 ldcp->stream_offset = 0; 4162 } 4163 4164 size = MIN(ldcp->stream_remains, *sizep); 4165 4166 bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size); 4167 ldcp->stream_offset += size; 4168 ldcp->stream_remains -= size; 4169 4170 D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d", 4171 ldcp->id, size); 4172 4173 *sizep = size; 4174 return (0); 4175 } 4176 4177 /* 4178 * Write specified amount of bytes to the channel 4179 * in multiple pkts of pkt_payload size. Each 4180 * packet is tagged with an unique packet ID in 4181 * the case of a reliable link. 4182 * 4183 * On return, size contains the number of bytes written. 4184 */ 4185 int 4186 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep) 4187 { 4188 ldc_chan_t *ldcp; 4189 int rv = 0; 4190 4191 if (handle == NULL) { 4192 DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n"); 4193 return (EINVAL); 4194 } 4195 ldcp = (ldc_chan_t *)handle; 4196 4197 /* check if writes can occur */ 4198 if (!mutex_tryenter(&ldcp->tx_lock)) { 4199 /* 4200 * Could not get the lock - channel could 4201 * be in the process of being unconfigured 4202 * or reader has encountered an error 4203 */ 4204 return (EAGAIN); 4205 } 4206 4207 /* check if non-zero data to write */ 4208 if (buf == NULL || sizep == NULL) { 4209 DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n", 4210 ldcp->id); 4211 mutex_exit(&ldcp->tx_lock); 4212 return (EINVAL); 4213 } 4214 4215 if (*sizep == 0) { 4216 DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n", 4217 ldcp->id); 4218 mutex_exit(&ldcp->tx_lock); 4219 return (0); 4220 } 4221 4222 /* Check if channel is UP for data exchange */ 4223 if (ldcp->tstate != TS_UP) { 4224 DWARN(ldcp->id, 4225 "ldc_write: (0x%llx) channel is not in UP state\n", 4226 ldcp->id); 4227 *sizep = 0; 4228 rv = ECONNRESET; 4229 } else { 4230 rv = ldcp->write_p(ldcp, buf, sizep); 4231 } 4232 4233 mutex_exit(&ldcp->tx_lock); 4234 4235 return (rv); 4236 } 4237 4238 /* 4239 * Write a raw packet to the channel 4240 * On return, size contains the number of bytes written. 4241 */ 4242 static int 4243 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep) 4244 { 4245 ldc_msg_t *ldcmsg; 4246 uint64_t tx_head, tx_tail, new_tail; 4247 int rv = 0; 4248 size_t size; 4249 4250 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 4251 ASSERT(ldcp->mode == LDC_MODE_RAW); 4252 4253 size = *sizep; 4254 4255 /* 4256 * Check to see if the packet size is less than or 4257 * equal to packet size support in raw mode 4258 */ 4259 if (size > ldcp->pkt_payload) { 4260 DWARN(ldcp->id, 4261 "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n", 4262 ldcp->id, *sizep); 4263 *sizep = 0; 4264 return (EMSGSIZE); 4265 } 4266 4267 /* get the qptrs for the tx queue */ 4268 rv = hv_ldc_tx_get_state(ldcp->id, 4269 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 4270 if (rv != 0) { 4271 cmn_err(CE_WARN, 4272 "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id); 4273 *sizep = 0; 4274 return (EIO); 4275 } 4276 4277 if (ldcp->link_state == LDC_CHANNEL_DOWN || 4278 ldcp->link_state == LDC_CHANNEL_RESET) { 4279 DWARN(ldcp->id, 4280 "ldc_write: (0x%llx) channel down/reset\n", ldcp->id); 4281 4282 *sizep = 0; 4283 if (mutex_tryenter(&ldcp->lock)) { 4284 i_ldc_reset(ldcp, B_FALSE); 4285 mutex_exit(&ldcp->lock); 4286 } else { 4287 /* 4288 * Release Tx lock, and then reacquire channel 4289 * and Tx lock in correct order 4290 */ 4291 mutex_exit(&ldcp->tx_lock); 4292 mutex_enter(&ldcp->lock); 4293 mutex_enter(&ldcp->tx_lock); 4294 i_ldc_reset(ldcp, B_FALSE); 4295 mutex_exit(&ldcp->lock); 4296 } 4297 return (ECONNRESET); 4298 } 4299 4300 tx_tail = ldcp->tx_tail; 4301 tx_head = ldcp->tx_head; 4302 new_tail = (tx_tail + LDC_PACKET_SIZE) & 4303 ((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT); 4304 4305 if (new_tail == tx_head) { 4306 DWARN(DBG_ALL_LDCS, 4307 "ldc_write: (0x%llx) TX queue is full\n", ldcp->id); 4308 *sizep = 0; 4309 return (EWOULDBLOCK); 4310 } 4311 4312 D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d", 4313 ldcp->id, size); 4314 4315 /* Send the data now */ 4316 ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 4317 4318 /* copy the data into pkt */ 4319 bcopy((uint8_t *)buf, ldcmsg, size); 4320 4321 /* increment tail */ 4322 tx_tail = new_tail; 4323 4324 /* 4325 * All packets have been copied into the TX queue 4326 * update the tail ptr in the HV 4327 */ 4328 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 4329 if (rv) { 4330 if (rv == EWOULDBLOCK) { 4331 DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n", 4332 ldcp->id); 4333 *sizep = 0; 4334 return (EWOULDBLOCK); 4335 } 4336 4337 *sizep = 0; 4338 if (mutex_tryenter(&ldcp->lock)) { 4339 i_ldc_reset(ldcp, B_FALSE); 4340 mutex_exit(&ldcp->lock); 4341 } else { 4342 /* 4343 * Release Tx lock, and then reacquire channel 4344 * and Tx lock in correct order 4345 */ 4346 mutex_exit(&ldcp->tx_lock); 4347 mutex_enter(&ldcp->lock); 4348 mutex_enter(&ldcp->tx_lock); 4349 i_ldc_reset(ldcp, B_FALSE); 4350 mutex_exit(&ldcp->lock); 4351 } 4352 return (ECONNRESET); 4353 } 4354 4355 ldcp->tx_tail = tx_tail; 4356 *sizep = size; 4357 4358 D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size); 4359 4360 return (rv); 4361 } 4362 4363 4364 /* 4365 * Write specified amount of bytes to the channel 4366 * in multiple pkts of pkt_payload size. Each 4367 * packet is tagged with an unique packet ID in 4368 * the case of a reliable link. 4369 * 4370 * On return, size contains the number of bytes written. 4371 * This function needs to ensure that the write size is < MTU size 4372 */ 4373 static int 4374 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size) 4375 { 4376 ldc_msg_t *ldcmsg; 4377 uint64_t tx_head, tx_tail, new_tail, start; 4378 uint64_t txq_size_mask, numavail; 4379 uint8_t *msgbuf, *source = (uint8_t *)buf; 4380 size_t len, bytes_written = 0, remaining; 4381 int rv; 4382 uint32_t curr_seqid; 4383 4384 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 4385 4386 ASSERT(ldcp->mode == LDC_MODE_RELIABLE || 4387 ldcp->mode == LDC_MODE_UNRELIABLE || 4388 ldcp->mode == LDC_MODE_STREAM); 4389 4390 /* compute mask for increment */ 4391 txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT; 4392 4393 /* get the qptrs for the tx queue */ 4394 rv = hv_ldc_tx_get_state(ldcp->id, 4395 &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); 4396 if (rv != 0) { 4397 cmn_err(CE_WARN, 4398 "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id); 4399 *size = 0; 4400 return (EIO); 4401 } 4402 4403 if (ldcp->link_state == LDC_CHANNEL_DOWN || 4404 ldcp->link_state == LDC_CHANNEL_RESET) { 4405 DWARN(ldcp->id, 4406 "ldc_write: (0x%llx) channel down/reset\n", ldcp->id); 4407 *size = 0; 4408 if (mutex_tryenter(&ldcp->lock)) { 4409 i_ldc_reset(ldcp, B_FALSE); 4410 mutex_exit(&ldcp->lock); 4411 } else { 4412 /* 4413 * Release Tx lock, and then reacquire channel 4414 * and Tx lock in correct order 4415 */ 4416 mutex_exit(&ldcp->tx_lock); 4417 mutex_enter(&ldcp->lock); 4418 mutex_enter(&ldcp->tx_lock); 4419 i_ldc_reset(ldcp, B_FALSE); 4420 mutex_exit(&ldcp->lock); 4421 } 4422 return (ECONNRESET); 4423 } 4424 4425 tx_tail = ldcp->tx_tail; 4426 new_tail = (tx_tail + LDC_PACKET_SIZE) % 4427 (ldcp->tx_q_entries << LDC_PACKET_SHIFT); 4428 4429 /* 4430 * Check to see if the queue is full. The check is done using 4431 * the appropriate head based on the link mode. 4432 */ 4433 i_ldc_get_tx_head(ldcp, &tx_head); 4434 4435 if (new_tail == tx_head) { 4436 DWARN(DBG_ALL_LDCS, 4437 "ldc_write: (0x%llx) TX queue is full\n", ldcp->id); 4438 *size = 0; 4439 return (EWOULDBLOCK); 4440 } 4441 4442 /* 4443 * Make sure that the LDC Tx queue has enough space 4444 */ 4445 numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT) 4446 + ldcp->tx_q_entries - 1; 4447 numavail %= ldcp->tx_q_entries; 4448 4449 if (*size > (numavail * ldcp->pkt_payload)) { 4450 DWARN(DBG_ALL_LDCS, 4451 "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id); 4452 return (EWOULDBLOCK); 4453 } 4454 4455 D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d", 4456 ldcp->id, *size); 4457 4458 /* Send the data now */ 4459 bytes_written = 0; 4460 curr_seqid = ldcp->last_msg_snt; 4461 start = tx_tail; 4462 4463 while (*size > bytes_written) { 4464 4465 ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); 4466 4467 msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE || 4468 ldcp->mode == LDC_MODE_STREAM) ? 4469 ldcmsg->rdata : ldcmsg->udata); 4470 4471 ldcmsg->type = LDC_DATA; 4472 ldcmsg->stype = LDC_INFO; 4473 ldcmsg->ctrl = 0; 4474 4475 remaining = *size - bytes_written; 4476 len = min(ldcp->pkt_payload, remaining); 4477 ldcmsg->env = (uint8_t)len; 4478 4479 curr_seqid++; 4480 ldcmsg->seqid = curr_seqid; 4481 4482 /* copy the data into pkt */ 4483 bcopy(source, msgbuf, len); 4484 4485 source += len; 4486 bytes_written += len; 4487 4488 /* increment tail */ 4489 tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask; 4490 4491 ASSERT(tx_tail != tx_head); 4492 } 4493 4494 /* Set the start and stop bits */ 4495 ldcmsg->env |= LDC_FRAG_STOP; 4496 ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start); 4497 ldcmsg->env |= LDC_FRAG_START; 4498 4499 /* 4500 * All packets have been copied into the TX queue 4501 * update the tail ptr in the HV 4502 */ 4503 rv = i_ldc_set_tx_tail(ldcp, tx_tail); 4504 if (rv == 0) { 4505 ldcp->tx_tail = tx_tail; 4506 ldcp->last_msg_snt = curr_seqid; 4507 *size = bytes_written; 4508 } else { 4509 int rv2; 4510 4511 if (rv != EWOULDBLOCK) { 4512 *size = 0; 4513 if (mutex_tryenter(&ldcp->lock)) { 4514 i_ldc_reset(ldcp, B_FALSE); 4515 mutex_exit(&ldcp->lock); 4516 } else { 4517 /* 4518 * Release Tx lock, and then reacquire channel 4519 * and Tx lock in correct order 4520 */ 4521 mutex_exit(&ldcp->tx_lock); 4522 mutex_enter(&ldcp->lock); 4523 mutex_enter(&ldcp->tx_lock); 4524 i_ldc_reset(ldcp, B_FALSE); 4525 mutex_exit(&ldcp->lock); 4526 } 4527 return (ECONNRESET); 4528 } 4529 4530 D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, " 4531 "old tail 0x%x, new tail 0x%x, qsize=0x%x)\n", 4532 rv, ldcp->tx_head, ldcp->tx_tail, tx_tail, 4533 (ldcp->tx_q_entries << LDC_PACKET_SHIFT)); 4534 4535 rv2 = hv_ldc_tx_get_state(ldcp->id, 4536 &tx_head, &tx_tail, &ldcp->link_state); 4537 4538 D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x " 4539 "(head 0x%x, tail 0x%x state 0x%x)\n", 4540 rv2, tx_head, tx_tail, ldcp->link_state); 4541 4542 *size = 0; 4543 } 4544 4545 D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size); 4546 4547 return (rv); 4548 } 4549 4550 /* 4551 * Write specified amount of bytes to the channel 4552 * in multiple pkts of pkt_payload size. Each 4553 * packet is tagged with an unique packet ID in 4554 * the case of a reliable link. 4555 * 4556 * On return, size contains the number of bytes written. 4557 * This function needs to ensure that the write size is < MTU size 4558 */ 4559 static int 4560 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep) 4561 { 4562 ASSERT(MUTEX_HELD(&ldcp->tx_lock)); 4563 ASSERT(ldcp->mode == LDC_MODE_STREAM); 4564 4565 /* Truncate packet to max of MTU size */ 4566 if (*sizep > ldcp->mtu) *sizep = ldcp->mtu; 4567 return (i_ldc_write_packet(ldcp, buf, sizep)); 4568 } 4569 4570 4571 /* 4572 * Interfaces for channel nexus to register/unregister with LDC module 4573 * The nexus will register functions to be used to register individual 4574 * channels with the nexus and enable interrupts for the channels 4575 */ 4576 int 4577 ldc_register(ldc_cnex_t *cinfo) 4578 { 4579 ldc_chan_t *ldcp; 4580 4581 if (cinfo == NULL || cinfo->dip == NULL || 4582 cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL || 4583 cinfo->add_intr == NULL || cinfo->rem_intr == NULL || 4584 cinfo->clr_intr == NULL) { 4585 4586 DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n"); 4587 return (EINVAL); 4588 } 4589 4590 mutex_enter(&ldcssp->lock); 4591 4592 /* nexus registration */ 4593 ldcssp->cinfo.dip = cinfo->dip; 4594 ldcssp->cinfo.reg_chan = cinfo->reg_chan; 4595 ldcssp->cinfo.unreg_chan = cinfo->unreg_chan; 4596 ldcssp->cinfo.add_intr = cinfo->add_intr; 4597 ldcssp->cinfo.rem_intr = cinfo->rem_intr; 4598 ldcssp->cinfo.clr_intr = cinfo->clr_intr; 4599 4600 /* register any channels that might have been previously initialized */ 4601 ldcp = ldcssp->chan_list; 4602 while (ldcp) { 4603 if ((ldcp->tstate & TS_QCONF_RDY) && 4604 (ldcp->tstate & TS_CNEX_RDY) == 0) 4605 (void) i_ldc_register_channel(ldcp); 4606 4607 ldcp = ldcp->next; 4608 } 4609 4610 mutex_exit(&ldcssp->lock); 4611 4612 return (0); 4613 } 4614 4615 int 4616 ldc_unregister(ldc_cnex_t *cinfo) 4617 { 4618 if (cinfo == NULL || cinfo->dip == NULL) { 4619 DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n"); 4620 return (EINVAL); 4621 } 4622 4623 mutex_enter(&ldcssp->lock); 4624 4625 if (cinfo->dip != ldcssp->cinfo.dip) { 4626 DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n"); 4627 mutex_exit(&ldcssp->lock); 4628 return (EINVAL); 4629 } 4630 4631 /* nexus unregister */ 4632 ldcssp->cinfo.dip = NULL; 4633 ldcssp->cinfo.reg_chan = NULL; 4634 ldcssp->cinfo.unreg_chan = NULL; 4635 ldcssp->cinfo.add_intr = NULL; 4636 ldcssp->cinfo.rem_intr = NULL; 4637 ldcssp->cinfo.clr_intr = NULL; 4638 4639 mutex_exit(&ldcssp->lock); 4640 4641 return (0); 4642 } 4643 4644 4645 /* ------------------------------------------------------------------------- */ 4646 4647 /* 4648 * Allocate a memory handle for the channel and link it into the list 4649 * Also choose which memory table to use if this is the first handle 4650 * being assigned to this channel 4651 */ 4652 int 4653 ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle) 4654 { 4655 ldc_chan_t *ldcp; 4656 ldc_mhdl_t *mhdl; 4657 4658 if (handle == NULL) { 4659 DWARN(DBG_ALL_LDCS, 4660 "ldc_mem_alloc_handle: invalid channel handle\n"); 4661 return (EINVAL); 4662 } 4663 ldcp = (ldc_chan_t *)handle; 4664 4665 mutex_enter(&ldcp->lock); 4666 4667 /* check to see if channel is initalized */ 4668 if ((ldcp->tstate & ~TS_IN_RESET) < TS_INIT) { 4669 DWARN(ldcp->id, 4670 "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n", 4671 ldcp->id); 4672 mutex_exit(&ldcp->lock); 4673 return (EINVAL); 4674 } 4675 4676 /* allocate handle for channel */ 4677 mhdl = kmem_cache_alloc(ldcssp->memhdl_cache, KM_SLEEP); 4678 4679 /* initialize the lock */ 4680 mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL); 4681 4682 mhdl->myshadow = B_FALSE; 4683 mhdl->memseg = NULL; 4684 mhdl->ldcp = ldcp; 4685 mhdl->status = LDC_UNBOUND; 4686 4687 /* insert memory handle (@ head) into list */ 4688 if (ldcp->mhdl_list == NULL) { 4689 ldcp->mhdl_list = mhdl; 4690 mhdl->next = NULL; 4691 } else { 4692 /* insert @ head */ 4693 mhdl->next = ldcp->mhdl_list; 4694 ldcp->mhdl_list = mhdl; 4695 } 4696 4697 /* return the handle */ 4698 *mhandle = (ldc_mem_handle_t)mhdl; 4699 4700 mutex_exit(&ldcp->lock); 4701 4702 D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n", 4703 ldcp->id, mhdl); 4704 4705 return (0); 4706 } 4707 4708 /* 4709 * Free memory handle for the channel and unlink it from the list 4710 */ 4711 int 4712 ldc_mem_free_handle(ldc_mem_handle_t mhandle) 4713 { 4714 ldc_mhdl_t *mhdl, *phdl; 4715 ldc_chan_t *ldcp; 4716 4717 if (mhandle == NULL) { 4718 DWARN(DBG_ALL_LDCS, 4719 "ldc_mem_free_handle: invalid memory handle\n"); 4720 return (EINVAL); 4721 } 4722 mhdl = (ldc_mhdl_t *)mhandle; 4723 4724 mutex_enter(&mhdl->lock); 4725 4726 ldcp = mhdl->ldcp; 4727 4728 if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) { 4729 DWARN(ldcp->id, 4730 "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n", 4731 mhdl); 4732 mutex_exit(&mhdl->lock); 4733 return (EINVAL); 4734 } 4735 mutex_exit(&mhdl->lock); 4736 4737 mutex_enter(&ldcp->mlist_lock); 4738 4739 phdl = ldcp->mhdl_list; 4740 4741 /* first handle */ 4742 if (phdl == mhdl) { 4743 ldcp->mhdl_list = mhdl->next; 4744 mutex_destroy(&mhdl->lock); 4745 kmem_cache_free(ldcssp->memhdl_cache, mhdl); 4746 4747 D1(ldcp->id, 4748 "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n", 4749 ldcp->id, mhdl); 4750 } else { 4751 /* walk the list - unlink and free */ 4752 while (phdl != NULL) { 4753 if (phdl->next == mhdl) { 4754 phdl->next = mhdl->next; 4755 mutex_destroy(&mhdl->lock); 4756 kmem_cache_free(ldcssp->memhdl_cache, mhdl); 4757 D1(ldcp->id, 4758 "ldc_mem_free_handle: (0x%llx) freed " 4759 "handle 0x%llx\n", ldcp->id, mhdl); 4760 break; 4761 } 4762 phdl = phdl->next; 4763 } 4764 } 4765 4766 if (phdl == NULL) { 4767 DWARN(ldcp->id, 4768 "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl); 4769 mutex_exit(&ldcp->mlist_lock); 4770 return (EINVAL); 4771 } 4772 4773 mutex_exit(&ldcp->mlist_lock); 4774 4775 return (0); 4776 } 4777 4778 /* 4779 * Bind a memory handle to a virtual address. 4780 * The virtual address is converted to the corresponding real addresses. 4781 * Returns pointer to the first ldc_mem_cookie and the total number 4782 * of cookies for this virtual address. Other cookies can be obtained 4783 * using the ldc_mem_nextcookie() call. If the pages are stored in 4784 * consecutive locations in the table, a single cookie corresponding to 4785 * the first location is returned. The cookie size spans all the entries. 4786 * 4787 * If the VA corresponds to a page that is already being exported, reuse 4788 * the page and do not export it again. Bump the page's use count. 4789 */ 4790 int 4791 ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len, 4792 uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount) 4793 { 4794 ldc_mhdl_t *mhdl; 4795 ldc_chan_t *ldcp; 4796 ldc_mtbl_t *mtbl; 4797 ldc_memseg_t *memseg; 4798 ldc_mte_t tmp_mte; 4799 uint64_t index, prev_index = 0; 4800 int64_t cookie_idx; 4801 uintptr_t raddr, ra_aligned; 4802 uint64_t psize, poffset, v_offset; 4803 uint64_t pg_shift, pg_size, pg_size_code, pg_mask; 4804 pgcnt_t npages; 4805 caddr_t v_align, addr; 4806 int i, rv; 4807 4808 if (mhandle == NULL) { 4809 DWARN(DBG_ALL_LDCS, 4810 "ldc_mem_bind_handle: invalid memory handle\n"); 4811 return (EINVAL); 4812 } 4813 mhdl = (ldc_mhdl_t *)mhandle; 4814 ldcp = mhdl->ldcp; 4815 4816 /* clear count */ 4817 *ccount = 0; 4818 4819 mutex_enter(&mhdl->lock); 4820 4821 if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) { 4822 DWARN(ldcp->id, 4823 "ldc_mem_bind_handle: (0x%x) handle already bound\n", 4824 mhandle); 4825 mutex_exit(&mhdl->lock); 4826 return (EINVAL); 4827 } 4828 4829 /* Force address and size to be 8-byte aligned */ 4830 if ((((uintptr_t)vaddr | len) & 0x7) != 0) { 4831 DWARN(ldcp->id, 4832 "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n"); 4833 mutex_exit(&mhdl->lock); 4834 return (EINVAL); 4835 } 4836 4837 /* 4838 * If this channel is binding a memory handle for the 4839 * first time allocate it a memory map table and initialize it 4840 */ 4841 if ((mtbl = ldcp->mtbl) == NULL) { 4842 4843 mutex_enter(&ldcp->lock); 4844 4845 /* Allocate and initialize the map table structure */ 4846 mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP); 4847 mtbl->num_entries = mtbl->num_avail = ldc_maptable_entries; 4848 mtbl->size = ldc_maptable_entries * sizeof (ldc_mte_slot_t); 4849 mtbl->next_entry = NULL; 4850 mtbl->contigmem = B_TRUE; 4851 4852 /* Allocate the table itself */ 4853 mtbl->table = (ldc_mte_slot_t *) 4854 contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE); 4855 if (mtbl->table == NULL) { 4856 4857 /* allocate a page of memory using kmem_alloc */ 4858 mtbl->table = kmem_alloc(MMU_PAGESIZE, KM_SLEEP); 4859 mtbl->size = MMU_PAGESIZE; 4860 mtbl->contigmem = B_FALSE; 4861 mtbl->num_entries = mtbl->num_avail = 4862 mtbl->size / sizeof (ldc_mte_slot_t); 4863 DWARN(ldcp->id, 4864 "ldc_mem_bind_handle: (0x%llx) reduced tbl size " 4865 "to %lx entries\n", ldcp->id, mtbl->num_entries); 4866 } 4867 4868 /* zero out the memory */ 4869 bzero(mtbl->table, mtbl->size); 4870 4871 /* initialize the lock */ 4872 mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL); 4873 4874 /* register table for this channel */ 4875 rv = hv_ldc_set_map_table(ldcp->id, 4876 va_to_pa(mtbl->table), mtbl->num_entries); 4877 if (rv != 0) { 4878 cmn_err(CE_WARN, 4879 "ldc_mem_bind_handle: (0x%lx) err %d mapping tbl", 4880 ldcp->id, rv); 4881 if (mtbl->contigmem) 4882 contig_mem_free(mtbl->table, mtbl->size); 4883 else 4884 kmem_free(mtbl->table, mtbl->size); 4885 mutex_destroy(&mtbl->lock); 4886 kmem_free(mtbl, sizeof (ldc_mtbl_t)); 4887 mutex_exit(&ldcp->lock); 4888 mutex_exit(&mhdl->lock); 4889 return (EIO); 4890 } 4891 4892 ldcp->mtbl = mtbl; 4893 mutex_exit(&ldcp->lock); 4894 4895 D1(ldcp->id, 4896 "ldc_mem_bind_handle: (0x%llx) alloc'd map table 0x%llx\n", 4897 ldcp->id, ldcp->mtbl->table); 4898 } 4899 4900 /* FUTURE: get the page size, pgsz code, and shift */ 4901 pg_size = MMU_PAGESIZE; 4902 pg_size_code = page_szc(pg_size); 4903 pg_shift = page_get_shift(pg_size_code); 4904 pg_mask = ~(pg_size - 1); 4905 4906 D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding " 4907 "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", 4908 ldcp->id, vaddr, pg_size, pg_size_code, pg_shift); 4909 4910 /* aligned VA and its offset */ 4911 v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1)); 4912 v_offset = ((uintptr_t)vaddr) & (pg_size - 1); 4913 4914 npages = (len+v_offset)/pg_size; 4915 npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1; 4916 4917 D1(ldcp->id, "ldc_mem_bind_handle: binding " 4918 "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n", 4919 ldcp->id, vaddr, v_align, v_offset, npages); 4920 4921 /* lock the memory table - exclusive access to channel */ 4922 mutex_enter(&mtbl->lock); 4923 4924 if (npages > mtbl->num_avail) { 4925 D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) no table entries\n", 4926 ldcp->id); 4927 mutex_exit(&mtbl->lock); 4928 mutex_exit(&mhdl->lock); 4929 return (ENOMEM); 4930 } 4931 4932 /* Allocate a memseg structure */ 4933 memseg = mhdl->memseg = 4934 kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP); 4935 4936 /* Allocate memory to store all pages and cookies */ 4937 memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP); 4938 memseg->cookies = 4939 kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP); 4940 4941 D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n", 4942 ldcp->id, npages); 4943 4944 addr = v_align; 4945 4946 /* 4947 * Check if direct shared memory map is enabled, if not change 4948 * the mapping type to include SHADOW_MAP. 4949 */ 4950 if (ldc_shmem_enabled == 0) 4951 mtype = LDC_SHADOW_MAP; 4952 4953 /* 4954 * Table slots are used in a round-robin manner. The algorithm permits 4955 * inserting duplicate entries. Slots allocated earlier will typically 4956 * get freed before we get back to reusing the slot.Inserting duplicate 4957 * entries should be OK as we only lookup entries using the cookie addr 4958 * i.e. tbl index, during export, unexport and copy operation. 4959 * 4960 * One implementation what was tried was to search for a duplicate 4961 * page entry first and reuse it. The search overhead is very high and 4962 * in the vnet case dropped the perf by almost half, 50 to 24 mbps. 4963 * So it does make sense to avoid searching for duplicates. 4964 * 4965 * But during the process of searching for a free slot, if we find a 4966 * duplicate entry we will go ahead and use it, and bump its use count. 4967 */ 4968 4969 /* index to start searching from */ 4970 index = mtbl->next_entry; 4971 cookie_idx = -1; 4972 4973 tmp_mte.ll = 0; /* initialise fields to 0 */ 4974 4975 if (mtype & LDC_DIRECT_MAP) { 4976 tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0; 4977 tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0; 4978 tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0; 4979 } 4980 4981 if (mtype & LDC_SHADOW_MAP) { 4982 tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0; 4983 tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0; 4984 } 4985 4986 if (mtype & LDC_IO_MAP) { 4987 tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0; 4988 tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0; 4989 } 4990 4991 D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll); 4992 4993 tmp_mte.mte_pgszc = pg_size_code; 4994 4995 /* initialize each mem table entry */ 4996 for (i = 0; i < npages; i++) { 4997 4998 /* check if slot is available in the table */ 4999 while (mtbl->table[index].entry.ll != 0) { 5000 5001 index = (index + 1) % mtbl->num_entries; 5002 5003 if (index == mtbl->next_entry) { 5004 /* we have looped around */ 5005 DWARN(DBG_ALL_LDCS, 5006 "ldc_mem_bind_handle: (0x%llx) cannot find " 5007 "entry\n", ldcp->id); 5008 *ccount = 0; 5009 5010 /* NOTE: free memory, remove previous entries */ 5011 /* this shouldnt happen as num_avail was ok */ 5012 5013 mutex_exit(&mtbl->lock); 5014 mutex_exit(&mhdl->lock); 5015 return (ENOMEM); 5016 } 5017 } 5018 5019 /* get the real address */ 5020 raddr = va_to_pa((void *)addr); 5021 ra_aligned = ((uintptr_t)raddr & pg_mask); 5022 5023 /* build the mte */ 5024 tmp_mte.mte_rpfn = ra_aligned >> pg_shift; 5025 5026 D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll); 5027 5028 /* update entry in table */ 5029 mtbl->table[index].entry = tmp_mte; 5030 5031 D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx" 5032 " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index); 5033 5034 /* calculate the size and offset for this export range */ 5035 if (i == 0) { 5036 /* first page */ 5037 psize = min((pg_size - v_offset), len); 5038 poffset = v_offset; 5039 5040 } else if (i == (npages - 1)) { 5041 /* last page */ 5042 psize = (((uintptr_t)(vaddr + len)) & 5043 ((uint64_t)(pg_size-1))); 5044 if (psize == 0) 5045 psize = pg_size; 5046 poffset = 0; 5047 5048 } else { 5049 /* middle pages */ 5050 psize = pg_size; 5051 poffset = 0; 5052 } 5053 5054 /* store entry for this page */ 5055 memseg->pages[i].index = index; 5056 memseg->pages[i].raddr = raddr; 5057 memseg->pages[i].offset = poffset; 5058 memseg->pages[i].size = psize; 5059 memseg->pages[i].mte = &(mtbl->table[index]); 5060 5061 /* create the cookie */ 5062 if (i == 0 || (index != prev_index + 1)) { 5063 cookie_idx++; 5064 memseg->cookies[cookie_idx].addr = 5065 IDX2COOKIE(index, pg_size_code, pg_shift); 5066 memseg->cookies[cookie_idx].addr |= poffset; 5067 memseg->cookies[cookie_idx].size = psize; 5068 5069 } else { 5070 memseg->cookies[cookie_idx].size += psize; 5071 } 5072 5073 D1(ldcp->id, "ldc_mem_bind_handle: bound " 5074 "(0x%llx) va=0x%llx, idx=0x%llx, " 5075 "ra=0x%llx(sz=0x%x,off=0x%x)\n", 5076 ldcp->id, addr, index, raddr, psize, poffset); 5077 5078 /* decrement number of available entries */ 5079 mtbl->num_avail--; 5080 5081 /* increment va by page size */ 5082 addr += pg_size; 5083 5084 /* increment index */ 5085 prev_index = index; 5086 index = (index + 1) % mtbl->num_entries; 5087 5088 /* save the next slot */ 5089 mtbl->next_entry = index; 5090 } 5091 5092 mutex_exit(&mtbl->lock); 5093 5094 /* memory handle = bound */ 5095 mhdl->mtype = mtype; 5096 mhdl->perm = perm; 5097 mhdl->status = LDC_BOUND; 5098 5099 /* update memseg_t */ 5100 memseg->vaddr = vaddr; 5101 memseg->raddr = memseg->pages[0].raddr; 5102 memseg->size = len; 5103 memseg->npages = npages; 5104 memseg->ncookies = cookie_idx + 1; 5105 memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0; 5106 5107 /* return count and first cookie */ 5108 *ccount = memseg->ncookies; 5109 cookie->addr = memseg->cookies[0].addr; 5110 cookie->size = memseg->cookies[0].size; 5111 5112 D1(ldcp->id, 5113 "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, " 5114 "pgs=0x%llx cookies=0x%llx\n", 5115 ldcp->id, mhdl, vaddr, npages, memseg->ncookies); 5116 5117 mutex_exit(&mhdl->lock); 5118 return (0); 5119 } 5120 5121 /* 5122 * Return the next cookie associated with the specified memory handle 5123 */ 5124 int 5125 ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie) 5126 { 5127 ldc_mhdl_t *mhdl; 5128 ldc_chan_t *ldcp; 5129 ldc_memseg_t *memseg; 5130 5131 if (mhandle == NULL) { 5132 DWARN(DBG_ALL_LDCS, 5133 "ldc_mem_nextcookie: invalid memory handle\n"); 5134 return (EINVAL); 5135 } 5136 mhdl = (ldc_mhdl_t *)mhandle; 5137 5138 mutex_enter(&mhdl->lock); 5139 5140 ldcp = mhdl->ldcp; 5141 memseg = mhdl->memseg; 5142 5143 if (cookie == 0) { 5144 DWARN(ldcp->id, 5145 "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n", 5146 ldcp->id); 5147 mutex_exit(&mhdl->lock); 5148 return (EINVAL); 5149 } 5150 5151 if (memseg->next_cookie != 0) { 5152 cookie->addr = memseg->cookies[memseg->next_cookie].addr; 5153 cookie->size = memseg->cookies[memseg->next_cookie].size; 5154 memseg->next_cookie++; 5155 if (memseg->next_cookie == memseg->ncookies) 5156 memseg->next_cookie = 0; 5157 5158 } else { 5159 DWARN(ldcp->id, 5160 "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id); 5161 cookie->addr = 0; 5162 cookie->size = 0; 5163 mutex_exit(&mhdl->lock); 5164 return (EINVAL); 5165 } 5166 5167 D1(ldcp->id, 5168 "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n", 5169 ldcp->id, cookie->addr, cookie->size); 5170 5171 mutex_exit(&mhdl->lock); 5172 return (0); 5173 } 5174 5175 /* 5176 * Unbind the virtual memory region associated with the specified 5177 * memory handle. Allassociated cookies are freed and the corresponding 5178 * RA space is no longer exported. 5179 */ 5180 int 5181 ldc_mem_unbind_handle(ldc_mem_handle_t mhandle) 5182 { 5183 ldc_mhdl_t *mhdl; 5184 ldc_chan_t *ldcp; 5185 ldc_mtbl_t *mtbl; 5186 ldc_memseg_t *memseg; 5187 uint64_t cookie_addr; 5188 uint64_t pg_shift, pg_size_code; 5189 int i, rv; 5190 5191 if (mhandle == NULL) { 5192 DWARN(DBG_ALL_LDCS, 5193 "ldc_mem_unbind_handle: invalid memory handle\n"); 5194 return (EINVAL); 5195 } 5196 mhdl = (ldc_mhdl_t *)mhandle; 5197 5198 mutex_enter(&mhdl->lock); 5199 5200 if (mhdl->status == LDC_UNBOUND) { 5201 DWARN(DBG_ALL_LDCS, 5202 "ldc_mem_unbind_handle: (0x%x) handle is not bound\n", 5203 mhandle); 5204 mutex_exit(&mhdl->lock); 5205 return (EINVAL); 5206 } 5207 5208 ldcp = mhdl->ldcp; 5209 mtbl = ldcp->mtbl; 5210 5211 memseg = mhdl->memseg; 5212 5213 /* lock the memory table - exclusive access to channel */ 5214 mutex_enter(&mtbl->lock); 5215 5216 /* undo the pages exported */ 5217 for (i = 0; i < memseg->npages; i++) { 5218 5219 /* check for mapped pages, revocation cookie != 0 */ 5220 if (memseg->pages[i].mte->cookie) { 5221 5222 pg_size_code = page_szc(memseg->pages[i].size); 5223 pg_shift = page_get_shift(memseg->pages[i].size); 5224 cookie_addr = IDX2COOKIE(memseg->pages[i].index, 5225 pg_size_code, pg_shift); 5226 5227 D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) revoke " 5228 "cookie 0x%llx, rcookie 0x%llx\n", ldcp->id, 5229 cookie_addr, memseg->pages[i].mte->cookie); 5230 rv = hv_ldc_revoke(ldcp->id, cookie_addr, 5231 memseg->pages[i].mte->cookie); 5232 if (rv) { 5233 DWARN(ldcp->id, 5234 "ldc_mem_unbind_handle: (0x%llx) cannot " 5235 "revoke mapping, cookie %llx\n", ldcp->id, 5236 cookie_addr); 5237 } 5238 } 5239 5240 /* clear the entry from the table */ 5241 memseg->pages[i].mte->entry.ll = 0; 5242 mtbl->num_avail++; 5243 } 5244 mutex_exit(&mtbl->lock); 5245 5246 /* free the allocated memseg and page structures */ 5247 kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages)); 5248 kmem_free(memseg->cookies, 5249 (sizeof (ldc_mem_cookie_t) * memseg->npages)); 5250 kmem_cache_free(ldcssp->memseg_cache, memseg); 5251 5252 /* uninitialize the memory handle */ 5253 mhdl->memseg = NULL; 5254 mhdl->status = LDC_UNBOUND; 5255 5256 D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n", 5257 ldcp->id, mhdl); 5258 5259 mutex_exit(&mhdl->lock); 5260 return (0); 5261 } 5262 5263 /* 5264 * Get information about the dring. The base address of the descriptor 5265 * ring along with the type and permission are returned back. 5266 */ 5267 int 5268 ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo) 5269 { 5270 ldc_mhdl_t *mhdl; 5271 5272 if (mhandle == NULL) { 5273 DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n"); 5274 return (EINVAL); 5275 } 5276 mhdl = (ldc_mhdl_t *)mhandle; 5277 5278 if (minfo == NULL) { 5279 DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n"); 5280 return (EINVAL); 5281 } 5282 5283 mutex_enter(&mhdl->lock); 5284 5285 minfo->status = mhdl->status; 5286 if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) { 5287 minfo->vaddr = mhdl->memseg->vaddr; 5288 minfo->raddr = mhdl->memseg->raddr; 5289 minfo->mtype = mhdl->mtype; 5290 minfo->perm = mhdl->perm; 5291 } 5292 mutex_exit(&mhdl->lock); 5293 5294 return (0); 5295 } 5296 5297 /* 5298 * Copy data either from or to the client specified virtual address 5299 * space to or from the exported memory associated with the cookies. 5300 * The direction argument determines whether the data is read from or 5301 * written to exported memory. 5302 */ 5303 int 5304 ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size, 5305 ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction) 5306 { 5307 ldc_chan_t *ldcp; 5308 uint64_t local_voff, local_valign; 5309 uint64_t cookie_addr, cookie_size; 5310 uint64_t pg_shift, pg_size, pg_size_code; 5311 uint64_t export_caddr, export_poff, export_psize, export_size; 5312 uint64_t local_ra, local_poff, local_psize; 5313 uint64_t copy_size, copied_len = 0, total_bal = 0, idx = 0; 5314 pgcnt_t npages; 5315 size_t len = *size; 5316 int i, rv = 0; 5317 5318 uint64_t chid; 5319 5320 if (handle == NULL) { 5321 DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n"); 5322 return (EINVAL); 5323 } 5324 ldcp = (ldc_chan_t *)handle; 5325 chid = ldcp->id; 5326 5327 /* check to see if channel is UP */ 5328 if (ldcp->tstate != TS_UP) { 5329 DWARN(chid, "ldc_mem_copy: (0x%llx) channel is not UP\n", 5330 chid); 5331 return (ECONNRESET); 5332 } 5333 5334 /* Force address and size to be 8-byte aligned */ 5335 if ((((uintptr_t)vaddr | len) & 0x7) != 0) { 5336 DWARN(chid, 5337 "ldc_mem_copy: addr/sz is not 8-byte aligned\n"); 5338 return (EINVAL); 5339 } 5340 5341 /* Find the size of the exported memory */ 5342 export_size = 0; 5343 for (i = 0; i < ccount; i++) 5344 export_size += cookies[i].size; 5345 5346 /* check to see if offset is valid */ 5347 if (off > export_size) { 5348 DWARN(chid, 5349 "ldc_mem_copy: (0x%llx) start offset > export mem size\n", 5350 chid); 5351 return (EINVAL); 5352 } 5353 5354 /* 5355 * Check to see if the export size is smaller than the size we 5356 * are requesting to copy - if so flag an error 5357 */ 5358 if ((export_size - off) < *size) { 5359 DWARN(chid, 5360 "ldc_mem_copy: (0x%llx) copy size > export mem size\n", 5361 chid); 5362 return (EINVAL); 5363 } 5364 5365 total_bal = min(export_size, *size); 5366 5367 /* FUTURE: get the page size, pgsz code, and shift */ 5368 pg_size = MMU_PAGESIZE; 5369 pg_size_code = page_szc(pg_size); 5370 pg_shift = page_get_shift(pg_size_code); 5371 5372 D1(chid, "ldc_mem_copy: copying data " 5373 "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", 5374 chid, vaddr, pg_size, pg_size_code, pg_shift); 5375 5376 /* aligned VA and its offset */ 5377 local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1)); 5378 local_voff = ((uintptr_t)vaddr) & (pg_size - 1); 5379 5380 npages = (len+local_voff)/pg_size; 5381 npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1; 5382 5383 D1(chid, 5384 "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n", 5385 chid, vaddr, local_valign, local_voff, npages); 5386 5387 local_ra = va_to_pa((void *)local_valign); 5388 local_poff = local_voff; 5389 local_psize = min(len, (pg_size - local_voff)); 5390 5391 len -= local_psize; 5392 5393 /* 5394 * find the first cookie in the list of cookies 5395 * if the offset passed in is not zero 5396 */ 5397 for (idx = 0; idx < ccount; idx++) { 5398 cookie_size = cookies[idx].size; 5399 if (off < cookie_size) 5400 break; 5401 off -= cookie_size; 5402 } 5403 5404 cookie_addr = cookies[idx].addr + off; 5405 cookie_size = cookies[idx].size - off; 5406 5407 export_caddr = cookie_addr & ~(pg_size - 1); 5408 export_poff = cookie_addr & (pg_size - 1); 5409 export_psize = min(cookie_size, (pg_size - export_poff)); 5410 5411 for (;;) { 5412 5413 copy_size = min(export_psize, local_psize); 5414 5415 D1(chid, 5416 "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx," 5417 " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx," 5418 " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx," 5419 " total_bal=0x%llx\n", 5420 chid, direction, export_caddr, local_ra, export_poff, 5421 local_poff, export_psize, local_psize, copy_size, 5422 total_bal); 5423 5424 rv = hv_ldc_copy(chid, direction, 5425 (export_caddr + export_poff), (local_ra + local_poff), 5426 copy_size, &copied_len); 5427 5428 if (rv != 0) { 5429 int error = EIO; 5430 uint64_t rx_hd, rx_tl; 5431 5432 DWARN(chid, 5433 "ldc_mem_copy: (0x%llx) err %d during copy\n", 5434 (unsigned long long)chid, rv); 5435 DWARN(chid, 5436 "ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%lx, " 5437 "loc_ra=0x%lx, exp_poff=0x%lx, loc_poff=0x%lx," 5438 " exp_psz=0x%lx, loc_psz=0x%lx, copy_sz=0x%lx," 5439 " copied_len=0x%lx, total_bal=0x%lx\n", 5440 chid, direction, export_caddr, local_ra, 5441 export_poff, local_poff, export_psize, local_psize, 5442 copy_size, copied_len, total_bal); 5443 5444 *size = *size - total_bal; 5445 5446 /* 5447 * check if reason for copy error was due to 5448 * a channel reset. we need to grab the lock 5449 * just in case we have to do a reset. 5450 */ 5451 mutex_enter(&ldcp->lock); 5452 mutex_enter(&ldcp->tx_lock); 5453 5454 rv = hv_ldc_rx_get_state(ldcp->id, 5455 &rx_hd, &rx_tl, &(ldcp->link_state)); 5456 if (ldcp->link_state == LDC_CHANNEL_DOWN || 5457 ldcp->link_state == LDC_CHANNEL_RESET) { 5458 i_ldc_reset(ldcp, B_FALSE); 5459 error = ECONNRESET; 5460 } 5461 5462 mutex_exit(&ldcp->tx_lock); 5463 mutex_exit(&ldcp->lock); 5464 5465 return (error); 5466 } 5467 5468 ASSERT(copied_len <= copy_size); 5469 5470 D2(chid, "ldc_mem_copy: copied=0x%llx\n", copied_len); 5471 export_poff += copied_len; 5472 local_poff += copied_len; 5473 export_psize -= copied_len; 5474 local_psize -= copied_len; 5475 cookie_size -= copied_len; 5476 5477 total_bal -= copied_len; 5478 5479 if (copy_size != copied_len) 5480 continue; 5481 5482 if (export_psize == 0 && total_bal != 0) { 5483 5484 if (cookie_size == 0) { 5485 idx++; 5486 cookie_addr = cookies[idx].addr; 5487 cookie_size = cookies[idx].size; 5488 5489 export_caddr = cookie_addr & ~(pg_size - 1); 5490 export_poff = cookie_addr & (pg_size - 1); 5491 export_psize = 5492 min(cookie_size, (pg_size-export_poff)); 5493 } else { 5494 export_caddr += pg_size; 5495 export_poff = 0; 5496 export_psize = min(cookie_size, pg_size); 5497 } 5498 } 5499 5500 if (local_psize == 0 && total_bal != 0) { 5501 local_valign += pg_size; 5502 local_ra = va_to_pa((void *)local_valign); 5503 local_poff = 0; 5504 local_psize = min(pg_size, len); 5505 len -= local_psize; 5506 } 5507 5508 /* check if we are all done */ 5509 if (total_bal == 0) 5510 break; 5511 } 5512 5513 5514 D1(chid, 5515 "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n", 5516 chid, *size); 5517 5518 return (0); 5519 } 5520 5521 /* 5522 * Copy data either from or to the client specified virtual address 5523 * space to or from HV physical memory. 5524 * 5525 * The direction argument determines whether the data is read from or 5526 * written to HV memory. direction values are LDC_COPY_IN/OUT similar 5527 * to the ldc_mem_copy interface 5528 */ 5529 int 5530 ldc_mem_rdwr_cookie(ldc_handle_t handle, caddr_t vaddr, size_t *size, 5531 caddr_t paddr, uint8_t direction) 5532 { 5533 ldc_chan_t *ldcp; 5534 uint64_t local_voff, local_valign; 5535 uint64_t pg_shift, pg_size, pg_size_code; 5536 uint64_t target_pa, target_poff, target_psize, target_size; 5537 uint64_t local_ra, local_poff, local_psize; 5538 uint64_t copy_size, copied_len = 0; 5539 pgcnt_t npages; 5540 size_t len = *size; 5541 int rv = 0; 5542 5543 if (handle == NULL) { 5544 DWARN(DBG_ALL_LDCS, 5545 "ldc_mem_rdwr_cookie: invalid channel handle\n"); 5546 return (EINVAL); 5547 } 5548 ldcp = (ldc_chan_t *)handle; 5549 5550 mutex_enter(&ldcp->lock); 5551 5552 /* check to see if channel is UP */ 5553 if (ldcp->tstate != TS_UP) { 5554 DWARN(ldcp->id, 5555 "ldc_mem_rdwr_cookie: (0x%llx) channel is not UP\n", 5556 ldcp->id); 5557 mutex_exit(&ldcp->lock); 5558 return (ECONNRESET); 5559 } 5560 5561 /* Force address and size to be 8-byte aligned */ 5562 if ((((uintptr_t)vaddr | len) & 0x7) != 0) { 5563 DWARN(ldcp->id, 5564 "ldc_mem_rdwr_cookie: addr/size is not 8-byte aligned\n"); 5565 mutex_exit(&ldcp->lock); 5566 return (EINVAL); 5567 } 5568 5569 target_size = *size; 5570 5571 /* FUTURE: get the page size, pgsz code, and shift */ 5572 pg_size = MMU_PAGESIZE; 5573 pg_size_code = page_szc(pg_size); 5574 pg_shift = page_get_shift(pg_size_code); 5575 5576 D1(ldcp->id, "ldc_mem_rdwr_cookie: copying data " 5577 "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", 5578 ldcp->id, vaddr, pg_size, pg_size_code, pg_shift); 5579 5580 /* aligned VA and its offset */ 5581 local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1); 5582 local_voff = ((uintptr_t)vaddr) & (pg_size - 1); 5583 5584 npages = (len + local_voff) / pg_size; 5585 npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1; 5586 5587 D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) v=0x%llx, " 5588 "val=0x%llx,off=0x%x,pgs=0x%x\n", 5589 ldcp->id, vaddr, local_valign, local_voff, npages); 5590 5591 local_ra = va_to_pa((void *)local_valign); 5592 local_poff = local_voff; 5593 local_psize = min(len, (pg_size - local_voff)); 5594 5595 len -= local_psize; 5596 5597 target_pa = ((uintptr_t)paddr) & ~(pg_size - 1); 5598 target_poff = ((uintptr_t)paddr) & (pg_size - 1); 5599 target_psize = pg_size - target_poff; 5600 5601 for (;;) { 5602 5603 copy_size = min(target_psize, local_psize); 5604 5605 D1(ldcp->id, 5606 "ldc_mem_rdwr_cookie: (0x%llx) dir=0x%x, tar_pa=0x%llx," 5607 " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx," 5608 " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx," 5609 " total_bal=0x%llx\n", 5610 ldcp->id, direction, target_pa, local_ra, target_poff, 5611 local_poff, target_psize, local_psize, copy_size, 5612 target_size); 5613 5614 rv = hv_ldc_copy(ldcp->id, direction, 5615 (target_pa + target_poff), (local_ra + local_poff), 5616 copy_size, &copied_len); 5617 5618 if (rv != 0) { 5619 DWARN(DBG_ALL_LDCS, 5620 "ldc_mem_rdwr_cookie: (0x%lx) err %d during copy\n", 5621 ldcp->id, rv); 5622 DWARN(DBG_ALL_LDCS, 5623 "ldc_mem_rdwr_cookie: (0x%llx) dir=%lld, " 5624 "tar_pa=0x%llx, loc_ra=0x%llx, tar_poff=0x%llx, " 5625 "loc_poff=0x%llx, tar_psz=0x%llx, loc_psz=0x%llx, " 5626 "copy_sz=0x%llx, total_bal=0x%llx\n", 5627 ldcp->id, direction, target_pa, local_ra, 5628 target_poff, local_poff, target_psize, local_psize, 5629 copy_size, target_size); 5630 5631 *size = *size - target_size; 5632 mutex_exit(&ldcp->lock); 5633 return (i_ldc_h2v_error(rv)); 5634 } 5635 5636 D2(ldcp->id, "ldc_mem_rdwr_cookie: copied=0x%llx\n", 5637 copied_len); 5638 target_poff += copied_len; 5639 local_poff += copied_len; 5640 target_psize -= copied_len; 5641 local_psize -= copied_len; 5642 5643 target_size -= copied_len; 5644 5645 if (copy_size != copied_len) 5646 continue; 5647 5648 if (target_psize == 0 && target_size != 0) { 5649 target_pa += pg_size; 5650 target_poff = 0; 5651 target_psize = min(pg_size, target_size); 5652 } 5653 5654 if (local_psize == 0 && target_size != 0) { 5655 local_valign += pg_size; 5656 local_ra = va_to_pa((void *)local_valign); 5657 local_poff = 0; 5658 local_psize = min(pg_size, len); 5659 len -= local_psize; 5660 } 5661 5662 /* check if we are all done */ 5663 if (target_size == 0) 5664 break; 5665 } 5666 5667 mutex_exit(&ldcp->lock); 5668 5669 D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) done copying sz=0x%llx\n", 5670 ldcp->id, *size); 5671 5672 return (0); 5673 } 5674 5675 /* 5676 * Map an exported memory segment into the local address space. If the 5677 * memory range was exported for direct map access, a HV call is made 5678 * to allocate a RA range. If the map is done via a shadow copy, local 5679 * shadow memory is allocated and the base VA is returned in 'vaddr'. If 5680 * the mapping is a direct map then the RA is returned in 'raddr'. 5681 */ 5682 int 5683 ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount, 5684 uint8_t mtype, uint8_t perm, caddr_t *vaddr, caddr_t *raddr) 5685 { 5686 int i, j, idx, rv, retries; 5687 ldc_chan_t *ldcp; 5688 ldc_mhdl_t *mhdl; 5689 ldc_memseg_t *memseg; 5690 caddr_t tmpaddr; 5691 uint64_t map_perm = perm; 5692 uint64_t pg_size, pg_shift, pg_size_code, pg_mask; 5693 uint64_t exp_size = 0, base_off, map_size, npages; 5694 uint64_t cookie_addr, cookie_off, cookie_size; 5695 tte_t ldc_tte; 5696 5697 if (mhandle == NULL) { 5698 DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n"); 5699 return (EINVAL); 5700 } 5701 mhdl = (ldc_mhdl_t *)mhandle; 5702 5703 mutex_enter(&mhdl->lock); 5704 5705 if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED || 5706 mhdl->memseg != NULL) { 5707 DWARN(DBG_ALL_LDCS, 5708 "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle); 5709 mutex_exit(&mhdl->lock); 5710 return (EINVAL); 5711 } 5712 5713 ldcp = mhdl->ldcp; 5714 5715 mutex_enter(&ldcp->lock); 5716 5717 if (ldcp->tstate != TS_UP) { 5718 DWARN(ldcp->id, 5719 "ldc_mem_dring_map: (0x%llx) channel is not UP\n", 5720 ldcp->id); 5721 mutex_exit(&ldcp->lock); 5722 mutex_exit(&mhdl->lock); 5723 return (ECONNRESET); 5724 } 5725 5726 if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) { 5727 DWARN(ldcp->id, "ldc_mem_map: invalid map type\n"); 5728 mutex_exit(&ldcp->lock); 5729 mutex_exit(&mhdl->lock); 5730 return (EINVAL); 5731 } 5732 5733 D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n", 5734 ldcp->id, cookie->addr, cookie->size); 5735 5736 /* FUTURE: get the page size, pgsz code, and shift */ 5737 pg_size = MMU_PAGESIZE; 5738 pg_size_code = page_szc(pg_size); 5739 pg_shift = page_get_shift(pg_size_code); 5740 pg_mask = ~(pg_size - 1); 5741 5742 /* calculate the number of pages in the exported cookie */ 5743 base_off = cookie[0].addr & (pg_size - 1); 5744 for (idx = 0; idx < ccount; idx++) 5745 exp_size += cookie[idx].size; 5746 map_size = P2ROUNDUP((exp_size + base_off), pg_size); 5747 npages = (map_size >> pg_shift); 5748 5749 /* Allocate memseg structure */ 5750 memseg = mhdl->memseg = 5751 kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP); 5752 5753 /* Allocate memory to store all pages and cookies */ 5754 memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP); 5755 memseg->cookies = 5756 kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP); 5757 5758 D2(ldcp->id, "ldc_mem_map: (0x%llx) exp_size=0x%llx, map_size=0x%llx," 5759 "pages=0x%llx\n", ldcp->id, exp_size, map_size, npages); 5760 5761 /* 5762 * Check if direct map over shared memory is enabled, if not change 5763 * the mapping type to SHADOW_MAP. 5764 */ 5765 if (ldc_shmem_enabled == 0) 5766 mtype = LDC_SHADOW_MAP; 5767 5768 /* 5769 * Check to see if the client is requesting direct or shadow map 5770 * If direct map is requested, try to map remote memory first, 5771 * and if that fails, revert to shadow map 5772 */ 5773 if (mtype == LDC_DIRECT_MAP) { 5774 5775 /* Allocate kernel virtual space for mapping */ 5776 memseg->vaddr = vmem_xalloc(heap_arena, map_size, 5777 pg_size, 0, 0, NULL, NULL, VM_NOSLEEP); 5778 if (memseg->vaddr == NULL) { 5779 cmn_err(CE_WARN, 5780 "ldc_mem_map: (0x%lx) memory map failed\n", 5781 ldcp->id); 5782 kmem_free(memseg->cookies, 5783 (sizeof (ldc_mem_cookie_t) * ccount)); 5784 kmem_free(memseg->pages, 5785 (sizeof (ldc_page_t) * npages)); 5786 kmem_cache_free(ldcssp->memseg_cache, memseg); 5787 5788 mutex_exit(&ldcp->lock); 5789 mutex_exit(&mhdl->lock); 5790 return (ENOMEM); 5791 } 5792 5793 /* Unload previous mapping */ 5794 hat_unload(kas.a_hat, memseg->vaddr, map_size, 5795 HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK); 5796 5797 /* for each cookie passed in - map into address space */ 5798 idx = 0; 5799 cookie_size = 0; 5800 tmpaddr = memseg->vaddr; 5801 5802 for (i = 0; i < npages; i++) { 5803 5804 if (cookie_size == 0) { 5805 ASSERT(idx < ccount); 5806 cookie_addr = cookie[idx].addr & pg_mask; 5807 cookie_off = cookie[idx].addr & (pg_size - 1); 5808 cookie_size = 5809 P2ROUNDUP((cookie_off + cookie[idx].size), 5810 pg_size); 5811 idx++; 5812 } 5813 5814 D1(ldcp->id, "ldc_mem_map: (0x%llx) mapping " 5815 "cookie 0x%llx, bal=0x%llx\n", ldcp->id, 5816 cookie_addr, cookie_size); 5817 5818 /* map the cookie into address space */ 5819 for (retries = 0; retries < ldc_max_retries; 5820 retries++) { 5821 5822 rv = hv_ldc_mapin(ldcp->id, cookie_addr, 5823 &memseg->pages[i].raddr, &map_perm); 5824 if (rv != H_EWOULDBLOCK && rv != H_ETOOMANY) 5825 break; 5826 5827 drv_usecwait(ldc_delay); 5828 } 5829 5830 if (rv || memseg->pages[i].raddr == 0) { 5831 DWARN(ldcp->id, 5832 "ldc_mem_map: (0x%llx) hv mapin err %d\n", 5833 ldcp->id, rv); 5834 5835 /* remove previous mapins */ 5836 hat_unload(kas.a_hat, memseg->vaddr, map_size, 5837 HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK); 5838 for (j = 0; j < i; j++) { 5839 rv = hv_ldc_unmap( 5840 memseg->pages[j].raddr); 5841 if (rv) { 5842 DWARN(ldcp->id, 5843 "ldc_mem_map: (0x%llx) " 5844 "cannot unmap ra=0x%llx\n", 5845 ldcp->id, 5846 memseg->pages[j].raddr); 5847 } 5848 } 5849 5850 /* free kernel virtual space */ 5851 vmem_free(heap_arena, (void *)memseg->vaddr, 5852 map_size); 5853 5854 /* direct map failed - revert to shadow map */ 5855 mtype = LDC_SHADOW_MAP; 5856 break; 5857 5858 } else { 5859 5860 D1(ldcp->id, 5861 "ldc_mem_map: (0x%llx) vtop map 0x%llx -> " 5862 "0x%llx, cookie=0x%llx, perm=0x%llx\n", 5863 ldcp->id, tmpaddr, memseg->pages[i].raddr, 5864 cookie_addr, perm); 5865 5866 /* 5867 * NOTE: Calling hat_devload directly, causes it 5868 * to look for page_t using the pfn. Since this 5869 * addr is greater than the memlist, it treates 5870 * it as non-memory 5871 */ 5872 sfmmu_memtte(&ldc_tte, 5873 (pfn_t)(memseg->pages[i].raddr >> pg_shift), 5874 PROT_READ | PROT_WRITE | HAT_NOSYNC, TTE8K); 5875 5876 D1(ldcp->id, 5877 "ldc_mem_map: (0x%llx) ra 0x%llx -> " 5878 "tte 0x%llx\n", ldcp->id, 5879 memseg->pages[i].raddr, ldc_tte); 5880 5881 sfmmu_tteload(kas.a_hat, &ldc_tte, tmpaddr, 5882 NULL, HAT_LOAD_LOCK); 5883 5884 cookie_size -= pg_size; 5885 cookie_addr += pg_size; 5886 tmpaddr += pg_size; 5887 } 5888 } 5889 } 5890 5891 if (mtype == LDC_SHADOW_MAP) { 5892 if (*vaddr == NULL) { 5893 memseg->vaddr = kmem_zalloc(exp_size, KM_SLEEP); 5894 mhdl->myshadow = B_TRUE; 5895 5896 D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated " 5897 "shadow page va=0x%llx\n", ldcp->id, memseg->vaddr); 5898 } else { 5899 /* 5900 * Use client supplied memory for memseg->vaddr 5901 * WARNING: assuming that client mem is >= exp_size 5902 */ 5903 memseg->vaddr = *vaddr; 5904 } 5905 5906 /* Save all page and cookie information */ 5907 for (i = 0, tmpaddr = memseg->vaddr; i < npages; i++) { 5908 memseg->pages[i].raddr = va_to_pa(tmpaddr); 5909 memseg->pages[i].size = pg_size; 5910 tmpaddr += pg_size; 5911 } 5912 5913 } 5914 5915 /* save all cookies */ 5916 bcopy(cookie, memseg->cookies, ccount * sizeof (ldc_mem_cookie_t)); 5917 5918 /* update memseg_t */ 5919 memseg->raddr = memseg->pages[0].raddr; 5920 memseg->size = (mtype == LDC_SHADOW_MAP) ? exp_size : map_size; 5921 memseg->npages = npages; 5922 memseg->ncookies = ccount; 5923 memseg->next_cookie = 0; 5924 5925 /* memory handle = mapped */ 5926 mhdl->mtype = mtype; 5927 mhdl->perm = perm; 5928 mhdl->status = LDC_MAPPED; 5929 5930 D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, " 5931 "va=0x%llx, pgs=0x%llx cookies=0x%llx\n", 5932 ldcp->id, mhdl, memseg->raddr, memseg->vaddr, 5933 memseg->npages, memseg->ncookies); 5934 5935 if (mtype == LDC_SHADOW_MAP) 5936 base_off = 0; 5937 if (raddr) 5938 *raddr = (caddr_t)(memseg->raddr | base_off); 5939 if (vaddr) 5940 *vaddr = (caddr_t)((uintptr_t)memseg->vaddr | base_off); 5941 5942 mutex_exit(&ldcp->lock); 5943 mutex_exit(&mhdl->lock); 5944 return (0); 5945 } 5946 5947 /* 5948 * Unmap a memory segment. Free shadow memory (if any). 5949 */ 5950 int 5951 ldc_mem_unmap(ldc_mem_handle_t mhandle) 5952 { 5953 int i, rv; 5954 ldc_mhdl_t *mhdl = (ldc_mhdl_t *)mhandle; 5955 ldc_chan_t *ldcp; 5956 ldc_memseg_t *memseg; 5957 5958 if (mhdl == 0 || mhdl->status != LDC_MAPPED) { 5959 DWARN(DBG_ALL_LDCS, 5960 "ldc_mem_unmap: (0x%llx) handle is not mapped\n", 5961 mhandle); 5962 return (EINVAL); 5963 } 5964 5965 mutex_enter(&mhdl->lock); 5966 5967 ldcp = mhdl->ldcp; 5968 memseg = mhdl->memseg; 5969 5970 D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n", 5971 ldcp->id, mhdl); 5972 5973 /* if we allocated shadow memory - free it */ 5974 if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) { 5975 kmem_free(memseg->vaddr, memseg->size); 5976 } else if (mhdl->mtype == LDC_DIRECT_MAP) { 5977 5978 /* unmap in the case of DIRECT_MAP */ 5979 hat_unload(kas.a_hat, memseg->vaddr, memseg->size, 5980 HAT_UNLOAD_UNLOCK); 5981 5982 for (i = 0; i < memseg->npages; i++) { 5983 rv = hv_ldc_unmap(memseg->pages[i].raddr); 5984 if (rv) { 5985 cmn_err(CE_WARN, 5986 "ldc_mem_map: (0x%lx) hv unmap err %d\n", 5987 ldcp->id, rv); 5988 } 5989 } 5990 5991 vmem_free(heap_arena, (void *)memseg->vaddr, memseg->size); 5992 } 5993 5994 /* free the allocated memseg and page structures */ 5995 kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages)); 5996 kmem_free(memseg->cookies, 5997 (sizeof (ldc_mem_cookie_t) * memseg->ncookies)); 5998 kmem_cache_free(ldcssp->memseg_cache, memseg); 5999 6000 /* uninitialize the memory handle */ 6001 mhdl->memseg = NULL; 6002 mhdl->status = LDC_UNBOUND; 6003 6004 D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n", 6005 ldcp->id, mhdl); 6006 6007 mutex_exit(&mhdl->lock); 6008 return (0); 6009 } 6010 6011 /* 6012 * Internal entry point for LDC mapped memory entry consistency 6013 * semantics. Acquire copies the contents of the remote memory 6014 * into the local shadow copy. The release operation copies the local 6015 * contents into the remote memory. The offset and size specify the 6016 * bounds for the memory range being synchronized. 6017 */ 6018 static int 6019 i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction, 6020 uint64_t offset, size_t size) 6021 { 6022 int err; 6023 ldc_mhdl_t *mhdl; 6024 ldc_chan_t *ldcp; 6025 ldc_memseg_t *memseg; 6026 caddr_t local_vaddr; 6027 size_t copy_size; 6028 6029 if (mhandle == NULL) { 6030 DWARN(DBG_ALL_LDCS, 6031 "i_ldc_mem_acquire_release: invalid memory handle\n"); 6032 return (EINVAL); 6033 } 6034 mhdl = (ldc_mhdl_t *)mhandle; 6035 6036 mutex_enter(&mhdl->lock); 6037 6038 if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) { 6039 DWARN(DBG_ALL_LDCS, 6040 "i_ldc_mem_acquire_release: not mapped memory\n"); 6041 mutex_exit(&mhdl->lock); 6042 return (EINVAL); 6043 } 6044 6045 /* do nothing for direct map */ 6046 if (mhdl->mtype == LDC_DIRECT_MAP) { 6047 mutex_exit(&mhdl->lock); 6048 return (0); 6049 } 6050 6051 /* do nothing if COPY_IN+MEM_W and COPY_OUT+MEM_R */ 6052 if ((direction == LDC_COPY_IN && (mhdl->perm & LDC_MEM_R) == 0) || 6053 (direction == LDC_COPY_OUT && (mhdl->perm & LDC_MEM_W) == 0)) { 6054 mutex_exit(&mhdl->lock); 6055 return (0); 6056 } 6057 6058 if (offset >= mhdl->memseg->size || 6059 (offset + size) > mhdl->memseg->size) { 6060 DWARN(DBG_ALL_LDCS, 6061 "i_ldc_mem_acquire_release: memory out of range\n"); 6062 mutex_exit(&mhdl->lock); 6063 return (EINVAL); 6064 } 6065 6066 /* get the channel handle and memory segment */ 6067 ldcp = mhdl->ldcp; 6068 memseg = mhdl->memseg; 6069 6070 if (mhdl->mtype == LDC_SHADOW_MAP) { 6071 6072 local_vaddr = memseg->vaddr + offset; 6073 copy_size = size; 6074 6075 /* copy to/from remote from/to local memory */ 6076 err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset, 6077 ©_size, memseg->cookies, memseg->ncookies, 6078 direction); 6079 if (err || copy_size != size) { 6080 DWARN(ldcp->id, 6081 "i_ldc_mem_acquire_release: copy failed\n"); 6082 mutex_exit(&mhdl->lock); 6083 return (err); 6084 } 6085 } 6086 6087 mutex_exit(&mhdl->lock); 6088 6089 return (0); 6090 } 6091 6092 /* 6093 * Ensure that the contents in the remote memory seg are consistent 6094 * with the contents if of local segment 6095 */ 6096 int 6097 ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size) 6098 { 6099 return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size)); 6100 } 6101 6102 6103 /* 6104 * Ensure that the contents in the local memory seg are consistent 6105 * with the contents if of remote segment 6106 */ 6107 int 6108 ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size) 6109 { 6110 return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size)); 6111 } 6112 6113 /* 6114 * Allocate a descriptor ring. The size of each each descriptor 6115 * must be 8-byte aligned and the entire ring should be a multiple 6116 * of MMU_PAGESIZE. 6117 */ 6118 int 6119 ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle) 6120 { 6121 ldc_dring_t *dringp; 6122 size_t size = (dsize * len); 6123 6124 D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n", 6125 len, dsize); 6126 6127 if (dhandle == NULL) { 6128 DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n"); 6129 return (EINVAL); 6130 } 6131 6132 if (len == 0) { 6133 DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n"); 6134 return (EINVAL); 6135 } 6136 6137 /* descriptor size should be 8-byte aligned */ 6138 if (dsize == 0 || (dsize & 0x7)) { 6139 DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n"); 6140 return (EINVAL); 6141 } 6142 6143 *dhandle = 0; 6144 6145 /* Allocate a desc ring structure */ 6146 dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP); 6147 6148 /* Initialize dring */ 6149 dringp->length = len; 6150 dringp->dsize = dsize; 6151 6152 /* round off to multiple of pagesize */ 6153 dringp->size = (size & MMU_PAGEMASK); 6154 if (size & MMU_PAGEOFFSET) 6155 dringp->size += MMU_PAGESIZE; 6156 6157 dringp->status = LDC_UNBOUND; 6158 6159 /* allocate descriptor ring memory */ 6160 dringp->base = kmem_zalloc(dringp->size, KM_SLEEP); 6161 6162 /* initialize the desc ring lock */ 6163 mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL); 6164 6165 /* Add descriptor ring to the head of global list */ 6166 mutex_enter(&ldcssp->lock); 6167 dringp->next = ldcssp->dring_list; 6168 ldcssp->dring_list = dringp; 6169 mutex_exit(&ldcssp->lock); 6170 6171 *dhandle = (ldc_dring_handle_t)dringp; 6172 6173 D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n"); 6174 6175 return (0); 6176 } 6177 6178 6179 /* 6180 * Destroy a descriptor ring. 6181 */ 6182 int 6183 ldc_mem_dring_destroy(ldc_dring_handle_t dhandle) 6184 { 6185 ldc_dring_t *dringp; 6186 ldc_dring_t *tmp_dringp; 6187 6188 D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n"); 6189 6190 if (dhandle == NULL) { 6191 DWARN(DBG_ALL_LDCS, 6192 "ldc_mem_dring_destroy: invalid desc ring handle\n"); 6193 return (EINVAL); 6194 } 6195 dringp = (ldc_dring_t *)dhandle; 6196 6197 if (dringp->status == LDC_BOUND) { 6198 DWARN(DBG_ALL_LDCS, 6199 "ldc_mem_dring_destroy: desc ring is bound\n"); 6200 return (EACCES); 6201 } 6202 6203 mutex_enter(&dringp->lock); 6204 mutex_enter(&ldcssp->lock); 6205 6206 /* remove from linked list - if not bound */ 6207 tmp_dringp = ldcssp->dring_list; 6208 if (tmp_dringp == dringp) { 6209 ldcssp->dring_list = dringp->next; 6210 dringp->next = NULL; 6211 6212 } else { 6213 while (tmp_dringp != NULL) { 6214 if (tmp_dringp->next == dringp) { 6215 tmp_dringp->next = dringp->next; 6216 dringp->next = NULL; 6217 break; 6218 } 6219 tmp_dringp = tmp_dringp->next; 6220 } 6221 if (tmp_dringp == NULL) { 6222 DWARN(DBG_ALL_LDCS, 6223 "ldc_mem_dring_destroy: invalid descriptor\n"); 6224 mutex_exit(&ldcssp->lock); 6225 mutex_exit(&dringp->lock); 6226 return (EINVAL); 6227 } 6228 } 6229 6230 mutex_exit(&ldcssp->lock); 6231 6232 /* free the descriptor ring */ 6233 kmem_free(dringp->base, dringp->size); 6234 6235 mutex_exit(&dringp->lock); 6236 6237 /* destroy dring lock */ 6238 mutex_destroy(&dringp->lock); 6239 6240 /* free desc ring object */ 6241 kmem_free(dringp, sizeof (ldc_dring_t)); 6242 6243 return (0); 6244 } 6245 6246 /* 6247 * Bind a previously allocated dring to a channel. The channel should 6248 * be OPEN in order to bind the ring to the channel. Returns back a 6249 * descriptor ring cookie. The descriptor ring is exported for remote 6250 * access by the client at the other end of the channel. An entry for 6251 * dring pages is stored in map table (via call to ldc_mem_bind_handle). 6252 */ 6253 int 6254 ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle, 6255 uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount) 6256 { 6257 int err; 6258 ldc_chan_t *ldcp; 6259 ldc_dring_t *dringp; 6260 ldc_mem_handle_t mhandle; 6261 6262 /* check to see if channel is initalized */ 6263 if (handle == NULL) { 6264 DWARN(DBG_ALL_LDCS, 6265 "ldc_mem_dring_bind: invalid channel handle\n"); 6266 return (EINVAL); 6267 } 6268 ldcp = (ldc_chan_t *)handle; 6269 6270 if (dhandle == NULL) { 6271 DWARN(DBG_ALL_LDCS, 6272 "ldc_mem_dring_bind: invalid desc ring handle\n"); 6273 return (EINVAL); 6274 } 6275 dringp = (ldc_dring_t *)dhandle; 6276 6277 if (cookie == NULL) { 6278 DWARN(ldcp->id, 6279 "ldc_mem_dring_bind: invalid cookie arg\n"); 6280 return (EINVAL); 6281 } 6282 6283 mutex_enter(&dringp->lock); 6284 6285 if (dringp->status == LDC_BOUND) { 6286 DWARN(DBG_ALL_LDCS, 6287 "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n", 6288 ldcp->id); 6289 mutex_exit(&dringp->lock); 6290 return (EINVAL); 6291 } 6292 6293 if ((perm & LDC_MEM_RW) == 0) { 6294 DWARN(DBG_ALL_LDCS, 6295 "ldc_mem_dring_bind: invalid permissions\n"); 6296 mutex_exit(&dringp->lock); 6297 return (EINVAL); 6298 } 6299 6300 if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) { 6301 DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n"); 6302 mutex_exit(&dringp->lock); 6303 return (EINVAL); 6304 } 6305 6306 dringp->ldcp = ldcp; 6307 6308 /* create an memory handle */ 6309 err = ldc_mem_alloc_handle(handle, &mhandle); 6310 if (err || mhandle == NULL) { 6311 DWARN(DBG_ALL_LDCS, 6312 "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n", 6313 ldcp->id); 6314 mutex_exit(&dringp->lock); 6315 return (err); 6316 } 6317 dringp->mhdl = mhandle; 6318 6319 /* bind the descriptor ring to channel */ 6320 err = ldc_mem_bind_handle(mhandle, dringp->base, dringp->size, 6321 mtype, perm, cookie, ccount); 6322 if (err) { 6323 DWARN(ldcp->id, 6324 "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n", 6325 ldcp->id); 6326 mutex_exit(&dringp->lock); 6327 return (err); 6328 } 6329 6330 /* 6331 * For now return error if we get more than one cookie 6332 * FUTURE: Return multiple cookies .. 6333 */ 6334 if (*ccount > 1) { 6335 (void) ldc_mem_unbind_handle(mhandle); 6336 (void) ldc_mem_free_handle(mhandle); 6337 6338 dringp->ldcp = NULL; 6339 dringp->mhdl = NULL; 6340 *ccount = 0; 6341 6342 mutex_exit(&dringp->lock); 6343 return (EAGAIN); 6344 } 6345 6346 /* Add descriptor ring to channel's exported dring list */ 6347 mutex_enter(&ldcp->exp_dlist_lock); 6348 dringp->ch_next = ldcp->exp_dring_list; 6349 ldcp->exp_dring_list = dringp; 6350 mutex_exit(&ldcp->exp_dlist_lock); 6351 6352 dringp->status = LDC_BOUND; 6353 6354 mutex_exit(&dringp->lock); 6355 6356 return (0); 6357 } 6358 6359 /* 6360 * Return the next cookie associated with the specified dring handle 6361 */ 6362 int 6363 ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie) 6364 { 6365 int rv = 0; 6366 ldc_dring_t *dringp; 6367 ldc_chan_t *ldcp; 6368 6369 if (dhandle == NULL) { 6370 DWARN(DBG_ALL_LDCS, 6371 "ldc_mem_dring_nextcookie: invalid desc ring handle\n"); 6372 return (EINVAL); 6373 } 6374 dringp = (ldc_dring_t *)dhandle; 6375 mutex_enter(&dringp->lock); 6376 6377 if (dringp->status != LDC_BOUND) { 6378 DWARN(DBG_ALL_LDCS, 6379 "ldc_mem_dring_nextcookie: descriptor ring 0x%llx " 6380 "is not bound\n", dringp); 6381 mutex_exit(&dringp->lock); 6382 return (EINVAL); 6383 } 6384 6385 ldcp = dringp->ldcp; 6386 6387 if (cookie == NULL) { 6388 DWARN(ldcp->id, 6389 "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n", 6390 ldcp->id); 6391 mutex_exit(&dringp->lock); 6392 return (EINVAL); 6393 } 6394 6395 rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie); 6396 mutex_exit(&dringp->lock); 6397 6398 return (rv); 6399 } 6400 /* 6401 * Unbind a previously bound dring from a channel. 6402 */ 6403 int 6404 ldc_mem_dring_unbind(ldc_dring_handle_t dhandle) 6405 { 6406 ldc_dring_t *dringp; 6407 ldc_dring_t *tmp_dringp; 6408 ldc_chan_t *ldcp; 6409 6410 if (dhandle == NULL) { 6411 DWARN(DBG_ALL_LDCS, 6412 "ldc_mem_dring_unbind: invalid desc ring handle\n"); 6413 return (EINVAL); 6414 } 6415 dringp = (ldc_dring_t *)dhandle; 6416 6417 mutex_enter(&dringp->lock); 6418 6419 if (dringp->status == LDC_UNBOUND) { 6420 DWARN(DBG_ALL_LDCS, 6421 "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n", 6422 dringp); 6423 mutex_exit(&dringp->lock); 6424 return (EINVAL); 6425 } 6426 ldcp = dringp->ldcp; 6427 6428 mutex_enter(&ldcp->exp_dlist_lock); 6429 6430 tmp_dringp = ldcp->exp_dring_list; 6431 if (tmp_dringp == dringp) { 6432 ldcp->exp_dring_list = dringp->ch_next; 6433 dringp->ch_next = NULL; 6434 6435 } else { 6436 while (tmp_dringp != NULL) { 6437 if (tmp_dringp->ch_next == dringp) { 6438 tmp_dringp->ch_next = dringp->ch_next; 6439 dringp->ch_next = NULL; 6440 break; 6441 } 6442 tmp_dringp = tmp_dringp->ch_next; 6443 } 6444 if (tmp_dringp == NULL) { 6445 DWARN(DBG_ALL_LDCS, 6446 "ldc_mem_dring_unbind: invalid descriptor\n"); 6447 mutex_exit(&ldcp->exp_dlist_lock); 6448 mutex_exit(&dringp->lock); 6449 return (EINVAL); 6450 } 6451 } 6452 6453 mutex_exit(&ldcp->exp_dlist_lock); 6454 6455 (void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl); 6456 (void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl); 6457 6458 dringp->ldcp = NULL; 6459 dringp->mhdl = NULL; 6460 dringp->status = LDC_UNBOUND; 6461 6462 mutex_exit(&dringp->lock); 6463 6464 return (0); 6465 } 6466 6467 /* 6468 * Get information about the dring. The base address of the descriptor 6469 * ring along with the type and permission are returned back. 6470 */ 6471 int 6472 ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo) 6473 { 6474 ldc_dring_t *dringp; 6475 int rv; 6476 6477 if (dhandle == NULL) { 6478 DWARN(DBG_ALL_LDCS, 6479 "ldc_mem_dring_info: invalid desc ring handle\n"); 6480 return (EINVAL); 6481 } 6482 dringp = (ldc_dring_t *)dhandle; 6483 6484 mutex_enter(&dringp->lock); 6485 6486 if (dringp->mhdl) { 6487 rv = ldc_mem_info(dringp->mhdl, minfo); 6488 if (rv) { 6489 DWARN(DBG_ALL_LDCS, 6490 "ldc_mem_dring_info: error reading mem info\n"); 6491 mutex_exit(&dringp->lock); 6492 return (rv); 6493 } 6494 } else { 6495 minfo->vaddr = dringp->base; 6496 minfo->raddr = NULL; 6497 minfo->status = dringp->status; 6498 } 6499 6500 mutex_exit(&dringp->lock); 6501 6502 return (0); 6503 } 6504 6505 /* 6506 * Map an exported descriptor ring into the local address space. If the 6507 * descriptor ring was exported for direct map access, a HV call is made 6508 * to allocate a RA range. If the map is done via a shadow copy, local 6509 * shadow memory is allocated. 6510 */ 6511 int 6512 ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie, 6513 uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype, 6514 ldc_dring_handle_t *dhandle) 6515 { 6516 int err; 6517 ldc_chan_t *ldcp = (ldc_chan_t *)handle; 6518 ldc_mem_handle_t mhandle; 6519 ldc_dring_t *dringp; 6520 size_t dring_size; 6521 6522 if (dhandle == NULL) { 6523 DWARN(DBG_ALL_LDCS, 6524 "ldc_mem_dring_map: invalid dhandle\n"); 6525 return (EINVAL); 6526 } 6527 6528 /* check to see if channel is initalized */ 6529 if (handle == NULL) { 6530 DWARN(DBG_ALL_LDCS, 6531 "ldc_mem_dring_map: invalid channel handle\n"); 6532 return (EINVAL); 6533 } 6534 ldcp = (ldc_chan_t *)handle; 6535 6536 if (cookie == NULL) { 6537 DWARN(ldcp->id, 6538 "ldc_mem_dring_map: (0x%llx) invalid cookie\n", 6539 ldcp->id); 6540 return (EINVAL); 6541 } 6542 6543 /* FUTURE: For now we support only one cookie per dring */ 6544 ASSERT(ccount == 1); 6545 6546 if (cookie->size < (dsize * len)) { 6547 DWARN(ldcp->id, 6548 "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n", 6549 ldcp->id); 6550 return (EINVAL); 6551 } 6552 6553 *dhandle = 0; 6554 6555 /* Allocate an dring structure */ 6556 dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP); 6557 6558 D1(ldcp->id, 6559 "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n", 6560 mtype, len, dsize, cookie->addr, cookie->size); 6561 6562 /* Initialize dring */ 6563 dringp->length = len; 6564 dringp->dsize = dsize; 6565 6566 /* round of to multiple of page size */ 6567 dring_size = len * dsize; 6568 dringp->size = (dring_size & MMU_PAGEMASK); 6569 if (dring_size & MMU_PAGEOFFSET) 6570 dringp->size += MMU_PAGESIZE; 6571 6572 dringp->ldcp = ldcp; 6573 6574 /* create an memory handle */ 6575 err = ldc_mem_alloc_handle(handle, &mhandle); 6576 if (err || mhandle == NULL) { 6577 DWARN(DBG_ALL_LDCS, 6578 "ldc_mem_dring_map: cannot alloc hdl err=%d\n", 6579 err); 6580 kmem_free(dringp, sizeof (ldc_dring_t)); 6581 return (ENOMEM); 6582 } 6583 6584 dringp->mhdl = mhandle; 6585 dringp->base = NULL; 6586 6587 /* map the dring into local memory */ 6588 err = ldc_mem_map(mhandle, cookie, ccount, mtype, LDC_MEM_RW, 6589 &(dringp->base), NULL); 6590 if (err || dringp->base == NULL) { 6591 cmn_err(CE_WARN, 6592 "ldc_mem_dring_map: cannot map desc ring err=%d\n", err); 6593 (void) ldc_mem_free_handle(mhandle); 6594 kmem_free(dringp, sizeof (ldc_dring_t)); 6595 return (ENOMEM); 6596 } 6597 6598 /* initialize the desc ring lock */ 6599 mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL); 6600 6601 /* Add descriptor ring to channel's imported dring list */ 6602 mutex_enter(&ldcp->imp_dlist_lock); 6603 dringp->ch_next = ldcp->imp_dring_list; 6604 ldcp->imp_dring_list = dringp; 6605 mutex_exit(&ldcp->imp_dlist_lock); 6606 6607 dringp->status = LDC_MAPPED; 6608 6609 *dhandle = (ldc_dring_handle_t)dringp; 6610 6611 return (0); 6612 } 6613 6614 /* 6615 * Unmap a descriptor ring. Free shadow memory (if any). 6616 */ 6617 int 6618 ldc_mem_dring_unmap(ldc_dring_handle_t dhandle) 6619 { 6620 ldc_dring_t *dringp; 6621 ldc_dring_t *tmp_dringp; 6622 ldc_chan_t *ldcp; 6623 6624 if (dhandle == NULL) { 6625 DWARN(DBG_ALL_LDCS, 6626 "ldc_mem_dring_unmap: invalid desc ring handle\n"); 6627 return (EINVAL); 6628 } 6629 dringp = (ldc_dring_t *)dhandle; 6630 6631 if (dringp->status != LDC_MAPPED) { 6632 DWARN(DBG_ALL_LDCS, 6633 "ldc_mem_dring_unmap: not a mapped desc ring\n"); 6634 return (EINVAL); 6635 } 6636 6637 mutex_enter(&dringp->lock); 6638 6639 ldcp = dringp->ldcp; 6640 6641 mutex_enter(&ldcp->imp_dlist_lock); 6642 6643 /* find and unlink the desc ring from channel import list */ 6644 tmp_dringp = ldcp->imp_dring_list; 6645 if (tmp_dringp == dringp) { 6646 ldcp->imp_dring_list = dringp->ch_next; 6647 dringp->ch_next = NULL; 6648 6649 } else { 6650 while (tmp_dringp != NULL) { 6651 if (tmp_dringp->ch_next == dringp) { 6652 tmp_dringp->ch_next = dringp->ch_next; 6653 dringp->ch_next = NULL; 6654 break; 6655 } 6656 tmp_dringp = tmp_dringp->ch_next; 6657 } 6658 if (tmp_dringp == NULL) { 6659 DWARN(DBG_ALL_LDCS, 6660 "ldc_mem_dring_unmap: invalid descriptor\n"); 6661 mutex_exit(&ldcp->imp_dlist_lock); 6662 mutex_exit(&dringp->lock); 6663 return (EINVAL); 6664 } 6665 } 6666 6667 mutex_exit(&ldcp->imp_dlist_lock); 6668 6669 /* do a LDC memory handle unmap and free */ 6670 (void) ldc_mem_unmap(dringp->mhdl); 6671 (void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl); 6672 6673 dringp->status = 0; 6674 dringp->ldcp = NULL; 6675 6676 mutex_exit(&dringp->lock); 6677 6678 /* destroy dring lock */ 6679 mutex_destroy(&dringp->lock); 6680 6681 /* free desc ring object */ 6682 kmem_free(dringp, sizeof (ldc_dring_t)); 6683 6684 return (0); 6685 } 6686 6687 /* 6688 * Internal entry point for descriptor ring access entry consistency 6689 * semantics. Acquire copies the contents of the remote descriptor ring 6690 * into the local shadow copy. The release operation copies the local 6691 * contents into the remote dring. The start and end locations specify 6692 * bounds for the entries being synchronized. 6693 */ 6694 static int 6695 i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle, 6696 uint8_t direction, uint64_t start, uint64_t end) 6697 { 6698 int err; 6699 ldc_dring_t *dringp; 6700 ldc_chan_t *ldcp; 6701 uint64_t soff; 6702 size_t copy_size; 6703 6704 if (dhandle == NULL) { 6705 DWARN(DBG_ALL_LDCS, 6706 "i_ldc_dring_acquire_release: invalid desc ring handle\n"); 6707 return (EINVAL); 6708 } 6709 dringp = (ldc_dring_t *)dhandle; 6710 mutex_enter(&dringp->lock); 6711 6712 if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) { 6713 DWARN(DBG_ALL_LDCS, 6714 "i_ldc_dring_acquire_release: not a mapped desc ring\n"); 6715 mutex_exit(&dringp->lock); 6716 return (EINVAL); 6717 } 6718 6719 if (start >= dringp->length || end >= dringp->length) { 6720 DWARN(DBG_ALL_LDCS, 6721 "i_ldc_dring_acquire_release: index out of range\n"); 6722 mutex_exit(&dringp->lock); 6723 return (EINVAL); 6724 } 6725 6726 /* get the channel handle */ 6727 ldcp = dringp->ldcp; 6728 6729 copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) : 6730 ((dringp->length - start) * dringp->dsize); 6731 6732 /* Calculate the relative offset for the first desc */ 6733 soff = (start * dringp->dsize); 6734 6735 /* copy to/from remote from/to local memory */ 6736 D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n", 6737 soff, copy_size); 6738 err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl, 6739 direction, soff, copy_size); 6740 if (err) { 6741 DWARN(ldcp->id, 6742 "i_ldc_dring_acquire_release: copy failed\n"); 6743 mutex_exit(&dringp->lock); 6744 return (err); 6745 } 6746 6747 /* do the balance */ 6748 if (start > end) { 6749 copy_size = ((end + 1) * dringp->dsize); 6750 soff = 0; 6751 6752 /* copy to/from remote from/to local memory */ 6753 D1(ldcp->id, "i_ldc_dring_acquire_release: c2 " 6754 "off=0x%llx sz=0x%llx\n", soff, copy_size); 6755 err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl, 6756 direction, soff, copy_size); 6757 if (err) { 6758 DWARN(ldcp->id, 6759 "i_ldc_dring_acquire_release: copy failed\n"); 6760 mutex_exit(&dringp->lock); 6761 return (err); 6762 } 6763 } 6764 6765 mutex_exit(&dringp->lock); 6766 6767 return (0); 6768 } 6769 6770 /* 6771 * Ensure that the contents in the local dring are consistent 6772 * with the contents if of remote dring 6773 */ 6774 int 6775 ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end) 6776 { 6777 return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end)); 6778 } 6779 6780 /* 6781 * Ensure that the contents in the remote dring are consistent 6782 * with the contents if of local dring 6783 */ 6784 int 6785 ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end) 6786 { 6787 return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end)); 6788 } 6789 6790 6791 /* ------------------------------------------------------------------------- */ 6792