1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/errno.h> 28 #include <sys/sysmacros.h> 29 #include <sys/param.h> 30 #include <sys/machsystm.h> 31 #include <sys/stream.h> 32 #include <sys/strsubr.h> 33 #include <sys/kmem.h> 34 #include <sys/strsun.h> 35 #include <sys/callb.h> 36 #include <sys/sdt.h> 37 #include <sys/ethernet.h> 38 #include <sys/mach_descrip.h> 39 #include <sys/mdeg.h> 40 #include <sys/vnet.h> 41 #include <sys/vio_mailbox.h> 42 #include <sys/vio_common.h> 43 #include <sys/vnet_common.h> 44 #include <sys/vnet_mailbox.h> 45 #include <sys/vio_util.h> 46 #include <sys/vnet_gen.h> 47 48 /* 49 * This file contains the implementation of RxDringData transfer mode of VIO 50 * Protocol in vnet. The functions in this file are invoked from vnet_gen.c 51 * after RxDringData mode is negotiated with the peer during attribute phase of 52 * handshake. This file contains functions that setup the transmit and receive 53 * descriptor rings, and associated resources in RxDringData mode. It also 54 * contains the transmit and receive data processing functions that are invoked 55 * in RxDringData mode. The data processing routines in this file have the 56 * suffix '_shm' to indicate the shared memory mechanism used in RxDringData 57 * mode. 58 */ 59 60 /* Functions exported to vnet_gen.c */ 61 int vgen_create_rx_dring(vgen_ldc_t *ldcp); 62 void vgen_destroy_rx_dring(vgen_ldc_t *ldcp); 63 int vgen_map_tx_dring(vgen_ldc_t *ldcp, void *pkt); 64 void vgen_unmap_tx_dring(vgen_ldc_t *ldcp); 65 int vgen_map_data(vgen_ldc_t *ldcp, void *pkt); 66 int vgen_dringsend_shm(void *arg, mblk_t *mp); 67 int vgen_handle_dringdata_shm(void *arg1, void *arg2); 68 mblk_t *vgen_poll_rcv_shm(vgen_ldc_t *ldcp, int bytes_to_pickup); 69 int vgen_send_dringack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, 70 uint32_t start, int32_t end, uint8_t pstate); 71 72 /* Internal functions */ 73 static int vgen_handle_dringdata_info_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tp); 74 static int vgen_handle_dringdata_ack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 75 static int vgen_handle_dringdata_nack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tp); 76 static int vgen_intr_rcv_shm(vgen_ldc_t *ldcp); 77 static int vgen_receive_packet(vgen_ldc_t *ldcp, mblk_t **bp, uint_t *size); 78 static int vgen_send_dringdata_shm(vgen_ldc_t *ldcp, uint32_t start, 79 int32_t end); 80 static int vgen_sendmsg_shm(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen); 81 82 /* Functions imported from vnet_gen.c */ 83 extern int vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller); 84 extern int vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller); 85 extern void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen); 86 extern void vgen_destroy_rxpools(void *arg); 87 88 /* Tunables */ 89 extern uint32_t vnet_num_descriptors; 90 extern uint32_t vgen_chain_len; 91 extern uint32_t vgen_ldcwr_retries; 92 extern uint32_t vgen_recv_delay; 93 extern uint32_t vgen_recv_retries; 94 extern uint32_t vgen_nrbufs_factor; 95 96 #ifdef DEBUG 97 98 #define DEBUG_PRINTF vgen_debug_printf 99 100 extern int vnet_dbglevel; 101 extern int vgen_inject_err_flag; 102 103 extern void vgen_debug_printf(const char *fname, vgen_t *vgenp, 104 vgen_ldc_t *ldcp, const char *fmt, ...); 105 extern boolean_t vgen_inject_error(vgen_ldc_t *ldcp, int error); 106 107 #endif 108 109 /* 110 * Allocate receive resources for the channel. The resources consist of a 111 * receive descriptor ring and an associated receive buffer area. 112 */ 113 int 114 vgen_create_rx_dring(vgen_ldc_t *ldcp) 115 { 116 int i, j; 117 int rv; 118 uint32_t ncookies; 119 ldc_mem_info_t minfo; 120 vnet_rx_dringdata_desc_t *rxdp; 121 size_t data_sz; 122 vio_mblk_t *vmp; 123 vio_mblk_t **rxdp_to_vmp; 124 uint32_t rxdsize; 125 caddr_t datap = NULL; 126 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 127 128 rxdsize = sizeof (vnet_rx_dringdata_desc_t); 129 ldcp->num_rxds = vnet_num_descriptors; 130 ldcp->num_rbufs = vnet_num_descriptors * vgen_nrbufs_factor; 131 132 /* Create the receive descriptor ring */ 133 rv = ldc_mem_dring_create(ldcp->num_rxds, rxdsize, 134 &ldcp->rx_dring_handle); 135 if (rv != 0) { 136 DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n"); 137 goto fail; 138 } 139 140 /* Get the addr of descriptor ring */ 141 rv = ldc_mem_dring_info(ldcp->rx_dring_handle, &minfo); 142 if (rv != 0) { 143 DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n"); 144 goto fail; 145 } 146 ldcp->rxdp = (vnet_rx_dringdata_desc_t *)(minfo.vaddr); 147 bzero(ldcp->rxdp, sizeof (*rxdp) * (ldcp->num_rxds)); 148 149 /* 150 * Allocate a table that maps descriptor to its associated buffer; 151 * used while receiving to validate that the peer has not changed the 152 * buffer offset provided in the descriptor. 153 */ 154 rxdp_to_vmp = kmem_zalloc(ldcp->num_rxds * sizeof (uintptr_t), 155 KM_SLEEP); 156 ldcp->rxdp_to_vmp = rxdp_to_vmp; 157 158 /* 159 * Allocate a single large buffer that serves as the rx buffer area. 160 * We allocate a ldc memory handle and export the buffer area as shared 161 * memory. We send the ldc memcookie for this buffer space to the peer, 162 * as part of dring registration phase during handshake. We manage this 163 * buffer area as individual buffers of max_frame_size and provide 164 * specific buffer offsets in each descriptor to the peer. Note that 165 * the factor used to compute the # of buffers (above) must be > 1 to 166 * ensure that there are more buffers than the # of descriptors. This 167 * is needed because, while the shared memory buffers are sent up our 168 * stack during receive, the sender needs additional buffers that can 169 * be used for further transmits. This also means there is no one to 170 * one correspondence between the descriptor index and buffer offset. 171 * The sender has to read the buffer offset in the descriptor and use 172 * the specified offset to copy the tx data into the shared buffer. We 173 * (receiver) manage the individual buffers and their state (see 174 * VIO_MBLK_STATEs in vio_util.h). 175 */ 176 data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 177 data_sz = VNET_ROUNDUP_2K(data_sz); 178 179 ldcp->rx_data_sz = data_sz * ldcp->num_rbufs; 180 ldcp->rx_dblk_sz = data_sz; 181 datap = kmem_zalloc(ldcp->rx_data_sz, KM_SLEEP); 182 ldcp->rx_datap = datap; 183 184 /* Allocate a ldc memhandle for the entire rx data area */ 185 rv = ldc_mem_alloc_handle(ldcp->ldc_handle, &ldcp->rx_data_handle); 186 if (rv) { 187 ldcp->rx_data_handle = 0; 188 goto fail; 189 } 190 191 /* Allocate memory for the data cookies */ 192 ldcp->rx_data_cookie = kmem_zalloc(VNET_DATA_AREA_COOKIES * 193 sizeof (ldc_mem_cookie_t), KM_SLEEP); 194 195 /* 196 * Bind ldc memhandle to the corresponding rx data area. 197 */ 198 ncookies = 0; 199 rv = ldc_mem_bind_handle(ldcp->rx_data_handle, (caddr_t)datap, 200 ldcp->rx_data_sz, LDC_DIRECT_MAP, LDC_MEM_W, 201 ldcp->rx_data_cookie, &ncookies); 202 if (rv != 0) { 203 goto fail; 204 } 205 if ((ncookies == 0) || (ncookies > VNET_DATA_AREA_COOKIES)) { 206 goto fail; 207 } 208 ldcp->rx_data_ncookies = ncookies; 209 210 for (j = 1; j < ncookies; j++) { 211 rv = ldc_mem_nextcookie(ldcp->rx_data_handle, 212 &(ldcp->rx_data_cookie[j])); 213 if (rv != 0) { 214 DERR(vgenp, ldcp, "ldc_mem_nextcookie " 215 "failed rv (%d)", rv); 216 goto fail; 217 } 218 } 219 220 /* 221 * Successful in binding the handle to rx data area. Now setup mblks 222 * around each data buffer and setup the descriptors to point to these 223 * rx data buffers. We associate each descriptor with a buffer 224 * by specifying the buffer offset in the descriptor. When the peer 225 * needs to transmit data, this offset is read by the peer to determine 226 * the buffer in the mapped buffer area where the data to be 227 * transmitted should be copied, for a specific descriptor. 228 */ 229 rv = vio_create_mblks(ldcp->num_rbufs, data_sz, (uint8_t *)datap, 230 &ldcp->rx_vmp); 231 if (rv != 0) { 232 goto fail; 233 } 234 235 for (i = 0; i < ldcp->num_rxds; i++) { 236 rxdp = &(ldcp->rxdp[i]); 237 /* allocate an mblk around this data buffer */ 238 vmp = vio_allocb(ldcp->rx_vmp); 239 ASSERT(vmp != NULL); 240 rxdp->data_buf_offset = VIO_MBLK_DATA_OFF(vmp) + VNET_IPALIGN; 241 rxdp->dstate = VIO_DESC_FREE; 242 rxdp_to_vmp[i] = vmp; 243 } 244 245 /* 246 * The descriptors and the associated buffers are all ready; 247 * now bind descriptor ring to the channel. 248 */ 249 rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->rx_dring_handle, 250 LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW, 251 &ldcp->rx_dring_cookie, &ncookies); 252 if (rv != 0) { 253 DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed " 254 "rv(%x)\n", rv); 255 goto fail; 256 } 257 ASSERT(ncookies == 1); 258 ldcp->rx_dring_ncookies = ncookies; 259 260 /* initialize rx seqnum and index */ 261 ldcp->next_rxseq = VNET_ISS; 262 ldcp->next_rxi = 0; 263 264 return (VGEN_SUCCESS); 265 266 fail: 267 vgen_destroy_rx_dring(ldcp); 268 return (VGEN_FAILURE); 269 } 270 271 /* 272 * Free receive resources for the channel. 273 */ 274 void 275 vgen_destroy_rx_dring(vgen_ldc_t *ldcp) 276 { 277 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 278 279 /* We first unbind the descriptor ring */ 280 if (ldcp->rx_dring_ncookies != 0) { 281 (void) ldc_mem_dring_unbind(ldcp->rx_dring_handle); 282 ldcp->rx_dring_ncookies = 0; 283 } 284 285 /* Destroy the mblks that are wrapped around the rx data buffers */ 286 if (ldcp->rx_vmp != NULL) { 287 vio_clobber_pool(ldcp->rx_vmp); 288 if (vio_destroy_mblks(ldcp->rx_vmp) != 0) { 289 /* 290 * If we can't destroy the rx pool for this channel, 291 * dispatch a task to retry and clean up. Note that we 292 * don't need to wait for the task to complete. If the 293 * vnet device itself gets detached, it will wait for 294 * the task to complete implicitly in 295 * ddi_taskq_destroy(). 296 */ 297 (void) ddi_taskq_dispatch(vgenp->rxp_taskq, 298 vgen_destroy_rxpools, ldcp->rx_vmp, DDI_SLEEP); 299 } 300 ldcp->rx_vmp = NULL; 301 } 302 303 /* Free rx data area cookies */ 304 if (ldcp->rx_data_cookie != NULL) { 305 kmem_free(ldcp->rx_data_cookie, VNET_DATA_AREA_COOKIES * 306 sizeof (ldc_mem_cookie_t)); 307 ldcp->rx_data_cookie = NULL; 308 } 309 310 /* Unbind rx data area memhandle */ 311 if (ldcp->rx_data_ncookies != 0) { 312 (void) ldc_mem_unbind_handle(ldcp->rx_data_handle); 313 ldcp->rx_data_ncookies = 0; 314 } 315 316 /* Free rx data area memhandle */ 317 if (ldcp->rx_data_handle != 0) { 318 (void) ldc_mem_free_handle(ldcp->rx_data_handle); 319 ldcp->rx_data_handle = 0; 320 } 321 322 /* Now free the rx data area itself */ 323 if (ldcp->rx_datap != NULL) { 324 /* prealloc'd rx data buffer */ 325 kmem_free(ldcp->rx_datap, ldcp->rx_data_sz); 326 ldcp->rx_datap = NULL; 327 ldcp->rx_data_sz = 0; 328 } 329 330 /* Finally, free the receive descriptor ring */ 331 if (ldcp->rx_dring_handle != 0) { 332 (void) ldc_mem_dring_destroy(ldcp->rx_dring_handle); 333 ldcp->rx_dring_handle = 0; 334 ldcp->rxdp = NULL; 335 } 336 337 if (ldcp->rxdp_to_vmp != NULL) { 338 kmem_free(ldcp->rxdp_to_vmp, 339 ldcp->num_rxds * sizeof (uintptr_t)); 340 ldcp->rxdp_to_vmp = NULL; 341 } 342 343 /* Reset rx index and seqnum */ 344 ldcp->next_rxi = 0; 345 ldcp->next_rxseq = VNET_ISS; 346 } 347 348 /* 349 * Map the receive descriptor ring exported 350 * by the peer, as our transmit descriptor ring. 351 */ 352 int 353 vgen_map_tx_dring(vgen_ldc_t *ldcp, void *pkt) 354 { 355 int i; 356 int rv; 357 ldc_mem_info_t minfo; 358 ldc_mem_cookie_t dcookie; 359 uint32_t ncookies; 360 uint32_t num_desc; 361 uint32_t desc_size; 362 vnet_rx_dringdata_desc_t *txdp; 363 on_trap_data_t otd; 364 vio_dring_reg_msg_t *msg = pkt; 365 366 ncookies = msg->ncookies; 367 num_desc = msg->num_descriptors; 368 desc_size = msg->descriptor_size; 369 370 /* 371 * Sanity check. 372 */ 373 if (num_desc < VGEN_NUM_DESCRIPTORS_MIN || 374 desc_size < sizeof (vnet_rx_dringdata_desc_t) || 375 ncookies > 1) { 376 goto fail; 377 } 378 379 bcopy(&msg->cookie[0], &dcookie, sizeof (ldc_mem_cookie_t)); 380 381 /* Map the remote dring */ 382 rv = ldc_mem_dring_map(ldcp->ldc_handle, &dcookie, ncookies, num_desc, 383 desc_size, LDC_DIRECT_MAP, &(ldcp->tx_dring_handle)); 384 if (rv != 0) { 385 goto fail; 386 } 387 388 /* 389 * Sucessfully mapped; now try to get info about the mapped dring 390 */ 391 rv = ldc_mem_dring_info(ldcp->tx_dring_handle, &minfo); 392 if (rv != 0) { 393 goto fail; 394 } 395 396 /* 397 * Save ring address, number of descriptors. 398 */ 399 ldcp->mtxdp = (vnet_rx_dringdata_desc_t *)(minfo.vaddr); 400 bcopy(&dcookie, &(ldcp->tx_dring_cookie), sizeof (dcookie)); 401 ldcp->tx_dring_ncookies = ncookies; 402 ldcp->num_txds = num_desc; 403 404 /* Initialize tx dring indexes and seqnum */ 405 ldcp->next_txi = ldcp->cur_txi = 0; 406 ldcp->next_txseq = VNET_ISS - 1; 407 ldcp->resched_peer = B_TRUE; 408 ldcp->dring_mtype = minfo.mtype; 409 ldcp->dringdata_msgid = 0; 410 411 /* Save peer's dring_info values */ 412 bcopy(&dcookie, &(ldcp->peer_hparams.dring_cookie), 413 sizeof (ldc_mem_cookie_t)); 414 ldcp->peer_hparams.num_desc = num_desc; 415 ldcp->peer_hparams.desc_size = desc_size; 416 ldcp->peer_hparams.dring_ncookies = ncookies; 417 418 /* Set dring_ident for the peer */ 419 ldcp->peer_hparams.dring_ident = (uint64_t)ldcp->mtxdp; 420 421 /* Return the dring_ident in ack msg */ 422 msg->dring_ident = (uint64_t)ldcp->mtxdp; 423 424 /* 425 * Mark the descriptor state as 'done'. This is implementation specific 426 * and not required by the protocol. In our implementation, we only 427 * need the descripor to be in 'done' state to be used by the transmit 428 * function and the peer is not aware of it. As the protocol requires 429 * that during initial registration the exporting end point mark the 430 * dstate as 'free', we change it 'done' here. After this, the dstate 431 * in our implementation will keep moving between 'ready', set by our 432 * transmit function; and and 'done', set by the peer (per protocol) 433 * after receiving data. 434 * Setup on_trap() protection before accessing dring shared memory area. 435 */ 436 rv = LDC_ON_TRAP(&otd); 437 if (rv != 0) { 438 /* 439 * Data access fault occured down the code path below while 440 * accessing the descriptors. Return failure. 441 */ 442 goto fail; 443 } 444 445 for (i = 0; i < num_desc; i++) { 446 txdp = &ldcp->mtxdp[i]; 447 txdp->dstate = VIO_DESC_DONE; 448 } 449 450 (void) LDC_NO_TRAP(); 451 return (VGEN_SUCCESS); 452 453 fail: 454 if (ldcp->tx_dring_handle != 0) { 455 (void) ldc_mem_dring_unmap(ldcp->tx_dring_handle); 456 ldcp->tx_dring_handle = 0; 457 } 458 return (VGEN_FAILURE); 459 } 460 461 /* 462 * Unmap the transmit descriptor ring. 463 */ 464 void 465 vgen_unmap_tx_dring(vgen_ldc_t *ldcp) 466 { 467 /* Unmap mapped tx data area */ 468 if (ldcp->tx_datap != NULL) { 469 (void) ldc_mem_unmap(ldcp->tx_data_handle); 470 ldcp->tx_datap = NULL; 471 } 472 473 /* Free tx data area handle */ 474 if (ldcp->tx_data_handle != 0) { 475 (void) ldc_mem_free_handle(ldcp->tx_data_handle); 476 ldcp->tx_data_handle = 0; 477 } 478 479 /* Free tx data area cookies */ 480 if (ldcp->tx_data_cookie != NULL) { 481 kmem_free(ldcp->tx_data_cookie, ldcp->tx_data_ncookies * 482 sizeof (ldc_mem_cookie_t)); 483 ldcp->tx_data_cookie = NULL; 484 ldcp->tx_data_ncookies = 0; 485 } 486 487 /* Unmap peer's dring */ 488 if (ldcp->tx_dring_handle != 0) { 489 (void) ldc_mem_dring_unmap(ldcp->tx_dring_handle); 490 ldcp->tx_dring_handle = 0; 491 } 492 493 /* clobber tx ring members */ 494 bzero(&ldcp->tx_dring_cookie, sizeof (ldcp->tx_dring_cookie)); 495 ldcp->mtxdp = NULL; 496 ldcp->next_txi = ldcp->cur_txi = 0; 497 ldcp->num_txds = 0; 498 ldcp->next_txseq = VNET_ISS - 1; 499 ldcp->resched_peer = B_TRUE; 500 } 501 502 /* 503 * Map the shared memory data buffer area exported by the peer. 504 */ 505 int 506 vgen_map_data(vgen_ldc_t *ldcp, void *pkt) 507 { 508 int rv; 509 vio_dring_reg_ext_msg_t *emsg; 510 vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)pkt; 511 uint8_t *buf = (uint8_t *)msg->cookie; 512 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 513 514 /* skip over dring cookies */ 515 ASSERT(msg->ncookies == 1); 516 buf += (msg->ncookies * sizeof (ldc_mem_cookie_t)); 517 518 emsg = (vio_dring_reg_ext_msg_t *)buf; 519 if (emsg->data_ncookies > VNET_DATA_AREA_COOKIES) { 520 return (VGEN_FAILURE); 521 } 522 523 /* save # of data area cookies */ 524 ldcp->tx_data_ncookies = emsg->data_ncookies; 525 526 /* save data area size */ 527 ldcp->tx_data_sz = emsg->data_area_size; 528 529 /* allocate ldc mem handle for data area */ 530 rv = ldc_mem_alloc_handle(ldcp->ldc_handle, &ldcp->tx_data_handle); 531 if (rv != 0) { 532 DWARN(vgenp, ldcp, "ldc_mem_alloc_handle() failed: %d\n", rv); 533 return (VGEN_FAILURE); 534 } 535 536 /* map the data area */ 537 rv = ldc_mem_map(ldcp->tx_data_handle, emsg->data_cookie, 538 emsg->data_ncookies, LDC_DIRECT_MAP, LDC_MEM_W, 539 (caddr_t *)&ldcp->tx_datap, NULL); 540 if (rv != 0) { 541 DWARN(vgenp, ldcp, "ldc_mem_map() failed: %d\n", rv); 542 (void) ldc_mem_free_handle(ldcp->tx_data_handle); 543 ldcp->tx_data_handle = 0; 544 return (VGEN_FAILURE); 545 } 546 547 /* allocate memory for data area cookies */ 548 ldcp->tx_data_cookie = kmem_zalloc(emsg->data_ncookies * 549 sizeof (ldc_mem_cookie_t), KM_SLEEP); 550 551 /* save data area cookies */ 552 bcopy(emsg->data_cookie, ldcp->tx_data_cookie, 553 emsg->data_ncookies * sizeof (ldc_mem_cookie_t)); 554 555 return (VGEN_SUCCESS); 556 } 557 558 /* 559 * This function transmits normal data frames (non-priority) over the channel. 560 * It queues the frame into the transmit descriptor ring and sends a 561 * VIO_DRING_DATA message if needed, to wake up the peer to (re)start 562 * processing. 563 */ 564 int 565 vgen_dringsend_shm(void *arg, mblk_t *mp) 566 { 567 uint32_t next_txi; 568 uint32_t txi; 569 vnet_rx_dringdata_desc_t *txdp; 570 vnet_rx_dringdata_desc_t *ntxdp; 571 struct ether_header *ehp; 572 size_t mblksz; 573 caddr_t dst; 574 mblk_t *bp; 575 size_t size; 576 uint32_t buf_offset; 577 on_trap_data_t otd; 578 int rv = 0; 579 boolean_t is_bcast = B_FALSE; 580 boolean_t is_mcast = B_FALSE; 581 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; 582 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 583 vgen_stats_t *statsp = &ldcp->stats; 584 vgen_hparams_t *lp = &ldcp->local_hparams; 585 boolean_t resched_peer = B_FALSE; 586 boolean_t tx_update = B_FALSE; 587 588 /* Drop the packet if ldc is not up or handshake is not done */ 589 if (ldcp->ldc_status != LDC_UP) { 590 DBG2(vgenp, ldcp, "status(%d), dropping packet\n", 591 ldcp->ldc_status); 592 goto dringsend_shm_exit; 593 } 594 595 if (ldcp->hphase != VH_DONE) { 596 DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n", 597 ldcp->hphase); 598 goto dringsend_shm_exit; 599 } 600 601 size = msgsize(mp); 602 if (size > (size_t)lp->mtu) { 603 DWARN(vgenp, ldcp, "invalid size(%d)\n", size); 604 goto dringsend_shm_exit; 605 } 606 if (size < ETHERMIN) 607 size = ETHERMIN; 608 609 ehp = (struct ether_header *)mp->b_rptr; 610 is_bcast = IS_BROADCAST(ehp); 611 is_mcast = IS_MULTICAST(ehp); 612 613 /* 614 * Setup on_trap() protection before accessing shared memory areas 615 * (descriptor and data buffer). Note that we enable this protection a 616 * little early and turn it off slightly later, than keeping it enabled 617 * strictly at the points in code below where the descriptor and data 618 * buffer are accessed. This is done for performance reasons: 619 * (a) to avoid calling the trap protection code while holding mutex. 620 * (b) to avoid multiple on/off steps for descriptor and data accesses. 621 */ 622 rv = LDC_ON_TRAP(&otd); 623 if (rv != 0) { 624 /* 625 * Data access fault occured down the code path below while 626 * accessing either the descriptor or the data buffer. Release 627 * any locks that we might have acquired in the code below and 628 * return failure. 629 */ 630 DERR(vgenp, ldcp, "data access fault occured\n"); 631 statsp->oerrors++; 632 if (mutex_owned(&ldcp->txlock)) { 633 mutex_exit(&ldcp->txlock); 634 } 635 if (mutex_owned(&ldcp->wrlock)) { 636 mutex_exit(&ldcp->wrlock); 637 } 638 goto dringsend_shm_exit; 639 } 640 641 /* 642 * Allocate a descriptor 643 */ 644 mutex_enter(&ldcp->txlock); 645 txi = next_txi = ldcp->next_txi; 646 INCR_TXI(next_txi, ldcp); 647 ntxdp = &(ldcp->mtxdp[next_txi]); 648 if (ntxdp->dstate != VIO_DESC_DONE) { /* out of descriptors */ 649 if (ldcp->tx_blocked == B_FALSE) { 650 ldcp->tx_blocked_lbolt = ddi_get_lbolt(); 651 ldcp->tx_blocked = B_TRUE; 652 } 653 statsp->tx_no_desc++; 654 mutex_exit(&ldcp->txlock); 655 (void) LDC_NO_TRAP(); 656 return (VGEN_TX_NORESOURCES); 657 } 658 659 if (ldcp->tx_blocked == B_TRUE) { 660 ldcp->tx_blocked = B_FALSE; 661 tx_update = B_TRUE; 662 } 663 664 /* Update descriptor ring index */ 665 ldcp->next_txi = next_txi; 666 mutex_exit(&ldcp->txlock); 667 668 if (tx_update == B_TRUE) { 669 vio_net_tx_update_t vtx_update = 670 ldcp->portp->vcb.vio_net_tx_update; 671 672 vtx_update(ldcp->portp->vhp); 673 } 674 675 /* Access the descriptor */ 676 txdp = &(ldcp->mtxdp[txi]); 677 678 /* Ensure load ordering of dstate (above) and data_buf_offset. */ 679 MEMBAR_CONSUMER(); 680 681 /* Get the offset of the buffer to be used */ 682 buf_offset = txdp->data_buf_offset; 683 684 /* Access the buffer using the offset */ 685 dst = (caddr_t)ldcp->tx_datap + buf_offset; 686 687 /* Copy data into mapped transmit buffer */ 688 for (bp = mp; bp != NULL; bp = bp->b_cont) { 689 mblksz = MBLKL(bp); 690 bcopy(bp->b_rptr, dst, mblksz); 691 dst += mblksz; 692 } 693 694 /* Set the size of data in the descriptor */ 695 txdp->nbytes = size; 696 697 /* 698 * Ensure store ordering of nbytes and dstate (below); so that the peer 699 * sees the right nbytes value after it checks that the dstate is READY. 700 */ 701 MEMBAR_PRODUCER(); 702 703 mutex_enter(&ldcp->wrlock); 704 705 /* Mark the descriptor ready */ 706 txdp->dstate = VIO_DESC_READY; 707 708 /* Check if peer needs wake up (handled below) */ 709 if (ldcp->resched_peer == B_TRUE) { 710 ldcp->resched_peer = B_FALSE; 711 resched_peer = B_TRUE; 712 } 713 714 /* Update tx stats */ 715 statsp->opackets++; 716 statsp->obytes += size; 717 if (is_bcast) 718 statsp->brdcstxmt++; 719 else if (is_mcast) 720 statsp->multixmt++; 721 722 mutex_exit(&ldcp->wrlock); 723 724 /* 725 * We are done accessing shared memory; clear trap protection. 726 */ 727 (void) LDC_NO_TRAP(); 728 729 /* 730 * Need to wake up the peer ? 731 */ 732 if (resched_peer == B_TRUE) { 733 rv = vgen_send_dringdata_shm(ldcp, (uint32_t)txi, -1); 734 if (rv != 0) { 735 /* error: drop the packet */ 736 DWARN(vgenp, ldcp, "failed sending dringdata msg " 737 "rv(%d) len(%d)\n", rv, size); 738 mutex_enter(&ldcp->wrlock); 739 statsp->oerrors++; 740 ldcp->resched_peer = B_TRUE; 741 mutex_exit(&ldcp->wrlock); 742 } 743 } 744 745 dringsend_shm_exit: 746 if (rv == ECONNRESET || rv == EACCES) { 747 (void) vgen_handle_evt_reset(ldcp, VGEN_OTHER); 748 } 749 freemsg(mp); 750 return (VGEN_TX_SUCCESS); 751 } 752 753 /* 754 * Process dring data messages (info/ack/nack) 755 */ 756 int 757 vgen_handle_dringdata_shm(void *arg1, void *arg2) 758 { 759 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg1; 760 vio_msg_tag_t *tagp = (vio_msg_tag_t *)arg2; 761 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 762 int rv = 0; 763 764 switch (tagp->vio_subtype) { 765 766 case VIO_SUBTYPE_INFO: 767 /* 768 * To reduce the locking contention, release the 769 * cblock here and re-acquire it once we are done 770 * receiving packets. 771 */ 772 mutex_exit(&ldcp->cblock); 773 mutex_enter(&ldcp->rxlock); 774 rv = vgen_handle_dringdata_info_shm(ldcp, tagp); 775 mutex_exit(&ldcp->rxlock); 776 mutex_enter(&ldcp->cblock); 777 if (rv != 0) { 778 DWARN(vgenp, ldcp, "handle_data_info failed(%d)\n", rv); 779 } 780 break; 781 782 case VIO_SUBTYPE_ACK: 783 rv = vgen_handle_dringdata_ack_shm(ldcp, tagp); 784 if (rv != 0) { 785 DWARN(vgenp, ldcp, "handle_data_ack failed(%d)\n", rv); 786 } 787 break; 788 789 case VIO_SUBTYPE_NACK: 790 rv = vgen_handle_dringdata_nack_shm(ldcp, tagp); 791 if (rv != 0) { 792 DWARN(vgenp, ldcp, "handle_data_nack failed(%d)\n", rv); 793 } 794 break; 795 } 796 797 return (rv); 798 } 799 800 static int 801 vgen_handle_dringdata_info_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 802 { 803 uint32_t start; 804 int32_t end; 805 int rv = 0; 806 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 807 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 808 vgen_stats_t *statsp = &ldcp->stats; 809 810 start = dringmsg->start_idx; 811 end = dringmsg->end_idx; 812 813 DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n", 814 start, end); 815 816 if (!(CHECK_RXI(start, ldcp)) || 817 ((end != -1) && !(CHECK_RXI(end, ldcp)))) { 818 DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n", 819 start, end); 820 /* drop the message if invalid index */ 821 return (0); 822 } 823 824 /* validate dring_ident */ 825 if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) { 826 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", 827 dringmsg->dring_ident); 828 /* invalid dring_ident, drop the msg */ 829 return (0); 830 } 831 832 statsp->dring_data_msgs_rcvd++; 833 834 /* 835 * If we are in polling mode, return from here without processing the 836 * dring. We will process the dring in the context of polling thread. 837 */ 838 if (ldcp->polling_on == B_TRUE) { 839 return (0); 840 } 841 842 /* 843 * Process the dring and receive packets in intr context. 844 */ 845 rv = vgen_intr_rcv_shm(ldcp); 846 if (rv != 0) { 847 DWARN(vgenp, ldcp, "vgen_intr_rcv_shm() failed\n"); 848 } 849 return (rv); 850 } 851 852 /* 853 * Process the rx descriptor ring in the context of interrupt thread 854 * (vgen_ldc_cb() callback) and send the received packets up the stack. 855 */ 856 static int 857 vgen_intr_rcv_shm(vgen_ldc_t *ldcp) 858 { 859 int rv; 860 uint32_t end_ix; 861 vio_dring_msg_t msg; 862 uint_t mblk_sz; 863 int count = 0; 864 int total_count = 0; 865 mblk_t *bp = NULL; 866 mblk_t *bpt = NULL; 867 mblk_t *mp = NULL; 868 vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb; 869 870 ASSERT(MUTEX_HELD(&ldcp->rxlock)); 871 872 do { 873 rv = vgen_receive_packet(ldcp, &mp, &mblk_sz); 874 if (rv != 0) { 875 if (rv == EINVAL) { 876 /* Invalid descriptor error; get next */ 877 continue; 878 } 879 DTRACE_PROBE1(vgen_intr_nopkts, vgen_ldc_t *, ldcp); 880 break; 881 } 882 883 /* Build a chain of received packets */ 884 if (bp == NULL) { 885 /* first pkt */ 886 bp = mp; 887 bpt = bp; 888 bpt->b_next = NULL; 889 } else { 890 mp->b_next = NULL; 891 bpt->b_next = mp; 892 bpt = mp; 893 } 894 895 total_count++; 896 count++; 897 898 /* 899 * We are receiving the packets in interrupt context. If we 900 * have gathered vgen_chain_len (tunable) # of packets in the 901 * chain, send them up. (See vgen_poll_rcv_shm() for receiving 902 * in polling thread context). 903 */ 904 if (count == vgen_chain_len) { 905 DTRACE_PROBE2(vgen_intr_pkts, vgen_ldc_t *, ldcp, 906 int, count); 907 mutex_exit(&ldcp->rxlock); 908 vrx_cb(ldcp->portp->vhp, bp); 909 mutex_enter(&ldcp->rxlock); 910 bp = bpt = NULL; 911 count = 0; 912 } 913 914 /* 915 * Stop further processing if we processed the entire dring 916 * once; otherwise continue. 917 */ 918 } while (total_count < ldcp->num_rxds); 919 920 if (bp != NULL) { 921 DTRACE_PROBE2(vgen_intr_pkts, vgen_ldc_t *, ldcp, int, count); 922 mutex_exit(&ldcp->rxlock); 923 vrx_cb(ldcp->portp->vhp, bp); 924 mutex_enter(&ldcp->rxlock); 925 } 926 927 if (ldcp->polling_on == B_FALSE) { 928 /* 929 * We send a stopped message to peer (sender) while we are in 930 * intr mode only; allowing the peer to send further data intrs 931 * (dring data msgs) to us. 932 */ 933 end_ix = ldcp->next_rxi; 934 DECR_RXI(end_ix, ldcp); 935 msg.dring_ident = ldcp->peer_hparams.dring_ident; 936 rv = vgen_send_dringack_shm(ldcp, (vio_msg_tag_t *)&msg, 937 VNET_START_IDX_UNSPEC, end_ix, VIO_DP_STOPPED); 938 return (rv); 939 } 940 941 return (0); 942 } 943 944 /* 945 * Process the rx descriptor ring in the context of mac polling thread. Receive 946 * packets upto the limit specified by bytes_to_pickup or until there are no 947 * more packets, whichever occurs first. Return the chain of received packets. 948 */ 949 mblk_t * 950 vgen_poll_rcv_shm(vgen_ldc_t *ldcp, int bytes_to_pickup) 951 { 952 uint_t mblk_sz = 0; 953 uint_t sz = 0; 954 mblk_t *bp = NULL; 955 mblk_t *bpt = NULL; 956 mblk_t *mp = NULL; 957 int count = 0; 958 int rv; 959 960 mutex_enter(&ldcp->rxlock); 961 962 if (ldcp->hphase != VH_DONE) { 963 /* Channel is being reset and handshake not complete */ 964 mutex_exit(&ldcp->rxlock); 965 return (NULL); 966 } 967 968 do { 969 rv = vgen_receive_packet(ldcp, &mp, &mblk_sz); 970 if (rv != 0) { 971 if (rv == EINVAL) { 972 /* Invalid descriptor error; get next */ 973 continue; 974 } 975 DTRACE_PROBE1(vgen_poll_nopkts, vgen_ldc_t *, ldcp); 976 break; 977 } 978 979 /* Build a chain of received packets */ 980 if (bp == NULL) { 981 /* first pkt */ 982 bp = mp; 983 bpt = bp; 984 bpt->b_next = NULL; 985 } else { 986 mp->b_next = NULL; 987 bpt->b_next = mp; 988 bpt = mp; 989 } 990 991 /* Compute total size accumulated */ 992 sz += mblk_sz; 993 count++; 994 995 /* Reached the bytes limit; we are done. */ 996 if (sz >= bytes_to_pickup) { 997 break; 998 } 999 1000 _NOTE(CONSTCOND) 1001 } while (1); 1002 1003 /* 1004 * We prepend any high priority packets to the chain of packets; note 1005 * that if we are already at the bytes_to_pickup limit, we might 1006 * slightly exceed that in such cases. That should be ok, as these pkts 1007 * are expected to be small in size and arrive at an interval in the 1008 * the order of a few seconds. 1009 */ 1010 if (ldcp->rx_pktdata == vgen_handle_pkt_data && 1011 ldcp->rx_pri_head != NULL) { 1012 ldcp->rx_pri_tail->b_next = bp; 1013 bp = ldcp->rx_pri_head; 1014 ldcp->rx_pri_head = ldcp->rx_pri_tail = NULL; 1015 } 1016 1017 mutex_exit(&ldcp->rxlock); 1018 1019 DTRACE_PROBE2(vgen_poll_pkts, vgen_ldc_t *, ldcp, int, count); 1020 DTRACE_PROBE2(vgen_poll_bytes, vgen_ldc_t *, ldcp, uint_t, sz); 1021 return (bp); 1022 } 1023 1024 /* 1025 * Process the next index in the rx dring and receive the associated packet. 1026 * 1027 * Returns: 1028 * bp: Success: The received packet. 1029 * Failure: NULL 1030 * size: Success: Size of received packet. 1031 * Failure: 0 1032 * retval: 1033 * Success: 0 1034 * Failure: EAGAIN: Descriptor not ready 1035 * EIO: Descriptor contents invalid. 1036 */ 1037 static int 1038 vgen_receive_packet(vgen_ldc_t *ldcp, mblk_t **bp, uint_t *size) 1039 { 1040 uint32_t rxi; 1041 vio_mblk_t *vmp; 1042 vio_mblk_t *new_vmp; 1043 struct ether_header *ehp; 1044 vnet_rx_dringdata_desc_t *rxdp; 1045 int err = 0; 1046 uint32_t nbytes = 0; 1047 mblk_t *mp = NULL; 1048 mblk_t *dmp = NULL; 1049 vgen_stats_t *statsp = &ldcp->stats; 1050 vgen_hparams_t *lp = &ldcp->local_hparams; 1051 1052 rxi = ldcp->next_rxi; 1053 rxdp = &(ldcp->rxdp[rxi]); 1054 vmp = ldcp->rxdp_to_vmp[rxi]; 1055 1056 if (rxdp->dstate != VIO_DESC_READY) { 1057 /* 1058 * Descriptor is not ready. 1059 */ 1060 DTRACE_PROBE1(vgen_noready_rxds, vgen_ldc_t *, ldcp); 1061 return (EAGAIN); 1062 } 1063 1064 /* 1065 * Ensure load ordering of dstate and nbytes. 1066 */ 1067 MEMBAR_CONSUMER(); 1068 1069 nbytes = rxdp->nbytes; 1070 1071 if ((nbytes < ETHERMIN) || 1072 (nbytes > lp->mtu) || 1073 (rxdp->data_buf_offset != 1074 (VIO_MBLK_DATA_OFF(vmp) + VNET_IPALIGN))) { 1075 /* 1076 * Descriptor contents invalid. 1077 */ 1078 statsp->ierrors++; 1079 rxdp->dstate = VIO_DESC_DONE; 1080 err = EIO; 1081 goto done; 1082 } 1083 1084 /* 1085 * Now allocate a new buffer for this descriptor before sending up the 1086 * buffer being processed. If that fails, stop processing; as we are 1087 * out of receive buffers. 1088 */ 1089 new_vmp = vio_allocb(ldcp->rx_vmp); 1090 1091 /* 1092 * Process the current buffer being received. 1093 */ 1094 mp = vmp->mp; 1095 1096 if (new_vmp == NULL) { 1097 /* 1098 * We failed to get a new mapped buffer that is needed to 1099 * refill the descriptor. In that case, leave the current 1100 * buffer bound to the descriptor; allocate an mblk dynamically 1101 * and copy the contents of the buffer to the mblk. Then send 1102 * up this mblk. This way the sender has the same buffer as 1103 * before that can be used to send new data. 1104 */ 1105 statsp->norcvbuf++; 1106 dmp = allocb(nbytes + VNET_IPALIGN, BPRI_MED); 1107 if (dmp == NULL) { 1108 statsp->ierrors++; 1109 return (ENOMEM); 1110 } 1111 bcopy(mp->b_rptr + VNET_IPALIGN, 1112 dmp->b_rptr + VNET_IPALIGN, nbytes); 1113 mp = dmp; 1114 } else { 1115 /* Mark the status of the current rbuf */ 1116 vmp->state = VIO_MBLK_HAS_DATA; 1117 1118 /* Set the offset of the new buffer in the descriptor */ 1119 rxdp->data_buf_offset = 1120 VIO_MBLK_DATA_OFF(new_vmp) + VNET_IPALIGN; 1121 ldcp->rxdp_to_vmp[rxi] = new_vmp; 1122 } 1123 mp->b_rptr += VNET_IPALIGN; 1124 mp->b_wptr = mp->b_rptr + nbytes; 1125 1126 /* 1127 * Ensure store ordering of data_buf_offset and dstate; so that the 1128 * peer sees the right data_buf_offset after it checks that the dstate 1129 * is DONE. 1130 */ 1131 MEMBAR_PRODUCER(); 1132 1133 /* Now mark the descriptor 'done' */ 1134 rxdp->dstate = VIO_DESC_DONE; 1135 1136 /* Update stats */ 1137 statsp->ipackets++; 1138 statsp->rbytes += rxdp->nbytes; 1139 ehp = (struct ether_header *)mp->b_rptr; 1140 if (IS_BROADCAST(ehp)) 1141 statsp->brdcstrcv++; 1142 else if (IS_MULTICAST(ehp)) 1143 statsp->multircv++; 1144 done: 1145 /* Update the next index to be processed */ 1146 INCR_RXI(rxi, ldcp); 1147 1148 /* Save the new recv index */ 1149 ldcp->next_rxi = rxi; 1150 1151 /* Return the packet received */ 1152 *size = nbytes; 1153 *bp = mp; 1154 return (err); 1155 } 1156 1157 static int 1158 vgen_handle_dringdata_ack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 1159 { 1160 uint32_t start; 1161 int32_t end; 1162 uint32_t txi; 1163 vgen_stats_t *statsp; 1164 vnet_rx_dringdata_desc_t *txdp; 1165 on_trap_data_t otd; 1166 int rv = 0; 1167 boolean_t ready_txd = B_FALSE; 1168 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1169 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 1170 1171 start = dringmsg->start_idx; 1172 end = dringmsg->end_idx; 1173 statsp = &ldcp->stats; 1174 1175 /* 1176 * Received an ack for our transmits upto a certain dring index. This 1177 * enables us to reclaim descriptors. We also send a new dring data msg 1178 * to the peer to restart processing if there are pending transmit pkts. 1179 */ 1180 DBG2(vgenp, ldcp, "ACK: start(%d), end(%d)\n", start, end); 1181 1182 /* 1183 * In RxDringData mode (v1.6), start index of -1 can be used by the 1184 * peer to indicate that it is unspecified. However, the end index 1185 * must be set correctly indicating the last descriptor index processed. 1186 */ 1187 if (((start != VNET_START_IDX_UNSPEC) && !(CHECK_TXI(start, ldcp))) || 1188 !(CHECK_TXI(end, ldcp))) { 1189 /* drop the message if invalid index */ 1190 DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n", 1191 start, end); 1192 return (rv); 1193 } 1194 1195 /* Validate dring_ident */ 1196 if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) { 1197 /* invalid dring_ident, drop the msg */ 1198 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", 1199 dringmsg->dring_ident); 1200 return (rv); 1201 } 1202 statsp->dring_data_acks_rcvd++; 1203 1204 /* 1205 * Clear transmit flow control condition 1206 * as some descriptors should be free now. 1207 */ 1208 mutex_enter(&ldcp->txlock); 1209 if (ldcp->tx_blocked == B_TRUE) { 1210 vio_net_tx_update_t vtx_update = 1211 ldcp->portp->vcb.vio_net_tx_update; 1212 1213 ldcp->tx_blocked = B_FALSE; 1214 vtx_update(ldcp->portp->vhp); 1215 } 1216 mutex_exit(&ldcp->txlock); 1217 1218 if (dringmsg->dring_process_state != VIO_DP_STOPPED) { 1219 /* 1220 * Receiver continued processing 1221 * dring after sending us the ack. 1222 */ 1223 return (rv); 1224 } 1225 1226 /* 1227 * Receiver stopped processing descriptors. 1228 */ 1229 statsp->dring_stopped_acks_rcvd++; 1230 1231 /* 1232 * Setup on_trap() protection before accessing dring shared memory area. 1233 */ 1234 rv = LDC_ON_TRAP(&otd); 1235 if (rv != 0) { 1236 /* 1237 * Data access fault occured down the code path below while 1238 * accessing the descriptors. Release any locks that we might 1239 * have acquired in the code below and return failure. 1240 */ 1241 if (mutex_owned(&ldcp->wrlock)) { 1242 mutex_exit(&ldcp->wrlock); 1243 } 1244 return (ECONNRESET); 1245 } 1246 1247 /* 1248 * Determine if there are any pending tx descriptors ready to be 1249 * processed by the receiver(peer) and if so, send a message to the 1250 * peer to restart receiving. 1251 */ 1252 mutex_enter(&ldcp->wrlock); 1253 1254 ready_txd = B_FALSE; 1255 txi = end; 1256 INCR_TXI(txi, ldcp); 1257 txdp = &ldcp->mtxdp[txi]; 1258 if (txdp->dstate == VIO_DESC_READY) { 1259 ready_txd = B_TRUE; 1260 } 1261 1262 /* 1263 * We are done accessing shared memory; clear trap protection. 1264 */ 1265 (void) LDC_NO_TRAP(); 1266 1267 if (ready_txd == B_FALSE) { 1268 /* 1269 * No ready tx descriptors. Set the flag to send a message to 1270 * the peer when tx descriptors are ready in transmit routine. 1271 */ 1272 ldcp->resched_peer = B_TRUE; 1273 mutex_exit(&ldcp->wrlock); 1274 return (rv); 1275 } 1276 1277 /* 1278 * We have some tx descriptors ready to be processed by the receiver. 1279 * Send a dring data message to the peer to restart processing. 1280 */ 1281 ldcp->resched_peer = B_FALSE; 1282 mutex_exit(&ldcp->wrlock); 1283 rv = vgen_send_dringdata_shm(ldcp, txi, -1); 1284 if (rv != VGEN_SUCCESS) { 1285 mutex_enter(&ldcp->wrlock); 1286 ldcp->resched_peer = B_TRUE; 1287 mutex_exit(&ldcp->wrlock); 1288 } 1289 1290 return (rv); 1291 } 1292 1293 static int 1294 vgen_handle_dringdata_nack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 1295 { 1296 uint32_t start; 1297 int32_t end; 1298 uint32_t txi; 1299 vnet_rx_dringdata_desc_t *txdp; 1300 on_trap_data_t otd; 1301 int rv = 0; 1302 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1303 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 1304 1305 DBG1(vgenp, ldcp, "enter\n"); 1306 start = dringmsg->start_idx; 1307 end = dringmsg->end_idx; 1308 1309 /* 1310 * Peer sent a NACK msg (to indicate bad descriptors ?). The start and 1311 * end correspond to the range of descriptors which are being nack'd. 1312 */ 1313 DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end); 1314 1315 /* 1316 * In RxDringData mode (v1.6), start index of -1 can be used by 1317 * the peer to indicate that it is unspecified. However, the end index 1318 * must be set correctly indicating the last descriptor index processed. 1319 */ 1320 if (((start != VNET_START_IDX_UNSPEC) && !(CHECK_TXI(start, ldcp))) || 1321 !(CHECK_TXI(end, ldcp))) { 1322 /* drop the message if invalid index */ 1323 DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n", 1324 start, end); 1325 return (rv); 1326 } 1327 1328 /* Validate dring_ident */ 1329 if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) { 1330 /* invalid dring_ident, drop the msg */ 1331 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", 1332 dringmsg->dring_ident); 1333 return (rv); 1334 } 1335 1336 /* 1337 * Setup on_trap() protection before accessing dring shared memory area. 1338 */ 1339 rv = LDC_ON_TRAP(&otd); 1340 if (rv != 0) { 1341 /* 1342 * Data access fault occured down the code path below while 1343 * accessing the descriptors. Release any locks that we might 1344 * have acquired in the code below and return failure. 1345 */ 1346 mutex_exit(&ldcp->txlock); 1347 return (ECONNRESET); 1348 } 1349 1350 /* We just mark the descrs as free so they can be reused */ 1351 mutex_enter(&ldcp->txlock); 1352 for (txi = start; txi <= end; ) { 1353 txdp = &(ldcp->mtxdp[txi]); 1354 if (txdp->dstate == VIO_DESC_READY) 1355 txdp->dstate = VIO_DESC_DONE; 1356 INCR_TXI(txi, ldcp); 1357 } 1358 1359 /* 1360 * We are done accessing shared memory; clear trap protection. 1361 */ 1362 (void) LDC_NO_TRAP(); 1363 1364 mutex_exit(&ldcp->txlock); 1365 1366 return (rv); 1367 } 1368 1369 /* 1370 * Send descriptor ring data message to the peer over LDC. 1371 */ 1372 static int 1373 vgen_send_dringdata_shm(vgen_ldc_t *ldcp, uint32_t start, int32_t end) 1374 { 1375 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1376 vio_dring_msg_t dringmsg, *msgp = &dringmsg; 1377 vio_msg_tag_t *tagp = &msgp->tag; 1378 vgen_stats_t *statsp = &ldcp->stats; 1379 int rv; 1380 1381 #ifdef DEBUG 1382 if (vgen_inject_error(ldcp, VGEN_ERR_TXTIMEOUT)) { 1383 return (VGEN_SUCCESS); 1384 } 1385 #endif 1386 bzero(msgp, sizeof (*msgp)); 1387 1388 tagp->vio_msgtype = VIO_TYPE_DATA; 1389 tagp->vio_subtype = VIO_SUBTYPE_INFO; 1390 tagp->vio_subtype_env = VIO_DRING_DATA; 1391 tagp->vio_sid = ldcp->local_sid; 1392 1393 msgp->dring_ident = ldcp->local_hparams.dring_ident; 1394 msgp->start_idx = start; 1395 msgp->end_idx = end; 1396 msgp->seq_num = atomic_inc_32_nv(&ldcp->dringdata_msgid); 1397 1398 rv = vgen_sendmsg_shm(ldcp, (caddr_t)tagp, sizeof (dringmsg)); 1399 if (rv != VGEN_SUCCESS) { 1400 DWARN(vgenp, ldcp, "vgen_sendmsg_shm() failed\n"); 1401 return (rv); 1402 } 1403 1404 statsp->dring_data_msgs_sent++; 1405 1406 DBG2(vgenp, ldcp, "DRING_DATA_SENT \n"); 1407 1408 return (VGEN_SUCCESS); 1409 } 1410 1411 /* 1412 * Send dring data ack message. 1413 */ 1414 int 1415 vgen_send_dringack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start, 1416 int32_t end, uint8_t pstate) 1417 { 1418 int rv = 0; 1419 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1420 vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp; 1421 vgen_stats_t *statsp = &ldcp->stats; 1422 1423 tagp->vio_msgtype = VIO_TYPE_DATA; 1424 tagp->vio_subtype = VIO_SUBTYPE_ACK; 1425 tagp->vio_subtype_env = VIO_DRING_DATA; 1426 tagp->vio_sid = ldcp->local_sid; 1427 msgp->start_idx = start; 1428 msgp->end_idx = end; 1429 msgp->dring_process_state = pstate; 1430 msgp->seq_num = atomic_inc_32_nv(&ldcp->dringdata_msgid); 1431 1432 rv = vgen_sendmsg_shm(ldcp, (caddr_t)tagp, sizeof (*msgp)); 1433 if (rv != VGEN_SUCCESS) { 1434 DWARN(vgenp, ldcp, "vgen_sendmsg_shm() failed\n"); 1435 } 1436 1437 statsp->dring_data_acks_sent++; 1438 if (pstate == VIO_DP_STOPPED) { 1439 statsp->dring_stopped_acks_sent++; 1440 } 1441 1442 return (rv); 1443 } 1444 1445 /* 1446 * Send dring data msgs (info/ack/nack) over LDC. 1447 */ 1448 static int 1449 vgen_sendmsg_shm(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen) 1450 { 1451 int rv; 1452 size_t len; 1453 uint32_t retries = 0; 1454 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1455 1456 len = msglen; 1457 if ((len == 0) || (msg == NULL)) 1458 return (VGEN_FAILURE); 1459 1460 do { 1461 len = msglen; 1462 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len); 1463 if (retries++ >= vgen_ldcwr_retries) 1464 break; 1465 } while (rv == EWOULDBLOCK); 1466 1467 if (rv != 0) { 1468 DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen(%d)\n", 1469 rv, msglen); 1470 return (rv); 1471 } 1472 1473 if (len != msglen) { 1474 DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n", 1475 rv, msglen); 1476 return (VGEN_FAILURE); 1477 } 1478 1479 return (VGEN_SUCCESS); 1480 } 1481