1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/errno.h> 28 #include <sys/sysmacros.h> 29 #include <sys/param.h> 30 #include <sys/machsystm.h> 31 #include <sys/stream.h> 32 #include <sys/strsubr.h> 33 #include <sys/kmem.h> 34 #include <sys/strsun.h> 35 #include <sys/callb.h> 36 #include <sys/sdt.h> 37 #include <sys/ethernet.h> 38 #include <sys/mach_descrip.h> 39 #include <sys/mdeg.h> 40 #include <sys/vnet.h> 41 #include <sys/vio_mailbox.h> 42 #include <sys/vio_common.h> 43 #include <sys/vnet_common.h> 44 #include <sys/vnet_mailbox.h> 45 #include <sys/vio_util.h> 46 #include <sys/vnet_gen.h> 47 48 /* 49 * This file contains the implementation of RxDringData transfer mode of VIO 50 * Protocol in vnet. The functions in this file are invoked from vnet_gen.c 51 * after RxDringData mode is negotiated with the peer during attribute phase of 52 * handshake. This file contains functions that setup the transmit and receive 53 * descriptor rings, and associated resources in RxDringData mode. It also 54 * contains the transmit and receive data processing functions that are invoked 55 * in RxDringData mode. The data processing routines in this file have the 56 * suffix '_shm' to indicate the shared memory mechanism used in RxDringData 57 * mode. 58 */ 59 60 /* Functions exported to vnet_gen.c */ 61 int vgen_create_rx_dring(vgen_ldc_t *ldcp); 62 void vgen_destroy_rx_dring(vgen_ldc_t *ldcp); 63 int vgen_map_tx_dring(vgen_ldc_t *ldcp, void *pkt); 64 void vgen_unmap_tx_dring(vgen_ldc_t *ldcp); 65 int vgen_map_data(vgen_ldc_t *ldcp, void *pkt); 66 int vgen_dringsend_shm(void *arg, mblk_t *mp); 67 int vgen_handle_dringdata_shm(void *arg1, void *arg2); 68 mblk_t *vgen_poll_rcv_shm(vgen_ldc_t *ldcp, int bytes_to_pickup); 69 int vgen_send_dringack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, 70 uint32_t start, int32_t end, uint8_t pstate); 71 72 /* Internal functions */ 73 static int vgen_handle_dringdata_info_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tp); 74 static int vgen_handle_dringdata_ack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 75 static int vgen_handle_dringdata_nack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tp); 76 static int vgen_intr_rcv_shm(vgen_ldc_t *ldcp); 77 static int vgen_receive_packet(vgen_ldc_t *ldcp, mblk_t **bp, uint_t *size); 78 static int vgen_send_dringdata_shm(vgen_ldc_t *ldcp, uint32_t start, 79 int32_t end); 80 static int vgen_sendmsg_shm(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen); 81 82 /* Functions imported from vnet_gen.c */ 83 extern int vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller); 84 extern int vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller); 85 extern void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen); 86 extern void vgen_destroy_rxpools(void *arg); 87 88 /* Tunables */ 89 extern uint32_t vnet_num_descriptors; 90 extern uint32_t vgen_chain_len; 91 extern uint32_t vgen_ldcwr_retries; 92 extern uint32_t vgen_recv_delay; 93 extern uint32_t vgen_recv_retries; 94 extern uint32_t vgen_nrbufs_factor; 95 96 #ifdef DEBUG 97 98 #define DEBUG_PRINTF vgen_debug_printf 99 100 extern int vnet_dbglevel; 101 extern int vgen_inject_err_flag; 102 103 extern void vgen_debug_printf(const char *fname, vgen_t *vgenp, 104 vgen_ldc_t *ldcp, const char *fmt, ...); 105 extern boolean_t vgen_inject_error(vgen_ldc_t *ldcp, int error); 106 107 #endif 108 109 /* 110 * Allocate receive resources for the channel. The resources consist of a 111 * receive descriptor ring and an associated receive buffer area. 112 */ 113 int 114 vgen_create_rx_dring(vgen_ldc_t *ldcp) 115 { 116 int i, j; 117 int rv; 118 uint32_t ncookies; 119 ldc_mem_info_t minfo; 120 vnet_rx_dringdata_desc_t *rxdp; 121 size_t data_sz; 122 vio_mblk_t *vmp; 123 vio_mblk_t **rxdp_to_vmp; 124 uint32_t rxdsize; 125 caddr_t datap = NULL; 126 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 127 128 rxdsize = sizeof (vnet_rx_dringdata_desc_t); 129 ldcp->num_rxds = vnet_num_descriptors; 130 ldcp->num_rbufs = VGEN_RXDRING_NRBUFS; 131 132 /* Create the receive descriptor ring */ 133 rv = ldc_mem_dring_create(ldcp->num_rxds, rxdsize, 134 &ldcp->rx_dring_handle); 135 if (rv != 0) { 136 DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n"); 137 goto fail; 138 } 139 140 /* Get the addr of descriptor ring */ 141 rv = ldc_mem_dring_info(ldcp->rx_dring_handle, &minfo); 142 if (rv != 0) { 143 DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n"); 144 goto fail; 145 } 146 ldcp->rxdp = (vnet_rx_dringdata_desc_t *)(minfo.vaddr); 147 bzero(ldcp->rxdp, sizeof (*rxdp) * (ldcp->num_rxds)); 148 149 /* 150 * Allocate a table that maps descriptor to its associated buffer; 151 * used while receiving to validate that the peer has not changed the 152 * buffer offset provided in the descriptor. 153 */ 154 rxdp_to_vmp = kmem_zalloc(ldcp->num_rxds * sizeof (uintptr_t), 155 KM_SLEEP); 156 ldcp->rxdp_to_vmp = rxdp_to_vmp; 157 158 /* 159 * Allocate a single large buffer that serves as the rx buffer area. 160 * We allocate a ldc memory handle and export the buffer area as shared 161 * memory. We send the ldc memcookie for this buffer space to the peer, 162 * as part of dring registration phase during handshake. We manage this 163 * buffer area as individual buffers of max_frame_size and provide 164 * specific buffer offsets in each descriptor to the peer. Note that 165 * the factor used to compute the # of buffers (above) must be > 1 to 166 * ensure that there are more buffers than the # of descriptors. This 167 * is needed because, while the shared memory buffers are sent up our 168 * stack during receive, the sender needs additional buffers that can 169 * be used for further transmits. This also means there is no one to 170 * one correspondence between the descriptor index and buffer offset. 171 * The sender has to read the buffer offset in the descriptor and use 172 * the specified offset to copy the tx data into the shared buffer. We 173 * (receiver) manage the individual buffers and their state (see 174 * VIO_MBLK_STATEs in vio_util.h). 175 */ 176 data_sz = RXDRING_DBLK_SZ(vgenp->max_frame_size); 177 178 ldcp->rx_data_sz = data_sz * ldcp->num_rbufs; 179 ldcp->rx_dblk_sz = data_sz; 180 datap = kmem_zalloc(ldcp->rx_data_sz, KM_SLEEP); 181 ldcp->rx_datap = datap; 182 183 /* Allocate a ldc memhandle for the entire rx data area */ 184 rv = ldc_mem_alloc_handle(ldcp->ldc_handle, &ldcp->rx_data_handle); 185 if (rv) { 186 ldcp->rx_data_handle = 0; 187 goto fail; 188 } 189 190 /* Allocate memory for the data cookies */ 191 ldcp->rx_data_cookie = kmem_zalloc(VNET_DATA_AREA_COOKIES * 192 sizeof (ldc_mem_cookie_t), KM_SLEEP); 193 194 /* 195 * Bind ldc memhandle to the corresponding rx data area. 196 */ 197 ncookies = 0; 198 rv = ldc_mem_bind_handle(ldcp->rx_data_handle, (caddr_t)datap, 199 ldcp->rx_data_sz, LDC_DIRECT_MAP, LDC_MEM_W, 200 ldcp->rx_data_cookie, &ncookies); 201 if (rv != 0) { 202 goto fail; 203 } 204 if ((ncookies == 0) || (ncookies > VNET_DATA_AREA_COOKIES)) { 205 goto fail; 206 } 207 ldcp->rx_data_ncookies = ncookies; 208 209 for (j = 1; j < ncookies; j++) { 210 rv = ldc_mem_nextcookie(ldcp->rx_data_handle, 211 &(ldcp->rx_data_cookie[j])); 212 if (rv != 0) { 213 DERR(vgenp, ldcp, "ldc_mem_nextcookie " 214 "failed rv (%d)", rv); 215 goto fail; 216 } 217 } 218 219 /* 220 * Successful in binding the handle to rx data area. Now setup mblks 221 * around each data buffer and setup the descriptors to point to these 222 * rx data buffers. We associate each descriptor with a buffer 223 * by specifying the buffer offset in the descriptor. When the peer 224 * needs to transmit data, this offset is read by the peer to determine 225 * the buffer in the mapped buffer area where the data to be 226 * transmitted should be copied, for a specific descriptor. 227 */ 228 rv = vio_create_mblks(ldcp->num_rbufs, data_sz, (uint8_t *)datap, 229 &ldcp->rx_vmp); 230 if (rv != 0) { 231 goto fail; 232 } 233 234 for (i = 0; i < ldcp->num_rxds; i++) { 235 rxdp = &(ldcp->rxdp[i]); 236 /* allocate an mblk around this data buffer */ 237 vmp = vio_allocb(ldcp->rx_vmp); 238 ASSERT(vmp != NULL); 239 rxdp->data_buf_offset = VIO_MBLK_DATA_OFF(vmp) + VNET_IPALIGN; 240 rxdp->dstate = VIO_DESC_FREE; 241 rxdp_to_vmp[i] = vmp; 242 } 243 244 /* 245 * The descriptors and the associated buffers are all ready; 246 * now bind descriptor ring to the channel. 247 */ 248 rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->rx_dring_handle, 249 LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW, 250 &ldcp->rx_dring_cookie, &ncookies); 251 if (rv != 0) { 252 DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed " 253 "rv(%x)\n", rv); 254 goto fail; 255 } 256 ASSERT(ncookies == 1); 257 ldcp->rx_dring_ncookies = ncookies; 258 259 /* initialize rx seqnum and index */ 260 ldcp->next_rxseq = VNET_ISS; 261 ldcp->next_rxi = 0; 262 263 return (VGEN_SUCCESS); 264 265 fail: 266 vgen_destroy_rx_dring(ldcp); 267 return (VGEN_FAILURE); 268 } 269 270 /* 271 * Free receive resources for the channel. 272 */ 273 void 274 vgen_destroy_rx_dring(vgen_ldc_t *ldcp) 275 { 276 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 277 278 /* We first unbind the descriptor ring */ 279 if (ldcp->rx_dring_ncookies != 0) { 280 (void) ldc_mem_dring_unbind(ldcp->rx_dring_handle); 281 ldcp->rx_dring_ncookies = 0; 282 } 283 284 /* Destroy the mblks that are wrapped around the rx data buffers */ 285 if (ldcp->rx_vmp != NULL) { 286 vio_clobber_pool(ldcp->rx_vmp); 287 if (vio_destroy_mblks(ldcp->rx_vmp) != 0) { 288 /* 289 * If we can't destroy the rx pool for this channel, 290 * dispatch a task to retry and clean up. Note that we 291 * don't need to wait for the task to complete. If the 292 * vnet device itself gets detached, it will wait for 293 * the task to complete implicitly in 294 * ddi_taskq_destroy(). 295 */ 296 (void) ddi_taskq_dispatch(vgenp->rxp_taskq, 297 vgen_destroy_rxpools, ldcp->rx_vmp, DDI_SLEEP); 298 } 299 ldcp->rx_vmp = NULL; 300 } 301 302 /* Free rx data area cookies */ 303 if (ldcp->rx_data_cookie != NULL) { 304 kmem_free(ldcp->rx_data_cookie, VNET_DATA_AREA_COOKIES * 305 sizeof (ldc_mem_cookie_t)); 306 ldcp->rx_data_cookie = NULL; 307 } 308 309 /* Unbind rx data area memhandle */ 310 if (ldcp->rx_data_ncookies != 0) { 311 (void) ldc_mem_unbind_handle(ldcp->rx_data_handle); 312 ldcp->rx_data_ncookies = 0; 313 } 314 315 /* Free rx data area memhandle */ 316 if (ldcp->rx_data_handle != 0) { 317 (void) ldc_mem_free_handle(ldcp->rx_data_handle); 318 ldcp->rx_data_handle = 0; 319 } 320 321 /* Now free the rx data area itself */ 322 if (ldcp->rx_datap != NULL) { 323 /* prealloc'd rx data buffer */ 324 kmem_free(ldcp->rx_datap, ldcp->rx_data_sz); 325 ldcp->rx_datap = NULL; 326 ldcp->rx_data_sz = 0; 327 } 328 329 /* Finally, free the receive descriptor ring */ 330 if (ldcp->rx_dring_handle != 0) { 331 (void) ldc_mem_dring_destroy(ldcp->rx_dring_handle); 332 ldcp->rx_dring_handle = 0; 333 ldcp->rxdp = NULL; 334 } 335 336 if (ldcp->rxdp_to_vmp != NULL) { 337 kmem_free(ldcp->rxdp_to_vmp, 338 ldcp->num_rxds * sizeof (uintptr_t)); 339 ldcp->rxdp_to_vmp = NULL; 340 } 341 342 /* Reset rx index and seqnum */ 343 ldcp->next_rxi = 0; 344 ldcp->next_rxseq = VNET_ISS; 345 } 346 347 /* 348 * Map the receive descriptor ring exported 349 * by the peer, as our transmit descriptor ring. 350 */ 351 int 352 vgen_map_tx_dring(vgen_ldc_t *ldcp, void *pkt) 353 { 354 int i; 355 int rv; 356 ldc_mem_info_t minfo; 357 ldc_mem_cookie_t dcookie; 358 uint32_t ncookies; 359 uint32_t num_desc; 360 uint32_t desc_size; 361 vnet_rx_dringdata_desc_t *txdp; 362 on_trap_data_t otd; 363 vio_dring_reg_msg_t *msg = pkt; 364 365 ncookies = msg->ncookies; 366 num_desc = msg->num_descriptors; 367 desc_size = msg->descriptor_size; 368 369 /* 370 * Sanity check. 371 */ 372 if (num_desc < VGEN_NUM_DESCRIPTORS_MIN || 373 desc_size < sizeof (vnet_rx_dringdata_desc_t) || 374 ncookies > 1) { 375 goto fail; 376 } 377 378 bcopy(&msg->cookie[0], &dcookie, sizeof (ldc_mem_cookie_t)); 379 380 /* Map the remote dring */ 381 rv = ldc_mem_dring_map(ldcp->ldc_handle, &dcookie, ncookies, num_desc, 382 desc_size, LDC_DIRECT_MAP, &(ldcp->tx_dring_handle)); 383 if (rv != 0) { 384 goto fail; 385 } 386 387 /* 388 * Sucessfully mapped; now try to get info about the mapped dring 389 */ 390 rv = ldc_mem_dring_info(ldcp->tx_dring_handle, &minfo); 391 if (rv != 0) { 392 goto fail; 393 } 394 395 /* 396 * Save ring address, number of descriptors. 397 */ 398 ldcp->mtxdp = (vnet_rx_dringdata_desc_t *)(minfo.vaddr); 399 bcopy(&dcookie, &(ldcp->tx_dring_cookie), sizeof (dcookie)); 400 ldcp->tx_dring_ncookies = ncookies; 401 ldcp->num_txds = num_desc; 402 403 /* Initialize tx dring indexes and seqnum */ 404 ldcp->next_txi = ldcp->cur_txi = ldcp->resched_peer_txi = 0; 405 ldcp->next_txseq = VNET_ISS - 1; 406 ldcp->resched_peer = B_TRUE; 407 ldcp->dring_mtype = minfo.mtype; 408 ldcp->dringdata_msgid = 0; 409 410 /* Save peer's dring_info values */ 411 bcopy(&dcookie, &(ldcp->peer_hparams.dring_cookie), 412 sizeof (ldc_mem_cookie_t)); 413 ldcp->peer_hparams.num_desc = num_desc; 414 ldcp->peer_hparams.desc_size = desc_size; 415 ldcp->peer_hparams.dring_ncookies = ncookies; 416 417 /* Set dring_ident for the peer */ 418 ldcp->peer_hparams.dring_ident = (uint64_t)ldcp->mtxdp; 419 420 /* Return the dring_ident in ack msg */ 421 msg->dring_ident = (uint64_t)ldcp->mtxdp; 422 423 /* 424 * Mark the descriptor state as 'done'. This is implementation specific 425 * and not required by the protocol. In our implementation, we only 426 * need the descripor to be in 'done' state to be used by the transmit 427 * function and the peer is not aware of it. As the protocol requires 428 * that during initial registration the exporting end point mark the 429 * dstate as 'free', we change it 'done' here. After this, the dstate 430 * in our implementation will keep moving between 'ready', set by our 431 * transmit function; and and 'done', set by the peer (per protocol) 432 * after receiving data. 433 * Setup on_trap() protection before accessing dring shared memory area. 434 */ 435 rv = LDC_ON_TRAP(&otd); 436 if (rv != 0) { 437 /* 438 * Data access fault occured down the code path below while 439 * accessing the descriptors. Return failure. 440 */ 441 goto fail; 442 } 443 444 for (i = 0; i < num_desc; i++) { 445 txdp = &ldcp->mtxdp[i]; 446 txdp->dstate = VIO_DESC_DONE; 447 } 448 449 (void) LDC_NO_TRAP(); 450 return (VGEN_SUCCESS); 451 452 fail: 453 if (ldcp->tx_dring_handle != 0) { 454 (void) ldc_mem_dring_unmap(ldcp->tx_dring_handle); 455 ldcp->tx_dring_handle = 0; 456 } 457 return (VGEN_FAILURE); 458 } 459 460 /* 461 * Unmap the transmit descriptor ring. 462 */ 463 void 464 vgen_unmap_tx_dring(vgen_ldc_t *ldcp) 465 { 466 /* Unmap mapped tx data area */ 467 if (ldcp->tx_datap != NULL) { 468 (void) ldc_mem_unmap(ldcp->tx_data_handle); 469 ldcp->tx_datap = NULL; 470 } 471 472 /* Free tx data area handle */ 473 if (ldcp->tx_data_handle != 0) { 474 (void) ldc_mem_free_handle(ldcp->tx_data_handle); 475 ldcp->tx_data_handle = 0; 476 } 477 478 /* Free tx data area cookies */ 479 if (ldcp->tx_data_cookie != NULL) { 480 kmem_free(ldcp->tx_data_cookie, ldcp->tx_data_ncookies * 481 sizeof (ldc_mem_cookie_t)); 482 ldcp->tx_data_cookie = NULL; 483 ldcp->tx_data_ncookies = 0; 484 } 485 486 /* Unmap peer's dring */ 487 if (ldcp->tx_dring_handle != 0) { 488 (void) ldc_mem_dring_unmap(ldcp->tx_dring_handle); 489 ldcp->tx_dring_handle = 0; 490 } 491 492 /* clobber tx ring members */ 493 bzero(&ldcp->tx_dring_cookie, sizeof (ldcp->tx_dring_cookie)); 494 ldcp->mtxdp = NULL; 495 ldcp->next_txi = ldcp->cur_txi = ldcp->resched_peer_txi = 0; 496 ldcp->num_txds = 0; 497 ldcp->next_txseq = VNET_ISS - 1; 498 ldcp->resched_peer = B_TRUE; 499 } 500 501 /* 502 * Map the shared memory data buffer area exported by the peer. 503 */ 504 int 505 vgen_map_data(vgen_ldc_t *ldcp, void *pkt) 506 { 507 int rv; 508 vio_dring_reg_ext_msg_t *emsg; 509 vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)pkt; 510 uint8_t *buf = (uint8_t *)msg->cookie; 511 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 512 ldc_mem_info_t minfo; 513 514 /* skip over dring cookies */ 515 ASSERT(msg->ncookies == 1); 516 buf += (msg->ncookies * sizeof (ldc_mem_cookie_t)); 517 518 emsg = (vio_dring_reg_ext_msg_t *)buf; 519 if (emsg->data_ncookies > VNET_DATA_AREA_COOKIES) { 520 return (VGEN_FAILURE); 521 } 522 523 /* save # of data area cookies */ 524 ldcp->tx_data_ncookies = emsg->data_ncookies; 525 526 /* save data area size */ 527 ldcp->tx_data_sz = emsg->data_area_size; 528 529 /* allocate ldc mem handle for data area */ 530 rv = ldc_mem_alloc_handle(ldcp->ldc_handle, &ldcp->tx_data_handle); 531 if (rv != 0) { 532 DWARN(vgenp, ldcp, "ldc_mem_alloc_handle() failed: %d\n", rv); 533 return (VGEN_FAILURE); 534 } 535 536 /* map the data area */ 537 rv = ldc_mem_map(ldcp->tx_data_handle, emsg->data_cookie, 538 emsg->data_ncookies, LDC_DIRECT_MAP, LDC_MEM_W, 539 (caddr_t *)&ldcp->tx_datap, NULL); 540 if (rv != 0) { 541 DWARN(vgenp, ldcp, "ldc_mem_map() failed: %d\n", rv); 542 return (VGEN_FAILURE); 543 } 544 545 /* get the map info */ 546 rv = ldc_mem_info(ldcp->tx_data_handle, &minfo); 547 if (rv != 0) { 548 DWARN(vgenp, ldcp, "ldc_mem_info() failed: %d\n", rv); 549 return (VGEN_FAILURE); 550 } 551 552 if (minfo.mtype != LDC_DIRECT_MAP) { 553 DWARN(vgenp, ldcp, "mtype(%d) is not direct map\n", 554 minfo.mtype); 555 return (VGEN_FAILURE); 556 } 557 558 /* allocate memory for data area cookies */ 559 ldcp->tx_data_cookie = kmem_zalloc(emsg->data_ncookies * 560 sizeof (ldc_mem_cookie_t), KM_SLEEP); 561 562 /* save data area cookies */ 563 bcopy(emsg->data_cookie, ldcp->tx_data_cookie, 564 emsg->data_ncookies * sizeof (ldc_mem_cookie_t)); 565 566 return (VGEN_SUCCESS); 567 } 568 569 /* 570 * This function transmits normal data frames (non-priority) over the channel. 571 * It queues the frame into the transmit descriptor ring and sends a 572 * VIO_DRING_DATA message if needed, to wake up the peer to (re)start 573 * processing. 574 */ 575 int 576 vgen_dringsend_shm(void *arg, mblk_t *mp) 577 { 578 uint32_t next_txi; 579 uint32_t txi; 580 vnet_rx_dringdata_desc_t *txdp; 581 struct ether_header *ehp; 582 size_t mblksz; 583 caddr_t dst; 584 mblk_t *bp; 585 size_t size; 586 uint32_t buf_offset; 587 on_trap_data_t otd; 588 int rv = 0; 589 boolean_t is_bcast = B_FALSE; 590 boolean_t is_mcast = B_FALSE; 591 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; 592 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 593 vgen_stats_t *statsp = &ldcp->stats; 594 vgen_hparams_t *lp = &ldcp->local_hparams; 595 boolean_t resched_peer = B_FALSE; 596 boolean_t tx_update = B_FALSE; 597 598 /* Drop the packet if ldc is not up or handshake is not done */ 599 if (ldcp->ldc_status != LDC_UP) { 600 DBG2(vgenp, ldcp, "status(%d), dropping packet\n", 601 ldcp->ldc_status); 602 goto dringsend_shm_exit; 603 } 604 605 if (ldcp->hphase != VH_DONE) { 606 DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n", 607 ldcp->hphase); 608 goto dringsend_shm_exit; 609 } 610 611 size = msgsize(mp); 612 if (size > (size_t)lp->mtu) { 613 DWARN(vgenp, ldcp, "invalid size(%d)\n", size); 614 goto dringsend_shm_exit; 615 } 616 if (size < ETHERMIN) 617 size = ETHERMIN; 618 619 ehp = (struct ether_header *)mp->b_rptr; 620 is_bcast = IS_BROADCAST(ehp); 621 is_mcast = IS_MULTICAST(ehp); 622 623 /* 624 * Setup on_trap() protection before accessing shared memory areas 625 * (descriptor and data buffer). Note that we enable this protection a 626 * little early and turn it off slightly later, than keeping it enabled 627 * strictly at the points in code below where the descriptor and data 628 * buffer are accessed. This is done for performance reasons: 629 * (a) to avoid calling the trap protection code while holding mutex. 630 * (b) to avoid multiple on/off steps for descriptor and data accesses. 631 */ 632 rv = LDC_ON_TRAP(&otd); 633 if (rv != 0) { 634 /* 635 * Data access fault occured down the code path below while 636 * accessing either the descriptor or the data buffer. Release 637 * any locks that we might have acquired in the code below and 638 * return failure. 639 */ 640 DERR(vgenp, ldcp, "data access fault occured\n"); 641 statsp->oerrors++; 642 if (mutex_owned(&ldcp->txlock)) { 643 mutex_exit(&ldcp->txlock); 644 } 645 if (mutex_owned(&ldcp->wrlock)) { 646 mutex_exit(&ldcp->wrlock); 647 } 648 goto dringsend_shm_exit; 649 } 650 651 /* 652 * Allocate a descriptor 653 */ 654 mutex_enter(&ldcp->txlock); 655 txi = next_txi = ldcp->next_txi; 656 INCR_TXI(next_txi, ldcp); 657 txdp = &(ldcp->mtxdp[txi]); 658 if (txdp->dstate != VIO_DESC_DONE) { /* out of descriptors */ 659 if (ldcp->tx_blocked == B_FALSE) { 660 ldcp->tx_blocked_lbolt = ddi_get_lbolt(); 661 ldcp->tx_blocked = B_TRUE; 662 } 663 statsp->tx_no_desc++; 664 mutex_exit(&ldcp->txlock); 665 (void) LDC_NO_TRAP(); 666 return (VGEN_TX_NORESOURCES); 667 } else { 668 txdp->dstate = VIO_DESC_INITIALIZING; 669 } 670 671 if (ldcp->tx_blocked == B_TRUE) { 672 ldcp->tx_blocked = B_FALSE; 673 tx_update = B_TRUE; 674 } 675 676 /* Update descriptor ring index */ 677 ldcp->next_txi = next_txi; 678 mutex_exit(&ldcp->txlock); 679 680 if (tx_update == B_TRUE) { 681 vio_net_tx_update_t vtx_update = 682 ldcp->portp->vcb.vio_net_tx_update; 683 684 vtx_update(ldcp->portp->vhp); 685 } 686 687 /* Ensure load ordering of dstate (above) and data_buf_offset. */ 688 MEMBAR_CONSUMER(); 689 690 /* Get the offset of the buffer to be used */ 691 buf_offset = txdp->data_buf_offset; 692 693 /* Access the buffer using the offset */ 694 dst = (caddr_t)ldcp->tx_datap + buf_offset; 695 696 /* Copy data into mapped transmit buffer */ 697 for (bp = mp; bp != NULL; bp = bp->b_cont) { 698 mblksz = MBLKL(bp); 699 bcopy(bp->b_rptr, dst, mblksz); 700 dst += mblksz; 701 } 702 703 /* Set the size of data in the descriptor */ 704 txdp->nbytes = size; 705 706 /* 707 * Ensure store ordering of nbytes and dstate (below); so that the peer 708 * sees the right nbytes value after it checks that the dstate is READY. 709 */ 710 MEMBAR_PRODUCER(); 711 712 mutex_enter(&ldcp->wrlock); 713 714 ASSERT(txdp->dstate == VIO_DESC_INITIALIZING); 715 716 /* Mark the descriptor ready */ 717 txdp->dstate = VIO_DESC_READY; 718 719 /* Check if peer needs wake up (handled below) */ 720 if (ldcp->resched_peer == B_TRUE && ldcp->resched_peer_txi == txi) { 721 resched_peer = B_TRUE; 722 ldcp->resched_peer = B_FALSE; 723 } 724 725 /* Update tx stats */ 726 statsp->opackets++; 727 statsp->obytes += size; 728 if (is_bcast) 729 statsp->brdcstxmt++; 730 else if (is_mcast) 731 statsp->multixmt++; 732 733 mutex_exit(&ldcp->wrlock); 734 735 /* 736 * We are done accessing shared memory; clear trap protection. 737 */ 738 (void) LDC_NO_TRAP(); 739 740 /* 741 * Need to wake up the peer ? 742 */ 743 if (resched_peer == B_TRUE) { 744 rv = vgen_send_dringdata_shm(ldcp, (uint32_t)txi, -1); 745 if (rv != 0) { 746 /* error: drop the packet */ 747 DWARN(vgenp, ldcp, "failed sending dringdata msg " 748 "rv(%d) len(%d)\n", rv, size); 749 mutex_enter(&ldcp->wrlock); 750 statsp->oerrors++; 751 ldcp->resched_peer = B_TRUE; 752 mutex_exit(&ldcp->wrlock); 753 } 754 } 755 756 dringsend_shm_exit: 757 if (rv == ECONNRESET || rv == EACCES) { 758 (void) vgen_handle_evt_reset(ldcp, VGEN_OTHER); 759 } 760 freemsg(mp); 761 return (VGEN_TX_SUCCESS); 762 } 763 764 /* 765 * Process dring data messages (info/ack/nack) 766 */ 767 int 768 vgen_handle_dringdata_shm(void *arg1, void *arg2) 769 { 770 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg1; 771 vio_msg_tag_t *tagp = (vio_msg_tag_t *)arg2; 772 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 773 int rv = 0; 774 775 switch (tagp->vio_subtype) { 776 777 case VIO_SUBTYPE_INFO: 778 /* 779 * To reduce the locking contention, release the 780 * cblock here and re-acquire it once we are done 781 * receiving packets. 782 */ 783 mutex_exit(&ldcp->cblock); 784 mutex_enter(&ldcp->rxlock); 785 rv = vgen_handle_dringdata_info_shm(ldcp, tagp); 786 mutex_exit(&ldcp->rxlock); 787 mutex_enter(&ldcp->cblock); 788 if (rv != 0) { 789 DWARN(vgenp, ldcp, "handle_data_info failed(%d)\n", rv); 790 } 791 break; 792 793 case VIO_SUBTYPE_ACK: 794 rv = vgen_handle_dringdata_ack_shm(ldcp, tagp); 795 if (rv != 0) { 796 DWARN(vgenp, ldcp, "handle_data_ack failed(%d)\n", rv); 797 } 798 break; 799 800 case VIO_SUBTYPE_NACK: 801 rv = vgen_handle_dringdata_nack_shm(ldcp, tagp); 802 if (rv != 0) { 803 DWARN(vgenp, ldcp, "handle_data_nack failed(%d)\n", rv); 804 } 805 break; 806 } 807 808 return (rv); 809 } 810 811 static int 812 vgen_handle_dringdata_info_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 813 { 814 uint32_t start; 815 int32_t end; 816 int rv = 0; 817 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 818 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 819 vgen_stats_t *statsp = &ldcp->stats; 820 821 start = dringmsg->start_idx; 822 end = dringmsg->end_idx; 823 824 DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n", 825 start, end); 826 827 if (!(CHECK_RXI(start, ldcp)) || 828 ((end != -1) && !(CHECK_RXI(end, ldcp)))) { 829 DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n", 830 start, end); 831 /* drop the message if invalid index */ 832 return (0); 833 } 834 835 /* validate dring_ident */ 836 if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) { 837 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", 838 dringmsg->dring_ident); 839 /* invalid dring_ident, drop the msg */ 840 return (0); 841 } 842 843 statsp->dring_data_msgs_rcvd++; 844 845 /* 846 * If we are in polling mode, return from here without processing the 847 * dring. We will process the dring in the context of polling thread. 848 */ 849 if (ldcp->polling_on == B_TRUE) { 850 return (0); 851 } 852 853 /* 854 * Process the dring and receive packets in intr context. 855 */ 856 rv = vgen_intr_rcv_shm(ldcp); 857 if (rv != 0) { 858 DWARN(vgenp, ldcp, "vgen_intr_rcv_shm() failed\n"); 859 } 860 return (rv); 861 } 862 863 /* 864 * Process the rx descriptor ring in the context of interrupt thread 865 * (vgen_ldc_cb() callback) and send the received packets up the stack. 866 */ 867 static int 868 vgen_intr_rcv_shm(vgen_ldc_t *ldcp) 869 { 870 int rv; 871 uint32_t end_ix; 872 vio_dring_msg_t msg; 873 uint_t mblk_sz; 874 int count = 0; 875 int total_count = 0; 876 mblk_t *bp = NULL; 877 mblk_t *bpt = NULL; 878 mblk_t *mp = NULL; 879 vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb; 880 881 ASSERT(MUTEX_HELD(&ldcp->rxlock)); 882 883 do { 884 rv = vgen_receive_packet(ldcp, &mp, &mblk_sz); 885 if (rv != 0) { 886 if (rv == EINVAL) { 887 /* Invalid descriptor error; get next */ 888 continue; 889 } 890 DTRACE_PROBE1(vgen_intr_nopkts, vgen_ldc_t *, ldcp); 891 break; 892 } 893 894 /* Build a chain of received packets */ 895 if (bp == NULL) { 896 /* first pkt */ 897 bp = mp; 898 bpt = bp; 899 bpt->b_next = NULL; 900 } else { 901 mp->b_next = NULL; 902 bpt->b_next = mp; 903 bpt = mp; 904 } 905 906 total_count++; 907 count++; 908 909 /* 910 * We are receiving the packets in interrupt context. If we 911 * have gathered vgen_chain_len (tunable) # of packets in the 912 * chain, send them up. (See vgen_poll_rcv_shm() for receiving 913 * in polling thread context). 914 */ 915 if (count == vgen_chain_len) { 916 DTRACE_PROBE2(vgen_intr_pkts, vgen_ldc_t *, ldcp, 917 int, count); 918 mutex_exit(&ldcp->rxlock); 919 vrx_cb(ldcp->portp->vhp, bp); 920 mutex_enter(&ldcp->rxlock); 921 bp = bpt = NULL; 922 count = 0; 923 } 924 925 /* 926 * Stop further processing if we processed the entire dring 927 * once; otherwise continue. 928 */ 929 } while (total_count < ldcp->num_rxds); 930 931 if (bp != NULL) { 932 DTRACE_PROBE2(vgen_intr_pkts, vgen_ldc_t *, ldcp, int, count); 933 mutex_exit(&ldcp->rxlock); 934 vrx_cb(ldcp->portp->vhp, bp); 935 mutex_enter(&ldcp->rxlock); 936 } 937 938 if (ldcp->polling_on == B_FALSE) { 939 /* 940 * We send a stopped message to peer (sender) while we are in 941 * intr mode only; allowing the peer to send further data intrs 942 * (dring data msgs) to us. 943 */ 944 end_ix = ldcp->next_rxi; 945 DECR_RXI(end_ix, ldcp); 946 msg.dring_ident = ldcp->peer_hparams.dring_ident; 947 rv = vgen_send_dringack_shm(ldcp, (vio_msg_tag_t *)&msg, 948 VNET_START_IDX_UNSPEC, end_ix, VIO_DP_STOPPED); 949 return (rv); 950 } 951 952 return (0); 953 } 954 955 /* 956 * Process the rx descriptor ring in the context of mac polling thread. Receive 957 * packets upto the limit specified by bytes_to_pickup or until there are no 958 * more packets, whichever occurs first. Return the chain of received packets. 959 */ 960 mblk_t * 961 vgen_poll_rcv_shm(vgen_ldc_t *ldcp, int bytes_to_pickup) 962 { 963 uint_t mblk_sz = 0; 964 uint_t sz = 0; 965 mblk_t *bp = NULL; 966 mblk_t *bpt = NULL; 967 mblk_t *mp = NULL; 968 int count = 0; 969 int rv; 970 971 mutex_enter(&ldcp->rxlock); 972 973 if (ldcp->hphase != VH_DONE) { 974 /* Channel is being reset and handshake not complete */ 975 mutex_exit(&ldcp->rxlock); 976 return (NULL); 977 } 978 979 do { 980 rv = vgen_receive_packet(ldcp, &mp, &mblk_sz); 981 if (rv != 0) { 982 if (rv == EINVAL) { 983 /* Invalid descriptor error; get next */ 984 continue; 985 } 986 DTRACE_PROBE1(vgen_poll_nopkts, vgen_ldc_t *, ldcp); 987 break; 988 } 989 990 /* Build a chain of received packets */ 991 if (bp == NULL) { 992 /* first pkt */ 993 bp = mp; 994 bpt = bp; 995 bpt->b_next = NULL; 996 } else { 997 mp->b_next = NULL; 998 bpt->b_next = mp; 999 bpt = mp; 1000 } 1001 1002 /* Compute total size accumulated */ 1003 sz += mblk_sz; 1004 count++; 1005 1006 /* Reached the bytes limit; we are done. */ 1007 if (sz >= bytes_to_pickup) { 1008 break; 1009 } 1010 1011 _NOTE(CONSTCOND) 1012 } while (1); 1013 1014 /* 1015 * We prepend any high priority packets to the chain of packets; note 1016 * that if we are already at the bytes_to_pickup limit, we might 1017 * slightly exceed that in such cases. That should be ok, as these pkts 1018 * are expected to be small in size and arrive at an interval in the 1019 * the order of a few seconds. 1020 */ 1021 if (ldcp->rx_pktdata == vgen_handle_pkt_data && 1022 ldcp->rx_pri_head != NULL) { 1023 ldcp->rx_pri_tail->b_next = bp; 1024 bp = ldcp->rx_pri_head; 1025 ldcp->rx_pri_head = ldcp->rx_pri_tail = NULL; 1026 } 1027 1028 mutex_exit(&ldcp->rxlock); 1029 1030 DTRACE_PROBE2(vgen_poll_pkts, vgen_ldc_t *, ldcp, int, count); 1031 DTRACE_PROBE2(vgen_poll_bytes, vgen_ldc_t *, ldcp, uint_t, sz); 1032 return (bp); 1033 } 1034 1035 /* 1036 * Process the next index in the rx dring and receive the associated packet. 1037 * 1038 * Returns: 1039 * bp: Success: The received packet. 1040 * Failure: NULL 1041 * size: Success: Size of received packet. 1042 * Failure: 0 1043 * retval: 1044 * Success: 0 1045 * Failure: EAGAIN: Descriptor not ready 1046 * EIO: Descriptor contents invalid. 1047 */ 1048 static int 1049 vgen_receive_packet(vgen_ldc_t *ldcp, mblk_t **bp, uint_t *size) 1050 { 1051 uint32_t rxi; 1052 vio_mblk_t *vmp; 1053 vio_mblk_t *new_vmp; 1054 struct ether_header *ehp; 1055 vnet_rx_dringdata_desc_t *rxdp; 1056 int err = 0; 1057 uint32_t nbytes = 0; 1058 mblk_t *mp = NULL; 1059 mblk_t *dmp = NULL; 1060 vgen_stats_t *statsp = &ldcp->stats; 1061 vgen_hparams_t *lp = &ldcp->local_hparams; 1062 1063 rxi = ldcp->next_rxi; 1064 rxdp = &(ldcp->rxdp[rxi]); 1065 vmp = ldcp->rxdp_to_vmp[rxi]; 1066 1067 if (rxdp->dstate != VIO_DESC_READY) { 1068 /* 1069 * Descriptor is not ready. 1070 */ 1071 DTRACE_PROBE1(vgen_noready_rxds, vgen_ldc_t *, ldcp); 1072 return (EAGAIN); 1073 } 1074 1075 /* 1076 * Ensure load ordering of dstate and nbytes. 1077 */ 1078 MEMBAR_CONSUMER(); 1079 1080 nbytes = rxdp->nbytes; 1081 1082 if ((nbytes < ETHERMIN) || 1083 (nbytes > lp->mtu) || 1084 (rxdp->data_buf_offset != 1085 (VIO_MBLK_DATA_OFF(vmp) + VNET_IPALIGN))) { 1086 /* 1087 * Descriptor contents invalid. 1088 */ 1089 statsp->ierrors++; 1090 rxdp->dstate = VIO_DESC_DONE; 1091 err = EIO; 1092 goto done; 1093 } 1094 1095 /* 1096 * Now allocate a new buffer for this descriptor before sending up the 1097 * buffer being processed. If that fails, stop processing; as we are 1098 * out of receive buffers. 1099 */ 1100 new_vmp = vio_allocb(ldcp->rx_vmp); 1101 1102 /* 1103 * Process the current buffer being received. 1104 */ 1105 mp = vmp->mp; 1106 1107 if (new_vmp == NULL) { 1108 /* 1109 * We failed to get a new mapped buffer that is needed to 1110 * refill the descriptor. In that case, leave the current 1111 * buffer bound to the descriptor; allocate an mblk dynamically 1112 * and copy the contents of the buffer to the mblk. Then send 1113 * up this mblk. This way the sender has the same buffer as 1114 * before that can be used to send new data. 1115 */ 1116 statsp->norcvbuf++; 1117 dmp = allocb(nbytes + VNET_IPALIGN, BPRI_MED); 1118 if (dmp == NULL) { 1119 statsp->ierrors++; 1120 return (ENOMEM); 1121 } 1122 bcopy(mp->b_rptr + VNET_IPALIGN, 1123 dmp->b_rptr + VNET_IPALIGN, nbytes); 1124 mp = dmp; 1125 } else { 1126 /* Mark the status of the current rbuf */ 1127 vmp->state = VIO_MBLK_HAS_DATA; 1128 1129 /* Set the offset of the new buffer in the descriptor */ 1130 rxdp->data_buf_offset = 1131 VIO_MBLK_DATA_OFF(new_vmp) + VNET_IPALIGN; 1132 ldcp->rxdp_to_vmp[rxi] = new_vmp; 1133 } 1134 mp->b_rptr += VNET_IPALIGN; 1135 mp->b_wptr = mp->b_rptr + nbytes; 1136 1137 /* 1138 * Ensure store ordering of data_buf_offset and dstate; so that the 1139 * peer sees the right data_buf_offset after it checks that the dstate 1140 * is DONE. 1141 */ 1142 MEMBAR_PRODUCER(); 1143 1144 /* Now mark the descriptor 'done' */ 1145 rxdp->dstate = VIO_DESC_DONE; 1146 1147 /* Update stats */ 1148 statsp->ipackets++; 1149 statsp->rbytes += rxdp->nbytes; 1150 ehp = (struct ether_header *)mp->b_rptr; 1151 if (IS_BROADCAST(ehp)) 1152 statsp->brdcstrcv++; 1153 else if (IS_MULTICAST(ehp)) 1154 statsp->multircv++; 1155 done: 1156 /* Update the next index to be processed */ 1157 INCR_RXI(rxi, ldcp); 1158 1159 /* Save the new recv index */ 1160 ldcp->next_rxi = rxi; 1161 1162 /* Return the packet received */ 1163 *size = nbytes; 1164 *bp = mp; 1165 return (err); 1166 } 1167 1168 static int 1169 vgen_handle_dringdata_ack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 1170 { 1171 uint32_t start; 1172 int32_t end; 1173 uint32_t txi; 1174 vgen_stats_t *statsp; 1175 vnet_rx_dringdata_desc_t *txdp; 1176 on_trap_data_t otd; 1177 int rv = 0; 1178 boolean_t ready_txd = B_FALSE; 1179 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1180 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 1181 1182 start = dringmsg->start_idx; 1183 end = dringmsg->end_idx; 1184 statsp = &ldcp->stats; 1185 1186 /* 1187 * Received an ack for our transmits upto a certain dring index. This 1188 * enables us to reclaim descriptors. We also send a new dring data msg 1189 * to the peer to restart processing if there are pending transmit pkts. 1190 */ 1191 DBG2(vgenp, ldcp, "ACK: start(%d), end(%d)\n", start, end); 1192 1193 /* 1194 * In RxDringData mode (v1.6), start index of -1 can be used by the 1195 * peer to indicate that it is unspecified. However, the end index 1196 * must be set correctly indicating the last descriptor index processed. 1197 */ 1198 if (((start != VNET_START_IDX_UNSPEC) && !(CHECK_TXI(start, ldcp))) || 1199 !(CHECK_TXI(end, ldcp))) { 1200 /* drop the message if invalid index */ 1201 DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n", 1202 start, end); 1203 return (rv); 1204 } 1205 1206 /* Validate dring_ident */ 1207 if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) { 1208 /* invalid dring_ident, drop the msg */ 1209 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", 1210 dringmsg->dring_ident); 1211 return (rv); 1212 } 1213 statsp->dring_data_acks_rcvd++; 1214 1215 /* 1216 * Clear transmit flow control condition 1217 * as some descriptors should be free now. 1218 */ 1219 mutex_enter(&ldcp->txlock); 1220 if (ldcp->tx_blocked == B_TRUE) { 1221 vio_net_tx_update_t vtx_update = 1222 ldcp->portp->vcb.vio_net_tx_update; 1223 1224 ldcp->tx_blocked = B_FALSE; 1225 vtx_update(ldcp->portp->vhp); 1226 } 1227 mutex_exit(&ldcp->txlock); 1228 1229 if (dringmsg->dring_process_state != VIO_DP_STOPPED) { 1230 /* 1231 * Receiver continued processing 1232 * dring after sending us the ack. 1233 */ 1234 return (rv); 1235 } 1236 1237 /* 1238 * Receiver stopped processing descriptors. 1239 */ 1240 statsp->dring_stopped_acks_rcvd++; 1241 1242 /* 1243 * Setup on_trap() protection before accessing dring shared memory area. 1244 */ 1245 rv = LDC_ON_TRAP(&otd); 1246 if (rv != 0) { 1247 /* 1248 * Data access fault occured down the code path below while 1249 * accessing the descriptors. Release any locks that we might 1250 * have acquired in the code below and return failure. 1251 */ 1252 if (mutex_owned(&ldcp->wrlock)) { 1253 mutex_exit(&ldcp->wrlock); 1254 } 1255 return (ECONNRESET); 1256 } 1257 1258 /* 1259 * Determine if there are any pending tx descriptors ready to be 1260 * processed by the receiver(peer) and if so, send a message to the 1261 * peer to restart receiving. 1262 */ 1263 mutex_enter(&ldcp->wrlock); 1264 1265 ready_txd = B_FALSE; 1266 txi = end; 1267 INCR_TXI(txi, ldcp); 1268 txdp = &ldcp->mtxdp[txi]; 1269 if (txdp->dstate == VIO_DESC_READY) { 1270 ready_txd = B_TRUE; 1271 } 1272 1273 /* 1274 * We are done accessing shared memory; clear trap protection. 1275 */ 1276 (void) LDC_NO_TRAP(); 1277 1278 if (ready_txd == B_FALSE) { 1279 /* 1280 * No ready tx descriptors. Set the flag to send a message to 1281 * the peer when tx descriptors are ready in transmit routine. 1282 */ 1283 ldcp->resched_peer = B_TRUE; 1284 ldcp->resched_peer_txi = txi; 1285 mutex_exit(&ldcp->wrlock); 1286 return (rv); 1287 } 1288 1289 /* 1290 * We have some tx descriptors ready to be processed by the receiver. 1291 * Send a dring data message to the peer to restart processing. 1292 */ 1293 ldcp->resched_peer = B_FALSE; 1294 mutex_exit(&ldcp->wrlock); 1295 rv = vgen_send_dringdata_shm(ldcp, txi, -1); 1296 if (rv != VGEN_SUCCESS) { 1297 mutex_enter(&ldcp->wrlock); 1298 ldcp->resched_peer = B_TRUE; 1299 mutex_exit(&ldcp->wrlock); 1300 } 1301 1302 return (rv); 1303 } 1304 1305 static int 1306 vgen_handle_dringdata_nack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 1307 { 1308 uint32_t start; 1309 int32_t end; 1310 uint32_t txi; 1311 vnet_rx_dringdata_desc_t *txdp; 1312 on_trap_data_t otd; 1313 int rv = 0; 1314 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1315 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 1316 1317 DBG1(vgenp, ldcp, "enter\n"); 1318 start = dringmsg->start_idx; 1319 end = dringmsg->end_idx; 1320 1321 /* 1322 * Peer sent a NACK msg (to indicate bad descriptors ?). The start and 1323 * end correspond to the range of descriptors which are being nack'd. 1324 */ 1325 DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end); 1326 1327 /* 1328 * In RxDringData mode (v1.6), start index of -1 can be used by 1329 * the peer to indicate that it is unspecified. However, the end index 1330 * must be set correctly indicating the last descriptor index processed. 1331 */ 1332 if (((start != VNET_START_IDX_UNSPEC) && !(CHECK_TXI(start, ldcp))) || 1333 !(CHECK_TXI(end, ldcp))) { 1334 /* drop the message if invalid index */ 1335 DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n", 1336 start, end); 1337 return (rv); 1338 } 1339 1340 /* Validate dring_ident */ 1341 if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) { 1342 /* invalid dring_ident, drop the msg */ 1343 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", 1344 dringmsg->dring_ident); 1345 return (rv); 1346 } 1347 1348 /* 1349 * Setup on_trap() protection before accessing dring shared memory area. 1350 */ 1351 rv = LDC_ON_TRAP(&otd); 1352 if (rv != 0) { 1353 /* 1354 * Data access fault occured down the code path below while 1355 * accessing the descriptors. Release any locks that we might 1356 * have acquired in the code below and return failure. 1357 */ 1358 mutex_exit(&ldcp->txlock); 1359 return (ECONNRESET); 1360 } 1361 1362 /* We just mark the descrs as free so they can be reused */ 1363 mutex_enter(&ldcp->txlock); 1364 for (txi = start; txi <= end; ) { 1365 txdp = &(ldcp->mtxdp[txi]); 1366 if (txdp->dstate == VIO_DESC_READY) 1367 txdp->dstate = VIO_DESC_DONE; 1368 INCR_TXI(txi, ldcp); 1369 } 1370 1371 /* 1372 * We are done accessing shared memory; clear trap protection. 1373 */ 1374 (void) LDC_NO_TRAP(); 1375 1376 mutex_exit(&ldcp->txlock); 1377 1378 return (rv); 1379 } 1380 1381 /* 1382 * Send descriptor ring data message to the peer over LDC. 1383 */ 1384 static int 1385 vgen_send_dringdata_shm(vgen_ldc_t *ldcp, uint32_t start, int32_t end) 1386 { 1387 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1388 vio_dring_msg_t dringmsg, *msgp = &dringmsg; 1389 vio_msg_tag_t *tagp = &msgp->tag; 1390 vgen_stats_t *statsp = &ldcp->stats; 1391 int rv; 1392 1393 #ifdef DEBUG 1394 if (vgen_inject_error(ldcp, VGEN_ERR_TXTIMEOUT)) { 1395 return (VGEN_SUCCESS); 1396 } 1397 #endif 1398 bzero(msgp, sizeof (*msgp)); 1399 1400 tagp->vio_msgtype = VIO_TYPE_DATA; 1401 tagp->vio_subtype = VIO_SUBTYPE_INFO; 1402 tagp->vio_subtype_env = VIO_DRING_DATA; 1403 tagp->vio_sid = ldcp->local_sid; 1404 1405 msgp->dring_ident = ldcp->local_hparams.dring_ident; 1406 msgp->start_idx = start; 1407 msgp->end_idx = end; 1408 msgp->seq_num = atomic_inc_32_nv(&ldcp->dringdata_msgid); 1409 1410 rv = vgen_sendmsg_shm(ldcp, (caddr_t)tagp, sizeof (dringmsg)); 1411 if (rv != VGEN_SUCCESS) { 1412 DWARN(vgenp, ldcp, "vgen_sendmsg_shm() failed\n"); 1413 return (rv); 1414 } 1415 1416 statsp->dring_data_msgs_sent++; 1417 1418 DBG2(vgenp, ldcp, "DRING_DATA_SENT \n"); 1419 1420 return (VGEN_SUCCESS); 1421 } 1422 1423 /* 1424 * Send dring data ack message. 1425 */ 1426 int 1427 vgen_send_dringack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start, 1428 int32_t end, uint8_t pstate) 1429 { 1430 int rv = 0; 1431 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1432 vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp; 1433 vgen_stats_t *statsp = &ldcp->stats; 1434 1435 tagp->vio_msgtype = VIO_TYPE_DATA; 1436 tagp->vio_subtype = VIO_SUBTYPE_ACK; 1437 tagp->vio_subtype_env = VIO_DRING_DATA; 1438 tagp->vio_sid = ldcp->local_sid; 1439 msgp->start_idx = start; 1440 msgp->end_idx = end; 1441 msgp->dring_process_state = pstate; 1442 msgp->seq_num = atomic_inc_32_nv(&ldcp->dringdata_msgid); 1443 1444 rv = vgen_sendmsg_shm(ldcp, (caddr_t)tagp, sizeof (*msgp)); 1445 if (rv != VGEN_SUCCESS) { 1446 DWARN(vgenp, ldcp, "vgen_sendmsg_shm() failed\n"); 1447 } 1448 1449 statsp->dring_data_acks_sent++; 1450 if (pstate == VIO_DP_STOPPED) { 1451 statsp->dring_stopped_acks_sent++; 1452 } 1453 1454 return (rv); 1455 } 1456 1457 /* 1458 * Send dring data msgs (info/ack/nack) over LDC. 1459 */ 1460 static int 1461 vgen_sendmsg_shm(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen) 1462 { 1463 int rv; 1464 size_t len; 1465 uint32_t retries = 0; 1466 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1467 1468 len = msglen; 1469 if ((len == 0) || (msg == NULL)) 1470 return (VGEN_FAILURE); 1471 1472 do { 1473 len = msglen; 1474 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len); 1475 if (retries++ >= vgen_ldcwr_retries) 1476 break; 1477 } while (rv == EWOULDBLOCK); 1478 1479 if (rv != 0) { 1480 DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen(%d)\n", 1481 rv, msglen); 1482 return (rv); 1483 } 1484 1485 if (len != msglen) { 1486 DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n", 1487 rv, msglen); 1488 return (VGEN_FAILURE); 1489 } 1490 1491 return (VGEN_SUCCESS); 1492 } 1493