1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 #include <sys/types.h> 27 #include <sys/errno.h> 28 #include <sys/sysmacros.h> 29 #include <sys/param.h> 30 #include <sys/machsystm.h> 31 #include <sys/stream.h> 32 #include <sys/strsubr.h> 33 #include <sys/kmem.h> 34 #include <sys/strsun.h> 35 #include <sys/callb.h> 36 #include <sys/sdt.h> 37 #include <sys/ethernet.h> 38 #include <sys/mach_descrip.h> 39 #include <sys/mdeg.h> 40 #include <sys/vnet.h> 41 #include <sys/vio_mailbox.h> 42 #include <sys/vio_common.h> 43 #include <sys/vnet_common.h> 44 #include <sys/vnet_mailbox.h> 45 #include <sys/vio_util.h> 46 #include <sys/vnet_gen.h> 47 48 /* 49 * This file contains the implementation of RxDringData transfer mode of VIO 50 * Protocol in vnet. The functions in this file are invoked from vnet_gen.c 51 * after RxDringData mode is negotiated with the peer during attribute phase of 52 * handshake. This file contains functions that setup the transmit and receive 53 * descriptor rings, and associated resources in RxDringData mode. It also 54 * contains the transmit and receive data processing functions that are invoked 55 * in RxDringData mode. The data processing routines in this file have the 56 * suffix '_shm' to indicate the shared memory mechanism used in RxDringData 57 * mode. 58 */ 59 60 /* Functions exported to vnet_gen.c */ 61 int vgen_create_rx_dring(vgen_ldc_t *ldcp); 62 void vgen_destroy_rx_dring(vgen_ldc_t *ldcp); 63 int vgen_map_tx_dring(vgen_ldc_t *ldcp, void *pkt); 64 void vgen_unmap_tx_dring(vgen_ldc_t *ldcp); 65 int vgen_map_data(vgen_ldc_t *ldcp, void *pkt); 66 int vgen_dringsend_shm(void *arg, mblk_t *mp); 67 int vgen_handle_dringdata_shm(void *arg1, void *arg2); 68 mblk_t *vgen_poll_rcv_shm(vgen_ldc_t *ldcp, int bytes_to_pickup); 69 int vgen_send_dringack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, 70 uint32_t start, int32_t end, uint8_t pstate); 71 72 /* Internal functions */ 73 static int vgen_handle_dringdata_info_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tp); 74 static int vgen_handle_dringdata_ack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 75 static int vgen_handle_dringdata_nack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tp); 76 static int vgen_intr_rcv_shm(vgen_ldc_t *ldcp); 77 static int vgen_receive_packet(vgen_ldc_t *ldcp, mblk_t **bp, uint_t *size); 78 static int vgen_send_dringdata_shm(vgen_ldc_t *ldcp, uint32_t start, 79 int32_t end); 80 static int vgen_sendmsg_shm(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen); 81 82 /* Functions imported from vnet_gen.c */ 83 extern int vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller); 84 extern int vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller); 85 extern void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen); 86 extern void vgen_destroy_rxpools(void *arg); 87 88 /* Tunables */ 89 extern uint32_t vnet_num_descriptors; 90 extern uint32_t vgen_chain_len; 91 extern uint32_t vgen_ldcwr_retries; 92 extern uint32_t vgen_recv_delay; 93 extern uint32_t vgen_recv_retries; 94 extern uint32_t vgen_nrbufs_factor; 95 96 #ifdef DEBUG 97 98 #define DEBUG_PRINTF vgen_debug_printf 99 100 extern int vnet_dbglevel; 101 extern int vgen_inject_err_flag; 102 103 extern void vgen_debug_printf(const char *fname, vgen_t *vgenp, 104 vgen_ldc_t *ldcp, const char *fmt, ...); 105 extern boolean_t vgen_inject_error(vgen_ldc_t *ldcp, int error); 106 107 #endif 108 109 /* 110 * Allocate receive resources for the channel. The resources consist of a 111 * receive descriptor ring and an associated receive buffer area. 112 */ 113 int 114 vgen_create_rx_dring(vgen_ldc_t *ldcp) 115 { 116 int i; 117 int rv; 118 uint32_t ncookies; 119 ldc_mem_info_t minfo; 120 vnet_rx_dringdata_desc_t *rxdp; 121 size_t data_sz; 122 vio_mblk_t *vmp; 123 vio_mblk_t **rxdp_to_vmp; 124 uint32_t rxdsize; 125 caddr_t datap = NULL; 126 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 127 128 rxdsize = sizeof (vnet_rx_dringdata_desc_t); 129 ldcp->num_rxds = vnet_num_descriptors; 130 ldcp->num_rbufs = vnet_num_descriptors * vgen_nrbufs_factor; 131 132 /* Create the receive descriptor ring */ 133 rv = ldc_mem_dring_create(ldcp->num_rxds, rxdsize, 134 &ldcp->rx_dring_handle); 135 if (rv != 0) { 136 DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n"); 137 goto fail; 138 } 139 140 /* Get the addr of descriptor ring */ 141 rv = ldc_mem_dring_info(ldcp->rx_dring_handle, &minfo); 142 if (rv != 0) { 143 DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n"); 144 goto fail; 145 } 146 ldcp->rxdp = (vnet_rx_dringdata_desc_t *)(minfo.vaddr); 147 bzero(ldcp->rxdp, sizeof (*rxdp) * (ldcp->num_rxds)); 148 149 /* 150 * Allocate a table that maps descriptor to its associated buffer; 151 * used while receiving to validate that the peer has not changed the 152 * buffer offset provided in the descriptor. 153 */ 154 rxdp_to_vmp = kmem_zalloc(ldcp->num_rxds * sizeof (uintptr_t), 155 KM_SLEEP); 156 ldcp->rxdp_to_vmp = rxdp_to_vmp; 157 158 /* 159 * Allocate a single large buffer that serves as the rx buffer area. 160 * We allocate a ldc memory handle and export the buffer area as shared 161 * memory. We send the ldc memcookie for this buffer space to the peer, 162 * as part of dring registration phase during handshake. We manage this 163 * buffer area as individual buffers of max_frame_size and provide 164 * specific buffer offsets in each descriptor to the peer. Note that 165 * the factor used to compute the # of buffers (above) must be > 1 to 166 * ensure that there are more buffers than the # of descriptors. This 167 * is needed because, while the shared memory buffers are sent up our 168 * stack during receive, the sender needs additional buffers that can 169 * be used for further transmits. This also means there is no one to 170 * one correspondence between the descriptor index and buffer offset. 171 * The sender has to read the buffer offset in the descriptor and use 172 * the specified offset to copy the tx data into the shared buffer. We 173 * (receiver) manage the individual buffers and their state (see 174 * VIO_MBLK_STATEs in vio_util.h). 175 */ 176 data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 177 data_sz = VNET_ROUNDUP_2K(data_sz); 178 179 ldcp->rx_data_sz = data_sz * ldcp->num_rbufs; 180 ldcp->rx_dblk_sz = data_sz; 181 datap = kmem_zalloc(ldcp->rx_data_sz, KM_SLEEP); 182 ldcp->rx_datap = datap; 183 184 /* Allocate a ldc memhandle for the entire rx data area */ 185 rv = ldc_mem_alloc_handle(ldcp->ldc_handle, &ldcp->rx_data_handle); 186 if (rv) { 187 ldcp->rx_data_handle = 0; 188 goto fail; 189 } 190 191 /* Allocate memory for the data cookies */ 192 ldcp->rx_data_cookie = kmem_zalloc(VNET_DATA_AREA_COOKIES * 193 sizeof (ldc_mem_cookie_t), KM_SLEEP); 194 195 /* 196 * Bind ldc memhandle to the corresponding rx data area. 197 */ 198 ncookies = 0; 199 rv = ldc_mem_bind_handle(ldcp->rx_data_handle, (caddr_t)datap, 200 ldcp->rx_data_sz, LDC_DIRECT_MAP, LDC_MEM_W, 201 ldcp->rx_data_cookie, &ncookies); 202 if (rv != 0) { 203 goto fail; 204 } 205 if ((ncookies == 0) || (ncookies > VNET_DATA_AREA_COOKIES)) { 206 goto fail; 207 } 208 ldcp->rx_data_ncookies = ncookies; 209 210 /* 211 * Successful in binding the handle to rx data area. Now setup mblks 212 * around each data buffer and setup the descriptors to point to these 213 * rx data buffers. We associate each descriptor with a buffer 214 * by specifying the buffer offset in the descriptor. When the peer 215 * needs to transmit data, this offset is read by the peer to determine 216 * the buffer in the mapped buffer area where the data to be 217 * transmitted should be copied, for a specific descriptor. 218 */ 219 rv = vio_create_mblks(ldcp->num_rbufs, data_sz, (uint8_t *)datap, 220 &ldcp->rx_vmp); 221 if (rv != 0) { 222 goto fail; 223 } 224 225 for (i = 0; i < ldcp->num_rxds; i++) { 226 rxdp = &(ldcp->rxdp[i]); 227 /* allocate an mblk around this data buffer */ 228 vmp = vio_allocb(ldcp->rx_vmp); 229 ASSERT(vmp != NULL); 230 rxdp->data_buf_offset = VIO_MBLK_DATA_OFF(vmp) + VNET_IPALIGN; 231 rxdp->dstate = VIO_DESC_FREE; 232 rxdp_to_vmp[i] = vmp; 233 } 234 235 /* 236 * The descriptors and the associated buffers are all ready; 237 * now bind descriptor ring to the channel. 238 */ 239 rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->rx_dring_handle, 240 LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW, 241 &ldcp->rx_dring_cookie, &ncookies); 242 if (rv != 0) { 243 DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed " 244 "rv(%x)\n", rv); 245 goto fail; 246 } 247 ASSERT(ncookies == 1); 248 ldcp->rx_dring_ncookies = ncookies; 249 250 /* initialize rx seqnum and index */ 251 ldcp->next_rxseq = VNET_ISS; 252 ldcp->next_rxi = 0; 253 254 return (VGEN_SUCCESS); 255 256 fail: 257 vgen_destroy_rx_dring(ldcp); 258 return (VGEN_FAILURE); 259 } 260 261 /* 262 * Free receive resources for the channel. 263 */ 264 void 265 vgen_destroy_rx_dring(vgen_ldc_t *ldcp) 266 { 267 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 268 269 /* We first unbind the descriptor ring */ 270 if (ldcp->rx_dring_ncookies != 0) { 271 (void) ldc_mem_dring_unbind(ldcp->rx_dring_handle); 272 ldcp->rx_dring_ncookies = 0; 273 } 274 275 /* Destroy the mblks that are wrapped around the rx data buffers */ 276 if (ldcp->rx_vmp != NULL) { 277 vio_clobber_pool(ldcp->rx_vmp); 278 if (vio_destroy_mblks(ldcp->rx_vmp) != 0) { 279 /* 280 * If we can't destroy the rx pool for this channel, 281 * dispatch a task to retry and clean up. Note that we 282 * don't need to wait for the task to complete. If the 283 * vnet device itself gets detached, it will wait for 284 * the task to complete implicitly in 285 * ddi_taskq_destroy(). 286 */ 287 (void) ddi_taskq_dispatch(vgenp->rxp_taskq, 288 vgen_destroy_rxpools, ldcp->rx_vmp, DDI_SLEEP); 289 } 290 ldcp->rx_vmp = NULL; 291 } 292 293 /* Free rx data area cookies */ 294 if (ldcp->rx_data_cookie != NULL) { 295 kmem_free(ldcp->rx_data_cookie, VNET_DATA_AREA_COOKIES * 296 sizeof (ldc_mem_cookie_t)); 297 ldcp->rx_data_cookie = NULL; 298 } 299 300 /* Unbind rx data area memhandle */ 301 if (ldcp->rx_data_ncookies != 0) { 302 (void) ldc_mem_unbind_handle(ldcp->rx_data_handle); 303 ldcp->rx_data_ncookies = 0; 304 } 305 306 /* Free rx data area memhandle */ 307 if (ldcp->rx_data_handle != 0) { 308 (void) ldc_mem_free_handle(ldcp->rx_data_handle); 309 ldcp->rx_data_handle = 0; 310 } 311 312 /* Now free the rx data area itself */ 313 if (ldcp->rx_datap != NULL) { 314 /* prealloc'd rx data buffer */ 315 kmem_free(ldcp->rx_datap, ldcp->rx_data_sz); 316 ldcp->rx_datap = NULL; 317 ldcp->rx_data_sz = 0; 318 } 319 320 /* Finally, free the receive descriptor ring */ 321 if (ldcp->rx_dring_handle != 0) { 322 (void) ldc_mem_dring_destroy(ldcp->rx_dring_handle); 323 ldcp->rx_dring_handle = 0; 324 ldcp->rxdp = NULL; 325 } 326 327 if (ldcp->rxdp_to_vmp != NULL) { 328 kmem_free(ldcp->rxdp_to_vmp, 329 ldcp->num_rxds * sizeof (uintptr_t)); 330 ldcp->rxdp_to_vmp = NULL; 331 } 332 333 /* Reset rx index and seqnum */ 334 ldcp->next_rxi = 0; 335 ldcp->next_rxseq = VNET_ISS; 336 } 337 338 /* 339 * Map the receive descriptor ring exported 340 * by the peer, as our transmit descriptor ring. 341 */ 342 int 343 vgen_map_tx_dring(vgen_ldc_t *ldcp, void *pkt) 344 { 345 int i; 346 int rv; 347 ldc_mem_info_t minfo; 348 ldc_mem_cookie_t dcookie; 349 uint32_t ncookies; 350 uint32_t num_desc; 351 uint32_t desc_size; 352 vnet_rx_dringdata_desc_t *txdp; 353 on_trap_data_t otd; 354 vio_dring_reg_msg_t *msg = pkt; 355 356 ncookies = msg->ncookies; 357 num_desc = msg->num_descriptors; 358 desc_size = msg->descriptor_size; 359 360 /* 361 * Sanity check. 362 */ 363 if (num_desc < VGEN_NUM_DESCRIPTORS_MIN || 364 desc_size < sizeof (vnet_rx_dringdata_desc_t) || 365 ncookies > 1) { 366 goto fail; 367 } 368 369 bcopy(&msg->cookie[0], &dcookie, sizeof (ldc_mem_cookie_t)); 370 371 /* Map the remote dring */ 372 rv = ldc_mem_dring_map(ldcp->ldc_handle, &dcookie, ncookies, num_desc, 373 desc_size, LDC_DIRECT_MAP, &(ldcp->tx_dring_handle)); 374 if (rv != 0) { 375 goto fail; 376 } 377 378 /* 379 * Sucessfully mapped; now try to get info about the mapped dring 380 */ 381 rv = ldc_mem_dring_info(ldcp->tx_dring_handle, &minfo); 382 if (rv != 0) { 383 goto fail; 384 } 385 386 /* 387 * Save ring address, number of descriptors. 388 */ 389 ldcp->mtxdp = (vnet_rx_dringdata_desc_t *)(minfo.vaddr); 390 bcopy(&dcookie, &(ldcp->tx_dring_cookie), sizeof (dcookie)); 391 ldcp->tx_dring_ncookies = ncookies; 392 ldcp->num_txds = num_desc; 393 394 /* Initialize tx dring indexes and seqnum */ 395 ldcp->next_txi = ldcp->cur_txi = 0; 396 ldcp->next_txseq = VNET_ISS - 1; 397 ldcp->resched_peer = B_TRUE; 398 ldcp->dring_mtype = minfo.mtype; 399 ldcp->dringdata_msgid = 0; 400 401 /* Save peer's dring_info values */ 402 bcopy(&dcookie, &(ldcp->peer_hparams.dring_cookie), 403 sizeof (ldc_mem_cookie_t)); 404 ldcp->peer_hparams.num_desc = num_desc; 405 ldcp->peer_hparams.desc_size = desc_size; 406 ldcp->peer_hparams.dring_ncookies = ncookies; 407 408 /* Set dring_ident for the peer */ 409 ldcp->peer_hparams.dring_ident = (uint64_t)ldcp->mtxdp; 410 411 /* Return the dring_ident in ack msg */ 412 msg->dring_ident = (uint64_t)ldcp->mtxdp; 413 414 /* 415 * Mark the descriptor state as 'done'. This is implementation specific 416 * and not required by the protocol. In our implementation, we only 417 * need the descripor to be in 'done' state to be used by the transmit 418 * function and the peer is not aware of it. As the protocol requires 419 * that during initial registration the exporting end point mark the 420 * dstate as 'free', we change it 'done' here. After this, the dstate 421 * in our implementation will keep moving between 'ready', set by our 422 * transmit function; and and 'done', set by the peer (per protocol) 423 * after receiving data. 424 * Setup on_trap() protection before accessing dring shared memory area. 425 */ 426 rv = LDC_ON_TRAP(&otd); 427 if (rv != 0) { 428 /* 429 * Data access fault occured down the code path below while 430 * accessing the descriptors. Return failure. 431 */ 432 goto fail; 433 } 434 435 for (i = 0; i < num_desc; i++) { 436 txdp = &ldcp->mtxdp[i]; 437 txdp->dstate = VIO_DESC_DONE; 438 } 439 440 (void) LDC_NO_TRAP(); 441 return (VGEN_SUCCESS); 442 443 fail: 444 if (ldcp->tx_dring_handle != 0) { 445 (void) ldc_mem_dring_unmap(ldcp->tx_dring_handle); 446 ldcp->tx_dring_handle = 0; 447 } 448 return (VGEN_FAILURE); 449 } 450 451 /* 452 * Unmap the transmit descriptor ring. 453 */ 454 void 455 vgen_unmap_tx_dring(vgen_ldc_t *ldcp) 456 { 457 /* Unmap mapped tx data area */ 458 if (ldcp->tx_datap != NULL) { 459 (void) ldc_mem_unmap(ldcp->tx_data_handle); 460 ldcp->tx_datap = NULL; 461 } 462 463 /* Free tx data area handle */ 464 if (ldcp->tx_data_handle != 0) { 465 (void) ldc_mem_free_handle(ldcp->tx_data_handle); 466 ldcp->tx_data_handle = 0; 467 } 468 469 /* Free tx data area cookies */ 470 if (ldcp->tx_data_cookie != NULL) { 471 kmem_free(ldcp->tx_data_cookie, ldcp->tx_data_ncookies * 472 sizeof (ldc_mem_cookie_t)); 473 ldcp->tx_data_cookie = NULL; 474 ldcp->tx_data_ncookies = 0; 475 } 476 477 /* Unmap peer's dring */ 478 if (ldcp->tx_dring_handle != 0) { 479 (void) ldc_mem_dring_unmap(ldcp->tx_dring_handle); 480 ldcp->tx_dring_handle = 0; 481 } 482 483 /* clobber tx ring members */ 484 bzero(&ldcp->tx_dring_cookie, sizeof (ldcp->tx_dring_cookie)); 485 ldcp->mtxdp = NULL; 486 ldcp->next_txi = ldcp->cur_txi = 0; 487 ldcp->num_txds = 0; 488 ldcp->next_txseq = VNET_ISS - 1; 489 ldcp->resched_peer = B_TRUE; 490 } 491 492 /* 493 * Map the shared memory data buffer area exported by the peer. 494 */ 495 int 496 vgen_map_data(vgen_ldc_t *ldcp, void *pkt) 497 { 498 int rv; 499 vio_dring_reg_ext_msg_t *emsg; 500 vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)pkt; 501 uint8_t *buf = (uint8_t *)msg->cookie; 502 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 503 504 /* skip over dring cookies */ 505 ASSERT(msg->ncookies == 1); 506 buf += (msg->ncookies * sizeof (ldc_mem_cookie_t)); 507 508 emsg = (vio_dring_reg_ext_msg_t *)buf; 509 if (emsg->data_ncookies > VNET_DATA_AREA_COOKIES) { 510 return (VGEN_FAILURE); 511 } 512 513 /* save # of data area cookies */ 514 ldcp->tx_data_ncookies = emsg->data_ncookies; 515 516 /* save data area size */ 517 ldcp->tx_data_sz = emsg->data_area_size; 518 519 /* allocate ldc mem handle for data area */ 520 rv = ldc_mem_alloc_handle(ldcp->ldc_handle, &ldcp->tx_data_handle); 521 if (rv != 0) { 522 DWARN(vgenp, ldcp, "ldc_mem_alloc_handle() failed: %d\n", rv); 523 return (VGEN_FAILURE); 524 } 525 526 /* map the data area */ 527 rv = ldc_mem_map(ldcp->tx_data_handle, emsg->data_cookie, 528 emsg->data_ncookies, LDC_DIRECT_MAP, LDC_MEM_W, 529 (caddr_t *)&ldcp->tx_datap, NULL); 530 if (rv != 0) { 531 DWARN(vgenp, ldcp, "ldc_mem_map() failed: %d\n", rv); 532 (void) ldc_mem_free_handle(ldcp->tx_data_handle); 533 ldcp->tx_data_handle = 0; 534 return (VGEN_FAILURE); 535 } 536 537 /* allocate memory for data area cookies */ 538 ldcp->tx_data_cookie = kmem_zalloc(emsg->data_ncookies * 539 sizeof (ldc_mem_cookie_t), KM_SLEEP); 540 541 /* save data area cookies */ 542 bcopy(emsg->data_cookie, ldcp->tx_data_cookie, 543 emsg->data_ncookies * sizeof (ldc_mem_cookie_t)); 544 545 return (VGEN_SUCCESS); 546 } 547 548 /* 549 * This function transmits normal data frames (non-priority) over the channel. 550 * It queues the frame into the transmit descriptor ring and sends a 551 * VIO_DRING_DATA message if needed, to wake up the peer to (re)start 552 * processing. 553 */ 554 int 555 vgen_dringsend_shm(void *arg, mblk_t *mp) 556 { 557 uint32_t next_txi; 558 uint32_t txi; 559 vnet_rx_dringdata_desc_t *txdp; 560 vnet_rx_dringdata_desc_t *ntxdp; 561 struct ether_header *ehp; 562 size_t mblksz; 563 caddr_t dst; 564 mblk_t *bp; 565 size_t size; 566 uint32_t buf_offset; 567 on_trap_data_t otd; 568 int rv = 0; 569 boolean_t is_bcast = B_FALSE; 570 boolean_t is_mcast = B_FALSE; 571 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; 572 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 573 vgen_stats_t *statsp = &ldcp->stats; 574 vgen_hparams_t *lp = &ldcp->local_hparams; 575 boolean_t resched_peer = B_FALSE; 576 boolean_t tx_update = B_FALSE; 577 578 /* Drop the packet if ldc is not up or handshake is not done */ 579 if (ldcp->ldc_status != LDC_UP) { 580 DBG2(vgenp, ldcp, "status(%d), dropping packet\n", 581 ldcp->ldc_status); 582 goto dringsend_shm_exit; 583 } 584 585 if (ldcp->hphase != VH_DONE) { 586 DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n", 587 ldcp->hphase); 588 goto dringsend_shm_exit; 589 } 590 591 size = msgsize(mp); 592 if (size > (size_t)lp->mtu) { 593 DWARN(vgenp, ldcp, "invalid size(%d)\n", size); 594 goto dringsend_shm_exit; 595 } 596 if (size < ETHERMIN) 597 size = ETHERMIN; 598 599 ehp = (struct ether_header *)mp->b_rptr; 600 is_bcast = IS_BROADCAST(ehp); 601 is_mcast = IS_MULTICAST(ehp); 602 603 /* 604 * Setup on_trap() protection before accessing shared memory areas 605 * (descriptor and data buffer). Note that we enable this protection a 606 * little early and turn it off slightly later, than keeping it enabled 607 * strictly at the points in code below where the descriptor and data 608 * buffer are accessed. This is done for performance reasons: 609 * (a) to avoid calling the trap protection code while holding mutex. 610 * (b) to avoid multiple on/off steps for descriptor and data accesses. 611 */ 612 rv = LDC_ON_TRAP(&otd); 613 if (rv != 0) { 614 /* 615 * Data access fault occured down the code path below while 616 * accessing either the descriptor or the data buffer. Release 617 * any locks that we might have acquired in the code below and 618 * return failure. 619 */ 620 DERR(vgenp, ldcp, "data access fault occured\n"); 621 statsp->oerrors++; 622 if (mutex_owned(&ldcp->txlock)) { 623 mutex_exit(&ldcp->txlock); 624 } 625 if (mutex_owned(&ldcp->wrlock)) { 626 mutex_exit(&ldcp->wrlock); 627 } 628 goto dringsend_shm_exit; 629 } 630 631 /* 632 * Allocate a descriptor 633 */ 634 mutex_enter(&ldcp->txlock); 635 txi = next_txi = ldcp->next_txi; 636 INCR_TXI(next_txi, ldcp); 637 ntxdp = &(ldcp->mtxdp[next_txi]); 638 if (ntxdp->dstate != VIO_DESC_DONE) { /* out of descriptors */ 639 if (ldcp->tx_blocked == B_FALSE) { 640 ldcp->tx_blocked_lbolt = ddi_get_lbolt(); 641 ldcp->tx_blocked = B_TRUE; 642 } 643 statsp->tx_no_desc++; 644 mutex_exit(&ldcp->txlock); 645 (void) LDC_NO_TRAP(); 646 return (VGEN_TX_NORESOURCES); 647 } 648 649 if (ldcp->tx_blocked == B_TRUE) { 650 ldcp->tx_blocked = B_FALSE; 651 tx_update = B_TRUE; 652 } 653 654 /* Update descriptor ring index */ 655 ldcp->next_txi = next_txi; 656 mutex_exit(&ldcp->txlock); 657 658 if (tx_update == B_TRUE) { 659 vio_net_tx_update_t vtx_update = 660 ldcp->portp->vcb.vio_net_tx_update; 661 662 vtx_update(ldcp->portp->vhp); 663 } 664 665 /* Access the descriptor */ 666 txdp = &(ldcp->mtxdp[txi]); 667 668 /* Ensure load ordering of dstate (above) and data_buf_offset. */ 669 MEMBAR_CONSUMER(); 670 671 /* Get the offset of the buffer to be used */ 672 buf_offset = txdp->data_buf_offset; 673 674 /* Access the buffer using the offset */ 675 dst = (caddr_t)ldcp->tx_datap + buf_offset; 676 677 /* Copy data into mapped transmit buffer */ 678 for (bp = mp; bp != NULL; bp = bp->b_cont) { 679 mblksz = MBLKL(bp); 680 bcopy(bp->b_rptr, dst, mblksz); 681 dst += mblksz; 682 } 683 684 /* Set the size of data in the descriptor */ 685 txdp->nbytes = size; 686 687 /* 688 * Ensure store ordering of nbytes and dstate (below); so that the peer 689 * sees the right nbytes value after it checks that the dstate is READY. 690 */ 691 MEMBAR_PRODUCER(); 692 693 mutex_enter(&ldcp->wrlock); 694 695 /* Mark the descriptor ready */ 696 txdp->dstate = VIO_DESC_READY; 697 698 /* Check if peer needs wake up (handled below) */ 699 if (ldcp->resched_peer == B_TRUE) { 700 ldcp->resched_peer = B_FALSE; 701 resched_peer = B_TRUE; 702 } 703 704 /* Update tx stats */ 705 statsp->opackets++; 706 statsp->obytes += size; 707 if (is_bcast) 708 statsp->brdcstxmt++; 709 else if (is_mcast) 710 statsp->multixmt++; 711 712 mutex_exit(&ldcp->wrlock); 713 714 /* 715 * We are done accessing shared memory; clear trap protection. 716 */ 717 (void) LDC_NO_TRAP(); 718 719 /* 720 * Need to wake up the peer ? 721 */ 722 if (resched_peer == B_TRUE) { 723 rv = vgen_send_dringdata_shm(ldcp, (uint32_t)txi, -1); 724 if (rv != 0) { 725 /* error: drop the packet */ 726 DWARN(vgenp, ldcp, "failed sending dringdata msg " 727 "rv(%d) len(%d)\n", rv, size); 728 mutex_enter(&ldcp->wrlock); 729 statsp->oerrors++; 730 ldcp->resched_peer = B_TRUE; 731 mutex_exit(&ldcp->wrlock); 732 } 733 } 734 735 dringsend_shm_exit: 736 if (rv == ECONNRESET || rv == EACCES) { 737 (void) vgen_handle_evt_reset(ldcp, VGEN_OTHER); 738 } 739 freemsg(mp); 740 return (VGEN_TX_SUCCESS); 741 } 742 743 /* 744 * Process dring data messages (info/ack/nack) 745 */ 746 int 747 vgen_handle_dringdata_shm(void *arg1, void *arg2) 748 { 749 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg1; 750 vio_msg_tag_t *tagp = (vio_msg_tag_t *)arg2; 751 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 752 int rv = 0; 753 754 switch (tagp->vio_subtype) { 755 756 case VIO_SUBTYPE_INFO: 757 /* 758 * To reduce the locking contention, release the 759 * cblock here and re-acquire it once we are done 760 * receiving packets. 761 */ 762 mutex_exit(&ldcp->cblock); 763 mutex_enter(&ldcp->rxlock); 764 rv = vgen_handle_dringdata_info_shm(ldcp, tagp); 765 mutex_exit(&ldcp->rxlock); 766 mutex_enter(&ldcp->cblock); 767 if (rv != 0) { 768 DWARN(vgenp, ldcp, "handle_data_info failed(%d)\n", rv); 769 } 770 break; 771 772 case VIO_SUBTYPE_ACK: 773 rv = vgen_handle_dringdata_ack_shm(ldcp, tagp); 774 if (rv != 0) { 775 DWARN(vgenp, ldcp, "handle_data_ack failed(%d)\n", rv); 776 } 777 break; 778 779 case VIO_SUBTYPE_NACK: 780 rv = vgen_handle_dringdata_nack_shm(ldcp, tagp); 781 if (rv != 0) { 782 DWARN(vgenp, ldcp, "handle_data_nack failed(%d)\n", rv); 783 } 784 break; 785 } 786 787 return (rv); 788 } 789 790 static int 791 vgen_handle_dringdata_info_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 792 { 793 uint32_t start; 794 int32_t end; 795 int rv = 0; 796 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 797 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 798 vgen_stats_t *statsp = &ldcp->stats; 799 800 start = dringmsg->start_idx; 801 end = dringmsg->end_idx; 802 803 DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n", 804 start, end); 805 806 if (!(CHECK_RXI(start, ldcp)) || 807 ((end != -1) && !(CHECK_RXI(end, ldcp)))) { 808 DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n", 809 start, end); 810 /* drop the message if invalid index */ 811 return (0); 812 } 813 814 /* validate dring_ident */ 815 if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) { 816 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", 817 dringmsg->dring_ident); 818 /* invalid dring_ident, drop the msg */ 819 return (0); 820 } 821 822 statsp->dring_data_msgs_rcvd++; 823 824 /* 825 * If we are in polling mode, return from here without processing the 826 * dring. We will process the dring in the context of polling thread. 827 */ 828 if (ldcp->polling_on == B_TRUE) { 829 return (0); 830 } 831 832 /* 833 * Process the dring and receive packets in intr context. 834 */ 835 rv = vgen_intr_rcv_shm(ldcp); 836 if (rv != 0) { 837 DWARN(vgenp, ldcp, "vgen_intr_rcv_shm() failed\n"); 838 } 839 return (rv); 840 } 841 842 /* 843 * Process the rx descriptor ring in the context of interrupt thread 844 * (vgen_ldc_cb() callback) and send the received packets up the stack. 845 */ 846 static int 847 vgen_intr_rcv_shm(vgen_ldc_t *ldcp) 848 { 849 int rv; 850 uint32_t end_ix; 851 vio_dring_msg_t msg; 852 uint_t mblk_sz; 853 int count = 0; 854 int total_count = 0; 855 mblk_t *bp = NULL; 856 mblk_t *bpt = NULL; 857 mblk_t *mp = NULL; 858 vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb; 859 860 ASSERT(MUTEX_HELD(&ldcp->rxlock)); 861 862 do { 863 rv = vgen_receive_packet(ldcp, &mp, &mblk_sz); 864 if (rv != 0) { 865 if (rv == EINVAL) { 866 /* Invalid descriptor error; get next */ 867 continue; 868 } 869 DTRACE_PROBE1(vgen_intr_nopkts, vgen_ldc_t *, ldcp); 870 break; 871 } 872 873 /* Build a chain of received packets */ 874 if (bp == NULL) { 875 /* first pkt */ 876 bp = mp; 877 bpt = bp; 878 bpt->b_next = NULL; 879 } else { 880 mp->b_next = NULL; 881 bpt->b_next = mp; 882 bpt = mp; 883 } 884 885 total_count++; 886 count++; 887 888 /* 889 * We are receiving the packets in interrupt context. If we 890 * have gathered vgen_chain_len (tunable) # of packets in the 891 * chain, send them up. (See vgen_poll_rcv_shm() for receiving 892 * in polling thread context). 893 */ 894 if (count == vgen_chain_len) { 895 DTRACE_PROBE2(vgen_intr_pkts, vgen_ldc_t *, ldcp, 896 int, count); 897 mutex_exit(&ldcp->rxlock); 898 vrx_cb(ldcp->portp->vhp, bp); 899 mutex_enter(&ldcp->rxlock); 900 bp = bpt = NULL; 901 count = 0; 902 } 903 904 /* 905 * Stop further processing if we processed the entire dring 906 * once; otherwise continue. 907 */ 908 } while (total_count < ldcp->num_rxds); 909 910 if (bp != NULL) { 911 DTRACE_PROBE2(vgen_intr_pkts, vgen_ldc_t *, ldcp, int, count); 912 mutex_exit(&ldcp->rxlock); 913 vrx_cb(ldcp->portp->vhp, bp); 914 mutex_enter(&ldcp->rxlock); 915 } 916 917 if (ldcp->polling_on == B_FALSE) { 918 /* 919 * We send a stopped message to peer (sender) while we are in 920 * intr mode only; allowing the peer to send further data intrs 921 * (dring data msgs) to us. 922 */ 923 end_ix = ldcp->next_rxi; 924 DECR_RXI(end_ix, ldcp); 925 msg.dring_ident = ldcp->peer_hparams.dring_ident; 926 rv = vgen_send_dringack_shm(ldcp, (vio_msg_tag_t *)&msg, 927 VNET_START_IDX_UNSPEC, end_ix, VIO_DP_STOPPED); 928 return (rv); 929 } 930 931 return (0); 932 } 933 934 /* 935 * Process the rx descriptor ring in the context of mac polling thread. Receive 936 * packets upto the limit specified by bytes_to_pickup or until there are no 937 * more packets, whichever occurs first. Return the chain of received packets. 938 */ 939 mblk_t * 940 vgen_poll_rcv_shm(vgen_ldc_t *ldcp, int bytes_to_pickup) 941 { 942 uint_t mblk_sz = 0; 943 uint_t sz = 0; 944 mblk_t *bp = NULL; 945 mblk_t *bpt = NULL; 946 mblk_t *mp = NULL; 947 int count = 0; 948 int rv; 949 950 mutex_enter(&ldcp->rxlock); 951 952 if (ldcp->hphase != VH_DONE) { 953 /* Channel is being reset and handshake not complete */ 954 mutex_exit(&ldcp->rxlock); 955 return (NULL); 956 } 957 958 do { 959 rv = vgen_receive_packet(ldcp, &mp, &mblk_sz); 960 if (rv != 0) { 961 if (rv == EINVAL) { 962 /* Invalid descriptor error; get next */ 963 continue; 964 } 965 DTRACE_PROBE1(vgen_poll_nopkts, vgen_ldc_t *, ldcp); 966 break; 967 } 968 969 /* Build a chain of received packets */ 970 if (bp == NULL) { 971 /* first pkt */ 972 bp = mp; 973 bpt = bp; 974 bpt->b_next = NULL; 975 } else { 976 mp->b_next = NULL; 977 bpt->b_next = mp; 978 bpt = mp; 979 } 980 981 /* Compute total size accumulated */ 982 sz += mblk_sz; 983 count++; 984 985 /* Reached the bytes limit; we are done. */ 986 if (sz >= bytes_to_pickup) { 987 break; 988 } 989 990 _NOTE(CONSTCOND) 991 } while (1); 992 993 /* 994 * We prepend any high priority packets to the chain of packets; note 995 * that if we are already at the bytes_to_pickup limit, we might 996 * slightly exceed that in such cases. That should be ok, as these pkts 997 * are expected to be small in size and arrive at an interval in the 998 * the order of a few seconds. 999 */ 1000 if (ldcp->rx_pktdata == vgen_handle_pkt_data && 1001 ldcp->rx_pri_head != NULL) { 1002 ldcp->rx_pri_tail->b_next = bp; 1003 bp = ldcp->rx_pri_head; 1004 ldcp->rx_pri_head = ldcp->rx_pri_tail = NULL; 1005 } 1006 1007 mutex_exit(&ldcp->rxlock); 1008 1009 DTRACE_PROBE2(vgen_poll_pkts, vgen_ldc_t *, ldcp, int, count); 1010 DTRACE_PROBE2(vgen_poll_bytes, vgen_ldc_t *, ldcp, uint_t, sz); 1011 return (bp); 1012 } 1013 1014 /* 1015 * Process the next index in the rx dring and receive the associated packet. 1016 * 1017 * Returns: 1018 * bp: Success: The received packet. 1019 * Failure: NULL 1020 * size: Success: Size of received packet. 1021 * Failure: 0 1022 * retval: 1023 * Success: 0 1024 * Failure: EAGAIN: Descriptor not ready 1025 * EIO: Descriptor contents invalid. 1026 */ 1027 static int 1028 vgen_receive_packet(vgen_ldc_t *ldcp, mblk_t **bp, uint_t *size) 1029 { 1030 uint32_t rxi; 1031 vio_mblk_t *vmp; 1032 vio_mblk_t *new_vmp; 1033 struct ether_header *ehp; 1034 vnet_rx_dringdata_desc_t *rxdp; 1035 int err = 0; 1036 uint32_t nbytes = 0; 1037 mblk_t *mp = NULL; 1038 mblk_t *dmp = NULL; 1039 vgen_stats_t *statsp = &ldcp->stats; 1040 vgen_hparams_t *lp = &ldcp->local_hparams; 1041 1042 rxi = ldcp->next_rxi; 1043 rxdp = &(ldcp->rxdp[rxi]); 1044 vmp = ldcp->rxdp_to_vmp[rxi]; 1045 1046 if (rxdp->dstate != VIO_DESC_READY) { 1047 /* 1048 * Descriptor is not ready. 1049 */ 1050 DTRACE_PROBE1(vgen_noready_rxds, vgen_ldc_t *, ldcp); 1051 return (EAGAIN); 1052 } 1053 1054 /* 1055 * Ensure load ordering of dstate and nbytes. 1056 */ 1057 MEMBAR_CONSUMER(); 1058 1059 nbytes = rxdp->nbytes; 1060 1061 if ((nbytes < ETHERMIN) || 1062 (nbytes > lp->mtu) || 1063 (rxdp->data_buf_offset != 1064 (VIO_MBLK_DATA_OFF(vmp) + VNET_IPALIGN))) { 1065 /* 1066 * Descriptor contents invalid. 1067 */ 1068 statsp->ierrors++; 1069 rxdp->dstate = VIO_DESC_DONE; 1070 err = EIO; 1071 goto done; 1072 } 1073 1074 /* 1075 * Now allocate a new buffer for this descriptor before sending up the 1076 * buffer being processed. If that fails, stop processing; as we are 1077 * out of receive buffers. 1078 */ 1079 new_vmp = vio_allocb(ldcp->rx_vmp); 1080 1081 /* 1082 * Process the current buffer being received. 1083 */ 1084 mp = vmp->mp; 1085 1086 if (new_vmp == NULL) { 1087 /* 1088 * We failed to get a new mapped buffer that is needed to 1089 * refill the descriptor. In that case, leave the current 1090 * buffer bound to the descriptor; allocate an mblk dynamically 1091 * and copy the contents of the buffer to the mblk. Then send 1092 * up this mblk. This way the sender has the same buffer as 1093 * before that can be used to send new data. 1094 */ 1095 statsp->norcvbuf++; 1096 dmp = allocb(nbytes + VNET_IPALIGN, BPRI_MED); 1097 if (dmp == NULL) { 1098 statsp->ierrors++; 1099 return (ENOMEM); 1100 } 1101 bcopy(mp->b_rptr + VNET_IPALIGN, 1102 dmp->b_rptr + VNET_IPALIGN, nbytes); 1103 mp = dmp; 1104 } else { 1105 /* Mark the status of the current rbuf */ 1106 vmp->state = VIO_MBLK_HAS_DATA; 1107 1108 /* Set the offset of the new buffer in the descriptor */ 1109 rxdp->data_buf_offset = 1110 VIO_MBLK_DATA_OFF(new_vmp) + VNET_IPALIGN; 1111 ldcp->rxdp_to_vmp[rxi] = new_vmp; 1112 } 1113 mp->b_rptr += VNET_IPALIGN; 1114 mp->b_wptr = mp->b_rptr + nbytes; 1115 1116 /* 1117 * Ensure store ordering of data_buf_offset and dstate; so that the 1118 * peer sees the right data_buf_offset after it checks that the dstate 1119 * is DONE. 1120 */ 1121 MEMBAR_PRODUCER(); 1122 1123 /* Now mark the descriptor 'done' */ 1124 rxdp->dstate = VIO_DESC_DONE; 1125 1126 /* Update stats */ 1127 statsp->ipackets++; 1128 statsp->rbytes += rxdp->nbytes; 1129 ehp = (struct ether_header *)mp->b_rptr; 1130 if (IS_BROADCAST(ehp)) 1131 statsp->brdcstrcv++; 1132 else if (IS_MULTICAST(ehp)) 1133 statsp->multircv++; 1134 done: 1135 /* Update the next index to be processed */ 1136 INCR_RXI(rxi, ldcp); 1137 1138 /* Save the new recv index */ 1139 ldcp->next_rxi = rxi; 1140 1141 /* Return the packet received */ 1142 *size = nbytes; 1143 *bp = mp; 1144 return (err); 1145 } 1146 1147 static int 1148 vgen_handle_dringdata_ack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 1149 { 1150 uint32_t start; 1151 int32_t end; 1152 uint32_t txi; 1153 vgen_stats_t *statsp; 1154 vnet_rx_dringdata_desc_t *txdp; 1155 on_trap_data_t otd; 1156 int rv = 0; 1157 boolean_t ready_txd = B_FALSE; 1158 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1159 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 1160 1161 start = dringmsg->start_idx; 1162 end = dringmsg->end_idx; 1163 statsp = &ldcp->stats; 1164 1165 /* 1166 * Received an ack for our transmits upto a certain dring index. This 1167 * enables us to reclaim descriptors. We also send a new dring data msg 1168 * to the peer to restart processing if there are pending transmit pkts. 1169 */ 1170 DBG2(vgenp, ldcp, "ACK: start(%d), end(%d)\n", start, end); 1171 1172 /* 1173 * In RxDringData mode (v1.6), start index of -1 can be used by the 1174 * peer to indicate that it is unspecified. However, the end index 1175 * must be set correctly indicating the last descriptor index processed. 1176 */ 1177 if (((start != VNET_START_IDX_UNSPEC) && !(CHECK_TXI(start, ldcp))) || 1178 !(CHECK_TXI(end, ldcp))) { 1179 /* drop the message if invalid index */ 1180 DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n", 1181 start, end); 1182 return (rv); 1183 } 1184 1185 /* Validate dring_ident */ 1186 if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) { 1187 /* invalid dring_ident, drop the msg */ 1188 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", 1189 dringmsg->dring_ident); 1190 return (rv); 1191 } 1192 statsp->dring_data_acks_rcvd++; 1193 1194 /* 1195 * Clear transmit flow control condition 1196 * as some descriptors should be free now. 1197 */ 1198 mutex_enter(&ldcp->txlock); 1199 if (ldcp->tx_blocked == B_TRUE) { 1200 vio_net_tx_update_t vtx_update = 1201 ldcp->portp->vcb.vio_net_tx_update; 1202 1203 ldcp->tx_blocked = B_FALSE; 1204 vtx_update(ldcp->portp->vhp); 1205 } 1206 mutex_exit(&ldcp->txlock); 1207 1208 if (dringmsg->dring_process_state != VIO_DP_STOPPED) { 1209 /* 1210 * Receiver continued processing 1211 * dring after sending us the ack. 1212 */ 1213 return (rv); 1214 } 1215 1216 /* 1217 * Receiver stopped processing descriptors. 1218 */ 1219 statsp->dring_stopped_acks_rcvd++; 1220 1221 /* 1222 * Setup on_trap() protection before accessing dring shared memory area. 1223 */ 1224 rv = LDC_ON_TRAP(&otd); 1225 if (rv != 0) { 1226 /* 1227 * Data access fault occured down the code path below while 1228 * accessing the descriptors. Release any locks that we might 1229 * have acquired in the code below and return failure. 1230 */ 1231 if (mutex_owned(&ldcp->wrlock)) { 1232 mutex_exit(&ldcp->wrlock); 1233 } 1234 return (ECONNRESET); 1235 } 1236 1237 /* 1238 * Determine if there are any pending tx descriptors ready to be 1239 * processed by the receiver(peer) and if so, send a message to the 1240 * peer to restart receiving. 1241 */ 1242 mutex_enter(&ldcp->wrlock); 1243 1244 ready_txd = B_FALSE; 1245 txi = end; 1246 INCR_TXI(txi, ldcp); 1247 txdp = &ldcp->mtxdp[txi]; 1248 if (txdp->dstate == VIO_DESC_READY) { 1249 ready_txd = B_TRUE; 1250 } 1251 1252 /* 1253 * We are done accessing shared memory; clear trap protection. 1254 */ 1255 (void) LDC_NO_TRAP(); 1256 1257 if (ready_txd == B_FALSE) { 1258 /* 1259 * No ready tx descriptors. Set the flag to send a message to 1260 * the peer when tx descriptors are ready in transmit routine. 1261 */ 1262 ldcp->resched_peer = B_TRUE; 1263 mutex_exit(&ldcp->wrlock); 1264 return (rv); 1265 } 1266 1267 /* 1268 * We have some tx descriptors ready to be processed by the receiver. 1269 * Send a dring data message to the peer to restart processing. 1270 */ 1271 ldcp->resched_peer = B_FALSE; 1272 mutex_exit(&ldcp->wrlock); 1273 rv = vgen_send_dringdata_shm(ldcp, txi, -1); 1274 if (rv != VGEN_SUCCESS) { 1275 mutex_enter(&ldcp->wrlock); 1276 ldcp->resched_peer = B_TRUE; 1277 mutex_exit(&ldcp->wrlock); 1278 } 1279 1280 return (rv); 1281 } 1282 1283 static int 1284 vgen_handle_dringdata_nack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 1285 { 1286 uint32_t start; 1287 int32_t end; 1288 uint32_t txi; 1289 vnet_rx_dringdata_desc_t *txdp; 1290 on_trap_data_t otd; 1291 int rv = 0; 1292 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1293 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 1294 1295 DBG1(vgenp, ldcp, "enter\n"); 1296 start = dringmsg->start_idx; 1297 end = dringmsg->end_idx; 1298 1299 /* 1300 * Peer sent a NACK msg (to indicate bad descriptors ?). The start and 1301 * end correspond to the range of descriptors which are being nack'd. 1302 */ 1303 DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end); 1304 1305 /* 1306 * In RxDringData mode (v1.6), start index of -1 can be used by 1307 * the peer to indicate that it is unspecified. However, the end index 1308 * must be set correctly indicating the last descriptor index processed. 1309 */ 1310 if (((start != VNET_START_IDX_UNSPEC) && !(CHECK_TXI(start, ldcp))) || 1311 !(CHECK_TXI(end, ldcp))) { 1312 /* drop the message if invalid index */ 1313 DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n", 1314 start, end); 1315 return (rv); 1316 } 1317 1318 /* Validate dring_ident */ 1319 if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) { 1320 /* invalid dring_ident, drop the msg */ 1321 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", 1322 dringmsg->dring_ident); 1323 return (rv); 1324 } 1325 1326 /* 1327 * Setup on_trap() protection before accessing dring shared memory area. 1328 */ 1329 rv = LDC_ON_TRAP(&otd); 1330 if (rv != 0) { 1331 /* 1332 * Data access fault occured down the code path below while 1333 * accessing the descriptors. Release any locks that we might 1334 * have acquired in the code below and return failure. 1335 */ 1336 mutex_exit(&ldcp->txlock); 1337 return (ECONNRESET); 1338 } 1339 1340 /* We just mark the descrs as free so they can be reused */ 1341 mutex_enter(&ldcp->txlock); 1342 for (txi = start; txi <= end; ) { 1343 txdp = &(ldcp->mtxdp[txi]); 1344 if (txdp->dstate == VIO_DESC_READY) 1345 txdp->dstate = VIO_DESC_DONE; 1346 INCR_TXI(txi, ldcp); 1347 } 1348 1349 /* 1350 * We are done accessing shared memory; clear trap protection. 1351 */ 1352 (void) LDC_NO_TRAP(); 1353 1354 mutex_exit(&ldcp->txlock); 1355 1356 return (rv); 1357 } 1358 1359 /* 1360 * Send descriptor ring data message to the peer over LDC. 1361 */ 1362 static int 1363 vgen_send_dringdata_shm(vgen_ldc_t *ldcp, uint32_t start, int32_t end) 1364 { 1365 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1366 vio_dring_msg_t dringmsg, *msgp = &dringmsg; 1367 vio_msg_tag_t *tagp = &msgp->tag; 1368 vgen_stats_t *statsp = &ldcp->stats; 1369 int rv; 1370 1371 #ifdef DEBUG 1372 if (vgen_inject_error(ldcp, VGEN_ERR_TXTIMEOUT)) { 1373 return (VGEN_SUCCESS); 1374 } 1375 #endif 1376 bzero(msgp, sizeof (*msgp)); 1377 1378 tagp->vio_msgtype = VIO_TYPE_DATA; 1379 tagp->vio_subtype = VIO_SUBTYPE_INFO; 1380 tagp->vio_subtype_env = VIO_DRING_DATA; 1381 tagp->vio_sid = ldcp->local_sid; 1382 1383 msgp->dring_ident = ldcp->local_hparams.dring_ident; 1384 msgp->start_idx = start; 1385 msgp->end_idx = end; 1386 msgp->seq_num = atomic_inc_32_nv(&ldcp->dringdata_msgid); 1387 1388 rv = vgen_sendmsg_shm(ldcp, (caddr_t)tagp, sizeof (dringmsg)); 1389 if (rv != VGEN_SUCCESS) { 1390 DWARN(vgenp, ldcp, "vgen_sendmsg_shm() failed\n"); 1391 return (rv); 1392 } 1393 1394 statsp->dring_data_msgs_sent++; 1395 1396 DBG2(vgenp, ldcp, "DRING_DATA_SENT \n"); 1397 1398 return (VGEN_SUCCESS); 1399 } 1400 1401 /* 1402 * Send dring data ack message. 1403 */ 1404 int 1405 vgen_send_dringack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start, 1406 int32_t end, uint8_t pstate) 1407 { 1408 int rv = 0; 1409 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1410 vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp; 1411 vgen_stats_t *statsp = &ldcp->stats; 1412 1413 tagp->vio_msgtype = VIO_TYPE_DATA; 1414 tagp->vio_subtype = VIO_SUBTYPE_ACK; 1415 tagp->vio_subtype_env = VIO_DRING_DATA; 1416 tagp->vio_sid = ldcp->local_sid; 1417 msgp->start_idx = start; 1418 msgp->end_idx = end; 1419 msgp->dring_process_state = pstate; 1420 msgp->seq_num = atomic_inc_32_nv(&ldcp->dringdata_msgid); 1421 1422 rv = vgen_sendmsg_shm(ldcp, (caddr_t)tagp, sizeof (*msgp)); 1423 if (rv != VGEN_SUCCESS) { 1424 DWARN(vgenp, ldcp, "vgen_sendmsg_shm() failed\n"); 1425 } 1426 1427 statsp->dring_data_acks_sent++; 1428 if (pstate == VIO_DP_STOPPED) { 1429 statsp->dring_stopped_acks_sent++; 1430 } 1431 1432 return (rv); 1433 } 1434 1435 /* 1436 * Send dring data msgs (info/ack/nack) over LDC. 1437 */ 1438 static int 1439 vgen_sendmsg_shm(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen) 1440 { 1441 int rv; 1442 size_t len; 1443 uint32_t retries = 0; 1444 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1445 1446 len = msglen; 1447 if ((len == 0) || (msg == NULL)) 1448 return (VGEN_FAILURE); 1449 1450 do { 1451 len = msglen; 1452 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len); 1453 if (retries++ >= vgen_ldcwr_retries) 1454 break; 1455 } while (rv == EWOULDBLOCK); 1456 1457 if (rv != 0) { 1458 DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen(%d)\n", 1459 rv, msglen); 1460 return (rv); 1461 } 1462 1463 if (len != msglen) { 1464 DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n", 1465 rv, msglen); 1466 return (VGEN_FAILURE); 1467 } 1468 1469 return (VGEN_SUCCESS); 1470 } 1471