1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 #include <sys/types.h> 27 #include <sys/errno.h> 28 #include <sys/sysmacros.h> 29 #include <sys/param.h> 30 #include <sys/machsystm.h> 31 #include <sys/stream.h> 32 #include <sys/strsubr.h> 33 #include <sys/kmem.h> 34 #include <sys/strsun.h> 35 #include <sys/callb.h> 36 #include <sys/sdt.h> 37 #include <sys/ethernet.h> 38 #include <sys/mach_descrip.h> 39 #include <sys/mdeg.h> 40 #include <sys/vnet.h> 41 #include <sys/vio_mailbox.h> 42 #include <sys/vio_common.h> 43 #include <sys/vnet_common.h> 44 #include <sys/vnet_mailbox.h> 45 #include <sys/vio_util.h> 46 #include <sys/vnet_gen.h> 47 48 /* 49 * This file contains the implementation of TxDring data transfer mode of VIO 50 * Protocol in vnet. The functions in this file are invoked from vnet_gen.c 51 * after TxDring mode is negotiated with the peer during attribute phase of 52 * handshake. This file contains functions that setup the transmit and receive 53 * descriptor rings, and associated resources in TxDring mode. It also contains 54 * the transmit and receive data processing functions that are invoked in 55 * TxDring mode. 56 */ 57 58 /* Functions exported to vnet_gen.c */ 59 int vgen_create_tx_dring(vgen_ldc_t *ldcp); 60 void vgen_destroy_tx_dring(vgen_ldc_t *ldcp); 61 int vgen_map_rx_dring(vgen_ldc_t *ldcp, void *pkt); 62 void vgen_unmap_rx_dring(vgen_ldc_t *ldcp); 63 int vgen_dringsend(void *arg, mblk_t *mp); 64 void vgen_ldc_msg_worker(void *arg); 65 void vgen_stop_msg_thread(vgen_ldc_t *ldcp); 66 int vgen_handle_dringdata(void *arg1, void *arg2); 67 mblk_t *vgen_poll_rcv(vgen_ldc_t *ldcp, int bytes_to_pickup); 68 int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 69 int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen, 70 boolean_t caller_holds_lock); 71 72 /* Internal functions */ 73 static int vgen_init_multipools(vgen_ldc_t *ldcp); 74 static int vgen_handle_dringdata_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 75 static int vgen_process_dringdata(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 76 static int vgen_handle_dringdata_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 77 static int vgen_handle_dringdata_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 78 static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp, mblk_t *bpt); 79 static int vgen_send_dringdata(vgen_ldc_t *ldcp, uint32_t start, int32_t end); 80 static int vgen_send_dringack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, 81 uint32_t start, int32_t end, uint8_t pstate); 82 static void vgen_reclaim(vgen_ldc_t *ldcp); 83 static void vgen_reclaim_dring(vgen_ldc_t *ldcp); 84 85 /* Functions imported from vnet_gen.c */ 86 extern int vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller); 87 extern int vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller); 88 extern void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen); 89 extern void vgen_destroy_rxpools(void *arg); 90 91 /* Tunables */ 92 extern int vgen_rxpool_cleanup_delay; 93 extern boolean_t vnet_jumbo_rxpools; 94 extern uint32_t vnet_num_descriptors; 95 extern uint32_t vgen_chain_len; 96 extern uint32_t vgen_ldcwr_retries; 97 extern uint32_t vgen_recv_delay; 98 extern uint32_t vgen_recv_retries; 99 extern uint32_t vgen_rbufsz1; 100 extern uint32_t vgen_rbufsz2; 101 extern uint32_t vgen_rbufsz3; 102 extern uint32_t vgen_rbufsz4; 103 extern uint32_t vgen_nrbufs1; 104 extern uint32_t vgen_nrbufs2; 105 extern uint32_t vgen_nrbufs3; 106 extern uint32_t vgen_nrbufs4; 107 108 #ifdef DEBUG 109 110 #define DEBUG_PRINTF vgen_debug_printf 111 112 extern int vnet_dbglevel; 113 extern int vgen_inject_err_flag; 114 115 extern void vgen_debug_printf(const char *fname, vgen_t *vgenp, 116 vgen_ldc_t *ldcp, const char *fmt, ...); 117 extern boolean_t vgen_inject_error(vgen_ldc_t *ldcp, int error); 118 119 #endif 120 121 /* 122 * Allocate transmit resources for the channel. The resources consist of a 123 * transmit descriptor ring and an associated transmit buffer area. 124 */ 125 int 126 vgen_create_tx_dring(vgen_ldc_t *ldcp) 127 { 128 int i; 129 int rv; 130 ldc_mem_info_t minfo; 131 uint32_t txdsize; 132 uint32_t tbufsize; 133 vgen_private_desc_t *tbufp; 134 vnet_public_desc_t *txdp; 135 vio_dring_entry_hdr_t *hdrp; 136 caddr_t datap = NULL; 137 int ci; 138 uint32_t ncookies; 139 size_t data_sz; 140 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 141 142 ldcp->num_txds = vnet_num_descriptors; 143 txdsize = sizeof (vnet_public_desc_t); 144 tbufsize = sizeof (vgen_private_desc_t); 145 146 /* allocate transmit buffer ring */ 147 tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP); 148 if (tbufp == NULL) { 149 return (DDI_FAILURE); 150 } 151 ldcp->tbufp = tbufp; 152 ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]); 153 154 /* create transmit descriptor ring */ 155 rv = ldc_mem_dring_create(ldcp->num_txds, txdsize, 156 &ldcp->tx_dring_handle); 157 if (rv != 0) { 158 DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n"); 159 goto fail; 160 } 161 162 /* get the addr of descriptor ring */ 163 rv = ldc_mem_dring_info(ldcp->tx_dring_handle, &minfo); 164 if (rv != 0) { 165 DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n"); 166 goto fail; 167 } 168 ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr); 169 170 /* 171 * In order to ensure that the number of ldc cookies per descriptor is 172 * limited to be within the default MAX_COOKIES (2), we take the steps 173 * outlined below: 174 * 175 * Align the entire data buffer area to 8K and carve out per descriptor 176 * data buffers starting from this 8K aligned base address. 177 * 178 * We round up the mtu specified to be a multiple of 2K or 4K. 179 * For sizes up to 12K we round up the size to the next 2K. 180 * For sizes > 12K we round up to the next 4K (otherwise sizes such as 181 * 14K could end up needing 3 cookies, with the buffer spread across 182 * 3 8K pages: 8K+6K, 2K+8K+2K, 6K+8K, ...). 183 */ 184 data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 185 if (data_sz <= VNET_12K) { 186 data_sz = VNET_ROUNDUP_2K(data_sz); 187 } else { 188 data_sz = VNET_ROUNDUP_4K(data_sz); 189 } 190 191 /* allocate extra 8K bytes for alignment */ 192 ldcp->tx_data_sz = (data_sz * ldcp->num_txds) + VNET_8K; 193 datap = kmem_zalloc(ldcp->tx_data_sz, KM_SLEEP); 194 ldcp->tx_datap = datap; 195 196 197 /* align the starting address of the data area to 8K */ 198 datap = (caddr_t)VNET_ROUNDUP_8K((uintptr_t)datap); 199 200 /* 201 * for each private descriptor, allocate a ldc mem_handle which is 202 * required to map the data during transmit, set the flags 203 * to free (available for use by transmit routine). 204 */ 205 206 for (i = 0; i < ldcp->num_txds; i++) { 207 208 tbufp = &(ldcp->tbufp[i]); 209 rv = ldc_mem_alloc_handle(ldcp->ldc_handle, 210 &(tbufp->memhandle)); 211 if (rv) { 212 tbufp->memhandle = 0; 213 goto fail; 214 } 215 216 /* 217 * bind ldc memhandle to the corresponding transmit buffer. 218 */ 219 ci = ncookies = 0; 220 rv = ldc_mem_bind_handle(tbufp->memhandle, 221 (caddr_t)datap, data_sz, LDC_SHADOW_MAP, 222 LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies); 223 if (rv != 0) { 224 goto fail; 225 } 226 227 /* 228 * successful in binding the handle to tx data buffer. 229 * set datap in the private descr to this buffer. 230 */ 231 tbufp->datap = datap; 232 233 if ((ncookies == 0) || 234 (ncookies > MAX_COOKIES)) { 235 goto fail; 236 } 237 238 for (ci = 1; ci < ncookies; ci++) { 239 rv = ldc_mem_nextcookie(tbufp->memhandle, 240 &(tbufp->memcookie[ci])); 241 if (rv != 0) { 242 goto fail; 243 } 244 } 245 246 tbufp->ncookies = ncookies; 247 datap += data_sz; 248 249 tbufp->flags = VGEN_PRIV_DESC_FREE; 250 txdp = &(ldcp->txdp[i]); 251 hdrp = &txdp->hdr; 252 hdrp->dstate = VIO_DESC_FREE; 253 hdrp->ack = B_FALSE; 254 tbufp->descp = txdp; 255 256 } 257 258 /* 259 * The descriptors and the associated buffers are all ready; 260 * now bind descriptor ring to the channel. 261 */ 262 rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dring_handle, 263 LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW, 264 &ldcp->tx_dring_cookie, &ncookies); 265 if (rv != 0) { 266 DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed " 267 "rv(%x)\n", rv); 268 goto fail; 269 } 270 ASSERT(ncookies == 1); 271 ldcp->tx_dring_ncookies = ncookies; 272 273 /* reset tbuf walking pointers */ 274 ldcp->next_tbufp = ldcp->tbufp; 275 ldcp->cur_tbufp = ldcp->tbufp; 276 277 /* initialize tx seqnum and index */ 278 ldcp->next_txseq = VNET_ISS; 279 ldcp->next_txi = 0; 280 281 ldcp->resched_peer = B_TRUE; 282 ldcp->resched_peer_txi = 0; 283 284 return (VGEN_SUCCESS); 285 286 fail: 287 vgen_destroy_tx_dring(ldcp); 288 return (VGEN_FAILURE); 289 } 290 291 /* 292 * Free transmit resources for the channel. 293 */ 294 void 295 vgen_destroy_tx_dring(vgen_ldc_t *ldcp) 296 { 297 int i; 298 int tbufsize = sizeof (vgen_private_desc_t); 299 vgen_private_desc_t *tbufp = ldcp->tbufp; 300 301 /* We first unbind the descriptor ring */ 302 if (ldcp->tx_dring_ncookies != 0) { 303 (void) ldc_mem_dring_unbind(ldcp->tx_dring_handle); 304 ldcp->tx_dring_ncookies = 0; 305 } 306 307 /* Unbind transmit buffers */ 308 if (ldcp->tbufp != NULL) { 309 /* for each tbuf (priv_desc), free ldc mem_handle */ 310 for (i = 0; i < ldcp->num_txds; i++) { 311 312 tbufp = &(ldcp->tbufp[i]); 313 314 if (tbufp->datap) { /* if bound to a ldc memhandle */ 315 (void) ldc_mem_unbind_handle(tbufp->memhandle); 316 tbufp->datap = NULL; 317 } 318 if (tbufp->memhandle) { 319 (void) ldc_mem_free_handle(tbufp->memhandle); 320 tbufp->memhandle = 0; 321 } 322 } 323 } 324 325 /* Free tx data buffer area */ 326 if (ldcp->tx_datap != NULL) { 327 kmem_free(ldcp->tx_datap, ldcp->tx_data_sz); 328 ldcp->tx_datap = NULL; 329 ldcp->tx_data_sz = 0; 330 } 331 332 /* Free transmit descriptor ring */ 333 if (ldcp->tx_dring_handle != 0) { 334 (void) ldc_mem_dring_destroy(ldcp->tx_dring_handle); 335 ldcp->tx_dring_handle = 0; 336 ldcp->txdp = NULL; 337 } 338 339 /* Free transmit buffer ring */ 340 if (ldcp->tbufp != NULL) { 341 kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize); 342 ldcp->tbufp = ldcp->tbufendp = NULL; 343 } 344 } 345 346 /* 347 * Map the transmit descriptor ring exported 348 * by the peer, as our receive descriptor ring. 349 */ 350 int 351 vgen_map_rx_dring(vgen_ldc_t *ldcp, void *pkt) 352 { 353 int rv; 354 ldc_mem_info_t minfo; 355 ldc_mem_cookie_t dcookie; 356 uint32_t ncookies; 357 uint32_t num_desc; 358 uint32_t desc_size; 359 vio_dring_reg_msg_t *msg = pkt; 360 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 361 362 ncookies = msg->ncookies; 363 num_desc = msg->num_descriptors; 364 desc_size = msg->descriptor_size; 365 bcopy(&msg->cookie[0], &dcookie, sizeof (ldc_mem_cookie_t)); 366 367 /* 368 * Sanity check. 369 */ 370 if (num_desc < VGEN_NUM_DESCRIPTORS_MIN || 371 desc_size < sizeof (vnet_public_desc_t)) { 372 goto fail; 373 } 374 375 /* Map the remote dring */ 376 rv = ldc_mem_dring_map(ldcp->ldc_handle, &dcookie, ncookies, num_desc, 377 desc_size, LDC_DIRECT_MAP, &(ldcp->rx_dring_handle)); 378 if (rv != 0) { 379 goto fail; 380 } 381 382 /* 383 * Sucessfully mapped, now try to get info about the mapped dring 384 */ 385 rv = ldc_mem_dring_info(ldcp->rx_dring_handle, &minfo); 386 if (rv != 0) { 387 goto fail; 388 } 389 390 /* 391 * Save ring address, number of descriptors. 392 */ 393 ldcp->mrxdp = (vnet_public_desc_t *)(minfo.vaddr); 394 bcopy(&dcookie, &(ldcp->rx_dring_cookie), sizeof (dcookie)); 395 ldcp->rx_dring_ncookies = ncookies; 396 ldcp->num_rxds = num_desc; 397 398 /* Initialize rx dring indexes and seqnum */ 399 ldcp->next_rxi = 0; 400 ldcp->next_rxseq = VNET_ISS; 401 ldcp->dring_mtype = minfo.mtype; 402 403 /* Save peer's dring_info values */ 404 bcopy(&dcookie, &(ldcp->peer_hparams.dring_cookie), 405 sizeof (ldc_mem_cookie_t)); 406 ldcp->peer_hparams.num_desc = num_desc; 407 ldcp->peer_hparams.desc_size = desc_size; 408 ldcp->peer_hparams.dring_ncookies = ncookies; 409 410 /* Set dring_ident for the peer */ 411 ldcp->peer_hparams.dring_ident = (uint64_t)ldcp->txdp; 412 413 /* Return the dring_ident in ack msg */ 414 msg->dring_ident = (uint64_t)ldcp->txdp; 415 416 /* alloc rx mblk pools */ 417 rv = vgen_init_multipools(ldcp); 418 if (rv != 0) { 419 /* 420 * We do not return failure if receive mblk pools can't 421 * be allocated; instead allocb(9F) will be used to 422 * dynamically allocate buffers during receive. 423 */ 424 DWARN(vgenp, ldcp, 425 "vnet%d: failed to allocate rx mblk " 426 "pools for channel(0x%lx)\n", 427 vgenp->instance, ldcp->ldc_id); 428 } 429 430 return (VGEN_SUCCESS); 431 432 fail: 433 if (ldcp->rx_dring_handle != 0) { 434 (void) ldc_mem_dring_unmap(ldcp->rx_dring_handle); 435 ldcp->rx_dring_handle = 0; 436 } 437 return (VGEN_FAILURE); 438 } 439 440 /* 441 * Unmap the receive descriptor ring. 442 */ 443 void 444 vgen_unmap_rx_dring(vgen_ldc_t *ldcp) 445 { 446 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 447 vio_mblk_pool_t *vmp = NULL; 448 449 /* Destroy receive mblk pools */ 450 vio_destroy_multipools(&ldcp->vmp, &vmp); 451 if (vmp != NULL) { 452 /* 453 * If we can't destroy the rx pool for this channel, 454 * dispatch a task to retry and clean up. Note that we 455 * don't need to wait for the task to complete. If the 456 * vnet device itself gets detached, it will wait for 457 * the task to complete implicitly in 458 * ddi_taskq_destroy(). 459 */ 460 (void) ddi_taskq_dispatch(vgenp->rxp_taskq, 461 vgen_destroy_rxpools, vmp, DDI_SLEEP); 462 } 463 464 /* Unmap peer's dring */ 465 if (ldcp->rx_dring_handle != 0) { 466 (void) ldc_mem_dring_unmap(ldcp->rx_dring_handle); 467 ldcp->rx_dring_handle = 0; 468 } 469 470 /* clobber rx ring members */ 471 bzero(&ldcp->rx_dring_cookie, sizeof (ldcp->rx_dring_cookie)); 472 ldcp->mrxdp = NULL; 473 ldcp->next_rxi = 0; 474 ldcp->num_rxds = 0; 475 ldcp->next_rxseq = VNET_ISS; 476 } 477 478 /* Allocate receive resources */ 479 static int 480 vgen_init_multipools(vgen_ldc_t *ldcp) 481 { 482 size_t data_sz; 483 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 484 int status; 485 uint32_t sz1 = 0; 486 uint32_t sz2 = 0; 487 uint32_t sz3 = 0; 488 uint32_t sz4 = 0; 489 490 /* 491 * We round up the mtu specified to be a multiple of 2K. 492 * We then create rx pools based on the rounded up size. 493 */ 494 data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 495 data_sz = VNET_ROUNDUP_2K(data_sz); 496 497 /* 498 * If pool sizes are specified, use them. Note that the presence of 499 * the first tunable will be used as a hint. 500 */ 501 if (vgen_rbufsz1 != 0) { 502 503 sz1 = vgen_rbufsz1; 504 sz2 = vgen_rbufsz2; 505 sz3 = vgen_rbufsz3; 506 sz4 = vgen_rbufsz4; 507 508 if (sz4 == 0) { /* need 3 pools */ 509 510 ldcp->max_rxpool_size = sz3; 511 status = vio_init_multipools(&ldcp->vmp, 512 VGEN_NUM_VMPOOLS, sz1, sz2, sz3, vgen_nrbufs1, 513 vgen_nrbufs2, vgen_nrbufs3); 514 515 } else { 516 517 ldcp->max_rxpool_size = sz4; 518 status = vio_init_multipools(&ldcp->vmp, 519 VGEN_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4, 520 vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, 521 vgen_nrbufs4); 522 } 523 return (status); 524 } 525 526 /* 527 * Pool sizes are not specified. We select the pool sizes based on the 528 * mtu if vnet_jumbo_rxpools is enabled. 529 */ 530 if (vnet_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) { 531 /* 532 * Receive buffer pool allocation based on mtu is disabled. 533 * Use the default mechanism of standard size pool allocation. 534 */ 535 sz1 = VGEN_DBLK_SZ_128; 536 sz2 = VGEN_DBLK_SZ_256; 537 sz3 = VGEN_DBLK_SZ_2048; 538 ldcp->max_rxpool_size = sz3; 539 540 status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS, 541 sz1, sz2, sz3, 542 vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3); 543 544 return (status); 545 } 546 547 switch (data_sz) { 548 549 case VNET_4K: 550 551 sz1 = VGEN_DBLK_SZ_128; 552 sz2 = VGEN_DBLK_SZ_256; 553 sz3 = VGEN_DBLK_SZ_2048; 554 sz4 = sz3 << 1; /* 4K */ 555 ldcp->max_rxpool_size = sz4; 556 557 status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1, 558 sz1, sz2, sz3, sz4, 559 vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4); 560 break; 561 562 default: /* data_sz: 4K+ to 16K */ 563 564 sz1 = VGEN_DBLK_SZ_256; 565 sz2 = VGEN_DBLK_SZ_2048; 566 sz3 = data_sz >> 1; /* Jumbo-size/2 */ 567 sz4 = data_sz; /* Jumbo-size */ 568 ldcp->max_rxpool_size = sz4; 569 570 status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1, 571 sz1, sz2, sz3, sz4, 572 vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4); 573 break; 574 575 } 576 577 return (status); 578 } 579 580 /* 581 * This function transmits normal data frames (non-priority) over the channel. 582 * It queues the frame into the transmit descriptor ring and sends a 583 * VIO_DRING_DATA message if needed, to wake up the peer to (re)start 584 * processing. 585 */ 586 int 587 vgen_dringsend(void *arg, mblk_t *mp) 588 { 589 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; 590 vgen_private_desc_t *tbufp; 591 vgen_private_desc_t *rtbufp; 592 vnet_public_desc_t *rtxdp; 593 vgen_private_desc_t *ntbufp; 594 vnet_public_desc_t *txdp; 595 vio_dring_entry_hdr_t *hdrp; 596 vgen_stats_t *statsp; 597 struct ether_header *ehp; 598 boolean_t is_bcast = B_FALSE; 599 boolean_t is_mcast = B_FALSE; 600 size_t mblksz; 601 caddr_t dst; 602 mblk_t *bp; 603 size_t size; 604 int rv = 0; 605 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 606 vgen_hparams_t *lp = &ldcp->local_hparams; 607 608 statsp = &ldcp->stats; 609 size = msgsize(mp); 610 611 DBG1(vgenp, ldcp, "enter\n"); 612 613 if (ldcp->ldc_status != LDC_UP) { 614 DWARN(vgenp, ldcp, "status(%d), dropping packet\n", 615 ldcp->ldc_status); 616 goto dringsend_exit; 617 } 618 619 /* drop the packet if ldc is not up or handshake is not done */ 620 if (ldcp->hphase != VH_DONE) { 621 DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n", 622 ldcp->hphase); 623 goto dringsend_exit; 624 } 625 626 if (size > (size_t)lp->mtu) { 627 DWARN(vgenp, ldcp, "invalid size(%d)\n", size); 628 goto dringsend_exit; 629 } 630 if (size < ETHERMIN) 631 size = ETHERMIN; 632 633 ehp = (struct ether_header *)mp->b_rptr; 634 is_bcast = IS_BROADCAST(ehp); 635 is_mcast = IS_MULTICAST(ehp); 636 637 mutex_enter(&ldcp->txlock); 638 /* 639 * allocate a descriptor 640 */ 641 tbufp = ldcp->next_tbufp; 642 ntbufp = NEXTTBUF(ldcp, tbufp); 643 if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */ 644 645 mutex_enter(&ldcp->tclock); 646 /* Try reclaiming now */ 647 vgen_reclaim_dring(ldcp); 648 ldcp->reclaim_lbolt = ddi_get_lbolt(); 649 650 if (ntbufp == ldcp->cur_tbufp) { 651 /* Now we are really out of tbuf/txds */ 652 ldcp->tx_blocked_lbolt = ddi_get_lbolt(); 653 ldcp->tx_blocked = B_TRUE; 654 mutex_exit(&ldcp->tclock); 655 656 statsp->tx_no_desc++; 657 mutex_exit(&ldcp->txlock); 658 659 return (VGEN_TX_NORESOURCES); 660 } 661 mutex_exit(&ldcp->tclock); 662 } 663 /* update next available tbuf in the ring and update tx index */ 664 ldcp->next_tbufp = ntbufp; 665 INCR_TXI(ldcp->next_txi, ldcp); 666 667 /* Mark the buffer busy before releasing the lock */ 668 tbufp->flags = VGEN_PRIV_DESC_BUSY; 669 mutex_exit(&ldcp->txlock); 670 671 /* copy data into pre-allocated transmit buffer */ 672 dst = tbufp->datap + VNET_IPALIGN; 673 for (bp = mp; bp != NULL; bp = bp->b_cont) { 674 mblksz = MBLKL(bp); 675 bcopy(bp->b_rptr, dst, mblksz); 676 dst += mblksz; 677 } 678 679 tbufp->datalen = size; 680 681 /* initialize the corresponding public descriptor (txd) */ 682 txdp = tbufp->descp; 683 hdrp = &txdp->hdr; 684 txdp->nbytes = size; 685 txdp->ncookies = tbufp->ncookies; 686 bcopy((tbufp->memcookie), (txdp->memcookie), 687 tbufp->ncookies * sizeof (ldc_mem_cookie_t)); 688 689 mutex_enter(&ldcp->wrlock); 690 /* 691 * If the flags not set to BUSY, it implies that the clobber 692 * was done while we were copying the data. In such case, 693 * discard the packet and return. 694 */ 695 if (tbufp->flags != VGEN_PRIV_DESC_BUSY) { 696 statsp->oerrors++; 697 mutex_exit(&ldcp->wrlock); 698 goto dringsend_exit; 699 } 700 hdrp->dstate = VIO_DESC_READY; 701 702 /* update stats */ 703 statsp->opackets++; 704 statsp->obytes += size; 705 if (is_bcast) 706 statsp->brdcstxmt++; 707 else if (is_mcast) 708 statsp->multixmt++; 709 710 /* send dring datamsg to the peer */ 711 if (ldcp->resched_peer) { 712 713 rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi]; 714 rtxdp = rtbufp->descp; 715 716 if (rtxdp->hdr.dstate == VIO_DESC_READY) { 717 rv = vgen_send_dringdata(ldcp, 718 (uint32_t)ldcp->resched_peer_txi, -1); 719 if (rv != 0) { 720 /* error: drop the packet */ 721 DWARN(vgenp, ldcp, 722 "failed sending dringdata msg " 723 "rv(%d) len(%d)\n", rv, size); 724 statsp->oerrors++; 725 } else { 726 ldcp->resched_peer = B_FALSE; 727 } 728 729 } 730 731 } 732 733 mutex_exit(&ldcp->wrlock); 734 735 dringsend_exit: 736 if (rv == ECONNRESET) { 737 (void) vgen_handle_evt_reset(ldcp, VGEN_OTHER); 738 } 739 freemsg(mp); 740 DBG1(vgenp, ldcp, "exit\n"); 741 return (VGEN_TX_SUCCESS); 742 } 743 744 mblk_t * 745 vgen_poll_rcv(vgen_ldc_t *ldcp, int bytes_to_pickup) 746 { 747 mblk_t *bp = NULL; 748 mblk_t *bpt = NULL; 749 mblk_t *mp = NULL; 750 size_t mblk_sz = 0; 751 size_t sz = 0; 752 uint_t count = 0; 753 754 mutex_enter(&ldcp->pollq_lock); 755 756 bp = ldcp->pollq_headp; 757 while (bp != NULL) { 758 /* get the size of this packet */ 759 mblk_sz = msgdsize(bp); 760 761 /* if adding this pkt, exceeds the size limit, we are done. */ 762 if (sz + mblk_sz > bytes_to_pickup) { 763 break; 764 } 765 766 /* we have room for this packet */ 767 sz += mblk_sz; 768 769 /* increment the # of packets being sent up */ 770 count++; 771 772 /* track the last processed pkt */ 773 bpt = bp; 774 775 /* get the next pkt */ 776 bp = bp->b_next; 777 } 778 779 if (count != 0) { 780 /* 781 * picked up some packets; save the head of pkts to be sent up. 782 */ 783 mp = ldcp->pollq_headp; 784 785 /* move the pollq_headp to skip over the pkts being sent up */ 786 ldcp->pollq_headp = bp; 787 788 /* picked up all pending pkts in the queue; reset tail also */ 789 if (ldcp->pollq_headp == NULL) { 790 ldcp->pollq_tailp = NULL; 791 } 792 793 /* terminate the tail of pkts to be sent up */ 794 bpt->b_next = NULL; 795 } 796 797 /* 798 * We prepend any high priority packets to the chain of packets; note 799 * that if we are already at the bytes_to_pickup limit, we might 800 * slightly exceed that in such cases. That should be ok, as these pkts 801 * are expected to be small in size and arrive at an interval in the 802 * the order of a few seconds. 803 */ 804 if (ldcp->rx_pktdata == vgen_handle_pkt_data && 805 ldcp->rx_pri_head != NULL) { 806 ldcp->rx_pri_tail->b_next = mp; 807 mp = ldcp->rx_pri_head; 808 ldcp->rx_pri_head = ldcp->rx_pri_tail = NULL; 809 } 810 811 mutex_exit(&ldcp->pollq_lock); 812 813 return (mp); 814 } 815 816 /* 817 * Process dring data messages (info/ack/nack) 818 */ 819 int 820 vgen_handle_dringdata(void *arg1, void *arg2) 821 { 822 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg1; 823 vio_msg_tag_t *tagp = (vio_msg_tag_t *)arg2; 824 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 825 int rv = 0; 826 827 DBG1(vgenp, ldcp, "enter\n"); 828 switch (tagp->vio_subtype) { 829 830 case VIO_SUBTYPE_INFO: 831 /* 832 * To reduce the locking contention, release the 833 * cblock here and re-acquire it once we are done 834 * receiving packets. 835 */ 836 mutex_exit(&ldcp->cblock); 837 mutex_enter(&ldcp->rxlock); 838 rv = vgen_handle_dringdata_info(ldcp, tagp); 839 mutex_exit(&ldcp->rxlock); 840 mutex_enter(&ldcp->cblock); 841 break; 842 843 case VIO_SUBTYPE_ACK: 844 rv = vgen_handle_dringdata_ack(ldcp, tagp); 845 break; 846 847 case VIO_SUBTYPE_NACK: 848 rv = vgen_handle_dringdata_nack(ldcp, tagp); 849 break; 850 } 851 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); 852 return (rv); 853 } 854 855 static int 856 vgen_handle_dringdata_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 857 { 858 uint32_t start; 859 int32_t end; 860 int rv = 0; 861 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 862 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 863 vgen_stats_t *statsp = &ldcp->stats; 864 #ifdef VGEN_HANDLE_LOST_PKTS 865 uint32_t rxi; 866 int n; 867 #endif 868 869 DBG1(vgenp, ldcp, "enter\n"); 870 871 start = dringmsg->start_idx; 872 end = dringmsg->end_idx; 873 /* 874 * received a data msg, which contains the start and end 875 * indices of the descriptors within the rx ring holding data, 876 * the seq_num of data packet corresponding to the start index, 877 * and the dring_ident. 878 * We can now read the contents of each of these descriptors 879 * and gather data from it. 880 */ 881 DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n", 882 start, end); 883 884 /* validate rx start and end indexes */ 885 if (!(CHECK_RXI(start, ldcp)) || ((end != -1) && 886 !(CHECK_RXI(end, ldcp)))) { 887 DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n", 888 start, end); 889 /* drop the message if invalid index */ 890 return (rv); 891 } 892 893 /* validate dring_ident */ 894 if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) { 895 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", 896 dringmsg->dring_ident); 897 /* invalid dring_ident, drop the msg */ 898 return (rv); 899 } 900 #ifdef DEBUG 901 if (vgen_inject_error(ldcp, VGEN_ERR_RXLOST)) { 902 /* drop this msg to simulate lost pkts for debugging */ 903 vgen_inject_err_flag &= ~(VGEN_ERR_RXLOST); 904 return (rv); 905 } 906 #endif 907 908 statsp->dring_data_msgs_rcvd++; 909 910 #ifdef VGEN_HANDLE_LOST_PKTS 911 912 /* receive start index doesn't match expected index */ 913 if (ldcp->next_rxi != start) { 914 DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n", 915 ldcp->next_rxi, start); 916 917 /* calculate the number of pkts lost */ 918 if (start >= ldcp->next_rxi) { 919 n = start - ldcp->next_rxi; 920 } else { 921 n = ldcp->num_rxds - (ldcp->next_rxi - start); 922 } 923 924 statsp->rx_lost_pkts += n; 925 tagp->vio_subtype = VIO_SUBTYPE_NACK; 926 tagp->vio_sid = ldcp->local_sid; 927 /* indicate the range of lost descriptors */ 928 dringmsg->start_idx = ldcp->next_rxi; 929 rxi = start; 930 DECR_RXI(rxi, ldcp); 931 dringmsg->end_idx = rxi; 932 /* dring ident is left unchanged */ 933 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, 934 sizeof (*dringmsg), B_FALSE); 935 if (rv != VGEN_SUCCESS) { 936 DWARN(vgenp, ldcp, 937 "vgen_sendmsg failed, stype:NACK\n"); 938 return (rv); 939 } 940 /* 941 * treat this range of descrs/pkts as dropped 942 * and set the new expected value of next_rxi 943 * and continue(below) to process from the new 944 * start index. 945 */ 946 ldcp->next_rxi = start; 947 } 948 949 #endif /* VGEN_HANDLE_LOST_PKTS */ 950 951 /* Now receive messages */ 952 rv = vgen_process_dringdata(ldcp, tagp); 953 954 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); 955 return (rv); 956 } 957 958 static int 959 vgen_process_dringdata(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 960 { 961 boolean_t set_ack_start = B_FALSE; 962 uint32_t start; 963 uint32_t ack_end; 964 uint32_t next_rxi; 965 uint32_t rxi; 966 int count = 0; 967 int rv = 0; 968 uint32_t retries = 0; 969 vgen_stats_t *statsp; 970 vnet_public_desc_t rxd; 971 vio_dring_entry_hdr_t *hdrp; 972 mblk_t *bp = NULL; 973 mblk_t *bpt = NULL; 974 uint32_t ack_start; 975 boolean_t rxd_err = B_FALSE; 976 mblk_t *mp = NULL; 977 vio_mblk_t *vmp = NULL; 978 size_t nbytes; 979 boolean_t ack_needed = B_FALSE; 980 size_t nread; 981 uint64_t off = 0; 982 struct ether_header *ehp; 983 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 984 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 985 vgen_hparams_t *lp = &ldcp->local_hparams; 986 987 DBG1(vgenp, ldcp, "enter\n"); 988 989 statsp = &ldcp->stats; 990 start = dringmsg->start_idx; 991 992 /* 993 * start processing the descriptors from the specified 994 * start index, up to the index a descriptor is not ready 995 * to be processed or we process the entire descriptor ring 996 * and wrap around upto the start index. 997 */ 998 999 /* need to set the start index of descriptors to be ack'd */ 1000 set_ack_start = B_TRUE; 1001 1002 /* index upto which we have ack'd */ 1003 ack_end = start; 1004 DECR_RXI(ack_end, ldcp); 1005 1006 next_rxi = rxi = start; 1007 do { 1008 vgen_recv_retry: 1009 rv = vnet_dring_entry_copy(&(ldcp->mrxdp[rxi]), &rxd, 1010 ldcp->dring_mtype, ldcp->rx_dring_handle, rxi, rxi); 1011 if (rv != 0) { 1012 DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed" 1013 " rv(%d)\n", rv); 1014 statsp->ierrors++; 1015 return (rv); 1016 } 1017 1018 hdrp = &rxd.hdr; 1019 1020 if (hdrp->dstate != VIO_DESC_READY) { 1021 /* 1022 * Before waiting and retry here, send up 1023 * the packets that are received already 1024 */ 1025 if (bp != NULL) { 1026 DTRACE_PROBE1(vgen_rcv_msgs, int, count); 1027 vgen_rx(ldcp, bp, bpt); 1028 count = 0; 1029 bp = bpt = NULL; 1030 } 1031 /* 1032 * descriptor is not ready. 1033 * retry descriptor acquire, stop processing 1034 * after max # retries. 1035 */ 1036 if (retries == vgen_recv_retries) 1037 break; 1038 retries++; 1039 drv_usecwait(vgen_recv_delay); 1040 goto vgen_recv_retry; 1041 } 1042 retries = 0; 1043 1044 if (set_ack_start) { 1045 /* 1046 * initialize the start index of the range 1047 * of descriptors to be ack'd. 1048 */ 1049 ack_start = rxi; 1050 set_ack_start = B_FALSE; 1051 } 1052 1053 if ((rxd.nbytes < ETHERMIN) || 1054 (rxd.nbytes > lp->mtu) || 1055 (rxd.ncookies == 0) || 1056 (rxd.ncookies > MAX_COOKIES)) { 1057 rxd_err = B_TRUE; 1058 } else { 1059 /* 1060 * Try to allocate an mblk from the free pool 1061 * of recv mblks for the channel. 1062 * If this fails, use allocb(). 1063 */ 1064 nbytes = (VNET_IPALIGN + rxd.nbytes + 7) & ~7; 1065 if (nbytes > ldcp->max_rxpool_size) { 1066 mp = allocb(VNET_IPALIGN + rxd.nbytes + 8, 1067 BPRI_MED); 1068 vmp = NULL; 1069 } else { 1070 vmp = vio_multipool_allocb(&ldcp->vmp, nbytes); 1071 if (vmp == NULL) { 1072 statsp->rx_vio_allocb_fail++; 1073 /* 1074 * Data buffer returned by allocb(9F) 1075 * is 8byte aligned. We allocate extra 1076 * 8 bytes to ensure size is multiple 1077 * of 8 bytes for ldc_mem_copy(). 1078 */ 1079 mp = allocb(VNET_IPALIGN + 1080 rxd.nbytes + 8, BPRI_MED); 1081 } else { 1082 mp = vmp->mp; 1083 } 1084 } 1085 } 1086 if ((rxd_err) || (mp == NULL)) { 1087 /* 1088 * rxd_err or allocb() failure, 1089 * drop this packet, get next. 1090 */ 1091 if (rxd_err) { 1092 statsp->ierrors++; 1093 rxd_err = B_FALSE; 1094 } else { 1095 statsp->rx_allocb_fail++; 1096 } 1097 1098 ack_needed = hdrp->ack; 1099 1100 /* set descriptor done bit */ 1101 rv = vnet_dring_entry_set_dstate(&(ldcp->mrxdp[rxi]), 1102 ldcp->dring_mtype, ldcp->rx_dring_handle, rxi, rxi, 1103 VIO_DESC_DONE); 1104 if (rv != 0) { 1105 DWARN(vgenp, ldcp, 1106 "vnet_dring_entry_set_dstate err rv(%d)\n", 1107 rv); 1108 return (rv); 1109 } 1110 1111 if (ack_needed) { 1112 ack_needed = B_FALSE; 1113 /* 1114 * sender needs ack for this packet, 1115 * ack pkts upto this index. 1116 */ 1117 ack_end = rxi; 1118 1119 rv = vgen_send_dringack(ldcp, tagp, 1120 ack_start, ack_end, 1121 VIO_DP_ACTIVE); 1122 if (rv != VGEN_SUCCESS) { 1123 goto error_ret; 1124 } 1125 1126 /* need to set new ack start index */ 1127 set_ack_start = B_TRUE; 1128 } 1129 goto vgen_next_rxi; 1130 } 1131 1132 nread = nbytes; 1133 rv = ldc_mem_copy(ldcp->ldc_handle, 1134 (caddr_t)mp->b_rptr, off, &nread, 1135 rxd.memcookie, rxd.ncookies, LDC_COPY_IN); 1136 1137 /* if ldc_mem_copy() failed */ 1138 if (rv) { 1139 DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv); 1140 statsp->ierrors++; 1141 freemsg(mp); 1142 goto error_ret; 1143 } 1144 1145 ack_needed = hdrp->ack; 1146 1147 rv = vnet_dring_entry_set_dstate(&(ldcp->mrxdp[rxi]), 1148 ldcp->dring_mtype, ldcp->rx_dring_handle, rxi, rxi, 1149 VIO_DESC_DONE); 1150 if (rv != 0) { 1151 DWARN(vgenp, ldcp, 1152 "vnet_dring_entry_set_dstate err rv(%d)\n", rv); 1153 freemsg(mp); 1154 goto error_ret; 1155 } 1156 1157 mp->b_rptr += VNET_IPALIGN; 1158 1159 if (ack_needed) { 1160 ack_needed = B_FALSE; 1161 /* 1162 * sender needs ack for this packet, 1163 * ack pkts upto this index. 1164 */ 1165 ack_end = rxi; 1166 1167 rv = vgen_send_dringack(ldcp, tagp, 1168 ack_start, ack_end, VIO_DP_ACTIVE); 1169 if (rv != VGEN_SUCCESS) { 1170 freemsg(mp); 1171 goto error_ret; 1172 } 1173 1174 /* need to set new ack start index */ 1175 set_ack_start = B_TRUE; 1176 } 1177 1178 if (nread != nbytes) { 1179 DWARN(vgenp, ldcp, 1180 "ldc_mem_copy nread(%lx), nbytes(%lx)\n", 1181 nread, nbytes); 1182 statsp->ierrors++; 1183 freemsg(mp); 1184 goto vgen_next_rxi; 1185 } 1186 1187 /* point to the actual end of data */ 1188 mp->b_wptr = mp->b_rptr + rxd.nbytes; 1189 1190 if (vmp != NULL) { 1191 vmp->state = VIO_MBLK_HAS_DATA; 1192 } 1193 1194 /* update stats */ 1195 statsp->ipackets++; 1196 statsp->rbytes += rxd.nbytes; 1197 ehp = (struct ether_header *)mp->b_rptr; 1198 if (IS_BROADCAST(ehp)) 1199 statsp->brdcstrcv++; 1200 else if (IS_MULTICAST(ehp)) 1201 statsp->multircv++; 1202 1203 /* build a chain of received packets */ 1204 if (bp == NULL) { 1205 /* first pkt */ 1206 bp = mp; 1207 bpt = bp; 1208 bpt->b_next = NULL; 1209 } else { 1210 mp->b_next = NULL; 1211 bpt->b_next = mp; 1212 bpt = mp; 1213 } 1214 1215 if (count++ > vgen_chain_len) { 1216 DTRACE_PROBE1(vgen_rcv_msgs, int, count); 1217 vgen_rx(ldcp, bp, bpt); 1218 count = 0; 1219 bp = bpt = NULL; 1220 } 1221 1222 vgen_next_rxi: 1223 /* update end index of range of descrs to be ack'd */ 1224 ack_end = rxi; 1225 1226 /* update the next index to be processed */ 1227 INCR_RXI(next_rxi, ldcp); 1228 if (next_rxi == start) { 1229 /* 1230 * processed the entire descriptor ring upto 1231 * the index at which we started. 1232 */ 1233 break; 1234 } 1235 1236 rxi = next_rxi; 1237 1238 _NOTE(CONSTCOND) 1239 } while (1); 1240 1241 /* 1242 * send an ack message to peer indicating that we have stopped 1243 * processing descriptors. 1244 */ 1245 if (set_ack_start) { 1246 /* 1247 * We have ack'd upto some index and we have not 1248 * processed any descriptors beyond that index. 1249 * Use the last ack'd index as both the start and 1250 * end of range of descrs being ack'd. 1251 * Note: This results in acking the last index twice 1252 * and should be harmless. 1253 */ 1254 ack_start = ack_end; 1255 } 1256 1257 rv = vgen_send_dringack(ldcp, tagp, ack_start, ack_end, 1258 VIO_DP_STOPPED); 1259 if (rv != VGEN_SUCCESS) { 1260 goto error_ret; 1261 } 1262 1263 /* save new recv index of next dring msg */ 1264 ldcp->next_rxi = next_rxi; 1265 1266 error_ret: 1267 /* send up packets received so far */ 1268 if (bp != NULL) { 1269 DTRACE_PROBE1(vgen_rcv_msgs, int, count); 1270 vgen_rx(ldcp, bp, bpt); 1271 bp = bpt = NULL; 1272 } 1273 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); 1274 return (rv); 1275 1276 } 1277 1278 static int 1279 vgen_handle_dringdata_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 1280 { 1281 int rv = 0; 1282 uint32_t start; 1283 int32_t end; 1284 uint32_t txi; 1285 boolean_t ready_txd = B_FALSE; 1286 vgen_stats_t *statsp; 1287 vgen_private_desc_t *tbufp; 1288 vnet_public_desc_t *txdp; 1289 vio_dring_entry_hdr_t *hdrp; 1290 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1291 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 1292 1293 DBG1(vgenp, ldcp, "enter\n"); 1294 start = dringmsg->start_idx; 1295 end = dringmsg->end_idx; 1296 statsp = &ldcp->stats; 1297 1298 /* 1299 * received an ack corresponding to a specific descriptor for 1300 * which we had set the ACK bit in the descriptor (during 1301 * transmit). This enables us to reclaim descriptors. 1302 */ 1303 1304 DBG2(vgenp, ldcp, "ACK: start(%d), end(%d)\n", start, end); 1305 1306 /* validate start and end indexes in the tx ack msg */ 1307 if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) { 1308 /* drop the message if invalid index */ 1309 DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n", 1310 start, end); 1311 return (rv); 1312 } 1313 /* validate dring_ident */ 1314 if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) { 1315 /* invalid dring_ident, drop the msg */ 1316 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", 1317 dringmsg->dring_ident); 1318 return (rv); 1319 } 1320 statsp->dring_data_acks_rcvd++; 1321 1322 /* reclaim descriptors that are done */ 1323 vgen_reclaim(ldcp); 1324 1325 if (dringmsg->dring_process_state != VIO_DP_STOPPED) { 1326 /* 1327 * receiver continued processing descriptors after 1328 * sending us the ack. 1329 */ 1330 return (rv); 1331 } 1332 1333 statsp->dring_stopped_acks_rcvd++; 1334 1335 /* receiver stopped processing descriptors */ 1336 mutex_enter(&ldcp->wrlock); 1337 mutex_enter(&ldcp->tclock); 1338 1339 /* 1340 * determine if there are any pending tx descriptors 1341 * ready to be processed by the receiver(peer) and if so, 1342 * send a message to the peer to restart receiving. 1343 */ 1344 ready_txd = B_FALSE; 1345 1346 /* 1347 * using the end index of the descriptor range for which 1348 * we received the ack, check if the next descriptor is 1349 * ready. 1350 */ 1351 txi = end; 1352 INCR_TXI(txi, ldcp); 1353 tbufp = &ldcp->tbufp[txi]; 1354 txdp = tbufp->descp; 1355 hdrp = &txdp->hdr; 1356 if (hdrp->dstate == VIO_DESC_READY) { 1357 ready_txd = B_TRUE; 1358 } else { 1359 /* 1360 * descr next to the end of ack'd descr range is not 1361 * ready. 1362 * starting from the current reclaim index, check 1363 * if any descriptor is ready. 1364 */ 1365 1366 txi = ldcp->cur_tbufp - ldcp->tbufp; 1367 tbufp = &ldcp->tbufp[txi]; 1368 1369 txdp = tbufp->descp; 1370 hdrp = &txdp->hdr; 1371 if (hdrp->dstate == VIO_DESC_READY) { 1372 ready_txd = B_TRUE; 1373 } 1374 1375 } 1376 1377 if (ready_txd) { 1378 /* 1379 * we have tx descriptor(s) ready to be 1380 * processed by the receiver. 1381 * send a message to the peer with the start index 1382 * of ready descriptors. 1383 */ 1384 rv = vgen_send_dringdata(ldcp, txi, -1); 1385 if (rv != VGEN_SUCCESS) { 1386 ldcp->resched_peer = B_TRUE; 1387 ldcp->resched_peer_txi = txi; 1388 mutex_exit(&ldcp->tclock); 1389 mutex_exit(&ldcp->wrlock); 1390 return (rv); 1391 } 1392 } else { 1393 /* 1394 * no ready tx descriptors. set the flag to send a 1395 * message to peer when tx descriptors are ready in 1396 * transmit routine. 1397 */ 1398 ldcp->resched_peer = B_TRUE; 1399 ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp; 1400 } 1401 1402 mutex_exit(&ldcp->tclock); 1403 mutex_exit(&ldcp->wrlock); 1404 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); 1405 return (rv); 1406 } 1407 1408 static int 1409 vgen_handle_dringdata_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 1410 { 1411 int rv = 0; 1412 uint32_t start; 1413 int32_t end; 1414 uint32_t txi; 1415 vnet_public_desc_t *txdp; 1416 vio_dring_entry_hdr_t *hdrp; 1417 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1418 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 1419 1420 DBG1(vgenp, ldcp, "enter\n"); 1421 start = dringmsg->start_idx; 1422 end = dringmsg->end_idx; 1423 1424 /* 1425 * peer sent a NACK msg to indicate lost packets. 1426 * The start and end correspond to the range of descriptors 1427 * for which the peer didn't receive a dring data msg and so 1428 * didn't receive the corresponding data. 1429 */ 1430 DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end); 1431 1432 /* validate start and end indexes in the tx nack msg */ 1433 if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) { 1434 /* drop the message if invalid index */ 1435 DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n", 1436 start, end); 1437 return (rv); 1438 } 1439 /* validate dring_ident */ 1440 if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) { 1441 /* invalid dring_ident, drop the msg */ 1442 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", 1443 dringmsg->dring_ident); 1444 return (rv); 1445 } 1446 mutex_enter(&ldcp->txlock); 1447 mutex_enter(&ldcp->tclock); 1448 1449 if (ldcp->next_tbufp == ldcp->cur_tbufp) { 1450 /* no busy descriptors, bogus nack ? */ 1451 mutex_exit(&ldcp->tclock); 1452 mutex_exit(&ldcp->txlock); 1453 return (rv); 1454 } 1455 1456 /* we just mark the descrs as done so they can be reclaimed */ 1457 for (txi = start; txi <= end; ) { 1458 txdp = &(ldcp->txdp[txi]); 1459 hdrp = &txdp->hdr; 1460 if (hdrp->dstate == VIO_DESC_READY) 1461 hdrp->dstate = VIO_DESC_DONE; 1462 INCR_TXI(txi, ldcp); 1463 } 1464 mutex_exit(&ldcp->tclock); 1465 mutex_exit(&ldcp->txlock); 1466 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); 1467 return (rv); 1468 } 1469 1470 /* 1471 * Send received packets up the stack. 1472 */ 1473 static void 1474 vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp, mblk_t *bpt) 1475 { 1476 vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb; 1477 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1478 1479 if (ldcp->msg_thread != NULL) { 1480 ASSERT(MUTEX_HELD(&ldcp->rxlock)); 1481 } else { 1482 ASSERT(MUTEX_HELD(&ldcp->cblock)); 1483 } 1484 1485 mutex_enter(&ldcp->pollq_lock); 1486 1487 if (ldcp->polling_on == B_TRUE) { 1488 /* 1489 * If we are in polling mode, simply queue 1490 * the packets onto the poll queue and return. 1491 */ 1492 if (ldcp->pollq_headp == NULL) { 1493 ldcp->pollq_headp = bp; 1494 ldcp->pollq_tailp = bpt; 1495 } else { 1496 ldcp->pollq_tailp->b_next = bp; 1497 ldcp->pollq_tailp = bpt; 1498 } 1499 1500 mutex_exit(&ldcp->pollq_lock); 1501 return; 1502 } 1503 1504 /* 1505 * Prepend any pending mblks in the poll queue, now that we 1506 * are in interrupt mode, before sending up the chain of pkts. 1507 */ 1508 if (ldcp->pollq_headp != NULL) { 1509 DBG2(vgenp, ldcp, "vgen_rx(%lx), pending pollq_headp\n", 1510 (uintptr_t)ldcp); 1511 ldcp->pollq_tailp->b_next = bp; 1512 bp = ldcp->pollq_headp; 1513 ldcp->pollq_headp = ldcp->pollq_tailp = NULL; 1514 } 1515 1516 mutex_exit(&ldcp->pollq_lock); 1517 1518 if (ldcp->msg_thread != NULL) { 1519 mutex_exit(&ldcp->rxlock); 1520 } else { 1521 mutex_exit(&ldcp->cblock); 1522 } 1523 1524 /* Send up the packets */ 1525 vrx_cb(ldcp->portp->vhp, bp); 1526 1527 if (ldcp->msg_thread != NULL) { 1528 mutex_enter(&ldcp->rxlock); 1529 } else { 1530 mutex_enter(&ldcp->cblock); 1531 } 1532 } 1533 1534 static void 1535 vgen_reclaim(vgen_ldc_t *ldcp) 1536 { 1537 mutex_enter(&ldcp->tclock); 1538 vgen_reclaim_dring(ldcp); 1539 ldcp->reclaim_lbolt = ddi_get_lbolt(); 1540 mutex_exit(&ldcp->tclock); 1541 } 1542 1543 /* 1544 * transmit reclaim function. starting from the current reclaim index 1545 * look for descriptors marked DONE and reclaim the descriptor. 1546 */ 1547 static void 1548 vgen_reclaim_dring(vgen_ldc_t *ldcp) 1549 { 1550 int count = 0; 1551 vnet_public_desc_t *txdp; 1552 vgen_private_desc_t *tbufp; 1553 vio_dring_entry_hdr_t *hdrp; 1554 1555 tbufp = ldcp->cur_tbufp; 1556 txdp = tbufp->descp; 1557 hdrp = &txdp->hdr; 1558 1559 while ((hdrp->dstate == VIO_DESC_DONE) && 1560 (tbufp != ldcp->next_tbufp)) { 1561 tbufp->flags = VGEN_PRIV_DESC_FREE; 1562 hdrp->dstate = VIO_DESC_FREE; 1563 hdrp->ack = B_FALSE; 1564 1565 tbufp = NEXTTBUF(ldcp, tbufp); 1566 txdp = tbufp->descp; 1567 hdrp = &txdp->hdr; 1568 count++; 1569 } 1570 1571 ldcp->cur_tbufp = tbufp; 1572 1573 /* 1574 * Check if mac layer should be notified to restart transmissions 1575 */ 1576 if ((ldcp->tx_blocked) && (count > 0)) { 1577 vio_net_tx_update_t vtx_update = 1578 ldcp->portp->vcb.vio_net_tx_update; 1579 1580 ldcp->tx_blocked = B_FALSE; 1581 vtx_update(ldcp->portp->vhp); 1582 } 1583 } 1584 1585 /* 1586 * Send descriptor ring data message to the peer over ldc. 1587 */ 1588 static int 1589 vgen_send_dringdata(vgen_ldc_t *ldcp, uint32_t start, int32_t end) 1590 { 1591 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1592 vio_dring_msg_t dringmsg, *msgp = &dringmsg; 1593 vio_msg_tag_t *tagp = &msgp->tag; 1594 vgen_stats_t *statsp = &ldcp->stats; 1595 int rv; 1596 1597 #ifdef DEBUG 1598 if (vgen_inject_error(ldcp, VGEN_ERR_TXTIMEOUT)) { 1599 return (VGEN_SUCCESS); 1600 } 1601 #endif 1602 bzero(msgp, sizeof (*msgp)); 1603 1604 tagp->vio_msgtype = VIO_TYPE_DATA; 1605 tagp->vio_subtype = VIO_SUBTYPE_INFO; 1606 tagp->vio_subtype_env = VIO_DRING_DATA; 1607 tagp->vio_sid = ldcp->local_sid; 1608 1609 msgp->dring_ident = ldcp->local_hparams.dring_ident; 1610 msgp->start_idx = start; 1611 msgp->end_idx = end; 1612 1613 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE); 1614 if (rv != VGEN_SUCCESS) { 1615 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); 1616 return (rv); 1617 } 1618 1619 statsp->dring_data_msgs_sent++; 1620 1621 DBG2(vgenp, ldcp, "DRING_DATA_SENT \n"); 1622 1623 return (VGEN_SUCCESS); 1624 } 1625 1626 /* 1627 * Send dring data ack message. 1628 */ 1629 static int 1630 vgen_send_dringack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start, 1631 int32_t end, uint8_t pstate) 1632 { 1633 int rv = 0; 1634 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1635 vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp; 1636 vgen_stats_t *statsp = &ldcp->stats; 1637 1638 tagp->vio_msgtype = VIO_TYPE_DATA; 1639 tagp->vio_subtype = VIO_SUBTYPE_ACK; 1640 tagp->vio_subtype_env = VIO_DRING_DATA; 1641 tagp->vio_sid = ldcp->local_sid; 1642 msgp->start_idx = start; 1643 msgp->end_idx = end; 1644 msgp->dring_process_state = pstate; 1645 1646 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE); 1647 if (rv != VGEN_SUCCESS) { 1648 DWARN(vgenp, ldcp, "vgen_sendmsg() failed\n"); 1649 } 1650 1651 statsp->dring_data_acks_sent++; 1652 if (pstate == VIO_DP_STOPPED) { 1653 statsp->dring_stopped_acks_sent++; 1654 } 1655 1656 return (rv); 1657 } 1658 1659 /* 1660 * Wrapper routine to send the given message over ldc using ldc_write(). 1661 */ 1662 int 1663 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen, 1664 boolean_t caller_holds_lock) 1665 { 1666 int rv; 1667 size_t len; 1668 uint32_t retries = 0; 1669 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1670 vio_msg_tag_t *tagp = (vio_msg_tag_t *)msg; 1671 vio_dring_msg_t *dmsg; 1672 vio_raw_data_msg_t *rmsg; 1673 boolean_t data_msg = B_FALSE; 1674 1675 len = msglen; 1676 if ((len == 0) || (msg == NULL)) 1677 return (VGEN_FAILURE); 1678 1679 if (!caller_holds_lock) { 1680 mutex_enter(&ldcp->wrlock); 1681 } 1682 1683 if (tagp->vio_subtype == VIO_SUBTYPE_INFO) { 1684 if (tagp->vio_subtype_env == VIO_DRING_DATA) { 1685 dmsg = (vio_dring_msg_t *)tagp; 1686 dmsg->seq_num = ldcp->next_txseq; 1687 data_msg = B_TRUE; 1688 } else if (tagp->vio_subtype_env == VIO_PKT_DATA) { 1689 rmsg = (vio_raw_data_msg_t *)tagp; 1690 rmsg->seq_num = ldcp->next_txseq; 1691 data_msg = B_TRUE; 1692 } 1693 } 1694 1695 do { 1696 len = msglen; 1697 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len); 1698 if (retries++ >= vgen_ldcwr_retries) 1699 break; 1700 } while (rv == EWOULDBLOCK); 1701 1702 if (rv == 0 && data_msg == B_TRUE) { 1703 ldcp->next_txseq++; 1704 } 1705 1706 if (!caller_holds_lock) { 1707 mutex_exit(&ldcp->wrlock); 1708 } 1709 1710 if (rv != 0) { 1711 DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n", 1712 rv, msglen); 1713 return (rv); 1714 } 1715 1716 if (len != msglen) { 1717 DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n", 1718 rv, msglen); 1719 return (VGEN_FAILURE); 1720 } 1721 1722 return (VGEN_SUCCESS); 1723 } 1724 1725 int 1726 vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 1727 { 1728 vio_raw_data_msg_t *rmsg; 1729 vio_dring_msg_t *dmsg; 1730 uint64_t seq_num; 1731 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1732 1733 if (tagp->vio_subtype_env == VIO_DRING_DATA) { 1734 dmsg = (vio_dring_msg_t *)tagp; 1735 seq_num = dmsg->seq_num; 1736 } else if (tagp->vio_subtype_env == VIO_PKT_DATA) { 1737 rmsg = (vio_raw_data_msg_t *)tagp; 1738 seq_num = rmsg->seq_num; 1739 } else { 1740 return (EINVAL); 1741 } 1742 1743 if (seq_num != ldcp->next_rxseq) { 1744 1745 /* seqnums don't match */ 1746 DWARN(vgenp, ldcp, 1747 "next_rxseq(0x%lx) != seq_num(0x%lx)\n", 1748 ldcp->next_rxseq, seq_num); 1749 return (EINVAL); 1750 1751 } 1752 1753 ldcp->next_rxseq++; 1754 1755 return (0); 1756 } 1757 1758 /* 1759 * vgen_ldc_msg_worker -- A per LDC worker thread. This thread is woken up by 1760 * the LDC interrupt handler to process LDC packets and receive data. 1761 */ 1762 void 1763 vgen_ldc_msg_worker(void *arg) 1764 { 1765 callb_cpr_t cprinfo; 1766 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; 1767 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1768 int rv; 1769 1770 DBG1(vgenp, ldcp, "enter\n"); 1771 CALLB_CPR_INIT(&cprinfo, &ldcp->msg_thr_lock, callb_generic_cpr, 1772 "vnet_rcv_thread"); 1773 mutex_enter(&ldcp->msg_thr_lock); 1774 while (!(ldcp->msg_thr_flags & VGEN_WTHR_STOP)) { 1775 1776 CALLB_CPR_SAFE_BEGIN(&cprinfo); 1777 /* 1778 * Wait until the data is received or a stop 1779 * request is received. 1780 */ 1781 while (!(ldcp->msg_thr_flags & 1782 (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) { 1783 cv_wait(&ldcp->msg_thr_cv, &ldcp->msg_thr_lock); 1784 } 1785 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->msg_thr_lock) 1786 1787 /* 1788 * First process the stop request. 1789 */ 1790 if (ldcp->msg_thr_flags & VGEN_WTHR_STOP) { 1791 DBG2(vgenp, ldcp, "stopped\n"); 1792 break; 1793 } 1794 ldcp->msg_thr_flags &= ~VGEN_WTHR_DATARCVD; 1795 ldcp->msg_thr_flags |= VGEN_WTHR_PROCESSING; 1796 mutex_exit(&ldcp->msg_thr_lock); 1797 DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n"); 1798 rv = vgen_handle_evt_read(ldcp, VGEN_MSG_THR); 1799 mutex_enter(&ldcp->msg_thr_lock); 1800 ldcp->msg_thr_flags &= ~VGEN_WTHR_PROCESSING; 1801 if (rv != 0) { 1802 /* 1803 * Channel has been reset. The thread should now exit. 1804 * The thread may be recreated if TxDring is negotiated 1805 * on this channel after the channel comes back up 1806 * again. 1807 */ 1808 ldcp->msg_thr_flags |= VGEN_WTHR_STOP; 1809 break; 1810 } 1811 } 1812 1813 /* 1814 * Update the run status and wakeup the thread that 1815 * has sent the stop request. 1816 */ 1817 ldcp->msg_thr_flags &= ~VGEN_WTHR_STOP; 1818 ldcp->msg_thread = NULL; 1819 CALLB_CPR_EXIT(&cprinfo); 1820 1821 thread_exit(); 1822 DBG1(vgenp, ldcp, "exit\n"); 1823 } 1824 1825 /* vgen_stop_msg_thread -- Co-ordinate with receive thread to stop it */ 1826 void 1827 vgen_stop_msg_thread(vgen_ldc_t *ldcp) 1828 { 1829 kt_did_t tid = 0; 1830 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1831 1832 DBG1(vgenp, ldcp, "enter\n"); 1833 /* 1834 * Send a stop request by setting the stop flag and 1835 * wait until the receive thread stops. 1836 */ 1837 mutex_enter(&ldcp->msg_thr_lock); 1838 if (ldcp->msg_thread != NULL) { 1839 tid = ldcp->msg_thread->t_did; 1840 ldcp->msg_thr_flags |= VGEN_WTHR_STOP; 1841 cv_signal(&ldcp->msg_thr_cv); 1842 } 1843 mutex_exit(&ldcp->msg_thr_lock); 1844 1845 if (tid != 0) { 1846 thread_join(tid); 1847 } 1848 DBG1(vgenp, ldcp, "exit\n"); 1849 } 1850