1 /* 2 * This file is provided under a CDDLv1 license. When using or 3 * redistributing this file, you may do so under this license. 4 * In redistributing this file this license must be included 5 * and no other modification of this header file is permitted. 6 * 7 * CDDL LICENSE SUMMARY 8 * 9 * Copyright(c) 1999 - 2009 Intel Corporation. All rights reserved. 10 * 11 * The contents of this file are subject to the terms of Version 12 * 1.0 of the Common Development and Distribution License (the "License"). 13 * 14 * You should have received a copy of the License with this software. 15 * You can obtain a copy of the License at 16 * http://www.opensolaris.org/os/licensing. 17 * See the License for the specific language governing permissions 18 * and limitations under the License. 19 */ 20 21 /* 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 27 * Copyright 2016 Joyent, Inc. 28 */ 29 30 /* 31 * ********************************************************************** 32 * * 33 * Module Name: * 34 * e1000g_rx.c * 35 * * 36 * Abstract: * 37 * This file contains some routines that take care of Receive * 38 * interrupt and also for the received packets it sends up to * 39 * upper layer. * 40 * It tries to do a zero copy if free buffers are available in * 41 * the pool. * 42 * * 43 * ********************************************************************** 44 */ 45 46 #include "e1000g_sw.h" 47 #include "e1000g_debug.h" 48 49 static p_rx_sw_packet_t e1000g_get_buf(e1000g_rx_data_t *rx_data); 50 #pragma inline(e1000g_get_buf) 51 52 /* 53 * e1000g_rxfree_func - the call-back function to reclaim rx buffer 54 * 55 * This function is called when an mp is freed by the user thru 56 * freeb call (Only for mp constructed through desballoc call) 57 * It returns back the freed buffer to the freelist 58 */ 59 void 60 e1000g_rxfree_func(p_rx_sw_packet_t packet) 61 { 62 e1000g_rx_data_t *rx_data; 63 private_devi_list_t *devi_node; 64 struct e1000g *Adapter; 65 uint32_t ring_cnt; 66 uint32_t ref_cnt; 67 unsigned char *address; 68 69 if (packet->ref_cnt == 0) { 70 /* 71 * This case only happens when rx buffers are being freed 72 * in e1000g_stop() and freemsg() is called. 73 */ 74 return; 75 } 76 77 rx_data = (e1000g_rx_data_t *)(uintptr_t)packet->rx_data; 78 79 if (packet->mp == NULL) { 80 /* 81 * Allocate a mblk that binds to the data buffer 82 */ 83 address = (unsigned char *)packet->rx_buf->address; 84 if (address != NULL) { 85 packet->mp = desballoc((unsigned char *) 86 address, packet->rx_buf->size, 87 BPRI_MED, &packet->free_rtn); 88 } 89 } 90 91 /* 92 * Enqueue the recycled packets in a recycle queue. When freelist 93 * dries up, move the entire chain of packets from recycle queue 94 * to freelist. This helps in avoiding per packet mutex contention 95 * around freelist. 96 */ 97 mutex_enter(&rx_data->recycle_lock); 98 QUEUE_PUSH_TAIL(&rx_data->recycle_list, &packet->Link); 99 rx_data->recycle_freepkt++; 100 mutex_exit(&rx_data->recycle_lock); 101 102 ref_cnt = atomic_dec_32_nv(&packet->ref_cnt); 103 if (ref_cnt == 0) { 104 mutex_enter(&e1000g_rx_detach_lock); 105 e1000g_free_rx_sw_packet(packet, B_FALSE); 106 107 atomic_dec_32(&rx_data->pending_count); 108 atomic_dec_32(&e1000g_mblks_pending); 109 110 if ((rx_data->pending_count == 0) && 111 (rx_data->flag & E1000G_RX_STOPPED)) { 112 devi_node = rx_data->priv_devi_node; 113 114 if (devi_node != NULL) { 115 ring_cnt = atomic_dec_32_nv( 116 &devi_node->pending_rx_count); 117 if ((ring_cnt == 0) && 118 (devi_node->flag & 119 E1000G_PRIV_DEVI_DETACH)) { 120 e1000g_free_priv_devi_node( 121 devi_node); 122 } 123 } else { 124 Adapter = rx_data->rx_ring->adapter; 125 atomic_dec_32( 126 &Adapter->pending_rx_count); 127 } 128 129 e1000g_free_rx_pending_buffers(rx_data); 130 e1000g_free_rx_data(rx_data); 131 } 132 mutex_exit(&e1000g_rx_detach_lock); 133 } 134 } 135 136 /* 137 * e1000g_rx_setup - setup rx data structures 138 * 139 * This routine initializes all of the receive related 140 * structures. This includes the receive descriptors, the 141 * actual receive buffers, and the rx_sw_packet software 142 * structures. 143 */ 144 void 145 e1000g_rx_setup(struct e1000g *Adapter) 146 { 147 struct e1000_hw *hw; 148 p_rx_sw_packet_t packet; 149 struct e1000_rx_desc *descriptor; 150 uint32_t buf_low; 151 uint32_t buf_high; 152 uint32_t reg_val; 153 uint32_t rctl; 154 uint32_t rxdctl; 155 uint32_t ert; 156 uint16_t phy_data; 157 int i; 158 int size; 159 e1000g_rx_data_t *rx_data; 160 161 hw = &Adapter->shared; 162 rx_data = Adapter->rx_ring->rx_data; 163 164 /* 165 * zero out all of the receive buffer descriptor memory 166 * assures any previous data or status is erased 167 */ 168 bzero(rx_data->rbd_area, 169 sizeof (struct e1000_rx_desc) * Adapter->rx_desc_num); 170 171 if (!Adapter->rx_buffer_setup) { 172 /* Init the list of "Receive Buffer" */ 173 QUEUE_INIT_LIST(&rx_data->recv_list); 174 175 /* Init the list of "Free Receive Buffer" */ 176 QUEUE_INIT_LIST(&rx_data->free_list); 177 178 /* Init the list of "Free Receive Buffer" */ 179 QUEUE_INIT_LIST(&rx_data->recycle_list); 180 /* 181 * Setup Receive list and the Free list. Note that 182 * the both were allocated in one packet area. 183 */ 184 packet = rx_data->packet_area; 185 descriptor = rx_data->rbd_first; 186 187 for (i = 0; i < Adapter->rx_desc_num; 188 i++, packet = packet->next, descriptor++) { 189 ASSERT(packet != NULL); 190 ASSERT(descriptor != NULL); 191 descriptor->buffer_addr = 192 packet->rx_buf->dma_address; 193 194 /* Add this rx_sw_packet to the receive list */ 195 QUEUE_PUSH_TAIL(&rx_data->recv_list, 196 &packet->Link); 197 } 198 199 for (i = 0; i < Adapter->rx_freelist_num; 200 i++, packet = packet->next) { 201 ASSERT(packet != NULL); 202 /* Add this rx_sw_packet to the free list */ 203 QUEUE_PUSH_TAIL(&rx_data->free_list, 204 &packet->Link); 205 } 206 rx_data->avail_freepkt = Adapter->rx_freelist_num; 207 rx_data->recycle_freepkt = 0; 208 209 Adapter->rx_buffer_setup = B_TRUE; 210 } else { 211 /* Setup the initial pointer to the first rx descriptor */ 212 packet = (p_rx_sw_packet_t) 213 QUEUE_GET_HEAD(&rx_data->recv_list); 214 descriptor = rx_data->rbd_first; 215 216 for (i = 0; i < Adapter->rx_desc_num; i++) { 217 ASSERT(packet != NULL); 218 ASSERT(descriptor != NULL); 219 descriptor->buffer_addr = 220 packet->rx_buf->dma_address; 221 222 /* Get next rx_sw_packet */ 223 packet = (p_rx_sw_packet_t) 224 QUEUE_GET_NEXT(&rx_data->recv_list, &packet->Link); 225 descriptor++; 226 } 227 } 228 229 E1000_WRITE_REG(&Adapter->shared, E1000_RDTR, Adapter->rx_intr_delay); 230 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL, 231 "E1000_RDTR: 0x%x\n", Adapter->rx_intr_delay); 232 if (hw->mac.type >= e1000_82540) { 233 E1000_WRITE_REG(&Adapter->shared, E1000_RADV, 234 Adapter->rx_intr_abs_delay); 235 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL, 236 "E1000_RADV: 0x%x\n", Adapter->rx_intr_abs_delay); 237 } 238 239 /* 240 * Setup our descriptor pointers 241 */ 242 rx_data->rbd_next = rx_data->rbd_first; 243 244 size = Adapter->rx_desc_num * sizeof (struct e1000_rx_desc); 245 E1000_WRITE_REG(hw, E1000_RDLEN(0), size); 246 size = E1000_READ_REG(hw, E1000_RDLEN(0)); 247 248 /* To get lower order bits */ 249 buf_low = (uint32_t)rx_data->rbd_dma_addr; 250 /* To get the higher order bits */ 251 buf_high = (uint32_t)(rx_data->rbd_dma_addr >> 32); 252 253 E1000_WRITE_REG(hw, E1000_RDBAH(0), buf_high); 254 E1000_WRITE_REG(hw, E1000_RDBAL(0), buf_low); 255 256 /* 257 * Setup our HW Rx Head & Tail descriptor pointers 258 */ 259 E1000_WRITE_REG(hw, E1000_RDT(0), 260 (uint32_t)(rx_data->rbd_last - rx_data->rbd_first)); 261 E1000_WRITE_REG(hw, E1000_RDH(0), 0); 262 263 /* 264 * Setup the Receive Control Register (RCTL), and ENABLE the 265 * receiver. The initial configuration is to: Enable the receiver, 266 * accept broadcasts, discard bad packets (and long packets), 267 * disable VLAN filter checking, set the receive descriptor 268 * minimum threshold size to 1/2, and the receive buffer size to 269 * 2k. 270 */ 271 rctl = E1000_RCTL_EN | /* Enable Receive Unit */ 272 E1000_RCTL_BAM | /* Accept Broadcast Packets */ 273 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT) | 274 E1000_RCTL_RDMTS_HALF | 275 E1000_RCTL_LBM_NO; /* Loopback Mode = none */ 276 277 if (Adapter->default_mtu > ETHERMTU) 278 rctl |= E1000_RCTL_LPE; /* Large Packet Enable bit */ 279 280 if (Adapter->strip_crc) 281 rctl |= E1000_RCTL_SECRC; /* Strip Ethernet CRC */ 282 283 if (Adapter->mem_workaround_82546 && 284 ((hw->mac.type == e1000_82545) || 285 (hw->mac.type == e1000_82546) || 286 (hw->mac.type == e1000_82546_rev_3))) { 287 rctl |= E1000_RCTL_SZ_2048; 288 } else { 289 if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_2K) && 290 (Adapter->max_frame_size <= FRAME_SIZE_UPTO_4K)) 291 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; 292 else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_4K) && 293 (Adapter->max_frame_size <= FRAME_SIZE_UPTO_8K)) 294 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; 295 else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_8K) && 296 (Adapter->max_frame_size <= FRAME_SIZE_UPTO_16K)) 297 rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX; 298 else 299 rctl |= E1000_RCTL_SZ_2048; 300 } 301 302 if (e1000_tbi_sbp_enabled_82543(hw)) 303 rctl |= E1000_RCTL_SBP; 304 305 /* 306 * Enable Early Receive Threshold (ERT) on supported devices. 307 * Only takes effect when packet size is equal or larger than the 308 * specified value (in 8 byte units), e.g. using jumbo frames. 309 */ 310 if ((hw->mac.type == e1000_82573) || 311 (hw->mac.type == e1000_82574) || 312 (hw->mac.type == e1000_ich9lan) || 313 (hw->mac.type == e1000_ich10lan)) { 314 315 ert = E1000_ERT_2048; 316 317 /* 318 * Special modification when ERT and 319 * jumbo frames are enabled 320 */ 321 if (Adapter->default_mtu > ETHERMTU) { 322 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); 323 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 0x3); 324 ert |= (1 << 13); 325 } 326 327 E1000_WRITE_REG(hw, E1000_ERT, ert); 328 } 329 330 /* Workaround errata on 82577/8 adapters with large frames */ 331 if ((hw->mac.type == e1000_pchlan) && 332 (Adapter->default_mtu > ETHERMTU)) { 333 334 (void) e1000_read_phy_reg(hw, PHY_REG(770, 26), &phy_data); 335 phy_data &= 0xfff8; 336 phy_data |= (1 << 2); 337 (void) e1000_write_phy_reg(hw, PHY_REG(770, 26), phy_data); 338 339 if (hw->phy.type == e1000_phy_82577) { 340 (void) e1000_read_phy_reg(hw, 22, &phy_data); 341 phy_data &= 0x0fff; 342 phy_data |= (1 << 14); 343 (void) e1000_write_phy_reg(hw, 0x10, 0x2823); 344 (void) e1000_write_phy_reg(hw, 0x11, 0x0003); 345 (void) e1000_write_phy_reg(hw, 22, phy_data); 346 } 347 } 348 349 /* Workaround errata on 82579 adapters with large frames */ 350 if (hw->mac.type == e1000_pch2lan) { 351 boolean_t enable_jumbo = (Adapter->default_mtu > ETHERMTU ? 352 B_TRUE : B_FALSE); 353 354 if (e1000_lv_jumbo_workaround_ich8lan(hw, enable_jumbo) != 0) 355 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL, 356 "failed to enable jumbo frame workaround mode\n"); 357 } 358 359 reg_val = 360 E1000_RXCSUM_TUOFL | /* TCP/UDP checksum offload Enable */ 361 E1000_RXCSUM_IPOFL; /* IP checksum offload Enable */ 362 363 E1000_WRITE_REG(hw, E1000_RXCSUM, reg_val); 364 365 /* 366 * Workaround: Set bit 16 (IPv6_ExDIS) to disable the 367 * processing of received IPV6 extension headers 368 */ 369 if ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572)) { 370 reg_val = E1000_READ_REG(hw, E1000_RFCTL); 371 reg_val |= (E1000_RFCTL_IPV6_EX_DIS | 372 E1000_RFCTL_NEW_IPV6_EXT_DIS); 373 E1000_WRITE_REG(hw, E1000_RFCTL, reg_val); 374 } 375 376 /* Write to enable the receive unit */ 377 E1000_WRITE_REG(hw, E1000_RCTL, rctl); 378 } 379 380 /* 381 * e1000g_get_buf - get an rx sw packet from the free_list 382 */ 383 static p_rx_sw_packet_t 384 e1000g_get_buf(e1000g_rx_data_t *rx_data) 385 { 386 p_rx_sw_packet_t packet; 387 struct e1000g *Adapter; 388 389 Adapter = rx_data->rx_ring->adapter; 390 391 mutex_enter(&rx_data->freelist_lock); 392 packet = (p_rx_sw_packet_t) 393 QUEUE_POP_HEAD(&rx_data->free_list); 394 if (packet != NULL) { 395 rx_data->avail_freepkt--; 396 goto end; 397 } 398 399 /* 400 * If the freelist has no packets, check the recycle list 401 * to see if there are any available descriptor there. 402 */ 403 mutex_enter(&rx_data->recycle_lock); 404 QUEUE_SWITCH(&rx_data->free_list, &rx_data->recycle_list); 405 rx_data->avail_freepkt = rx_data->recycle_freepkt; 406 rx_data->recycle_freepkt = 0; 407 mutex_exit(&rx_data->recycle_lock); 408 packet = (p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_data->free_list); 409 if (packet != NULL) { 410 rx_data->avail_freepkt--; 411 goto end; 412 } 413 414 if (Adapter->rx_freelist_num < Adapter->rx_freelist_limit) { 415 (void) e1000g_increase_rx_packets(rx_data); 416 packet = (p_rx_sw_packet_t) 417 QUEUE_POP_HEAD(&rx_data->free_list); 418 if (packet != NULL) { 419 rx_data->avail_freepkt--; 420 } 421 } 422 423 end: 424 mutex_exit(&rx_data->freelist_lock); 425 return (packet); 426 } 427 428 /* 429 * e1000g_receive - main receive routine 430 * 431 * This routine will process packets received in an interrupt 432 */ 433 mblk_t * 434 e1000g_receive(e1000g_rx_ring_t *rx_ring, mblk_t **tail, uint_t sz) 435 { 436 struct e1000_hw *hw; 437 mblk_t *nmp; 438 mblk_t *ret_mp; 439 mblk_t *ret_nmp; 440 struct e1000_rx_desc *current_desc; 441 struct e1000_rx_desc *last_desc; 442 p_rx_sw_packet_t packet; 443 p_rx_sw_packet_t newpkt; 444 uint16_t length; 445 uint32_t pkt_count; 446 uint32_t desc_count; 447 boolean_t accept_frame; 448 boolean_t end_of_packet; 449 boolean_t need_copy; 450 struct e1000g *Adapter; 451 dma_buffer_t *rx_buf; 452 uint16_t cksumflags; 453 uint_t chain_sz = 0; 454 e1000g_rx_data_t *rx_data; 455 uint32_t max_size; 456 uint32_t min_size; 457 458 ret_mp = NULL; 459 ret_nmp = NULL; 460 pkt_count = 0; 461 desc_count = 0; 462 cksumflags = 0; 463 464 Adapter = rx_ring->adapter; 465 rx_data = rx_ring->rx_data; 466 hw = &Adapter->shared; 467 468 /* Sync the Rx descriptor DMA buffers */ 469 (void) ddi_dma_sync(rx_data->rbd_dma_handle, 470 0, 0, DDI_DMA_SYNC_FORKERNEL); 471 472 if (e1000g_check_dma_handle(rx_data->rbd_dma_handle) != DDI_FM_OK) { 473 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED); 474 Adapter->e1000g_state |= E1000G_ERROR; 475 return (NULL); 476 } 477 478 current_desc = rx_data->rbd_next; 479 if (!(current_desc->status & E1000_RXD_STAT_DD)) { 480 /* 481 * don't send anything up. just clear the RFD 482 */ 483 E1000G_DEBUG_STAT(rx_ring->stat_none); 484 return (NULL); 485 } 486 487 max_size = Adapter->max_frame_size - ETHERFCSL - VLAN_TAGSZ; 488 min_size = ETHERMIN; 489 490 /* 491 * Loop through the receive descriptors starting at the last known 492 * descriptor owned by the hardware that begins a packet. 493 */ 494 while ((current_desc->status & E1000_RXD_STAT_DD) && 495 (pkt_count < Adapter->rx_limit_onintr) && 496 ((sz == E1000G_CHAIN_NO_LIMIT) || (chain_sz <= sz))) { 497 498 desc_count++; 499 /* 500 * Now this can happen in Jumbo frame situation. 501 */ 502 if (current_desc->status & E1000_RXD_STAT_EOP) { 503 /* packet has EOP set */ 504 end_of_packet = B_TRUE; 505 } else { 506 /* 507 * If this received buffer does not have the 508 * End-Of-Packet bit set, the received packet 509 * will consume multiple buffers. We won't send this 510 * packet upstack till we get all the related buffers. 511 */ 512 end_of_packet = B_FALSE; 513 } 514 515 /* 516 * Get a pointer to the actual receive buffer 517 * The mp->b_rptr is mapped to The CurrentDescriptor 518 * Buffer Address. 519 */ 520 packet = 521 (p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_data->recv_list); 522 ASSERT(packet != NULL); 523 524 rx_buf = packet->rx_buf; 525 526 length = current_desc->length; 527 528 #ifdef __sparc 529 if (packet->dma_type == USE_DVMA) 530 dvma_sync(rx_buf->dma_handle, 0, 531 DDI_DMA_SYNC_FORKERNEL); 532 else 533 (void) ddi_dma_sync(rx_buf->dma_handle, 534 E1000G_IPALIGNROOM, length, 535 DDI_DMA_SYNC_FORKERNEL); 536 #else 537 (void) ddi_dma_sync(rx_buf->dma_handle, 538 E1000G_IPALIGNROOM, length, 539 DDI_DMA_SYNC_FORKERNEL); 540 #endif 541 542 if (e1000g_check_dma_handle( 543 rx_buf->dma_handle) != DDI_FM_OK) { 544 ddi_fm_service_impact(Adapter->dip, 545 DDI_SERVICE_DEGRADED); 546 Adapter->e1000g_state |= E1000G_ERROR; 547 548 goto rx_drop; 549 } 550 551 accept_frame = (current_desc->errors == 0) || 552 ((current_desc->errors & 553 (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE)) != 0); 554 555 if (hw->mac.type == e1000_82543) { 556 unsigned char last_byte; 557 558 last_byte = 559 *((unsigned char *)rx_buf->address + length - 1); 560 561 if (TBI_ACCEPT(hw, 562 current_desc->status, current_desc->errors, 563 current_desc->length, last_byte, 564 Adapter->min_frame_size, Adapter->max_frame_size)) { 565 566 e1000_tbi_adjust_stats(Adapter, 567 length, hw->mac.addr); 568 569 length--; 570 accept_frame = B_TRUE; 571 } else if (e1000_tbi_sbp_enabled_82543(hw) && 572 (current_desc->errors == E1000_RXD_ERR_CE)) { 573 accept_frame = B_TRUE; 574 } 575 } 576 577 /* 578 * Indicate the packet to the NOS if it was good. 579 * Normally, hardware will discard bad packets for us. 580 * Check for the packet to be a valid Ethernet packet 581 */ 582 if (!accept_frame) { 583 /* 584 * error in incoming packet, either the packet is not a 585 * ethernet size packet, or the packet has an error. In 586 * either case, the packet will simply be discarded. 587 */ 588 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL, 589 "Process Receive Interrupts: Error in Packet\n"); 590 591 E1000G_STAT(rx_ring->stat_error); 592 /* 593 * Returning here as we are done here. There is 594 * no point in waiting for while loop to elapse 595 * and the things which were done. More efficient 596 * and less error prone... 597 */ 598 goto rx_drop; 599 } 600 601 /* 602 * If the Ethernet CRC is not stripped by the hardware, 603 * we need to strip it before sending it up to the stack. 604 */ 605 if (end_of_packet && !Adapter->strip_crc) { 606 if (length > ETHERFCSL) { 607 length -= ETHERFCSL; 608 } else { 609 /* 610 * If the fragment is smaller than the CRC, 611 * drop this fragment, do the processing of 612 * the end of the packet. 613 */ 614 if (rx_data->rx_mblk_tail == NULL) { 615 E1000G_STAT(rx_ring->stat_crc_only_pkt); 616 goto rx_next_desc; 617 } 618 619 rx_data->rx_mblk_tail->b_wptr -= 620 ETHERFCSL - length; 621 rx_data->rx_mblk_len -= 622 ETHERFCSL - length; 623 goto rx_end_of_packet; 624 } 625 } 626 627 need_copy = B_TRUE; 628 629 if (length <= Adapter->rx_bcopy_thresh) 630 goto rx_copy; 631 632 /* 633 * Get the pre-constructed mblk that was associated 634 * to the receive data buffer. 635 */ 636 if (packet->mp == NULL) { 637 packet->mp = desballoc((unsigned char *) 638 rx_buf->address, length, 639 BPRI_MED, &packet->free_rtn); 640 } 641 642 if (packet->mp != NULL) { 643 /* 644 * We have two sets of buffer pool. One associated with 645 * the Rxdescriptors and other a freelist buffer pool. 646 * Each time we get a good packet, Try to get a buffer 647 * from the freelist pool using e1000g_get_buf. If we 648 * get free buffer, then replace the descriptor buffer 649 * address with the free buffer we just got, and pass 650 * the pre-constructed mblk upstack. (note no copying) 651 * 652 * If we failed to get a free buffer, then try to 653 * allocate a new buffer(mp) and copy the recv buffer 654 * content to our newly allocated buffer(mp). Don't 655 * disturb the desriptor buffer address. (note copying) 656 */ 657 newpkt = e1000g_get_buf(rx_data); 658 659 if (newpkt != NULL) { 660 /* 661 * Get the mblk associated to the data, 662 * and strip it off the sw packet. 663 */ 664 nmp = packet->mp; 665 packet->mp = NULL; 666 atomic_inc_32(&packet->ref_cnt); 667 668 /* 669 * Now replace old buffer with the new 670 * one we got from free list 671 * Both the RxSwPacket as well as the 672 * Receive Buffer Descriptor will now 673 * point to this new packet. 674 */ 675 packet = newpkt; 676 677 current_desc->buffer_addr = 678 newpkt->rx_buf->dma_address; 679 680 need_copy = B_FALSE; 681 } else { 682 /* EMPTY */ 683 E1000G_DEBUG_STAT(rx_ring->stat_no_freepkt); 684 } 685 } 686 687 rx_copy: 688 if (need_copy) { 689 /* 690 * No buffers available on free list, 691 * bcopy the data from the buffer and 692 * keep the original buffer. Dont want to 693 * do this.. Yack but no other way 694 */ 695 if ((nmp = allocb(length + E1000G_IPALIGNROOM, 696 BPRI_MED)) == NULL) { 697 /* 698 * The system has no buffers available 699 * to send up the incoming packet, hence 700 * the packet will have to be processed 701 * when there're more buffers available. 702 */ 703 E1000G_STAT(rx_ring->stat_allocb_fail); 704 goto rx_drop; 705 } 706 nmp->b_rptr += E1000G_IPALIGNROOM; 707 nmp->b_wptr += E1000G_IPALIGNROOM; 708 /* 709 * The free list did not have any buffers 710 * available, so, the received packet will 711 * have to be copied into a mp and the original 712 * buffer will have to be retained for future 713 * packet reception. 714 */ 715 bcopy(rx_buf->address, nmp->b_wptr, length); 716 } 717 718 ASSERT(nmp != NULL); 719 nmp->b_wptr += length; 720 721 if (rx_data->rx_mblk == NULL) { 722 /* 723 * TCP/UDP checksum offload and 724 * IP checksum offload 725 */ 726 if (!(current_desc->status & E1000_RXD_STAT_IXSM)) { 727 /* 728 * Check TCP/UDP checksum 729 */ 730 if ((current_desc->status & 731 E1000_RXD_STAT_TCPCS) && 732 !(current_desc->errors & 733 E1000_RXD_ERR_TCPE)) 734 cksumflags |= HCK_FULLCKSUM_OK; 735 /* 736 * Check IP Checksum 737 */ 738 if ((current_desc->status & 739 E1000_RXD_STAT_IPCS) && 740 !(current_desc->errors & 741 E1000_RXD_ERR_IPE)) 742 cksumflags |= HCK_IPV4_HDRCKSUM_OK; 743 } 744 } 745 746 /* 747 * We need to maintain our packet chain in the global 748 * Adapter structure, for the Rx processing can end 749 * with a fragment that has no EOP set. 750 */ 751 if (rx_data->rx_mblk == NULL) { 752 /* Get the head of the message chain */ 753 rx_data->rx_mblk = nmp; 754 rx_data->rx_mblk_tail = nmp; 755 rx_data->rx_mblk_len = length; 756 } else { /* Not the first packet */ 757 /* Continue adding buffers */ 758 rx_data->rx_mblk_tail->b_cont = nmp; 759 rx_data->rx_mblk_tail = nmp; 760 rx_data->rx_mblk_len += length; 761 } 762 ASSERT(rx_data->rx_mblk != NULL); 763 ASSERT(rx_data->rx_mblk_tail != NULL); 764 ASSERT(rx_data->rx_mblk_tail->b_cont == NULL); 765 766 /* 767 * Now this MP is ready to travel upwards but some more 768 * fragments are coming. 769 * We will send packet upwards as soon as we get EOP 770 * set on the packet. 771 */ 772 if (!end_of_packet) { 773 /* 774 * continue to get the next descriptor, 775 * Tail would be advanced at the end 776 */ 777 goto rx_next_desc; 778 } 779 780 rx_end_of_packet: 781 if (E1000G_IS_VLAN_PACKET(rx_data->rx_mblk->b_rptr)) 782 max_size = Adapter->max_frame_size - ETHERFCSL; 783 784 if ((rx_data->rx_mblk_len > max_size) || 785 (rx_data->rx_mblk_len < min_size)) { 786 E1000G_STAT(rx_ring->stat_size_error); 787 goto rx_drop; 788 } 789 790 /* 791 * Found packet with EOP 792 * Process the last fragment. 793 */ 794 if (cksumflags != 0) { 795 mac_hcksum_set(rx_data->rx_mblk, 796 0, 0, 0, 0, cksumflags); 797 cksumflags = 0; 798 } 799 800 /* 801 * Count packets that span multi-descriptors 802 */ 803 E1000G_DEBUG_STAT_COND(rx_ring->stat_multi_desc, 804 (rx_data->rx_mblk->b_cont != NULL)); 805 806 /* 807 * Append to list to send upstream 808 */ 809 if (ret_mp == NULL) { 810 ret_mp = ret_nmp = rx_data->rx_mblk; 811 } else { 812 ret_nmp->b_next = rx_data->rx_mblk; 813 ret_nmp = rx_data->rx_mblk; 814 } 815 ret_nmp->b_next = NULL; 816 *tail = ret_nmp; 817 chain_sz += length; 818 819 rx_data->rx_mblk = NULL; 820 rx_data->rx_mblk_tail = NULL; 821 rx_data->rx_mblk_len = 0; 822 823 pkt_count++; 824 825 rx_next_desc: 826 /* 827 * Zero out the receive descriptors status 828 */ 829 current_desc->status = 0; 830 831 if (current_desc == rx_data->rbd_last) 832 rx_data->rbd_next = rx_data->rbd_first; 833 else 834 rx_data->rbd_next++; 835 836 last_desc = current_desc; 837 current_desc = rx_data->rbd_next; 838 839 /* 840 * Put the buffer that we just indicated back 841 * at the end of our list 842 */ 843 QUEUE_PUSH_TAIL(&rx_data->recv_list, 844 &packet->Link); 845 } /* while loop */ 846 847 /* Sync the Rx descriptor DMA buffers */ 848 (void) ddi_dma_sync(rx_data->rbd_dma_handle, 849 0, 0, DDI_DMA_SYNC_FORDEV); 850 851 /* 852 * Advance the E1000's Receive Queue #0 "Tail Pointer". 853 */ 854 E1000_WRITE_REG(hw, E1000_RDT(0), 855 (uint32_t)(last_desc - rx_data->rbd_first)); 856 857 if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) { 858 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED); 859 Adapter->e1000g_state |= E1000G_ERROR; 860 } 861 862 Adapter->rx_pkt_cnt = pkt_count; 863 864 return (ret_mp); 865 866 rx_drop: 867 /* 868 * Zero out the receive descriptors status 869 */ 870 current_desc->status = 0; 871 872 /* Sync the Rx descriptor DMA buffers */ 873 (void) ddi_dma_sync(rx_data->rbd_dma_handle, 874 0, 0, DDI_DMA_SYNC_FORDEV); 875 876 if (current_desc == rx_data->rbd_last) 877 rx_data->rbd_next = rx_data->rbd_first; 878 else 879 rx_data->rbd_next++; 880 881 last_desc = current_desc; 882 883 QUEUE_PUSH_TAIL(&rx_data->recv_list, &packet->Link); 884 /* 885 * Reclaim all old buffers already allocated during 886 * Jumbo receives.....for incomplete reception 887 */ 888 if (rx_data->rx_mblk != NULL) { 889 freemsg(rx_data->rx_mblk); 890 rx_data->rx_mblk = NULL; 891 rx_data->rx_mblk_tail = NULL; 892 rx_data->rx_mblk_len = 0; 893 } 894 /* 895 * Advance the E1000's Receive Queue #0 "Tail Pointer". 896 */ 897 E1000_WRITE_REG(hw, E1000_RDT(0), 898 (uint32_t)(last_desc - rx_data->rbd_first)); 899 900 if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) { 901 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED); 902 Adapter->e1000g_state |= E1000G_ERROR; 903 } 904 905 return (ret_mp); 906 } 907 908 /* 909 * This is part of a workaround for the I219, see e1000g_flush_desc_rings() for 910 * more information. 911 * 912 * Flush all descriptors in the rx ring and disable it. 913 */ 914 void 915 e1000g_flush_rx_ring(struct e1000g *Adapter) 916 { 917 struct e1000_hw *hw = &Adapter->shared; 918 uint32_t rctl, rxdctl; 919 920 rctl = E1000_READ_REG(hw, E1000_RCTL); 921 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 922 E1000_WRITE_FLUSH(hw); 923 usec_delay(150); 924 925 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); 926 /* Zero the lower 14 bits (prefetch and host thresholds). */ 927 rxdctl &= 0xffffc000; 928 /* 929 * Update thresholds: prefetch threshold to 31, host threshold to 1 930 * and make sure the granularity is "descriptors" and not "cache lines" 931 */ 932 rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC); 933 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl); 934 935 /* Momentarily enable the RX ring for the changes to take effect */ 936 E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN); 937 E1000_WRITE_FLUSH(hw); 938 usec_delay(150); 939 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 940 941 } 942