1 /*
2 * This file is provided under a CDDLv1 license. When using or
3 * redistributing this file, you may do so under this license.
4 * In redistributing this file this license must be included
5 * and no other modification of this header file is permitted.
6 *
7 * CDDL LICENSE SUMMARY
8 *
9 * Copyright(c) 1999 - 2009 Intel Corporation. All rights reserved.
10 *
11 * The contents of this file are subject to the terms of Version
12 * 1.0 of the Common Development and Distribution License (the "License").
13 *
14 * You should have received a copy of the License with this software.
15 * You can obtain a copy of the License at
16 * http://www.opensolaris.org/os/licensing.
17 * See the License for the specific language governing permissions
18 * and limitations under the License.
19 */
20
21 /*
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
27 * Copyright 2016 Joyent, Inc.
28 */
29
30 /*
31 * **********************************************************************
32 * *
33 * Module Name: *
34 * e1000g_rx.c *
35 * *
36 * Abstract: *
37 * This file contains some routines that take care of Receive *
38 * interrupt and also for the received packets it sends up to *
39 * upper layer. *
40 * It tries to do a zero copy if free buffers are available in *
41 * the pool. *
42 * *
43 * **********************************************************************
44 */
45
46 #include "e1000g_sw.h"
47 #include "e1000g_debug.h"
48
49 static p_rx_sw_packet_t e1000g_get_buf(e1000g_rx_data_t *rx_data);
50 #pragma inline(e1000g_get_buf)
51
52 /*
53 * e1000g_rxfree_func - the call-back function to reclaim rx buffer
54 *
55 * This function is called when an mp is freed by the user thru
56 * freeb call (Only for mp constructed through desballoc call)
57 * It returns back the freed buffer to the freelist
58 */
59 void
e1000g_rxfree_func(p_rx_sw_packet_t packet)60 e1000g_rxfree_func(p_rx_sw_packet_t packet)
61 {
62 e1000g_rx_data_t *rx_data;
63 private_devi_list_t *devi_node;
64 struct e1000g *Adapter;
65 uint32_t ring_cnt;
66 uint32_t ref_cnt;
67 unsigned char *address;
68
69 if (packet->ref_cnt == 0) {
70 /*
71 * This case only happens when rx buffers are being freed
72 * in e1000g_stop() and freemsg() is called.
73 */
74 return;
75 }
76
77 rx_data = (e1000g_rx_data_t *)(uintptr_t)packet->rx_data;
78
79 if (packet->mp == NULL) {
80 /*
81 * Allocate a mblk that binds to the data buffer
82 */
83 address = (unsigned char *)packet->rx_buf->address;
84 if (address != NULL) {
85 packet->mp = desballoc((unsigned char *)
86 address, packet->rx_buf->size,
87 BPRI_MED, &packet->free_rtn);
88 }
89 }
90
91 /*
92 * Enqueue the recycled packets in a recycle queue. When freelist
93 * dries up, move the entire chain of packets from recycle queue
94 * to freelist. This helps in avoiding per packet mutex contention
95 * around freelist.
96 */
97 mutex_enter(&rx_data->recycle_lock);
98 QUEUE_PUSH_TAIL(&rx_data->recycle_list, &packet->Link);
99 rx_data->recycle_freepkt++;
100 mutex_exit(&rx_data->recycle_lock);
101
102 ref_cnt = atomic_dec_32_nv(&packet->ref_cnt);
103 if (ref_cnt == 0) {
104 mutex_enter(&e1000g_rx_detach_lock);
105 e1000g_free_rx_sw_packet(packet, B_FALSE);
106
107 atomic_dec_32(&rx_data->pending_count);
108 atomic_dec_32(&e1000g_mblks_pending);
109
110 if ((rx_data->pending_count == 0) &&
111 (rx_data->flag & E1000G_RX_STOPPED)) {
112 devi_node = rx_data->priv_devi_node;
113
114 if (devi_node != NULL) {
115 ring_cnt = atomic_dec_32_nv(
116 &devi_node->pending_rx_count);
117 if ((ring_cnt == 0) &&
118 (devi_node->flag &
119 E1000G_PRIV_DEVI_DETACH)) {
120 e1000g_free_priv_devi_node(
121 devi_node);
122 }
123 } else {
124 Adapter = rx_data->rx_ring->adapter;
125 atomic_dec_32(
126 &Adapter->pending_rx_count);
127 }
128
129 e1000g_free_rx_pending_buffers(rx_data);
130 e1000g_free_rx_data(rx_data);
131 }
132 mutex_exit(&e1000g_rx_detach_lock);
133 }
134 }
135
136 /*
137 * e1000g_rx_setup - setup rx data structures
138 *
139 * This routine initializes all of the receive related
140 * structures. This includes the receive descriptors, the
141 * actual receive buffers, and the rx_sw_packet software
142 * structures.
143 */
144 void
e1000g_rx_setup(struct e1000g * Adapter)145 e1000g_rx_setup(struct e1000g *Adapter)
146 {
147 struct e1000_hw *hw;
148 p_rx_sw_packet_t packet;
149 struct e1000_rx_desc *descriptor;
150 uint32_t buf_low;
151 uint32_t buf_high;
152 uint32_t reg_val;
153 uint32_t rctl;
154 uint32_t rxdctl;
155 uint32_t ert;
156 uint16_t phy_data;
157 int i;
158 int size;
159 e1000g_rx_data_t *rx_data;
160
161 hw = &Adapter->shared;
162 rx_data = Adapter->rx_ring->rx_data;
163
164 /*
165 * zero out all of the receive buffer descriptor memory
166 * assures any previous data or status is erased
167 */
168 bzero(rx_data->rbd_area,
169 sizeof (struct e1000_rx_desc) * Adapter->rx_desc_num);
170
171 if (!Adapter->rx_buffer_setup) {
172 /* Init the list of "Receive Buffer" */
173 QUEUE_INIT_LIST(&rx_data->recv_list);
174
175 /* Init the list of "Free Receive Buffer" */
176 QUEUE_INIT_LIST(&rx_data->free_list);
177
178 /* Init the list of "Free Receive Buffer" */
179 QUEUE_INIT_LIST(&rx_data->recycle_list);
180 /*
181 * Setup Receive list and the Free list. Note that
182 * the both were allocated in one packet area.
183 */
184 packet = rx_data->packet_area;
185 descriptor = rx_data->rbd_first;
186
187 for (i = 0; i < Adapter->rx_desc_num;
188 i++, packet = packet->next, descriptor++) {
189 ASSERT(packet != NULL);
190 ASSERT(descriptor != NULL);
191 descriptor->buffer_addr =
192 packet->rx_buf->dma_address;
193
194 /* Add this rx_sw_packet to the receive list */
195 QUEUE_PUSH_TAIL(&rx_data->recv_list,
196 &packet->Link);
197 }
198
199 for (i = 0; i < Adapter->rx_freelist_num;
200 i++, packet = packet->next) {
201 ASSERT(packet != NULL);
202 /* Add this rx_sw_packet to the free list */
203 QUEUE_PUSH_TAIL(&rx_data->free_list,
204 &packet->Link);
205 }
206 rx_data->avail_freepkt = Adapter->rx_freelist_num;
207 rx_data->recycle_freepkt = 0;
208
209 Adapter->rx_buffer_setup = B_TRUE;
210 } else {
211 /* Setup the initial pointer to the first rx descriptor */
212 packet = (p_rx_sw_packet_t)
213 QUEUE_GET_HEAD(&rx_data->recv_list);
214 descriptor = rx_data->rbd_first;
215
216 for (i = 0; i < Adapter->rx_desc_num; i++) {
217 ASSERT(packet != NULL);
218 ASSERT(descriptor != NULL);
219 descriptor->buffer_addr =
220 packet->rx_buf->dma_address;
221
222 /* Get next rx_sw_packet */
223 packet = (p_rx_sw_packet_t)
224 QUEUE_GET_NEXT(&rx_data->recv_list, &packet->Link);
225 descriptor++;
226 }
227 }
228
229 E1000_WRITE_REG(&Adapter->shared, E1000_RDTR, Adapter->rx_intr_delay);
230 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
231 "E1000_RDTR: 0x%x\n", Adapter->rx_intr_delay);
232 if (hw->mac.type >= e1000_82540) {
233 E1000_WRITE_REG(&Adapter->shared, E1000_RADV,
234 Adapter->rx_intr_abs_delay);
235 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
236 "E1000_RADV: 0x%x\n", Adapter->rx_intr_abs_delay);
237 }
238
239 /*
240 * Setup our descriptor pointers
241 */
242 rx_data->rbd_next = rx_data->rbd_first;
243
244 size = Adapter->rx_desc_num * sizeof (struct e1000_rx_desc);
245 E1000_WRITE_REG(hw, E1000_RDLEN(0), size);
246 size = E1000_READ_REG(hw, E1000_RDLEN(0));
247
248 /* To get lower order bits */
249 buf_low = (uint32_t)rx_data->rbd_dma_addr;
250 /* To get the higher order bits */
251 buf_high = (uint32_t)(rx_data->rbd_dma_addr >> 32);
252
253 E1000_WRITE_REG(hw, E1000_RDBAH(0), buf_high);
254 E1000_WRITE_REG(hw, E1000_RDBAL(0), buf_low);
255
256 /*
257 * Setup our HW Rx Head & Tail descriptor pointers
258 */
259 E1000_WRITE_REG(hw, E1000_RDT(0),
260 (uint32_t)(rx_data->rbd_last - rx_data->rbd_first));
261 E1000_WRITE_REG(hw, E1000_RDH(0), 0);
262
263 /*
264 * Setup the Receive Control Register (RCTL), and ENABLE the
265 * receiver. The initial configuration is to: Enable the receiver,
266 * accept broadcasts, discard bad packets (and long packets),
267 * disable VLAN filter checking, set the receive descriptor
268 * minimum threshold size to 1/2, and the receive buffer size to
269 * 2k.
270 */
271 rctl = E1000_RCTL_EN | /* Enable Receive Unit */
272 E1000_RCTL_BAM | /* Accept Broadcast Packets */
273 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT) |
274 E1000_RCTL_RDMTS_HALF |
275 E1000_RCTL_LBM_NO; /* Loopback Mode = none */
276
277 if (Adapter->default_mtu > ETHERMTU)
278 rctl |= E1000_RCTL_LPE; /* Large Packet Enable bit */
279
280 if (Adapter->strip_crc)
281 rctl |= E1000_RCTL_SECRC; /* Strip Ethernet CRC */
282
283 if (Adapter->mem_workaround_82546 &&
284 ((hw->mac.type == e1000_82545) ||
285 (hw->mac.type == e1000_82546) ||
286 (hw->mac.type == e1000_82546_rev_3))) {
287 rctl |= E1000_RCTL_SZ_2048;
288 } else {
289 if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_2K) &&
290 (Adapter->max_frame_size <= FRAME_SIZE_UPTO_4K))
291 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
292 else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_4K) &&
293 (Adapter->max_frame_size <= FRAME_SIZE_UPTO_8K))
294 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
295 else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_8K) &&
296 (Adapter->max_frame_size <= FRAME_SIZE_UPTO_16K))
297 rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX;
298 else
299 rctl |= E1000_RCTL_SZ_2048;
300 }
301
302 if (e1000_tbi_sbp_enabled_82543(hw))
303 rctl |= E1000_RCTL_SBP;
304
305 /*
306 * Enable Early Receive Threshold (ERT) on supported devices.
307 * Only takes effect when packet size is equal or larger than the
308 * specified value (in 8 byte units), e.g. using jumbo frames.
309 */
310 if ((hw->mac.type == e1000_82573) ||
311 (hw->mac.type == e1000_82574) ||
312 (hw->mac.type == e1000_ich9lan) ||
313 (hw->mac.type == e1000_ich10lan)) {
314
315 ert = E1000_ERT_2048;
316
317 /*
318 * Special modification when ERT and
319 * jumbo frames are enabled
320 */
321 if (Adapter->default_mtu > ETHERMTU) {
322 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
323 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 0x3);
324 ert |= (1 << 13);
325 }
326
327 E1000_WRITE_REG(hw, E1000_ERT, ert);
328 }
329
330 /* Workaround errata on 82577/8 adapters with large frames */
331 if ((hw->mac.type == e1000_pchlan) &&
332 (Adapter->default_mtu > ETHERMTU)) {
333
334 (void) e1000_read_phy_reg(hw, PHY_REG(770, 26), &phy_data);
335 phy_data &= 0xfff8;
336 phy_data |= (1 << 2);
337 (void) e1000_write_phy_reg(hw, PHY_REG(770, 26), phy_data);
338
339 if (hw->phy.type == e1000_phy_82577) {
340 (void) e1000_read_phy_reg(hw, 22, &phy_data);
341 phy_data &= 0x0fff;
342 phy_data |= (1 << 14);
343 (void) e1000_write_phy_reg(hw, 0x10, 0x2823);
344 (void) e1000_write_phy_reg(hw, 0x11, 0x0003);
345 (void) e1000_write_phy_reg(hw, 22, phy_data);
346 }
347 }
348
349 /* Workaround errata on 82579 adapters with large frames */
350 if (hw->mac.type == e1000_pch2lan) {
351 boolean_t enable_jumbo = (Adapter->default_mtu > ETHERMTU ?
352 B_TRUE : B_FALSE);
353
354 if (e1000_lv_jumbo_workaround_ich8lan(hw, enable_jumbo) != 0)
355 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
356 "failed to enable jumbo frame workaround mode\n");
357 }
358
359 reg_val =
360 E1000_RXCSUM_TUOFL | /* TCP/UDP checksum offload Enable */
361 E1000_RXCSUM_IPOFL; /* IP checksum offload Enable */
362
363 E1000_WRITE_REG(hw, E1000_RXCSUM, reg_val);
364
365 /*
366 * Workaround: Set bit 16 (IPv6_ExDIS) to disable the
367 * processing of received IPV6 extension headers
368 */
369 if ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572)) {
370 reg_val = E1000_READ_REG(hw, E1000_RFCTL);
371 reg_val |= (E1000_RFCTL_IPV6_EX_DIS |
372 E1000_RFCTL_NEW_IPV6_EXT_DIS);
373 E1000_WRITE_REG(hw, E1000_RFCTL, reg_val);
374 }
375
376 /* Write to enable the receive unit */
377 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
378 }
379
380 /*
381 * e1000g_get_buf - get an rx sw packet from the free_list
382 */
383 static p_rx_sw_packet_t
e1000g_get_buf(e1000g_rx_data_t * rx_data)384 e1000g_get_buf(e1000g_rx_data_t *rx_data)
385 {
386 p_rx_sw_packet_t packet;
387 struct e1000g *Adapter;
388
389 Adapter = rx_data->rx_ring->adapter;
390
391 mutex_enter(&rx_data->freelist_lock);
392 packet = (p_rx_sw_packet_t)
393 QUEUE_POP_HEAD(&rx_data->free_list);
394 if (packet != NULL) {
395 rx_data->avail_freepkt--;
396 goto end;
397 }
398
399 /*
400 * If the freelist has no packets, check the recycle list
401 * to see if there are any available descriptor there.
402 */
403 mutex_enter(&rx_data->recycle_lock);
404 QUEUE_SWITCH(&rx_data->free_list, &rx_data->recycle_list);
405 rx_data->avail_freepkt = rx_data->recycle_freepkt;
406 rx_data->recycle_freepkt = 0;
407 mutex_exit(&rx_data->recycle_lock);
408 packet = (p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_data->free_list);
409 if (packet != NULL) {
410 rx_data->avail_freepkt--;
411 goto end;
412 }
413
414 if (Adapter->rx_freelist_num < Adapter->rx_freelist_limit) {
415 (void) e1000g_increase_rx_packets(rx_data);
416 packet = (p_rx_sw_packet_t)
417 QUEUE_POP_HEAD(&rx_data->free_list);
418 if (packet != NULL) {
419 rx_data->avail_freepkt--;
420 }
421 }
422
423 end:
424 mutex_exit(&rx_data->freelist_lock);
425 return (packet);
426 }
427
428 /*
429 * e1000g_receive - main receive routine
430 *
431 * This routine will process packets received in an interrupt
432 */
433 mblk_t *
e1000g_receive(e1000g_rx_ring_t * rx_ring,mblk_t ** tail,uint_t sz)434 e1000g_receive(e1000g_rx_ring_t *rx_ring, mblk_t **tail, uint_t sz)
435 {
436 struct e1000_hw *hw;
437 mblk_t *nmp;
438 mblk_t *ret_mp;
439 mblk_t *ret_nmp;
440 struct e1000_rx_desc *current_desc;
441 struct e1000_rx_desc *last_desc;
442 p_rx_sw_packet_t packet;
443 p_rx_sw_packet_t newpkt;
444 uint16_t length;
445 uint32_t pkt_count;
446 uint32_t desc_count;
447 boolean_t accept_frame;
448 boolean_t end_of_packet;
449 boolean_t need_copy;
450 struct e1000g *Adapter;
451 dma_buffer_t *rx_buf;
452 uint16_t cksumflags;
453 uint_t chain_sz = 0;
454 e1000g_rx_data_t *rx_data;
455 uint32_t max_size;
456 uint32_t min_size;
457
458 ret_mp = NULL;
459 ret_nmp = NULL;
460 pkt_count = 0;
461 desc_count = 0;
462 cksumflags = 0;
463
464 Adapter = rx_ring->adapter;
465 rx_data = rx_ring->rx_data;
466 hw = &Adapter->shared;
467
468 /* Sync the Rx descriptor DMA buffers */
469 (void) ddi_dma_sync(rx_data->rbd_dma_handle,
470 0, 0, DDI_DMA_SYNC_FORKERNEL);
471
472 if (e1000g_check_dma_handle(rx_data->rbd_dma_handle) != DDI_FM_OK) {
473 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
474 Adapter->e1000g_state |= E1000G_ERROR;
475 return (NULL);
476 }
477
478 current_desc = rx_data->rbd_next;
479 if (!(current_desc->status & E1000_RXD_STAT_DD)) {
480 /*
481 * don't send anything up. just clear the RFD
482 */
483 E1000G_DEBUG_STAT(rx_ring->stat_none);
484 return (NULL);
485 }
486
487 max_size = Adapter->max_frame_size - ETHERFCSL - VLAN_TAGSZ;
488 min_size = ETHERMIN;
489
490 /*
491 * Loop through the receive descriptors starting at the last known
492 * descriptor owned by the hardware that begins a packet.
493 */
494 while ((current_desc->status & E1000_RXD_STAT_DD) &&
495 (pkt_count < Adapter->rx_limit_onintr) &&
496 ((sz == E1000G_CHAIN_NO_LIMIT) || (chain_sz <= sz))) {
497
498 desc_count++;
499 /*
500 * Now this can happen in Jumbo frame situation.
501 */
502 if (current_desc->status & E1000_RXD_STAT_EOP) {
503 /* packet has EOP set */
504 end_of_packet = B_TRUE;
505 } else {
506 /*
507 * If this received buffer does not have the
508 * End-Of-Packet bit set, the received packet
509 * will consume multiple buffers. We won't send this
510 * packet upstack till we get all the related buffers.
511 */
512 end_of_packet = B_FALSE;
513 }
514
515 /*
516 * Get a pointer to the actual receive buffer
517 * The mp->b_rptr is mapped to The CurrentDescriptor
518 * Buffer Address.
519 */
520 packet =
521 (p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_data->recv_list);
522 ASSERT(packet != NULL);
523
524 rx_buf = packet->rx_buf;
525
526 length = current_desc->length;
527
528 #ifdef __sparc
529 if (packet->dma_type == USE_DVMA)
530 dvma_sync(rx_buf->dma_handle, 0,
531 DDI_DMA_SYNC_FORKERNEL);
532 else
533 (void) ddi_dma_sync(rx_buf->dma_handle,
534 E1000G_IPALIGNROOM, length,
535 DDI_DMA_SYNC_FORKERNEL);
536 #else
537 (void) ddi_dma_sync(rx_buf->dma_handle,
538 E1000G_IPALIGNROOM, length,
539 DDI_DMA_SYNC_FORKERNEL);
540 #endif
541
542 if (e1000g_check_dma_handle(
543 rx_buf->dma_handle) != DDI_FM_OK) {
544 ddi_fm_service_impact(Adapter->dip,
545 DDI_SERVICE_DEGRADED);
546 Adapter->e1000g_state |= E1000G_ERROR;
547
548 goto rx_drop;
549 }
550
551 /*
552 * workaround for redmine #3100. After a switch reset packet
553 * queue and descriptor dma addresses got out of sync. Detect
554 * this and flag the error. Let the watchdog timer do the reset
555 */
556 if (current_desc->buffer_addr != rx_buf->dma_address) {
557 e1000g_log(Adapter, CE_WARN, "receive dma descriptors "
558 "got out of sync, resetting adapter");
559 Adapter->e1000g_state |= E1000G_ERROR;
560 }
561 accept_frame = (current_desc->errors == 0) ||
562 ((current_desc->errors &
563 (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE)) != 0);
564
565 if (hw->mac.type == e1000_82543) {
566 unsigned char last_byte;
567
568 last_byte =
569 *((unsigned char *)rx_buf->address + length - 1);
570
571 if (TBI_ACCEPT(hw,
572 current_desc->status, current_desc->errors,
573 current_desc->length, last_byte,
574 Adapter->min_frame_size, Adapter->max_frame_size)) {
575
576 e1000_tbi_adjust_stats(Adapter,
577 length, hw->mac.addr);
578
579 length--;
580 accept_frame = B_TRUE;
581 } else if (e1000_tbi_sbp_enabled_82543(hw) &&
582 (current_desc->errors == E1000_RXD_ERR_CE)) {
583 accept_frame = B_TRUE;
584 }
585 }
586
587 /*
588 * Indicate the packet to the NOS if it was good.
589 * Normally, hardware will discard bad packets for us.
590 * Check for the packet to be a valid Ethernet packet
591 */
592 if (!accept_frame) {
593 /*
594 * error in incoming packet, either the packet is not a
595 * ethernet size packet, or the packet has an error. In
596 * either case, the packet will simply be discarded.
597 */
598 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
599 "Process Receive Interrupts: Error in Packet\n");
600
601 E1000G_STAT(rx_ring->stat_error);
602 /*
603 * Returning here as we are done here. There is
604 * no point in waiting for while loop to elapse
605 * and the things which were done. More efficient
606 * and less error prone...
607 */
608 goto rx_drop;
609 }
610
611 /*
612 * If the Ethernet CRC is not stripped by the hardware,
613 * we need to strip it before sending it up to the stack.
614 */
615 if (end_of_packet && !Adapter->strip_crc) {
616 if (length > ETHERFCSL) {
617 length -= ETHERFCSL;
618 } else {
619 /*
620 * If the fragment is smaller than the CRC,
621 * drop this fragment, do the processing of
622 * the end of the packet.
623 */
624 if (rx_data->rx_mblk_tail == NULL) {
625 E1000G_STAT(rx_ring->stat_crc_only_pkt);
626 goto rx_next_desc;
627 }
628
629 rx_data->rx_mblk_tail->b_wptr -=
630 ETHERFCSL - length;
631 rx_data->rx_mblk_len -=
632 ETHERFCSL - length;
633 goto rx_end_of_packet;
634 }
635 }
636
637 need_copy = B_TRUE;
638
639 if (length <= Adapter->rx_bcopy_thresh)
640 goto rx_copy;
641
642 /*
643 * Get the pre-constructed mblk that was associated
644 * to the receive data buffer.
645 */
646 if (packet->mp == NULL) {
647 packet->mp = desballoc((unsigned char *)
648 rx_buf->address, length,
649 BPRI_MED, &packet->free_rtn);
650 }
651
652 if (packet->mp != NULL) {
653 /*
654 * We have two sets of buffer pool. One associated with
655 * the Rxdescriptors and other a freelist buffer pool.
656 * Each time we get a good packet, Try to get a buffer
657 * from the freelist pool using e1000g_get_buf. If we
658 * get free buffer, then replace the descriptor buffer
659 * address with the free buffer we just got, and pass
660 * the pre-constructed mblk upstack. (note no copying)
661 *
662 * If we failed to get a free buffer, then try to
663 * allocate a new buffer(mp) and copy the recv buffer
664 * content to our newly allocated buffer(mp). Don't
665 * disturb the desriptor buffer address. (note copying)
666 */
667 newpkt = e1000g_get_buf(rx_data);
668
669 if (newpkt != NULL) {
670 /*
671 * Get the mblk associated to the data,
672 * and strip it off the sw packet.
673 */
674 nmp = packet->mp;
675 packet->mp = NULL;
676 atomic_inc_32(&packet->ref_cnt);
677
678 /*
679 * Now replace old buffer with the new
680 * one we got from free list
681 * Both the RxSwPacket as well as the
682 * Receive Buffer Descriptor will now
683 * point to this new packet.
684 */
685 packet = newpkt;
686
687 current_desc->buffer_addr =
688 newpkt->rx_buf->dma_address;
689
690 need_copy = B_FALSE;
691 } else {
692 /* EMPTY */
693 E1000G_DEBUG_STAT(rx_ring->stat_no_freepkt);
694 }
695 }
696
697 rx_copy:
698 if (need_copy) {
699 /*
700 * No buffers available on free list,
701 * bcopy the data from the buffer and
702 * keep the original buffer. Dont want to
703 * do this.. Yack but no other way
704 */
705 if ((nmp = allocb(length + E1000G_IPALIGNROOM,
706 BPRI_MED)) == NULL) {
707 /*
708 * The system has no buffers available
709 * to send up the incoming packet, hence
710 * the packet will have to be processed
711 * when there're more buffers available.
712 */
713 E1000G_STAT(rx_ring->stat_allocb_fail);
714 goto rx_drop;
715 }
716 nmp->b_rptr += E1000G_IPALIGNROOM;
717 nmp->b_wptr += E1000G_IPALIGNROOM;
718 /*
719 * The free list did not have any buffers
720 * available, so, the received packet will
721 * have to be copied into a mp and the original
722 * buffer will have to be retained for future
723 * packet reception.
724 */
725 bcopy(rx_buf->address, nmp->b_wptr, length);
726 }
727
728 ASSERT(nmp != NULL);
729 nmp->b_wptr += length;
730
731 if (rx_data->rx_mblk == NULL) {
732 /*
733 * TCP/UDP checksum offload and
734 * IP checksum offload
735 */
736 if (!(current_desc->status & E1000_RXD_STAT_IXSM)) {
737 /*
738 * Check TCP/UDP checksum
739 */
740 if ((current_desc->status &
741 E1000_RXD_STAT_TCPCS) &&
742 !(current_desc->errors &
743 E1000_RXD_ERR_TCPE))
744 cksumflags |= HCK_FULLCKSUM_OK;
745 /*
746 * Check IP Checksum
747 */
748 if ((current_desc->status &
749 E1000_RXD_STAT_IPCS) &&
750 !(current_desc->errors &
751 E1000_RXD_ERR_IPE))
752 cksumflags |= HCK_IPV4_HDRCKSUM_OK;
753 }
754 }
755
756 /*
757 * We need to maintain our packet chain in the global
758 * Adapter structure, for the Rx processing can end
759 * with a fragment that has no EOP set.
760 */
761 if (rx_data->rx_mblk == NULL) {
762 /* Get the head of the message chain */
763 rx_data->rx_mblk = nmp;
764 rx_data->rx_mblk_tail = nmp;
765 rx_data->rx_mblk_len = length;
766 } else { /* Not the first packet */
767 /* Continue adding buffers */
768 rx_data->rx_mblk_tail->b_cont = nmp;
769 rx_data->rx_mblk_tail = nmp;
770 rx_data->rx_mblk_len += length;
771 }
772 ASSERT(rx_data->rx_mblk != NULL);
773 ASSERT(rx_data->rx_mblk_tail != NULL);
774 ASSERT(rx_data->rx_mblk_tail->b_cont == NULL);
775
776 /*
777 * Now this MP is ready to travel upwards but some more
778 * fragments are coming.
779 * We will send packet upwards as soon as we get EOP
780 * set on the packet.
781 */
782 if (!end_of_packet) {
783 /*
784 * continue to get the next descriptor,
785 * Tail would be advanced at the end
786 */
787 goto rx_next_desc;
788 }
789
790 rx_end_of_packet:
791 if (E1000G_IS_VLAN_PACKET(rx_data->rx_mblk->b_rptr))
792 max_size = Adapter->max_frame_size - ETHERFCSL;
793
794 if ((rx_data->rx_mblk_len > max_size) ||
795 (rx_data->rx_mblk_len < min_size)) {
796 E1000G_STAT(rx_ring->stat_size_error);
797 goto rx_drop;
798 }
799
800 /*
801 * Found packet with EOP
802 * Process the last fragment.
803 */
804 if (cksumflags != 0) {
805 mac_hcksum_set(rx_data->rx_mblk,
806 0, 0, 0, 0, cksumflags);
807 cksumflags = 0;
808 }
809
810 /*
811 * Count packets that span multi-descriptors
812 */
813 E1000G_DEBUG_STAT_COND(rx_ring->stat_multi_desc,
814 (rx_data->rx_mblk->b_cont != NULL));
815
816 /*
817 * Append to list to send upstream
818 */
819 if (ret_mp == NULL) {
820 ret_mp = ret_nmp = rx_data->rx_mblk;
821 } else {
822 ret_nmp->b_next = rx_data->rx_mblk;
823 ret_nmp = rx_data->rx_mblk;
824 }
825 ret_nmp->b_next = NULL;
826 *tail = ret_nmp;
827 chain_sz += length;
828
829 rx_data->rx_mblk = NULL;
830 rx_data->rx_mblk_tail = NULL;
831 rx_data->rx_mblk_len = 0;
832
833 pkt_count++;
834
835 rx_next_desc:
836 /*
837 * Zero out the receive descriptors status
838 */
839 current_desc->status = 0;
840
841 if (current_desc == rx_data->rbd_last)
842 rx_data->rbd_next = rx_data->rbd_first;
843 else
844 rx_data->rbd_next++;
845
846 last_desc = current_desc;
847 current_desc = rx_data->rbd_next;
848
849 /*
850 * Put the buffer that we just indicated back
851 * at the end of our list
852 */
853 QUEUE_PUSH_TAIL(&rx_data->recv_list,
854 &packet->Link);
855 } /* while loop */
856
857 /* Sync the Rx descriptor DMA buffers */
858 (void) ddi_dma_sync(rx_data->rbd_dma_handle,
859 0, 0, DDI_DMA_SYNC_FORDEV);
860
861 /*
862 * Advance the E1000's Receive Queue #0 "Tail Pointer".
863 */
864 E1000_WRITE_REG(hw, E1000_RDT(0),
865 (uint32_t)(last_desc - rx_data->rbd_first));
866
867 if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
868 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
869 Adapter->e1000g_state |= E1000G_ERROR;
870 }
871
872 Adapter->rx_pkt_cnt = pkt_count;
873
874 return (ret_mp);
875
876 rx_drop:
877 /*
878 * Zero out the receive descriptors status
879 */
880 current_desc->status = 0;
881
882 /* Sync the Rx descriptor DMA buffers */
883 (void) ddi_dma_sync(rx_data->rbd_dma_handle,
884 0, 0, DDI_DMA_SYNC_FORDEV);
885
886 if (current_desc == rx_data->rbd_last)
887 rx_data->rbd_next = rx_data->rbd_first;
888 else
889 rx_data->rbd_next++;
890
891 last_desc = current_desc;
892
893 QUEUE_PUSH_TAIL(&rx_data->recv_list, &packet->Link);
894 /*
895 * Reclaim all old buffers already allocated during
896 * Jumbo receives.....for incomplete reception
897 */
898 if (rx_data->rx_mblk != NULL) {
899 freemsg(rx_data->rx_mblk);
900 rx_data->rx_mblk = NULL;
901 rx_data->rx_mblk_tail = NULL;
902 rx_data->rx_mblk_len = 0;
903 }
904 /*
905 * Advance the E1000's Receive Queue #0 "Tail Pointer".
906 */
907 E1000_WRITE_REG(hw, E1000_RDT(0),
908 (uint32_t)(last_desc - rx_data->rbd_first));
909
910 if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
911 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
912 Adapter->e1000g_state |= E1000G_ERROR;
913 }
914
915 return (ret_mp);
916 }
917
918 /*
919 * This is part of a workaround for the I219, see e1000g_flush_desc_rings() for
920 * more information.
921 *
922 * Flush all descriptors in the rx ring and disable it.
923 */
924 void
e1000g_flush_rx_ring(struct e1000g * Adapter)925 e1000g_flush_rx_ring(struct e1000g *Adapter)
926 {
927 struct e1000_hw *hw = &Adapter->shared;
928 uint32_t rctl, rxdctl;
929
930 rctl = E1000_READ_REG(hw, E1000_RCTL);
931 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
932 E1000_WRITE_FLUSH(hw);
933 usec_delay(150);
934
935 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
936 /* Zero the lower 14 bits (prefetch and host thresholds). */
937 rxdctl &= 0xffffc000;
938 /*
939 * Update thresholds: prefetch threshold to 31, host threshold to 1
940 * and make sure the granularity is "descriptors" and not "cache lines"
941 */
942 rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
943 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
944
945 /* Momentarily enable the RX ring for the changes to take effect */
946 E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
947 E1000_WRITE_FLUSH(hw);
948 usec_delay(150);
949 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
950
951 }
952