xref: /titanic_51/usr/src/uts/common/io/e1000g/e1000g_rx.c (revision b60ae21d2303cc238394b46cddb93a2dbcdb2e07)
1 /*
2  * This file is provided under a CDDLv1 license.  When using or
3  * redistributing this file, you may do so under this license.
4  * In redistributing this file this license must be included
5  * and no other modification of this header file is permitted.
6  *
7  * CDDL LICENSE SUMMARY
8  *
9  * Copyright(c) 1999 - 2009 Intel Corporation. All rights reserved.
10  *
11  * The contents of this file are subject to the terms of Version
12  * 1.0 of the Common Development and Distribution License (the "License").
13  *
14  * You should have received a copy of the License with this software.
15  * You can obtain a copy of the License at
16  *	http://www.opensolaris.org/os/licensing.
17  * See the License for the specific language governing permissions
18  * and limitations under the License.
19  */
20 
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
27  */
28 
29 /*
30  * **********************************************************************
31  *									*
32  * Module Name:								*
33  *   e1000g_rx.c							*
34  *									*
35  * Abstract:								*
36  *   This file contains some routines that take care of Receive		*
37  *   interrupt and also for the received packets it sends up to		*
38  *   upper layer.							*
39  *   It tries to do a zero copy if free buffers are available in	*
40  *   the pool.								*
41  *									*
42  * **********************************************************************
43  */
44 
45 #include "e1000g_sw.h"
46 #include "e1000g_debug.h"
47 
48 static p_rx_sw_packet_t e1000g_get_buf(e1000g_rx_data_t *rx_data);
49 #pragma	inline(e1000g_get_buf)
50 
51 /*
52  * e1000g_rxfree_func - the call-back function to reclaim rx buffer
53  *
54  * This function is called when an mp is freed by the user thru
55  * freeb call (Only for mp constructed through desballoc call)
56  * It returns back the freed buffer to the freelist
57  */
58 void
59 e1000g_rxfree_func(p_rx_sw_packet_t packet)
60 {
61 	e1000g_rx_data_t *rx_data;
62 	private_devi_list_t *devi_node;
63 	struct e1000g *Adapter;
64 	uint32_t ring_cnt;
65 	uint32_t ref_cnt;
66 	unsigned char *address;
67 
68 	if (packet->ref_cnt == 0) {
69 		/*
70 		 * This case only happens when rx buffers are being freed
71 		 * in e1000g_stop() and freemsg() is called.
72 		 */
73 		return;
74 	}
75 
76 	rx_data = (e1000g_rx_data_t *)(uintptr_t)packet->rx_data;
77 
78 	if (packet->mp == NULL) {
79 		/*
80 		 * Allocate a mblk that binds to the data buffer
81 		 */
82 		address = (unsigned char *)packet->rx_buf->address;
83 		if (address != NULL) {
84 			packet->mp = desballoc((unsigned char *)
85 			    address, packet->rx_buf->size,
86 			    BPRI_MED, &packet->free_rtn);
87 		}
88 	}
89 
90 	/*
91 	 * Enqueue the recycled packets in a recycle queue. When freelist
92 	 * dries up, move the entire chain of packets from recycle queue
93 	 * to freelist. This helps in avoiding per packet mutex contention
94 	 * around freelist.
95 	 */
96 	mutex_enter(&rx_data->recycle_lock);
97 	QUEUE_PUSH_TAIL(&rx_data->recycle_list, &packet->Link);
98 	rx_data->recycle_freepkt++;
99 	mutex_exit(&rx_data->recycle_lock);
100 
101 	ref_cnt = atomic_dec_32_nv(&packet->ref_cnt);
102 	if (ref_cnt == 0) {
103 		mutex_enter(&e1000g_rx_detach_lock);
104 		e1000g_free_rx_sw_packet(packet, B_FALSE);
105 
106 		atomic_dec_32(&rx_data->pending_count);
107 		atomic_dec_32(&e1000g_mblks_pending);
108 
109 		if ((rx_data->pending_count == 0) &&
110 		    (rx_data->flag & E1000G_RX_STOPPED)) {
111 			devi_node = rx_data->priv_devi_node;
112 
113 			if (devi_node != NULL) {
114 				ring_cnt = atomic_dec_32_nv(
115 				    &devi_node->pending_rx_count);
116 				if ((ring_cnt == 0) &&
117 				    (devi_node->flag &
118 				    E1000G_PRIV_DEVI_DETACH)) {
119 					e1000g_free_priv_devi_node(
120 					    devi_node);
121 				}
122 			} else {
123 				Adapter = rx_data->rx_ring->adapter;
124 				atomic_dec_32(
125 				    &Adapter->pending_rx_count);
126 			}
127 
128 			e1000g_free_rx_pending_buffers(rx_data);
129 			e1000g_free_rx_data(rx_data);
130 		}
131 		mutex_exit(&e1000g_rx_detach_lock);
132 	}
133 }
134 
135 /*
136  * e1000g_rx_setup - setup rx data structures
137  *
138  * This routine initializes all of the receive related
139  * structures. This includes the receive descriptors, the
140  * actual receive buffers, and the rx_sw_packet software
141  * structures.
142  */
143 void
144 e1000g_rx_setup(struct e1000g *Adapter)
145 {
146 	struct e1000_hw *hw;
147 	p_rx_sw_packet_t packet;
148 	struct e1000_rx_desc *descriptor;
149 	uint32_t buf_low;
150 	uint32_t buf_high;
151 	uint32_t reg_val;
152 	uint32_t rctl;
153 	uint32_t rxdctl;
154 	uint32_t ert;
155 	uint16_t phy_data;
156 	int i;
157 	int size;
158 	e1000g_rx_data_t *rx_data;
159 
160 	hw = &Adapter->shared;
161 	rx_data = Adapter->rx_ring->rx_data;
162 
163 	/*
164 	 * zero out all of the receive buffer descriptor memory
165 	 * assures any previous data or status is erased
166 	 */
167 	bzero(rx_data->rbd_area,
168 	    sizeof (struct e1000_rx_desc) * Adapter->rx_desc_num);
169 
170 	if (!Adapter->rx_buffer_setup) {
171 		/* Init the list of "Receive Buffer" */
172 		QUEUE_INIT_LIST(&rx_data->recv_list);
173 
174 		/* Init the list of "Free Receive Buffer" */
175 		QUEUE_INIT_LIST(&rx_data->free_list);
176 
177 		/* Init the list of "Free Receive Buffer" */
178 		QUEUE_INIT_LIST(&rx_data->recycle_list);
179 		/*
180 		 * Setup Receive list and the Free list. Note that
181 		 * the both were allocated in one packet area.
182 		 */
183 		packet = rx_data->packet_area;
184 		descriptor = rx_data->rbd_first;
185 
186 		for (i = 0; i < Adapter->rx_desc_num;
187 		    i++, packet = packet->next, descriptor++) {
188 			ASSERT(packet != NULL);
189 			ASSERT(descriptor != NULL);
190 			descriptor->buffer_addr =
191 			    packet->rx_buf->dma_address;
192 
193 			/* Add this rx_sw_packet to the receive list */
194 			QUEUE_PUSH_TAIL(&rx_data->recv_list,
195 			    &packet->Link);
196 		}
197 
198 		for (i = 0; i < Adapter->rx_freelist_num;
199 		    i++, packet = packet->next) {
200 			ASSERT(packet != NULL);
201 			/* Add this rx_sw_packet to the free list */
202 			QUEUE_PUSH_TAIL(&rx_data->free_list,
203 			    &packet->Link);
204 		}
205 		rx_data->avail_freepkt = Adapter->rx_freelist_num;
206 		rx_data->recycle_freepkt = 0;
207 
208 		Adapter->rx_buffer_setup = B_TRUE;
209 	} else {
210 		/* Setup the initial pointer to the first rx descriptor */
211 		packet = (p_rx_sw_packet_t)
212 		    QUEUE_GET_HEAD(&rx_data->recv_list);
213 		descriptor = rx_data->rbd_first;
214 
215 		for (i = 0; i < Adapter->rx_desc_num; i++) {
216 			ASSERT(packet != NULL);
217 			ASSERT(descriptor != NULL);
218 			descriptor->buffer_addr =
219 			    packet->rx_buf->dma_address;
220 
221 			/* Get next rx_sw_packet */
222 			packet = (p_rx_sw_packet_t)
223 			    QUEUE_GET_NEXT(&rx_data->recv_list, &packet->Link);
224 			descriptor++;
225 		}
226 	}
227 
228 	E1000_WRITE_REG(&Adapter->shared, E1000_RDTR, Adapter->rx_intr_delay);
229 	E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
230 	    "E1000_RDTR: 0x%x\n", Adapter->rx_intr_delay);
231 	if (hw->mac.type >= e1000_82540) {
232 		E1000_WRITE_REG(&Adapter->shared, E1000_RADV,
233 		    Adapter->rx_intr_abs_delay);
234 		E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
235 		    "E1000_RADV: 0x%x\n", Adapter->rx_intr_abs_delay);
236 	}
237 
238 	/*
239 	 * Setup our descriptor pointers
240 	 */
241 	rx_data->rbd_next = rx_data->rbd_first;
242 
243 	size = Adapter->rx_desc_num * sizeof (struct e1000_rx_desc);
244 	E1000_WRITE_REG(hw, E1000_RDLEN(0), size);
245 	size = E1000_READ_REG(hw, E1000_RDLEN(0));
246 
247 	/* To get lower order bits */
248 	buf_low = (uint32_t)rx_data->rbd_dma_addr;
249 	/* To get the higher order bits */
250 	buf_high = (uint32_t)(rx_data->rbd_dma_addr >> 32);
251 
252 	E1000_WRITE_REG(hw, E1000_RDBAH(0), buf_high);
253 	E1000_WRITE_REG(hw, E1000_RDBAL(0), buf_low);
254 
255 	/*
256 	 * Setup our HW Rx Head & Tail descriptor pointers
257 	 */
258 	E1000_WRITE_REG(hw, E1000_RDT(0),
259 	    (uint32_t)(rx_data->rbd_last - rx_data->rbd_first));
260 	E1000_WRITE_REG(hw, E1000_RDH(0), 0);
261 
262 	/*
263 	 * Setup the Receive Control Register (RCTL), and ENABLE the
264 	 * receiver. The initial configuration is to: Enable the receiver,
265 	 * accept broadcasts, discard bad packets (and long packets),
266 	 * disable VLAN filter checking, set the receive descriptor
267 	 * minimum threshold size to 1/2, and the receive buffer size to
268 	 * 2k.
269 	 */
270 	rctl = E1000_RCTL_EN |		/* Enable Receive Unit */
271 	    E1000_RCTL_BAM |		/* Accept Broadcast Packets */
272 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT) |
273 	    E1000_RCTL_RDMTS_HALF |
274 	    E1000_RCTL_LBM_NO;		/* Loopback Mode = none */
275 
276 	if (Adapter->default_mtu > ETHERMTU)
277 		rctl |= E1000_RCTL_LPE;  /* Large Packet Enable bit */
278 
279 	if (Adapter->strip_crc)
280 		rctl |= E1000_RCTL_SECRC;	/* Strip Ethernet CRC */
281 
282 	if (Adapter->mem_workaround_82546 &&
283 	    ((hw->mac.type == e1000_82545) ||
284 	    (hw->mac.type == e1000_82546) ||
285 	    (hw->mac.type == e1000_82546_rev_3))) {
286 		rctl |= E1000_RCTL_SZ_2048;
287 	} else {
288 		if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_2K) &&
289 		    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_4K))
290 			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
291 		else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_4K) &&
292 		    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_8K))
293 			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
294 		else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_8K) &&
295 		    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_16K))
296 			rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX;
297 		else
298 			rctl |= E1000_RCTL_SZ_2048;
299 	}
300 
301 	if (e1000_tbi_sbp_enabled_82543(hw))
302 		rctl |= E1000_RCTL_SBP;
303 
304 	/*
305 	 * Enable Early Receive Threshold (ERT) on supported devices.
306 	 * Only takes effect when packet size is equal or larger than the
307 	 * specified value (in 8 byte units), e.g. using jumbo frames.
308 	 */
309 	if ((hw->mac.type == e1000_82573) ||
310 	    (hw->mac.type == e1000_82574) ||
311 	    (hw->mac.type == e1000_ich9lan) ||
312 	    (hw->mac.type == e1000_ich10lan)) {
313 
314 		ert = E1000_ERT_2048;
315 
316 		/*
317 		 * Special modification when ERT and
318 		 * jumbo frames are enabled
319 		 */
320 		if (Adapter->default_mtu > ETHERMTU) {
321 			rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
322 			E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 0x3);
323 			ert |= (1 << 13);
324 		}
325 
326 		E1000_WRITE_REG(hw, E1000_ERT, ert);
327 	}
328 
329 	/* Workaround errata on 82577/8 adapters with large frames */
330 	if ((hw->mac.type == e1000_pchlan) &&
331 	    (Adapter->default_mtu > ETHERMTU)) {
332 
333 		(void) e1000_read_phy_reg(hw, PHY_REG(770, 26), &phy_data);
334 		phy_data &= 0xfff8;
335 		phy_data |= (1 << 2);
336 		(void) e1000_write_phy_reg(hw, PHY_REG(770, 26), phy_data);
337 
338 		if (hw->phy.type == e1000_phy_82577) {
339 			(void) e1000_read_phy_reg(hw, 22, &phy_data);
340 			phy_data &= 0x0fff;
341 			phy_data |= (1 << 14);
342 			(void) e1000_write_phy_reg(hw, 0x10, 0x2823);
343 			(void) e1000_write_phy_reg(hw, 0x11, 0x0003);
344 			(void) e1000_write_phy_reg(hw, 22, phy_data);
345 		}
346 	}
347 
348 	/* Workaround errata on 82579 adapters with large frames */
349 	if (hw->mac.type == e1000_pch2lan) {
350 		boolean_t enable_jumbo = (Adapter->default_mtu > ETHERMTU ?
351 		    B_TRUE : B_FALSE);
352 
353 		if (e1000_lv_jumbo_workaround_ich8lan(hw, enable_jumbo) != 0)
354 			E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
355 			    "failed to enable jumbo frame workaround mode\n");
356 	}
357 
358 	reg_val =
359 	    E1000_RXCSUM_TUOFL |	/* TCP/UDP checksum offload Enable */
360 	    E1000_RXCSUM_IPOFL;		/* IP checksum offload Enable */
361 
362 	E1000_WRITE_REG(hw, E1000_RXCSUM, reg_val);
363 
364 	/*
365 	 * Workaround: Set bit 16 (IPv6_ExDIS) to disable the
366 	 * processing of received IPV6 extension headers
367 	 */
368 	if ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572)) {
369 		reg_val = E1000_READ_REG(hw, E1000_RFCTL);
370 		reg_val |= (E1000_RFCTL_IPV6_EX_DIS |
371 		    E1000_RFCTL_NEW_IPV6_EXT_DIS);
372 		E1000_WRITE_REG(hw, E1000_RFCTL, reg_val);
373 	}
374 
375 	/* Write to enable the receive unit */
376 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
377 }
378 
379 /*
380  * e1000g_get_buf - get an rx sw packet from the free_list
381  */
382 static p_rx_sw_packet_t
383 e1000g_get_buf(e1000g_rx_data_t *rx_data)
384 {
385 	p_rx_sw_packet_t packet;
386 	struct e1000g *Adapter;
387 
388 	Adapter = rx_data->rx_ring->adapter;
389 
390 	mutex_enter(&rx_data->freelist_lock);
391 	packet = (p_rx_sw_packet_t)
392 	    QUEUE_POP_HEAD(&rx_data->free_list);
393 	if (packet != NULL) {
394 		rx_data->avail_freepkt--;
395 		goto end;
396 	}
397 
398 	/*
399 	 * If the freelist has no packets, check the recycle list
400 	 * to see if there are any available descriptor there.
401 	 */
402 	mutex_enter(&rx_data->recycle_lock);
403 	QUEUE_SWITCH(&rx_data->free_list, &rx_data->recycle_list);
404 	rx_data->avail_freepkt = rx_data->recycle_freepkt;
405 	rx_data->recycle_freepkt = 0;
406 	mutex_exit(&rx_data->recycle_lock);
407 	packet = (p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_data->free_list);
408 	if (packet != NULL) {
409 		rx_data->avail_freepkt--;
410 		goto end;
411 	}
412 
413 	if (Adapter->rx_freelist_num < Adapter->rx_freelist_limit) {
414 		(void) e1000g_increase_rx_packets(rx_data);
415 		packet = (p_rx_sw_packet_t)
416 		    QUEUE_POP_HEAD(&rx_data->free_list);
417 		if (packet != NULL) {
418 			rx_data->avail_freepkt--;
419 		}
420 	}
421 
422 end:
423 	mutex_exit(&rx_data->freelist_lock);
424 	return (packet);
425 }
426 
427 /*
428  * e1000g_receive - main receive routine
429  *
430  * This routine will process packets received in an interrupt
431  */
432 mblk_t *
433 e1000g_receive(e1000g_rx_ring_t *rx_ring, mblk_t **tail, uint_t sz)
434 {
435 	struct e1000_hw *hw;
436 	mblk_t *nmp;
437 	mblk_t *ret_mp;
438 	mblk_t *ret_nmp;
439 	struct e1000_rx_desc *current_desc;
440 	struct e1000_rx_desc *last_desc;
441 	p_rx_sw_packet_t packet;
442 	p_rx_sw_packet_t newpkt;
443 	uint16_t length;
444 	uint32_t pkt_count;
445 	uint32_t desc_count;
446 	boolean_t accept_frame;
447 	boolean_t end_of_packet;
448 	boolean_t need_copy;
449 	struct e1000g *Adapter;
450 	dma_buffer_t *rx_buf;
451 	uint16_t cksumflags;
452 	uint_t chain_sz = 0;
453 	e1000g_rx_data_t *rx_data;
454 	uint32_t max_size;
455 	uint32_t min_size;
456 
457 	ret_mp = NULL;
458 	ret_nmp = NULL;
459 	pkt_count = 0;
460 	desc_count = 0;
461 	cksumflags = 0;
462 
463 	Adapter = rx_ring->adapter;
464 	rx_data = rx_ring->rx_data;
465 	hw = &Adapter->shared;
466 
467 	/* Sync the Rx descriptor DMA buffers */
468 	(void) ddi_dma_sync(rx_data->rbd_dma_handle,
469 	    0, 0, DDI_DMA_SYNC_FORKERNEL);
470 
471 	if (e1000g_check_dma_handle(rx_data->rbd_dma_handle) != DDI_FM_OK) {
472 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
473 		Adapter->e1000g_state |= E1000G_ERROR;
474 		return (NULL);
475 	}
476 
477 	current_desc = rx_data->rbd_next;
478 	if (!(current_desc->status & E1000_RXD_STAT_DD)) {
479 		/*
480 		 * don't send anything up. just clear the RFD
481 		 */
482 		E1000G_DEBUG_STAT(rx_ring->stat_none);
483 		return (NULL);
484 	}
485 
486 	max_size = Adapter->max_frame_size - ETHERFCSL - VLAN_TAGSZ;
487 	min_size = ETHERMIN;
488 
489 	/*
490 	 * Loop through the receive descriptors starting at the last known
491 	 * descriptor owned by the hardware that begins a packet.
492 	 */
493 	while ((current_desc->status & E1000_RXD_STAT_DD) &&
494 	    (pkt_count < Adapter->rx_limit_onintr) &&
495 	    ((sz == E1000G_CHAIN_NO_LIMIT) || (chain_sz <= sz))) {
496 
497 		desc_count++;
498 		/*
499 		 * Now this can happen in Jumbo frame situation.
500 		 */
501 		if (current_desc->status & E1000_RXD_STAT_EOP) {
502 			/* packet has EOP set */
503 			end_of_packet = B_TRUE;
504 		} else {
505 			/*
506 			 * If this received buffer does not have the
507 			 * End-Of-Packet bit set, the received packet
508 			 * will consume multiple buffers. We won't send this
509 			 * packet upstack till we get all the related buffers.
510 			 */
511 			end_of_packet = B_FALSE;
512 		}
513 
514 		/*
515 		 * Get a pointer to the actual receive buffer
516 		 * The mp->b_rptr is mapped to The CurrentDescriptor
517 		 * Buffer Address.
518 		 */
519 		packet =
520 		    (p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_data->recv_list);
521 		ASSERT(packet != NULL);
522 
523 		rx_buf = packet->rx_buf;
524 
525 		length = current_desc->length;
526 
527 #ifdef __sparc
528 		if (packet->dma_type == USE_DVMA)
529 			dvma_sync(rx_buf->dma_handle, 0,
530 			    DDI_DMA_SYNC_FORKERNEL);
531 		else
532 			(void) ddi_dma_sync(rx_buf->dma_handle,
533 			    E1000G_IPALIGNROOM, length,
534 			    DDI_DMA_SYNC_FORKERNEL);
535 #else
536 		(void) ddi_dma_sync(rx_buf->dma_handle,
537 		    E1000G_IPALIGNROOM, length,
538 		    DDI_DMA_SYNC_FORKERNEL);
539 #endif
540 
541 		if (e1000g_check_dma_handle(
542 		    rx_buf->dma_handle) != DDI_FM_OK) {
543 			ddi_fm_service_impact(Adapter->dip,
544 			    DDI_SERVICE_DEGRADED);
545 			Adapter->e1000g_state |= E1000G_ERROR;
546 
547 			goto rx_drop;
548 		}
549 
550 		/*
551 		 * workaround for redmine #3100. After a switch reset packet
552 		 * queue and descriptor dma addresses got out of sync. Detect
553 		 * this and flag the error. Let the watchdog timer do the reset
554 		 */
555 		if (current_desc->buffer_addr != rx_buf->dma_address) {
556 			e1000g_log(Adapter, CE_WARN, "receive dma descriptors "
557 			    "got out of sync, resetting adapter");
558 			Adapter->e1000g_state |= E1000G_ERROR;
559 		}
560 		accept_frame = (current_desc->errors == 0) ||
561 		    ((current_desc->errors &
562 		    (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE)) != 0);
563 
564 		if (hw->mac.type == e1000_82543) {
565 			unsigned char last_byte;
566 
567 			last_byte =
568 			    *((unsigned char *)rx_buf->address + length - 1);
569 
570 			if (TBI_ACCEPT(hw,
571 			    current_desc->status, current_desc->errors,
572 			    current_desc->length, last_byte,
573 			    Adapter->min_frame_size, Adapter->max_frame_size)) {
574 
575 				e1000_tbi_adjust_stats(Adapter,
576 				    length, hw->mac.addr);
577 
578 				length--;
579 				accept_frame = B_TRUE;
580 			} else if (e1000_tbi_sbp_enabled_82543(hw) &&
581 			    (current_desc->errors == E1000_RXD_ERR_CE)) {
582 				accept_frame = B_TRUE;
583 			}
584 		}
585 
586 		/*
587 		 * Indicate the packet to the NOS if it was good.
588 		 * Normally, hardware will discard bad packets for us.
589 		 * Check for the packet to be a valid Ethernet packet
590 		 */
591 		if (!accept_frame) {
592 			/*
593 			 * error in incoming packet, either the packet is not a
594 			 * ethernet size packet, or the packet has an error. In
595 			 * either case, the packet will simply be discarded.
596 			 */
597 			E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
598 			    "Process Receive Interrupts: Error in Packet\n");
599 
600 			E1000G_STAT(rx_ring->stat_error);
601 			/*
602 			 * Returning here as we are done here. There is
603 			 * no point in waiting for while loop to elapse
604 			 * and the things which were done. More efficient
605 			 * and less error prone...
606 			 */
607 			goto rx_drop;
608 		}
609 
610 		/*
611 		 * If the Ethernet CRC is not stripped by the hardware,
612 		 * we need to strip it before sending it up to the stack.
613 		 */
614 		if (end_of_packet && !Adapter->strip_crc) {
615 			if (length > ETHERFCSL) {
616 				length -= ETHERFCSL;
617 			} else {
618 				/*
619 				 * If the fragment is smaller than the CRC,
620 				 * drop this fragment, do the processing of
621 				 * the end of the packet.
622 				 */
623 				if (rx_data->rx_mblk_tail == NULL) {
624 					E1000G_STAT(rx_ring->stat_crc_only_pkt);
625 					goto rx_next_desc;
626 				}
627 
628 				rx_data->rx_mblk_tail->b_wptr -=
629 				    ETHERFCSL - length;
630 				rx_data->rx_mblk_len -=
631 				    ETHERFCSL - length;
632 				goto rx_end_of_packet;
633 			}
634 		}
635 
636 		need_copy = B_TRUE;
637 
638 		if (length <= Adapter->rx_bcopy_thresh)
639 			goto rx_copy;
640 
641 		/*
642 		 * Get the pre-constructed mblk that was associated
643 		 * to the receive data buffer.
644 		 */
645 		if (packet->mp == NULL) {
646 			packet->mp = desballoc((unsigned char *)
647 			    rx_buf->address, length,
648 			    BPRI_MED, &packet->free_rtn);
649 		}
650 
651 		if (packet->mp != NULL) {
652 			/*
653 			 * We have two sets of buffer pool. One associated with
654 			 * the Rxdescriptors and other a freelist buffer pool.
655 			 * Each time we get a good packet, Try to get a buffer
656 			 * from the freelist pool using e1000g_get_buf. If we
657 			 * get free buffer, then replace the descriptor buffer
658 			 * address with the free buffer we just got, and pass
659 			 * the pre-constructed mblk upstack. (note no copying)
660 			 *
661 			 * If we failed to get a free buffer, then try to
662 			 * allocate a new buffer(mp) and copy the recv buffer
663 			 * content to our newly allocated buffer(mp). Don't
664 			 * disturb the desriptor buffer address. (note copying)
665 			 */
666 			newpkt = e1000g_get_buf(rx_data);
667 
668 			if (newpkt != NULL) {
669 				/*
670 				 * Get the mblk associated to the data,
671 				 * and strip it off the sw packet.
672 				 */
673 				nmp = packet->mp;
674 				packet->mp = NULL;
675 				atomic_inc_32(&packet->ref_cnt);
676 
677 				/*
678 				 * Now replace old buffer with the new
679 				 * one we got from free list
680 				 * Both the RxSwPacket as well as the
681 				 * Receive Buffer Descriptor will now
682 				 * point to this new packet.
683 				 */
684 				packet = newpkt;
685 
686 				current_desc->buffer_addr =
687 				    newpkt->rx_buf->dma_address;
688 
689 				need_copy = B_FALSE;
690 			} else {
691 				/* EMPTY */
692 				E1000G_DEBUG_STAT(rx_ring->stat_no_freepkt);
693 			}
694 		}
695 
696 rx_copy:
697 		if (need_copy) {
698 			/*
699 			 * No buffers available on free list,
700 			 * bcopy the data from the buffer and
701 			 * keep the original buffer. Dont want to
702 			 * do this.. Yack but no other way
703 			 */
704 			if ((nmp = allocb(length + E1000G_IPALIGNROOM,
705 			    BPRI_MED)) == NULL) {
706 				/*
707 				 * The system has no buffers available
708 				 * to send up the incoming packet, hence
709 				 * the packet will have to be processed
710 				 * when there're more buffers available.
711 				 */
712 				E1000G_STAT(rx_ring->stat_allocb_fail);
713 				goto rx_drop;
714 			}
715 			nmp->b_rptr += E1000G_IPALIGNROOM;
716 			nmp->b_wptr += E1000G_IPALIGNROOM;
717 			/*
718 			 * The free list did not have any buffers
719 			 * available, so, the received packet will
720 			 * have to be copied into a mp and the original
721 			 * buffer will have to be retained for future
722 			 * packet reception.
723 			 */
724 			bcopy(rx_buf->address, nmp->b_wptr, length);
725 		}
726 
727 		ASSERT(nmp != NULL);
728 		nmp->b_wptr += length;
729 
730 		if (rx_data->rx_mblk == NULL) {
731 			/*
732 			 *  TCP/UDP checksum offload and
733 			 *  IP checksum offload
734 			 */
735 			if (!(current_desc->status & E1000_RXD_STAT_IXSM)) {
736 				/*
737 				 * Check TCP/UDP checksum
738 				 */
739 				if ((current_desc->status &
740 				    E1000_RXD_STAT_TCPCS) &&
741 				    !(current_desc->errors &
742 				    E1000_RXD_ERR_TCPE))
743 					cksumflags |= HCK_FULLCKSUM_OK;
744 				/*
745 				 * Check IP Checksum
746 				 */
747 				if ((current_desc->status &
748 				    E1000_RXD_STAT_IPCS) &&
749 				    !(current_desc->errors &
750 				    E1000_RXD_ERR_IPE))
751 					cksumflags |= HCK_IPV4_HDRCKSUM_OK;
752 			}
753 		}
754 
755 		/*
756 		 * We need to maintain our packet chain in the global
757 		 * Adapter structure, for the Rx processing can end
758 		 * with a fragment that has no EOP set.
759 		 */
760 		if (rx_data->rx_mblk == NULL) {
761 			/* Get the head of the message chain */
762 			rx_data->rx_mblk = nmp;
763 			rx_data->rx_mblk_tail = nmp;
764 			rx_data->rx_mblk_len = length;
765 		} else {	/* Not the first packet */
766 			/* Continue adding buffers */
767 			rx_data->rx_mblk_tail->b_cont = nmp;
768 			rx_data->rx_mblk_tail = nmp;
769 			rx_data->rx_mblk_len += length;
770 		}
771 		ASSERT(rx_data->rx_mblk != NULL);
772 		ASSERT(rx_data->rx_mblk_tail != NULL);
773 		ASSERT(rx_data->rx_mblk_tail->b_cont == NULL);
774 
775 		/*
776 		 * Now this MP is ready to travel upwards but some more
777 		 * fragments are coming.
778 		 * We will send packet upwards as soon as we get EOP
779 		 * set on the packet.
780 		 */
781 		if (!end_of_packet) {
782 			/*
783 			 * continue to get the next descriptor,
784 			 * Tail would be advanced at the end
785 			 */
786 			goto rx_next_desc;
787 		}
788 
789 rx_end_of_packet:
790 		if (E1000G_IS_VLAN_PACKET(rx_data->rx_mblk->b_rptr))
791 			max_size = Adapter->max_frame_size - ETHERFCSL;
792 
793 		if ((rx_data->rx_mblk_len > max_size) ||
794 		    (rx_data->rx_mblk_len < min_size)) {
795 			E1000G_STAT(rx_ring->stat_size_error);
796 			goto rx_drop;
797 		}
798 
799 		/*
800 		 * Found packet with EOP
801 		 * Process the last fragment.
802 		 */
803 		if (cksumflags != 0) {
804 			mac_hcksum_set(rx_data->rx_mblk,
805 			    0, 0, 0, 0, cksumflags);
806 			cksumflags = 0;
807 		}
808 
809 		/*
810 		 * Count packets that span multi-descriptors
811 		 */
812 		E1000G_DEBUG_STAT_COND(rx_ring->stat_multi_desc,
813 		    (rx_data->rx_mblk->b_cont != NULL));
814 
815 		/*
816 		 * Append to list to send upstream
817 		 */
818 		if (ret_mp == NULL) {
819 			ret_mp = ret_nmp = rx_data->rx_mblk;
820 		} else {
821 			ret_nmp->b_next = rx_data->rx_mblk;
822 			ret_nmp = rx_data->rx_mblk;
823 		}
824 		ret_nmp->b_next = NULL;
825 		*tail = ret_nmp;
826 		chain_sz += length;
827 
828 		rx_data->rx_mblk = NULL;
829 		rx_data->rx_mblk_tail = NULL;
830 		rx_data->rx_mblk_len = 0;
831 
832 		pkt_count++;
833 
834 rx_next_desc:
835 		/*
836 		 * Zero out the receive descriptors status
837 		 */
838 		current_desc->status = 0;
839 
840 		if (current_desc == rx_data->rbd_last)
841 			rx_data->rbd_next = rx_data->rbd_first;
842 		else
843 			rx_data->rbd_next++;
844 
845 		last_desc = current_desc;
846 		current_desc = rx_data->rbd_next;
847 
848 		/*
849 		 * Put the buffer that we just indicated back
850 		 * at the end of our list
851 		 */
852 		QUEUE_PUSH_TAIL(&rx_data->recv_list,
853 		    &packet->Link);
854 	}	/* while loop */
855 
856 	/* Sync the Rx descriptor DMA buffers */
857 	(void) ddi_dma_sync(rx_data->rbd_dma_handle,
858 	    0, 0, DDI_DMA_SYNC_FORDEV);
859 
860 	/*
861 	 * Advance the E1000's Receive Queue #0 "Tail Pointer".
862 	 */
863 	E1000_WRITE_REG(hw, E1000_RDT(0),
864 	    (uint32_t)(last_desc - rx_data->rbd_first));
865 
866 	if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
867 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
868 		Adapter->e1000g_state |= E1000G_ERROR;
869 	}
870 
871 	Adapter->rx_pkt_cnt = pkt_count;
872 
873 	return (ret_mp);
874 
875 rx_drop:
876 	/*
877 	 * Zero out the receive descriptors status
878 	 */
879 	current_desc->status = 0;
880 
881 	/* Sync the Rx descriptor DMA buffers */
882 	(void) ddi_dma_sync(rx_data->rbd_dma_handle,
883 	    0, 0, DDI_DMA_SYNC_FORDEV);
884 
885 	if (current_desc == rx_data->rbd_last)
886 		rx_data->rbd_next = rx_data->rbd_first;
887 	else
888 		rx_data->rbd_next++;
889 
890 	last_desc = current_desc;
891 
892 	QUEUE_PUSH_TAIL(&rx_data->recv_list, &packet->Link);
893 	/*
894 	 * Reclaim all old buffers already allocated during
895 	 * Jumbo receives.....for incomplete reception
896 	 */
897 	if (rx_data->rx_mblk != NULL) {
898 		freemsg(rx_data->rx_mblk);
899 		rx_data->rx_mblk = NULL;
900 		rx_data->rx_mblk_tail = NULL;
901 		rx_data->rx_mblk_len = 0;
902 	}
903 	/*
904 	 * Advance the E1000's Receive Queue #0 "Tail Pointer".
905 	 */
906 	E1000_WRITE_REG(hw, E1000_RDT(0),
907 	    (uint32_t)(last_desc - rx_data->rbd_first));
908 
909 	if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
910 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
911 		Adapter->e1000g_state |= E1000G_ERROR;
912 	}
913 
914 	return (ret_mp);
915 }
916