xref: /titanic_52/usr/src/uts/common/io/e1000g/e1000g_rx.c (revision 80f1b0f5d7bc2c2f91ce68ea6379c779a03c595e)
1 /*
2  * This file is provided under a CDDLv1 license.  When using or
3  * redistributing this file, you may do so under this license.
4  * In redistributing this file this license must be included
5  * and no other modification of this header file is permitted.
6  *
7  * CDDL LICENSE SUMMARY
8  *
9  * Copyright(c) 1999 - 2009 Intel Corporation. All rights reserved.
10  *
11  * The contents of this file are subject to the terms of Version
12  * 1.0 of the Common Development and Distribution License (the "License").
13  *
14  * You should have received a copy of the License with this software.
15  * You can obtain a copy of the License at
16  *	http://www.opensolaris.org/os/licensing.
17  * See the License for the specific language governing permissions
18  * and limitations under the License.
19  */
20 
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * **********************************************************************
28  *									*
29  * Module Name:								*
30  *   e1000g_rx.c							*
31  *									*
32  * Abstract:								*
33  *   This file contains some routines that take care of Receive		*
34  *   interrupt and also for the received packets it sends up to		*
35  *   upper layer.							*
36  *   It tries to do a zero copy if free buffers are available in	*
37  *   the pool.								*
38  *									*
39  * **********************************************************************
40  */
41 
42 #include "e1000g_sw.h"
43 #include "e1000g_debug.h"
44 
45 static p_rx_sw_packet_t e1000g_get_buf(e1000g_rx_ring_t *rx_ring);
46 #pragma	inline(e1000g_get_buf)
47 static void e1000g_priv_devi_list_clean();
48 
49 /*
50  * e1000g_rxfree_func - the call-back function to reclaim rx buffer
51  *
52  * This function is called when an mp is freed by the user thru
53  * freeb call (Only for mp constructed through desballoc call)
54  * It returns back the freed buffer to the freelist
55  */
56 void
57 e1000g_rxfree_func(p_rx_sw_packet_t packet)
58 {
59 	e1000g_rx_ring_t *rx_ring;
60 
61 	rx_ring = (e1000g_rx_ring_t *)(uintptr_t)packet->rx_ring;
62 
63 	/*
64 	 * Here the rx recycling processes different rx packets in different
65 	 * threads, so we protect it with RW_READER to ensure it won't block
66 	 * other rx recycling threads.
67 	 */
68 	rw_enter(&e1000g_rx_detach_lock, RW_READER);
69 
70 	if (packet->flag == E1000G_RX_SW_FREE) {
71 		rw_exit(&e1000g_rx_detach_lock);
72 		return;
73 	}
74 
75 	if (packet->flag == E1000G_RX_SW_STOP) {
76 		packet->flag = E1000G_RX_SW_FREE;
77 		rw_exit(&e1000g_rx_detach_lock);
78 
79 		rw_enter(&e1000g_rx_detach_lock, RW_WRITER);
80 		rx_ring->pending_count--;
81 		e1000g_mblks_pending--;
82 
83 		if (rx_ring->pending_count == 0) {
84 			while (rx_ring->pending_list != NULL) {
85 				packet = rx_ring->pending_list;
86 				rx_ring->pending_list =
87 				    rx_ring->pending_list->next;
88 
89 				ASSERT(packet->mp == NULL);
90 				e1000g_free_rx_sw_packet(packet);
91 			}
92 		}
93 
94 		/*
95 		 * If e1000g_force_detach is enabled, we need to clean up
96 		 * the idle priv_dip entries in the private dip list while
97 		 * e1000g_mblks_pending is zero.
98 		 */
99 		if (e1000g_force_detach && (e1000g_mblks_pending == 0))
100 			e1000g_priv_devi_list_clean();
101 		rw_exit(&e1000g_rx_detach_lock);
102 		return;
103 	}
104 
105 	if (packet->flag == E1000G_RX_SW_DETACH) {
106 		packet->flag = E1000G_RX_SW_FREE;
107 		rw_exit(&e1000g_rx_detach_lock);
108 
109 		ASSERT(packet->mp == NULL);
110 		e1000g_free_rx_sw_packet(packet);
111 
112 		/*
113 		 * Here the e1000g_mblks_pending may be modified by different
114 		 * rx recycling threads simultaneously, so we need to protect
115 		 * it with RW_WRITER.
116 		 */
117 		rw_enter(&e1000g_rx_detach_lock, RW_WRITER);
118 		e1000g_mblks_pending--;
119 
120 		/*
121 		 * If e1000g_force_detach is enabled, we need to clean up
122 		 * the idle priv_dip entries in the private dip list while
123 		 * e1000g_mblks_pending is zero.
124 		 */
125 		if (e1000g_force_detach && (e1000g_mblks_pending == 0))
126 			e1000g_priv_devi_list_clean();
127 		rw_exit(&e1000g_rx_detach_lock);
128 		return;
129 	}
130 
131 	packet->flag = E1000G_RX_SW_FREE;
132 
133 	if (packet->mp == NULL) {
134 		/*
135 		 * Allocate a mblk that binds to the data buffer
136 		 */
137 		packet->mp = desballoc((unsigned char *)
138 		    packet->rx_buf->address - E1000G_IPALIGNROOM,
139 		    packet->rx_buf->size + E1000G_IPALIGNROOM,
140 		    BPRI_MED, &packet->free_rtn);
141 
142 		if (packet->mp != NULL) {
143 			packet->mp->b_rptr += E1000G_IPALIGNROOM;
144 			packet->mp->b_wptr += E1000G_IPALIGNROOM;
145 		} else {
146 			E1000G_STAT(rx_ring->stat_esballoc_fail);
147 		}
148 	}
149 
150 	/*
151 	 * Enqueue the recycled packets in a recycle queue. When freelist
152 	 * dries up, move the entire chain of packets from recycle queue
153 	 * to freelist. This helps in avoiding per packet mutex contention
154 	 * around freelist.
155 	 */
156 	mutex_enter(&rx_ring->recycle_lock);
157 	QUEUE_PUSH_TAIL(&rx_ring->recycle_list, &packet->Link);
158 	rx_ring->recycle_freepkt++;
159 	mutex_exit(&rx_ring->recycle_lock);
160 
161 	rw_exit(&e1000g_rx_detach_lock);
162 }
163 
164 /*
165  * e1000g_priv_devi_list_clean - clean up e1000g_private_devi_list
166  *
167  * We will walk the e1000g_private_devi_list to free the entry marked
168  * with the E1000G_PRIV_DEVI_DETACH flag.
169  */
170 static void
171 e1000g_priv_devi_list_clean()
172 {
173 	private_devi_list_t *devi_node, *devi_del;
174 
175 	if (e1000g_private_devi_list == NULL)
176 		return;
177 
178 	devi_node = e1000g_private_devi_list;
179 	while ((devi_node != NULL) &&
180 	    (devi_node->flag == E1000G_PRIV_DEVI_DETACH)) {
181 		e1000g_private_devi_list = devi_node->next;
182 		kmem_free(devi_node->priv_dip,
183 		    sizeof (struct dev_info));
184 		kmem_free(devi_node,
185 		    sizeof (private_devi_list_t));
186 		devi_node = e1000g_private_devi_list;
187 	}
188 	if (e1000g_private_devi_list == NULL)
189 		return;
190 	while (devi_node->next != NULL) {
191 		if (devi_node->next->flag == E1000G_PRIV_DEVI_DETACH) {
192 			devi_del = devi_node->next;
193 			devi_node->next = devi_del->next;
194 			kmem_free(devi_del->priv_dip,
195 			    sizeof (struct dev_info));
196 			kmem_free(devi_del,
197 			    sizeof (private_devi_list_t));
198 		} else {
199 			devi_node = devi_node->next;
200 		}
201 	}
202 }
203 
204 /*
205  * e1000g_rx_setup - setup rx data structures
206  *
207  * This routine initializes all of the receive related
208  * structures. This includes the receive descriptors, the
209  * actual receive buffers, and the rx_sw_packet software
210  * structures.
211  */
212 void
213 e1000g_rx_setup(struct e1000g *Adapter)
214 {
215 	struct e1000_hw *hw;
216 	p_rx_sw_packet_t packet;
217 	struct e1000_rx_desc *descriptor;
218 	uint32_t buf_low;
219 	uint32_t buf_high;
220 	uint32_t reg_val;
221 	uint32_t rctl;
222 	uint32_t rxdctl;
223 	uint32_t ert;
224 	int i;
225 	int size;
226 	e1000g_rx_ring_t *rx_ring;
227 
228 	hw = &Adapter->shared;
229 	rx_ring = Adapter->rx_ring;
230 
231 	/*
232 	 * zero out all of the receive buffer descriptor memory
233 	 * assures any previous data or status is erased
234 	 */
235 	bzero(rx_ring->rbd_area,
236 	    sizeof (struct e1000_rx_desc) * Adapter->rx_desc_num);
237 
238 	if (!Adapter->rx_buffer_setup) {
239 		/* Init the list of "Receive Buffer" */
240 		QUEUE_INIT_LIST(&rx_ring->recv_list);
241 
242 		/* Init the list of "Free Receive Buffer" */
243 		QUEUE_INIT_LIST(&rx_ring->free_list);
244 
245 		/* Init the list of "Free Receive Buffer" */
246 		QUEUE_INIT_LIST(&rx_ring->recycle_list);
247 		/*
248 		 * Setup Receive list and the Free list. Note that
249 		 * the both were allocated in one packet area.
250 		 */
251 		packet = rx_ring->packet_area;
252 		descriptor = rx_ring->rbd_first;
253 
254 		for (i = 0; i < Adapter->rx_desc_num;
255 		    i++, packet = packet->next, descriptor++) {
256 			ASSERT(packet != NULL);
257 			ASSERT(descriptor != NULL);
258 			descriptor->buffer_addr =
259 			    packet->rx_buf->dma_address;
260 
261 			/* Add this rx_sw_packet to the receive list */
262 			QUEUE_PUSH_TAIL(&rx_ring->recv_list,
263 			    &packet->Link);
264 		}
265 
266 		for (i = 0; i < Adapter->rx_freelist_num;
267 		    i++, packet = packet->next) {
268 			ASSERT(packet != NULL);
269 			/* Add this rx_sw_packet to the free list */
270 			QUEUE_PUSH_TAIL(&rx_ring->free_list,
271 			    &packet->Link);
272 		}
273 		rx_ring->avail_freepkt = Adapter->rx_freelist_num;
274 		rx_ring->recycle_freepkt = 0;
275 
276 		Adapter->rx_buffer_setup = B_TRUE;
277 	} else {
278 		/* Setup the initial pointer to the first rx descriptor */
279 		packet = (p_rx_sw_packet_t)
280 		    QUEUE_GET_HEAD(&rx_ring->recv_list);
281 		descriptor = rx_ring->rbd_first;
282 
283 		for (i = 0; i < Adapter->rx_desc_num; i++) {
284 			ASSERT(packet != NULL);
285 			ASSERT(descriptor != NULL);
286 			descriptor->buffer_addr =
287 			    packet->rx_buf->dma_address;
288 
289 			/* Get next rx_sw_packet */
290 			packet = (p_rx_sw_packet_t)
291 			    QUEUE_GET_NEXT(&rx_ring->recv_list, &packet->Link);
292 			descriptor++;
293 		}
294 	}
295 
296 	E1000_WRITE_REG(&Adapter->shared, E1000_RDTR, Adapter->rx_intr_delay);
297 	E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
298 	    "E1000_RDTR: 0x%x\n", Adapter->rx_intr_delay);
299 	if (hw->mac.type >= e1000_82540) {
300 		E1000_WRITE_REG(&Adapter->shared, E1000_RADV,
301 		    Adapter->rx_intr_abs_delay);
302 		E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
303 		    "E1000_RADV: 0x%x\n", Adapter->rx_intr_abs_delay);
304 	}
305 
306 	/*
307 	 * Setup our descriptor pointers
308 	 */
309 	rx_ring->rbd_next = rx_ring->rbd_first;
310 
311 	size = Adapter->rx_desc_num * sizeof (struct e1000_rx_desc);
312 	E1000_WRITE_REG(hw, E1000_RDLEN(0), size);
313 	size = E1000_READ_REG(hw, E1000_RDLEN(0));
314 
315 	/* To get lower order bits */
316 	buf_low = (uint32_t)rx_ring->rbd_dma_addr;
317 	/* To get the higher order bits */
318 	buf_high = (uint32_t)(rx_ring->rbd_dma_addr >> 32);
319 
320 	E1000_WRITE_REG(hw, E1000_RDBAH(0), buf_high);
321 	E1000_WRITE_REG(hw, E1000_RDBAL(0), buf_low);
322 
323 	/*
324 	 * Setup our HW Rx Head & Tail descriptor pointers
325 	 */
326 	E1000_WRITE_REG(hw, E1000_RDT(0),
327 	    (uint32_t)(rx_ring->rbd_last - rx_ring->rbd_first));
328 	E1000_WRITE_REG(hw, E1000_RDH(0), 0);
329 
330 	/*
331 	 * Setup the Receive Control Register (RCTL), and ENABLE the
332 	 * receiver. The initial configuration is to: Enable the receiver,
333 	 * accept broadcasts, discard bad packets (and long packets),
334 	 * disable VLAN filter checking, set the receive descriptor
335 	 * minimum threshold size to 1/2, and the receive buffer size to
336 	 * 2k.
337 	 */
338 	rctl = E1000_RCTL_EN |		/* Enable Receive Unit */
339 	    E1000_RCTL_BAM |		/* Accept Broadcast Packets */
340 	    E1000_RCTL_LPE |		/* Large Packet Enable bit */
341 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT) |
342 	    E1000_RCTL_RDMTS_HALF |
343 	    E1000_RCTL_LBM_NO;		/* Loopback Mode = none */
344 
345 	if (Adapter->strip_crc)
346 		rctl |= E1000_RCTL_SECRC;	/* Strip Ethernet CRC */
347 
348 	if (Adapter->mem_workaround_82546 &&
349 	    ((hw->mac.type == e1000_82545) ||
350 	    (hw->mac.type == e1000_82546) ||
351 	    (hw->mac.type == e1000_82546_rev_3))) {
352 		rctl |= E1000_RCTL_SZ_2048;
353 	} else {
354 		if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_2K) &&
355 		    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_4K))
356 			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
357 		else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_4K) &&
358 		    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_8K))
359 			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
360 		else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_8K) &&
361 		    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_16K))
362 			rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX;
363 		else
364 			rctl |= E1000_RCTL_SZ_2048;
365 	}
366 
367 	if (e1000_tbi_sbp_enabled_82543(hw))
368 		rctl |= E1000_RCTL_SBP;
369 
370 	/*
371 	 * Enable Early Receive Threshold (ERT) on supported devices.
372 	 * Only takes effect when packet size is equal or larger than the
373 	 * specified value (in 8 byte units), e.g. using jumbo frames.
374 	 */
375 	if ((hw->mac.type == e1000_82573) ||
376 	    (hw->mac.type == e1000_82574) ||
377 	    (hw->mac.type == e1000_ich9lan) ||
378 	    (hw->mac.type == e1000_ich10lan)) {
379 
380 		ert = E1000_ERT_2048;
381 
382 		/*
383 		 * Special modification when ERT and
384 		 * jumbo frames are enabled
385 		 */
386 		if (Adapter->default_mtu > ETHERMTU) {
387 			rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
388 			E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 0x3);
389 			ert |= (1 << 13);
390 		}
391 
392 		E1000_WRITE_REG(hw, E1000_ERT, ert);
393 	}
394 
395 	reg_val =
396 	    E1000_RXCSUM_TUOFL |	/* TCP/UDP checksum offload Enable */
397 	    E1000_RXCSUM_IPOFL;		/* IP checksum offload Enable */
398 
399 	E1000_WRITE_REG(hw, E1000_RXCSUM, reg_val);
400 
401 	/*
402 	 * Workaround: Set bit 16 (IPv6_ExDIS) to disable the
403 	 * processing of received IPV6 extension headers
404 	 */
405 	if ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572)) {
406 		reg_val = E1000_READ_REG(hw, E1000_RFCTL);
407 		reg_val |= (E1000_RFCTL_IPV6_EX_DIS |
408 		    E1000_RFCTL_NEW_IPV6_EXT_DIS);
409 		E1000_WRITE_REG(hw, E1000_RFCTL, reg_val);
410 	}
411 
412 	/* Write to enable the receive unit */
413 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
414 }
415 
416 /*
417  * e1000g_get_buf - get an rx sw packet from the free_list
418  */
419 static p_rx_sw_packet_t
420 e1000g_get_buf(e1000g_rx_ring_t *rx_ring)
421 {
422 	p_rx_sw_packet_t packet;
423 
424 	mutex_enter(&rx_ring->freelist_lock);
425 	packet = (p_rx_sw_packet_t)
426 	    QUEUE_POP_HEAD(&rx_ring->free_list);
427 	if (packet != NULL) {
428 		rx_ring->avail_freepkt--;
429 	} else {
430 		/*
431 		 * If the freelist has no packets, check the recycle list
432 		 * to see if there are any available descriptor there.
433 		 */
434 		mutex_enter(&rx_ring->recycle_lock);
435 		QUEUE_SWITCH(&rx_ring->free_list, &rx_ring->recycle_list);
436 		rx_ring->avail_freepkt = rx_ring->recycle_freepkt;
437 		rx_ring->recycle_freepkt = 0;
438 		mutex_exit(&rx_ring->recycle_lock);
439 		packet = (p_rx_sw_packet_t)
440 		    QUEUE_POP_HEAD(&rx_ring->free_list);
441 		if (packet != NULL)
442 			rx_ring->avail_freepkt--;
443 	}
444 	mutex_exit(&rx_ring->freelist_lock);
445 
446 	return (packet);
447 }
448 
449 /*
450  * e1000g_receive - main receive routine
451  *
452  * This routine will process packets received in an interrupt
453  */
454 mblk_t *
455 e1000g_receive(e1000g_rx_ring_t *rx_ring, mblk_t **tail, uint_t sz)
456 {
457 	struct e1000_hw *hw;
458 	mblk_t *nmp;
459 	mblk_t *ret_mp;
460 	mblk_t *ret_nmp;
461 	struct e1000_rx_desc *current_desc;
462 	struct e1000_rx_desc *last_desc;
463 	p_rx_sw_packet_t packet;
464 	p_rx_sw_packet_t newpkt;
465 	uint16_t length;
466 	uint32_t pkt_count;
467 	uint32_t desc_count;
468 	boolean_t accept_frame;
469 	boolean_t end_of_packet;
470 	boolean_t need_copy;
471 	struct e1000g *Adapter;
472 	dma_buffer_t *rx_buf;
473 	uint16_t cksumflags;
474 	uint_t chain_sz = 0;
475 
476 	ret_mp = NULL;
477 	ret_nmp = NULL;
478 	pkt_count = 0;
479 	desc_count = 0;
480 	cksumflags = 0;
481 
482 	Adapter = rx_ring->adapter;
483 	hw = &Adapter->shared;
484 
485 	/* Sync the Rx descriptor DMA buffers */
486 	(void) ddi_dma_sync(rx_ring->rbd_dma_handle,
487 	    0, 0, DDI_DMA_SYNC_FORKERNEL);
488 
489 	if (e1000g_check_dma_handle(rx_ring->rbd_dma_handle) != DDI_FM_OK) {
490 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
491 		Adapter->e1000g_state |= E1000G_ERROR;
492 	}
493 
494 	current_desc = rx_ring->rbd_next;
495 	if (!(current_desc->status & E1000_RXD_STAT_DD)) {
496 		/*
497 		 * don't send anything up. just clear the RFD
498 		 */
499 		E1000G_DEBUG_STAT(rx_ring->stat_none);
500 		return (ret_mp);
501 	}
502 
503 	/*
504 	 * Loop through the receive descriptors starting at the last known
505 	 * descriptor owned by the hardware that begins a packet.
506 	 */
507 	while ((current_desc->status & E1000_RXD_STAT_DD) &&
508 	    (pkt_count < Adapter->rx_limit_onintr) &&
509 	    ((sz == E1000G_CHAIN_NO_LIMIT) || (chain_sz <= sz))) {
510 
511 		desc_count++;
512 		/*
513 		 * Now this can happen in Jumbo frame situation.
514 		 */
515 		if (current_desc->status & E1000_RXD_STAT_EOP) {
516 			/* packet has EOP set */
517 			end_of_packet = B_TRUE;
518 		} else {
519 			/*
520 			 * If this received buffer does not have the
521 			 * End-Of-Packet bit set, the received packet
522 			 * will consume multiple buffers. We won't send this
523 			 * packet upstack till we get all the related buffers.
524 			 */
525 			end_of_packet = B_FALSE;
526 		}
527 
528 		/*
529 		 * Get a pointer to the actual receive buffer
530 		 * The mp->b_rptr is mapped to The CurrentDescriptor
531 		 * Buffer Address.
532 		 */
533 		packet =
534 		    (p_rx_sw_packet_t)QUEUE_GET_HEAD(&rx_ring->recv_list);
535 		ASSERT(packet != NULL);
536 
537 		rx_buf = packet->rx_buf;
538 
539 		length = current_desc->length;
540 
541 #ifdef __sparc
542 		if (packet->dma_type == USE_DVMA)
543 			dvma_sync(rx_buf->dma_handle, 0,
544 			    DDI_DMA_SYNC_FORKERNEL);
545 		else
546 			(void) ddi_dma_sync(rx_buf->dma_handle,
547 			    E1000G_IPALIGNROOM, length,
548 			    DDI_DMA_SYNC_FORKERNEL);
549 #else
550 		(void) ddi_dma_sync(rx_buf->dma_handle,
551 		    E1000G_IPALIGNROOM, length,
552 		    DDI_DMA_SYNC_FORKERNEL);
553 #endif
554 
555 		if (e1000g_check_dma_handle(
556 		    rx_buf->dma_handle) != DDI_FM_OK) {
557 			ddi_fm_service_impact(Adapter->dip,
558 			    DDI_SERVICE_DEGRADED);
559 			Adapter->e1000g_state |= E1000G_ERROR;
560 		}
561 
562 		accept_frame = (current_desc->errors == 0) ||
563 		    ((current_desc->errors &
564 		    (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE)) != 0);
565 
566 		if (hw->mac.type == e1000_82543) {
567 			unsigned char last_byte;
568 
569 			last_byte =
570 			    *((unsigned char *)rx_buf->address + length - 1);
571 
572 			if (TBI_ACCEPT(hw,
573 			    current_desc->status, current_desc->errors,
574 			    current_desc->length, last_byte,
575 			    Adapter->min_frame_size, Adapter->max_frame_size)) {
576 
577 				e1000_tbi_adjust_stats(Adapter,
578 				    length, hw->mac.addr);
579 
580 				length--;
581 				accept_frame = B_TRUE;
582 			} else if (e1000_tbi_sbp_enabled_82543(hw) &&
583 			    (current_desc->errors == E1000_RXD_ERR_CE)) {
584 				accept_frame = B_TRUE;
585 			}
586 		}
587 
588 		/*
589 		 * Indicate the packet to the NOS if it was good.
590 		 * Normally, hardware will discard bad packets for us.
591 		 * Check for the packet to be a valid Ethernet packet
592 		 */
593 		if (!accept_frame) {
594 			/*
595 			 * error in incoming packet, either the packet is not a
596 			 * ethernet size packet, or the packet has an error. In
597 			 * either case, the packet will simply be discarded.
598 			 */
599 			E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
600 			    "Process Receive Interrupts: Error in Packet\n");
601 
602 			E1000G_STAT(rx_ring->stat_error);
603 			/*
604 			 * Returning here as we are done here. There is
605 			 * no point in waiting for while loop to elapse
606 			 * and the things which were done. More efficient
607 			 * and less error prone...
608 			 */
609 			goto rx_drop;
610 		}
611 
612 		/*
613 		 * If the Ethernet CRC is not stripped by the hardware,
614 		 * we need to strip it before sending it up to the stack.
615 		 */
616 		if (end_of_packet && !Adapter->strip_crc) {
617 			if (length > ETHERFCSL) {
618 				length -= ETHERFCSL;
619 			} else {
620 				/*
621 				 * If the fragment is smaller than the CRC,
622 				 * drop this fragment, do the processing of
623 				 * the end of the packet.
624 				 */
625 				ASSERT(rx_ring->rx_mblk_tail != NULL);
626 				rx_ring->rx_mblk_tail->b_wptr -=
627 				    ETHERFCSL - length;
628 				rx_ring->rx_mblk_len -=
629 				    ETHERFCSL - length;
630 
631 				QUEUE_POP_HEAD(&rx_ring->recv_list);
632 
633 				goto rx_end_of_packet;
634 			}
635 		}
636 
637 		need_copy = B_TRUE;
638 
639 		if (length <= Adapter->rx_bcopy_thresh)
640 			goto rx_copy;
641 
642 		/*
643 		 * Get the pre-constructed mblk that was associated
644 		 * to the receive data buffer.
645 		 */
646 		if (packet->mp == NULL) {
647 			packet->mp = desballoc((unsigned char *)
648 			    rx_buf->address - E1000G_IPALIGNROOM,
649 			    length + E1000G_IPALIGNROOM,
650 			    BPRI_MED, &packet->free_rtn);
651 
652 			if (packet->mp != NULL) {
653 				packet->mp->b_rptr += E1000G_IPALIGNROOM;
654 				packet->mp->b_wptr += E1000G_IPALIGNROOM;
655 			} else {
656 				E1000G_STAT(rx_ring->stat_esballoc_fail);
657 			}
658 		}
659 
660 		if (packet->mp != NULL) {
661 			/*
662 			 * We have two sets of buffer pool. One associated with
663 			 * the Rxdescriptors and other a freelist buffer pool.
664 			 * Each time we get a good packet, Try to get a buffer
665 			 * from the freelist pool using e1000g_get_buf. If we
666 			 * get free buffer, then replace the descriptor buffer
667 			 * address with the free buffer we just got, and pass
668 			 * the pre-constructed mblk upstack. (note no copying)
669 			 *
670 			 * If we failed to get a free buffer, then try to
671 			 * allocate a new buffer(mp) and copy the recv buffer
672 			 * content to our newly allocated buffer(mp). Don't
673 			 * disturb the desriptor buffer address. (note copying)
674 			 */
675 			newpkt = e1000g_get_buf(rx_ring);
676 
677 			if (newpkt != NULL) {
678 				/*
679 				 * Get the mblk associated to the data,
680 				 * and strip it off the sw packet.
681 				 */
682 				nmp = packet->mp;
683 				packet->mp = NULL;
684 				packet->flag = E1000G_RX_SW_SENDUP;
685 
686 				/*
687 				 * Now replace old buffer with the new
688 				 * one we got from free list
689 				 * Both the RxSwPacket as well as the
690 				 * Receive Buffer Descriptor will now
691 				 * point to this new packet.
692 				 */
693 				packet = newpkt;
694 
695 				current_desc->buffer_addr =
696 				    newpkt->rx_buf->dma_address;
697 
698 				need_copy = B_FALSE;
699 			} else {
700 				E1000G_DEBUG_STAT(rx_ring->stat_no_freepkt);
701 			}
702 		}
703 
704 rx_copy:
705 		if (need_copy) {
706 			/*
707 			 * No buffers available on free list,
708 			 * bcopy the data from the buffer and
709 			 * keep the original buffer. Dont want to
710 			 * do this.. Yack but no other way
711 			 */
712 			if ((nmp = allocb(length + E1000G_IPALIGNROOM,
713 			    BPRI_MED)) == NULL) {
714 				/*
715 				 * The system has no buffers available
716 				 * to send up the incoming packet, hence
717 				 * the packet will have to be processed
718 				 * when there're more buffers available.
719 				 */
720 				E1000G_STAT(rx_ring->stat_allocb_fail);
721 				goto rx_drop;
722 			}
723 			nmp->b_rptr += E1000G_IPALIGNROOM;
724 			nmp->b_wptr += E1000G_IPALIGNROOM;
725 			/*
726 			 * The free list did not have any buffers
727 			 * available, so, the received packet will
728 			 * have to be copied into a mp and the original
729 			 * buffer will have to be retained for future
730 			 * packet reception.
731 			 */
732 			bcopy(rx_buf->address, nmp->b_wptr, length);
733 		}
734 
735 		/*
736 		 * The rx_sw_packet MUST be popped off the
737 		 * RxSwPacketList before either a putnext or freemsg
738 		 * is done on the mp that has now been created by the
739 		 * desballoc. If not, it is possible that the free
740 		 * routine will get called from the interrupt context
741 		 * and try to put this packet on the free list
742 		 */
743 		(p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_ring->recv_list);
744 
745 		ASSERT(nmp != NULL);
746 		nmp->b_wptr += length;
747 
748 		if (rx_ring->rx_mblk == NULL) {
749 			/*
750 			 *  TCP/UDP checksum offload and
751 			 *  IP checksum offload
752 			 */
753 			if (!(current_desc->status & E1000_RXD_STAT_IXSM)) {
754 				/*
755 				 * Check TCP/UDP checksum
756 				 */
757 				if ((current_desc->status &
758 				    E1000_RXD_STAT_TCPCS) &&
759 				    !(current_desc->errors &
760 				    E1000_RXD_ERR_TCPE))
761 					cksumflags |= HCK_FULLCKSUM |
762 					    HCK_FULLCKSUM_OK;
763 				/*
764 				 * Check IP Checksum
765 				 */
766 				if ((current_desc->status &
767 				    E1000_RXD_STAT_IPCS) &&
768 				    !(current_desc->errors &
769 				    E1000_RXD_ERR_IPE))
770 					cksumflags |= HCK_IPV4_HDRCKSUM;
771 			}
772 		}
773 
774 		/*
775 		 * We need to maintain our packet chain in the global
776 		 * Adapter structure, for the Rx processing can end
777 		 * with a fragment that has no EOP set.
778 		 */
779 		if (rx_ring->rx_mblk == NULL) {
780 			/* Get the head of the message chain */
781 			rx_ring->rx_mblk = nmp;
782 			rx_ring->rx_mblk_tail = nmp;
783 			rx_ring->rx_mblk_len = length;
784 		} else {	/* Not the first packet */
785 			/* Continue adding buffers */
786 			rx_ring->rx_mblk_tail->b_cont = nmp;
787 			rx_ring->rx_mblk_tail = nmp;
788 			rx_ring->rx_mblk_len += length;
789 		}
790 		ASSERT(rx_ring->rx_mblk != NULL);
791 		ASSERT(rx_ring->rx_mblk_tail != NULL);
792 		ASSERT(rx_ring->rx_mblk_tail->b_cont == NULL);
793 
794 		/*
795 		 * Now this MP is ready to travel upwards but some more
796 		 * fragments are coming.
797 		 * We will send packet upwards as soon as we get EOP
798 		 * set on the packet.
799 		 */
800 		if (!end_of_packet) {
801 			/*
802 			 * continue to get the next descriptor,
803 			 * Tail would be advanced at the end
804 			 */
805 			goto rx_next_desc;
806 		}
807 
808 rx_end_of_packet:
809 		/*
810 		 * Found packet with EOP
811 		 * Process the last fragment.
812 		 */
813 		if (cksumflags != 0) {
814 			(void) hcksum_assoc(rx_ring->rx_mblk,
815 			    NULL, NULL, 0, 0, 0, 0, cksumflags, 0);
816 			cksumflags = 0;
817 		}
818 
819 		/*
820 		 * Count packets that span multi-descriptors
821 		 */
822 		E1000G_DEBUG_STAT_COND(rx_ring->stat_multi_desc,
823 		    (rx_ring->rx_mblk->b_cont != NULL));
824 
825 		/*
826 		 * Append to list to send upstream
827 		 */
828 		if (ret_mp == NULL) {
829 			ret_mp = ret_nmp = rx_ring->rx_mblk;
830 		} else {
831 			ret_nmp->b_next = rx_ring->rx_mblk;
832 			ret_nmp = rx_ring->rx_mblk;
833 		}
834 		ret_nmp->b_next = NULL;
835 		*tail = ret_nmp;
836 		chain_sz += length;
837 
838 		rx_ring->rx_mblk = NULL;
839 		rx_ring->rx_mblk_tail = NULL;
840 		rx_ring->rx_mblk_len = 0;
841 
842 		pkt_count++;
843 
844 rx_next_desc:
845 		/*
846 		 * Zero out the receive descriptors status
847 		 */
848 		current_desc->status = 0;
849 
850 		if (current_desc == rx_ring->rbd_last)
851 			rx_ring->rbd_next = rx_ring->rbd_first;
852 		else
853 			rx_ring->rbd_next++;
854 
855 		last_desc = current_desc;
856 		current_desc = rx_ring->rbd_next;
857 
858 		/*
859 		 * Put the buffer that we just indicated back
860 		 * at the end of our list
861 		 */
862 		QUEUE_PUSH_TAIL(&rx_ring->recv_list,
863 		    &packet->Link);
864 	}	/* while loop */
865 
866 	/* Sync the Rx descriptor DMA buffers */
867 	(void) ddi_dma_sync(rx_ring->rbd_dma_handle,
868 	    0, 0, DDI_DMA_SYNC_FORDEV);
869 
870 	/*
871 	 * Advance the E1000's Receive Queue #0 "Tail Pointer".
872 	 */
873 	E1000_WRITE_REG(hw, E1000_RDT(0),
874 	    (uint32_t)(last_desc - rx_ring->rbd_first));
875 
876 	if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
877 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
878 		Adapter->e1000g_state |= E1000G_ERROR;
879 	}
880 
881 	Adapter->rx_pkt_cnt = pkt_count;
882 
883 	return (ret_mp);
884 
885 rx_drop:
886 	/*
887 	 * Zero out the receive descriptors status
888 	 */
889 	current_desc->status = 0;
890 
891 	/* Sync the Rx descriptor DMA buffers */
892 	(void) ddi_dma_sync(rx_ring->rbd_dma_handle,
893 	    0, 0, DDI_DMA_SYNC_FORDEV);
894 
895 	if (current_desc == rx_ring->rbd_last)
896 		rx_ring->rbd_next = rx_ring->rbd_first;
897 	else
898 		rx_ring->rbd_next++;
899 
900 	last_desc = current_desc;
901 
902 	(p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_ring->recv_list);
903 
904 	QUEUE_PUSH_TAIL(&rx_ring->recv_list, &packet->Link);
905 	/*
906 	 * Reclaim all old buffers already allocated during
907 	 * Jumbo receives.....for incomplete reception
908 	 */
909 	if (rx_ring->rx_mblk != NULL) {
910 		freemsg(rx_ring->rx_mblk);
911 		rx_ring->rx_mblk = NULL;
912 		rx_ring->rx_mblk_tail = NULL;
913 		rx_ring->rx_mblk_len = 0;
914 	}
915 	/*
916 	 * Advance the E1000's Receive Queue #0 "Tail Pointer".
917 	 */
918 	E1000_WRITE_REG(hw, E1000_RDT(0),
919 	    (uint32_t)(last_desc - rx_ring->rbd_first));
920 
921 	if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
922 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
923 		Adapter->e1000g_state |= E1000G_ERROR;
924 	}
925 
926 	return (ret_mp);
927 }
928