xref: /titanic_51/usr/src/uts/common/io/e1000g/e1000g_rx.c (revision a69116193464f859a8b27a2db19ad330ce163a55)
1 /*
2  * This file is provided under a CDDLv1 license.  When using or
3  * redistributing this file, you may do so under this license.
4  * In redistributing this file this license must be included
5  * and no other modification of this header file is permitted.
6  *
7  * CDDL LICENSE SUMMARY
8  *
9  * Copyright(c) 1999 - 2009 Intel Corporation. All rights reserved.
10  *
11  * The contents of this file are subject to the terms of Version
12  * 1.0 of the Common Development and Distribution License (the "License").
13  *
14  * You should have received a copy of the License with this software.
15  * You can obtain a copy of the License at
16  *	http://www.opensolaris.org/os/licensing.
17  * See the License for the specific language governing permissions
18  * and limitations under the License.
19  */
20 
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * **********************************************************************
27  *									*
28  * Module Name:								*
29  *   e1000g_rx.c							*
30  *									*
31  * Abstract:								*
32  *   This file contains some routines that take care of Receive		*
33  *   interrupt and also for the received packets it sends up to		*
34  *   upper layer.							*
35  *   It tries to do a zero copy if free buffers are available in	*
36  *   the pool.								*
37  *									*
38  * **********************************************************************
39  */
40 
41 #include "e1000g_sw.h"
42 #include "e1000g_debug.h"
43 
44 static p_rx_sw_packet_t e1000g_get_buf(e1000g_rx_data_t *rx_data);
45 #pragma	inline(e1000g_get_buf)
46 
47 /*
48  * e1000g_rxfree_func - the call-back function to reclaim rx buffer
49  *
50  * This function is called when an mp is freed by the user thru
51  * freeb call (Only for mp constructed through desballoc call)
52  * It returns back the freed buffer to the freelist
53  */
54 void
55 e1000g_rxfree_func(p_rx_sw_packet_t packet)
56 {
57 	e1000g_rx_data_t *rx_data;
58 	private_devi_list_t *devi_node;
59 	struct e1000g *Adapter;
60 	uint32_t ring_cnt;
61 	uint32_t ref_cnt;
62 	unsigned char *address;
63 
64 	if (packet->ref_cnt == 0) {
65 		/*
66 		 * This case only happens when rx buffers are being freed
67 		 * in e1000g_stop() and freemsg() is called.
68 		 */
69 		return;
70 	}
71 
72 	rx_data = (e1000g_rx_data_t *)(uintptr_t)packet->rx_data;
73 
74 	if (packet->mp == NULL) {
75 		/*
76 		 * Allocate a mblk that binds to the data buffer
77 		 */
78 		address = (unsigned char *)packet->rx_buf->address;
79 		if (address != NULL) {
80 			packet->mp = desballoc((unsigned char *)
81 			    address, packet->rx_buf->size,
82 			    BPRI_MED, &packet->free_rtn);
83 		}
84 	}
85 
86 	/*
87 	 * Enqueue the recycled packets in a recycle queue. When freelist
88 	 * dries up, move the entire chain of packets from recycle queue
89 	 * to freelist. This helps in avoiding per packet mutex contention
90 	 * around freelist.
91 	 */
92 	mutex_enter(&rx_data->recycle_lock);
93 	QUEUE_PUSH_TAIL(&rx_data->recycle_list, &packet->Link);
94 	rx_data->recycle_freepkt++;
95 	mutex_exit(&rx_data->recycle_lock);
96 
97 	ref_cnt = atomic_dec_32_nv(&packet->ref_cnt);
98 	if (ref_cnt == 0) {
99 		mutex_enter(&e1000g_rx_detach_lock);
100 		e1000g_free_rx_sw_packet(packet, B_FALSE);
101 
102 		atomic_dec_32(&rx_data->pending_count);
103 		atomic_dec_32(&e1000g_mblks_pending);
104 
105 		if ((rx_data->pending_count == 0) &&
106 		    (rx_data->flag & E1000G_RX_STOPPED)) {
107 			devi_node = rx_data->priv_devi_node;
108 
109 			if (devi_node != NULL) {
110 				ring_cnt = atomic_dec_32_nv(
111 				    &devi_node->pending_rx_count);
112 				if ((ring_cnt == 0) &&
113 				    (devi_node->flag &
114 				    E1000G_PRIV_DEVI_DETACH)) {
115 					e1000g_free_priv_devi_node(
116 					    devi_node);
117 				}
118 			} else {
119 				Adapter = rx_data->rx_ring->adapter;
120 				atomic_dec_32(
121 				    &Adapter->pending_rx_count);
122 			}
123 
124 			e1000g_free_rx_pending_buffers(rx_data);
125 			e1000g_free_rx_data(rx_data);
126 		}
127 		mutex_exit(&e1000g_rx_detach_lock);
128 	}
129 }
130 
131 /*
132  * e1000g_rx_setup - setup rx data structures
133  *
134  * This routine initializes all of the receive related
135  * structures. This includes the receive descriptors, the
136  * actual receive buffers, and the rx_sw_packet software
137  * structures.
138  */
139 void
140 e1000g_rx_setup(struct e1000g *Adapter)
141 {
142 	struct e1000_hw *hw;
143 	p_rx_sw_packet_t packet;
144 	struct e1000_rx_desc *descriptor;
145 	uint32_t buf_low;
146 	uint32_t buf_high;
147 	uint32_t reg_val;
148 	uint32_t rctl;
149 	uint32_t rxdctl;
150 	uint32_t ert;
151 	uint16_t phy_data;
152 	int i;
153 	int size;
154 	e1000g_rx_data_t *rx_data;
155 
156 	hw = &Adapter->shared;
157 	rx_data = Adapter->rx_ring->rx_data;
158 
159 	/*
160 	 * zero out all of the receive buffer descriptor memory
161 	 * assures any previous data or status is erased
162 	 */
163 	bzero(rx_data->rbd_area,
164 	    sizeof (struct e1000_rx_desc) * Adapter->rx_desc_num);
165 
166 	if (!Adapter->rx_buffer_setup) {
167 		/* Init the list of "Receive Buffer" */
168 		QUEUE_INIT_LIST(&rx_data->recv_list);
169 
170 		/* Init the list of "Free Receive Buffer" */
171 		QUEUE_INIT_LIST(&rx_data->free_list);
172 
173 		/* Init the list of "Free Receive Buffer" */
174 		QUEUE_INIT_LIST(&rx_data->recycle_list);
175 		/*
176 		 * Setup Receive list and the Free list. Note that
177 		 * the both were allocated in one packet area.
178 		 */
179 		packet = rx_data->packet_area;
180 		descriptor = rx_data->rbd_first;
181 
182 		for (i = 0; i < Adapter->rx_desc_num;
183 		    i++, packet = packet->next, descriptor++) {
184 			ASSERT(packet != NULL);
185 			ASSERT(descriptor != NULL);
186 			descriptor->buffer_addr =
187 			    packet->rx_buf->dma_address;
188 
189 			/* Add this rx_sw_packet to the receive list */
190 			QUEUE_PUSH_TAIL(&rx_data->recv_list,
191 			    &packet->Link);
192 		}
193 
194 		for (i = 0; i < Adapter->rx_freelist_num;
195 		    i++, packet = packet->next) {
196 			ASSERT(packet != NULL);
197 			/* Add this rx_sw_packet to the free list */
198 			QUEUE_PUSH_TAIL(&rx_data->free_list,
199 			    &packet->Link);
200 		}
201 		rx_data->avail_freepkt = Adapter->rx_freelist_num;
202 		rx_data->recycle_freepkt = 0;
203 
204 		Adapter->rx_buffer_setup = B_TRUE;
205 	} else {
206 		/* Setup the initial pointer to the first rx descriptor */
207 		packet = (p_rx_sw_packet_t)
208 		    QUEUE_GET_HEAD(&rx_data->recv_list);
209 		descriptor = rx_data->rbd_first;
210 
211 		for (i = 0; i < Adapter->rx_desc_num; i++) {
212 			ASSERT(packet != NULL);
213 			ASSERT(descriptor != NULL);
214 			descriptor->buffer_addr =
215 			    packet->rx_buf->dma_address;
216 
217 			/* Get next rx_sw_packet */
218 			packet = (p_rx_sw_packet_t)
219 			    QUEUE_GET_NEXT(&rx_data->recv_list, &packet->Link);
220 			descriptor++;
221 		}
222 	}
223 
224 	E1000_WRITE_REG(&Adapter->shared, E1000_RDTR, Adapter->rx_intr_delay);
225 	E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
226 	    "E1000_RDTR: 0x%x\n", Adapter->rx_intr_delay);
227 	if (hw->mac.type >= e1000_82540) {
228 		E1000_WRITE_REG(&Adapter->shared, E1000_RADV,
229 		    Adapter->rx_intr_abs_delay);
230 		E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
231 		    "E1000_RADV: 0x%x\n", Adapter->rx_intr_abs_delay);
232 	}
233 
234 	/*
235 	 * Setup our descriptor pointers
236 	 */
237 	rx_data->rbd_next = rx_data->rbd_first;
238 
239 	size = Adapter->rx_desc_num * sizeof (struct e1000_rx_desc);
240 	E1000_WRITE_REG(hw, E1000_RDLEN(0), size);
241 	size = E1000_READ_REG(hw, E1000_RDLEN(0));
242 
243 	/* To get lower order bits */
244 	buf_low = (uint32_t)rx_data->rbd_dma_addr;
245 	/* To get the higher order bits */
246 	buf_high = (uint32_t)(rx_data->rbd_dma_addr >> 32);
247 
248 	E1000_WRITE_REG(hw, E1000_RDBAH(0), buf_high);
249 	E1000_WRITE_REG(hw, E1000_RDBAL(0), buf_low);
250 
251 	/*
252 	 * Setup our HW Rx Head & Tail descriptor pointers
253 	 */
254 	E1000_WRITE_REG(hw, E1000_RDT(0),
255 	    (uint32_t)(rx_data->rbd_last - rx_data->rbd_first));
256 	E1000_WRITE_REG(hw, E1000_RDH(0), 0);
257 
258 	/*
259 	 * Setup the Receive Control Register (RCTL), and ENABLE the
260 	 * receiver. The initial configuration is to: Enable the receiver,
261 	 * accept broadcasts, discard bad packets (and long packets),
262 	 * disable VLAN filter checking, set the receive descriptor
263 	 * minimum threshold size to 1/2, and the receive buffer size to
264 	 * 2k.
265 	 */
266 	rctl = E1000_RCTL_EN |		/* Enable Receive Unit */
267 	    E1000_RCTL_BAM |		/* Accept Broadcast Packets */
268 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT) |
269 	    E1000_RCTL_RDMTS_HALF |
270 	    E1000_RCTL_LBM_NO;		/* Loopback Mode = none */
271 
272 	if (Adapter->default_mtu > ETHERMTU)
273 		rctl |= E1000_RCTL_LPE;  /* Large Packet Enable bit */
274 
275 	if (Adapter->strip_crc)
276 		rctl |= E1000_RCTL_SECRC;	/* Strip Ethernet CRC */
277 
278 	if (Adapter->mem_workaround_82546 &&
279 	    ((hw->mac.type == e1000_82545) ||
280 	    (hw->mac.type == e1000_82546) ||
281 	    (hw->mac.type == e1000_82546_rev_3))) {
282 		rctl |= E1000_RCTL_SZ_2048;
283 	} else {
284 		if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_2K) &&
285 		    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_4K))
286 			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
287 		else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_4K) &&
288 		    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_8K))
289 			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
290 		else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_8K) &&
291 		    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_16K))
292 			rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX;
293 		else
294 			rctl |= E1000_RCTL_SZ_2048;
295 	}
296 
297 	if (e1000_tbi_sbp_enabled_82543(hw))
298 		rctl |= E1000_RCTL_SBP;
299 
300 	/*
301 	 * Enable Early Receive Threshold (ERT) on supported devices.
302 	 * Only takes effect when packet size is equal or larger than the
303 	 * specified value (in 8 byte units), e.g. using jumbo frames.
304 	 */
305 	if ((hw->mac.type == e1000_82573) ||
306 	    (hw->mac.type == e1000_82574) ||
307 	    (hw->mac.type == e1000_ich9lan) ||
308 	    (hw->mac.type == e1000_ich10lan)) {
309 
310 		ert = E1000_ERT_2048;
311 
312 		/*
313 		 * Special modification when ERT and
314 		 * jumbo frames are enabled
315 		 */
316 		if (Adapter->default_mtu > ETHERMTU) {
317 			rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
318 			E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 0x3);
319 			ert |= (1 << 13);
320 		}
321 
322 		E1000_WRITE_REG(hw, E1000_ERT, ert);
323 	}
324 
325 	/* Workaround errata on 82577/8 adapters with large frames */
326 	if ((hw->mac.type == e1000_pchlan) &&
327 	    (Adapter->default_mtu > ETHERMTU)) {
328 
329 		(void) e1000_read_phy_reg(hw, PHY_REG(770, 26), &phy_data);
330 		phy_data &= 0xfff8;
331 		phy_data |= (1 << 2);
332 		(void) e1000_write_phy_reg(hw, PHY_REG(770, 26), phy_data);
333 
334 		if (hw->phy.type == e1000_phy_82577) {
335 			(void) e1000_read_phy_reg(hw, 22, &phy_data);
336 			phy_data &= 0x0fff;
337 			phy_data |= (1 << 14);
338 			(void) e1000_write_phy_reg(hw, 0x10, 0x2823);
339 			(void) e1000_write_phy_reg(hw, 0x11, 0x0003);
340 			(void) e1000_write_phy_reg(hw, 22, phy_data);
341 		}
342 	}
343 
344 	reg_val =
345 	    E1000_RXCSUM_TUOFL |	/* TCP/UDP checksum offload Enable */
346 	    E1000_RXCSUM_IPOFL;		/* IP checksum offload Enable */
347 
348 	E1000_WRITE_REG(hw, E1000_RXCSUM, reg_val);
349 
350 	/*
351 	 * Workaround: Set bit 16 (IPv6_ExDIS) to disable the
352 	 * processing of received IPV6 extension headers
353 	 */
354 	if ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572)) {
355 		reg_val = E1000_READ_REG(hw, E1000_RFCTL);
356 		reg_val |= (E1000_RFCTL_IPV6_EX_DIS |
357 		    E1000_RFCTL_NEW_IPV6_EXT_DIS);
358 		E1000_WRITE_REG(hw, E1000_RFCTL, reg_val);
359 	}
360 
361 	/* Write to enable the receive unit */
362 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
363 }
364 
365 /*
366  * e1000g_get_buf - get an rx sw packet from the free_list
367  */
368 static p_rx_sw_packet_t
369 e1000g_get_buf(e1000g_rx_data_t *rx_data)
370 {
371 	p_rx_sw_packet_t packet;
372 	struct e1000g *Adapter;
373 
374 	Adapter = rx_data->rx_ring->adapter;
375 
376 	mutex_enter(&rx_data->freelist_lock);
377 	packet = (p_rx_sw_packet_t)
378 	    QUEUE_POP_HEAD(&rx_data->free_list);
379 	if (packet != NULL) {
380 		rx_data->avail_freepkt--;
381 		goto end;
382 	}
383 
384 	/*
385 	 * If the freelist has no packets, check the recycle list
386 	 * to see if there are any available descriptor there.
387 	 */
388 	mutex_enter(&rx_data->recycle_lock);
389 	QUEUE_SWITCH(&rx_data->free_list, &rx_data->recycle_list);
390 	rx_data->avail_freepkt = rx_data->recycle_freepkt;
391 	rx_data->recycle_freepkt = 0;
392 	mutex_exit(&rx_data->recycle_lock);
393 	packet = (p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_data->free_list);
394 	if (packet != NULL) {
395 		rx_data->avail_freepkt--;
396 		goto end;
397 	}
398 
399 	if (Adapter->rx_freelist_num < Adapter->rx_freelist_limit) {
400 		(void) e1000g_increase_rx_packets(rx_data);
401 		packet = (p_rx_sw_packet_t)
402 		    QUEUE_POP_HEAD(&rx_data->free_list);
403 		if (packet != NULL) {
404 			rx_data->avail_freepkt--;
405 		}
406 	}
407 
408 end:
409 	mutex_exit(&rx_data->freelist_lock);
410 	return (packet);
411 }
412 
413 /*
414  * e1000g_receive - main receive routine
415  *
416  * This routine will process packets received in an interrupt
417  */
418 mblk_t *
419 e1000g_receive(e1000g_rx_ring_t *rx_ring, mblk_t **tail, uint_t sz)
420 {
421 	struct e1000_hw *hw;
422 	mblk_t *nmp;
423 	mblk_t *ret_mp;
424 	mblk_t *ret_nmp;
425 	struct e1000_rx_desc *current_desc;
426 	struct e1000_rx_desc *last_desc;
427 	p_rx_sw_packet_t packet;
428 	p_rx_sw_packet_t newpkt;
429 	uint16_t length;
430 	uint32_t pkt_count;
431 	uint32_t desc_count;
432 	boolean_t accept_frame;
433 	boolean_t end_of_packet;
434 	boolean_t need_copy;
435 	struct e1000g *Adapter;
436 	dma_buffer_t *rx_buf;
437 	uint16_t cksumflags;
438 	uint_t chain_sz = 0;
439 	e1000g_rx_data_t *rx_data;
440 	uint32_t max_size;
441 	uint32_t min_size;
442 
443 	ret_mp = NULL;
444 	ret_nmp = NULL;
445 	pkt_count = 0;
446 	desc_count = 0;
447 	cksumflags = 0;
448 
449 	Adapter = rx_ring->adapter;
450 	rx_data = rx_ring->rx_data;
451 	hw = &Adapter->shared;
452 
453 	/* Sync the Rx descriptor DMA buffers */
454 	(void) ddi_dma_sync(rx_data->rbd_dma_handle,
455 	    0, 0, DDI_DMA_SYNC_FORKERNEL);
456 
457 	if (e1000g_check_dma_handle(rx_data->rbd_dma_handle) != DDI_FM_OK) {
458 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
459 		Adapter->e1000g_state |= E1000G_ERROR;
460 		return (NULL);
461 	}
462 
463 	current_desc = rx_data->rbd_next;
464 	if (!(current_desc->status & E1000_RXD_STAT_DD)) {
465 		/*
466 		 * don't send anything up. just clear the RFD
467 		 */
468 		E1000G_DEBUG_STAT(rx_ring->stat_none);
469 		return (NULL);
470 	}
471 
472 	max_size = Adapter->max_frame_size - ETHERFCSL - VLAN_TAGSZ;
473 	min_size = ETHERMIN;
474 
475 	/*
476 	 * Loop through the receive descriptors starting at the last known
477 	 * descriptor owned by the hardware that begins a packet.
478 	 */
479 	while ((current_desc->status & E1000_RXD_STAT_DD) &&
480 	    (pkt_count < Adapter->rx_limit_onintr) &&
481 	    ((sz == E1000G_CHAIN_NO_LIMIT) || (chain_sz <= sz))) {
482 
483 		desc_count++;
484 		/*
485 		 * Now this can happen in Jumbo frame situation.
486 		 */
487 		if (current_desc->status & E1000_RXD_STAT_EOP) {
488 			/* packet has EOP set */
489 			end_of_packet = B_TRUE;
490 		} else {
491 			/*
492 			 * If this received buffer does not have the
493 			 * End-Of-Packet bit set, the received packet
494 			 * will consume multiple buffers. We won't send this
495 			 * packet upstack till we get all the related buffers.
496 			 */
497 			end_of_packet = B_FALSE;
498 		}
499 
500 		/*
501 		 * Get a pointer to the actual receive buffer
502 		 * The mp->b_rptr is mapped to The CurrentDescriptor
503 		 * Buffer Address.
504 		 */
505 		packet =
506 		    (p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_data->recv_list);
507 		ASSERT(packet != NULL);
508 
509 		rx_buf = packet->rx_buf;
510 
511 		length = current_desc->length;
512 
513 #ifdef __sparc
514 		if (packet->dma_type == USE_DVMA)
515 			dvma_sync(rx_buf->dma_handle, 0,
516 			    DDI_DMA_SYNC_FORKERNEL);
517 		else
518 			(void) ddi_dma_sync(rx_buf->dma_handle,
519 			    E1000G_IPALIGNROOM, length,
520 			    DDI_DMA_SYNC_FORKERNEL);
521 #else
522 		(void) ddi_dma_sync(rx_buf->dma_handle,
523 		    E1000G_IPALIGNROOM, length,
524 		    DDI_DMA_SYNC_FORKERNEL);
525 #endif
526 
527 		if (e1000g_check_dma_handle(
528 		    rx_buf->dma_handle) != DDI_FM_OK) {
529 			ddi_fm_service_impact(Adapter->dip,
530 			    DDI_SERVICE_DEGRADED);
531 			Adapter->e1000g_state |= E1000G_ERROR;
532 
533 			goto rx_drop;
534 		}
535 
536 		accept_frame = (current_desc->errors == 0) ||
537 		    ((current_desc->errors &
538 		    (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE)) != 0);
539 
540 		if (hw->mac.type == e1000_82543) {
541 			unsigned char last_byte;
542 
543 			last_byte =
544 			    *((unsigned char *)rx_buf->address + length - 1);
545 
546 			if (TBI_ACCEPT(hw,
547 			    current_desc->status, current_desc->errors,
548 			    current_desc->length, last_byte,
549 			    Adapter->min_frame_size, Adapter->max_frame_size)) {
550 
551 				e1000_tbi_adjust_stats(Adapter,
552 				    length, hw->mac.addr);
553 
554 				length--;
555 				accept_frame = B_TRUE;
556 			} else if (e1000_tbi_sbp_enabled_82543(hw) &&
557 			    (current_desc->errors == E1000_RXD_ERR_CE)) {
558 				accept_frame = B_TRUE;
559 			}
560 		}
561 
562 		/*
563 		 * Indicate the packet to the NOS if it was good.
564 		 * Normally, hardware will discard bad packets for us.
565 		 * Check for the packet to be a valid Ethernet packet
566 		 */
567 		if (!accept_frame) {
568 			/*
569 			 * error in incoming packet, either the packet is not a
570 			 * ethernet size packet, or the packet has an error. In
571 			 * either case, the packet will simply be discarded.
572 			 */
573 			E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
574 			    "Process Receive Interrupts: Error in Packet\n");
575 
576 			E1000G_STAT(rx_ring->stat_error);
577 			/*
578 			 * Returning here as we are done here. There is
579 			 * no point in waiting for while loop to elapse
580 			 * and the things which were done. More efficient
581 			 * and less error prone...
582 			 */
583 			goto rx_drop;
584 		}
585 
586 		/*
587 		 * If the Ethernet CRC is not stripped by the hardware,
588 		 * we need to strip it before sending it up to the stack.
589 		 */
590 		if (end_of_packet && !Adapter->strip_crc) {
591 			if (length > ETHERFCSL) {
592 				length -= ETHERFCSL;
593 			} else {
594 				/*
595 				 * If the fragment is smaller than the CRC,
596 				 * drop this fragment, do the processing of
597 				 * the end of the packet.
598 				 */
599 				if (rx_data->rx_mblk_tail == NULL) {
600 					E1000G_STAT(rx_ring->stat_crc_only_pkt);
601 					goto rx_next_desc;
602 				}
603 
604 				rx_data->rx_mblk_tail->b_wptr -=
605 				    ETHERFCSL - length;
606 				rx_data->rx_mblk_len -=
607 				    ETHERFCSL - length;
608 				goto rx_end_of_packet;
609 			}
610 		}
611 
612 		need_copy = B_TRUE;
613 
614 		if (length <= Adapter->rx_bcopy_thresh)
615 			goto rx_copy;
616 
617 		/*
618 		 * Get the pre-constructed mblk that was associated
619 		 * to the receive data buffer.
620 		 */
621 		if (packet->mp == NULL) {
622 			packet->mp = desballoc((unsigned char *)
623 			    rx_buf->address, length,
624 			    BPRI_MED, &packet->free_rtn);
625 		}
626 
627 		if (packet->mp != NULL) {
628 			/*
629 			 * We have two sets of buffer pool. One associated with
630 			 * the Rxdescriptors and other a freelist buffer pool.
631 			 * Each time we get a good packet, Try to get a buffer
632 			 * from the freelist pool using e1000g_get_buf. If we
633 			 * get free buffer, then replace the descriptor buffer
634 			 * address with the free buffer we just got, and pass
635 			 * the pre-constructed mblk upstack. (note no copying)
636 			 *
637 			 * If we failed to get a free buffer, then try to
638 			 * allocate a new buffer(mp) and copy the recv buffer
639 			 * content to our newly allocated buffer(mp). Don't
640 			 * disturb the desriptor buffer address. (note copying)
641 			 */
642 			newpkt = e1000g_get_buf(rx_data);
643 
644 			if (newpkt != NULL) {
645 				/*
646 				 * Get the mblk associated to the data,
647 				 * and strip it off the sw packet.
648 				 */
649 				nmp = packet->mp;
650 				packet->mp = NULL;
651 				atomic_inc_32(&packet->ref_cnt);
652 
653 				/*
654 				 * Now replace old buffer with the new
655 				 * one we got from free list
656 				 * Both the RxSwPacket as well as the
657 				 * Receive Buffer Descriptor will now
658 				 * point to this new packet.
659 				 */
660 				packet = newpkt;
661 
662 				current_desc->buffer_addr =
663 				    newpkt->rx_buf->dma_address;
664 
665 				need_copy = B_FALSE;
666 			} else {
667 				/* EMPTY */
668 				E1000G_DEBUG_STAT(rx_ring->stat_no_freepkt);
669 			}
670 		}
671 
672 rx_copy:
673 		if (need_copy) {
674 			/*
675 			 * No buffers available on free list,
676 			 * bcopy the data from the buffer and
677 			 * keep the original buffer. Dont want to
678 			 * do this.. Yack but no other way
679 			 */
680 			if ((nmp = allocb(length + E1000G_IPALIGNROOM,
681 			    BPRI_MED)) == NULL) {
682 				/*
683 				 * The system has no buffers available
684 				 * to send up the incoming packet, hence
685 				 * the packet will have to be processed
686 				 * when there're more buffers available.
687 				 */
688 				E1000G_STAT(rx_ring->stat_allocb_fail);
689 				goto rx_drop;
690 			}
691 			nmp->b_rptr += E1000G_IPALIGNROOM;
692 			nmp->b_wptr += E1000G_IPALIGNROOM;
693 			/*
694 			 * The free list did not have any buffers
695 			 * available, so, the received packet will
696 			 * have to be copied into a mp and the original
697 			 * buffer will have to be retained for future
698 			 * packet reception.
699 			 */
700 			bcopy(rx_buf->address, nmp->b_wptr, length);
701 		}
702 
703 		ASSERT(nmp != NULL);
704 		nmp->b_wptr += length;
705 
706 		if (rx_data->rx_mblk == NULL) {
707 			/*
708 			 *  TCP/UDP checksum offload and
709 			 *  IP checksum offload
710 			 */
711 			if (!(current_desc->status & E1000_RXD_STAT_IXSM)) {
712 				/*
713 				 * Check TCP/UDP checksum
714 				 */
715 				if ((current_desc->status &
716 				    E1000_RXD_STAT_TCPCS) &&
717 				    !(current_desc->errors &
718 				    E1000_RXD_ERR_TCPE))
719 					cksumflags |= HCK_FULLCKSUM_OK;
720 				/*
721 				 * Check IP Checksum
722 				 */
723 				if ((current_desc->status &
724 				    E1000_RXD_STAT_IPCS) &&
725 				    !(current_desc->errors &
726 				    E1000_RXD_ERR_IPE))
727 					cksumflags |= HCK_IPV4_HDRCKSUM_OK;
728 			}
729 		}
730 
731 		/*
732 		 * We need to maintain our packet chain in the global
733 		 * Adapter structure, for the Rx processing can end
734 		 * with a fragment that has no EOP set.
735 		 */
736 		if (rx_data->rx_mblk == NULL) {
737 			/* Get the head of the message chain */
738 			rx_data->rx_mblk = nmp;
739 			rx_data->rx_mblk_tail = nmp;
740 			rx_data->rx_mblk_len = length;
741 		} else {	/* Not the first packet */
742 			/* Continue adding buffers */
743 			rx_data->rx_mblk_tail->b_cont = nmp;
744 			rx_data->rx_mblk_tail = nmp;
745 			rx_data->rx_mblk_len += length;
746 		}
747 		ASSERT(rx_data->rx_mblk != NULL);
748 		ASSERT(rx_data->rx_mblk_tail != NULL);
749 		ASSERT(rx_data->rx_mblk_tail->b_cont == NULL);
750 
751 		/*
752 		 * Now this MP is ready to travel upwards but some more
753 		 * fragments are coming.
754 		 * We will send packet upwards as soon as we get EOP
755 		 * set on the packet.
756 		 */
757 		if (!end_of_packet) {
758 			/*
759 			 * continue to get the next descriptor,
760 			 * Tail would be advanced at the end
761 			 */
762 			goto rx_next_desc;
763 		}
764 
765 rx_end_of_packet:
766 		if (E1000G_IS_VLAN_PACKET(rx_data->rx_mblk->b_rptr))
767 			max_size = Adapter->max_frame_size - ETHERFCSL;
768 
769 		if ((rx_data->rx_mblk_len > max_size) ||
770 		    (rx_data->rx_mblk_len < min_size)) {
771 			E1000G_STAT(rx_ring->stat_size_error);
772 			goto rx_drop;
773 		}
774 
775 		/*
776 		 * Found packet with EOP
777 		 * Process the last fragment.
778 		 */
779 		if (cksumflags != 0) {
780 			mac_hcksum_set(rx_data->rx_mblk,
781 			    0, 0, 0, 0, cksumflags);
782 			cksumflags = 0;
783 		}
784 
785 		/*
786 		 * Count packets that span multi-descriptors
787 		 */
788 		E1000G_DEBUG_STAT_COND(rx_ring->stat_multi_desc,
789 		    (rx_data->rx_mblk->b_cont != NULL));
790 
791 		/*
792 		 * Append to list to send upstream
793 		 */
794 		if (ret_mp == NULL) {
795 			ret_mp = ret_nmp = rx_data->rx_mblk;
796 		} else {
797 			ret_nmp->b_next = rx_data->rx_mblk;
798 			ret_nmp = rx_data->rx_mblk;
799 		}
800 		ret_nmp->b_next = NULL;
801 		*tail = ret_nmp;
802 		chain_sz += length;
803 
804 		rx_data->rx_mblk = NULL;
805 		rx_data->rx_mblk_tail = NULL;
806 		rx_data->rx_mblk_len = 0;
807 
808 		pkt_count++;
809 
810 rx_next_desc:
811 		/*
812 		 * Zero out the receive descriptors status
813 		 */
814 		current_desc->status = 0;
815 
816 		if (current_desc == rx_data->rbd_last)
817 			rx_data->rbd_next = rx_data->rbd_first;
818 		else
819 			rx_data->rbd_next++;
820 
821 		last_desc = current_desc;
822 		current_desc = rx_data->rbd_next;
823 
824 		/*
825 		 * Put the buffer that we just indicated back
826 		 * at the end of our list
827 		 */
828 		QUEUE_PUSH_TAIL(&rx_data->recv_list,
829 		    &packet->Link);
830 	}	/* while loop */
831 
832 	/* Sync the Rx descriptor DMA buffers */
833 	(void) ddi_dma_sync(rx_data->rbd_dma_handle,
834 	    0, 0, DDI_DMA_SYNC_FORDEV);
835 
836 	/*
837 	 * Advance the E1000's Receive Queue #0 "Tail Pointer".
838 	 */
839 	E1000_WRITE_REG(hw, E1000_RDT(0),
840 	    (uint32_t)(last_desc - rx_data->rbd_first));
841 
842 	if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
843 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
844 		Adapter->e1000g_state |= E1000G_ERROR;
845 	}
846 
847 	Adapter->rx_pkt_cnt = pkt_count;
848 
849 	return (ret_mp);
850 
851 rx_drop:
852 	/*
853 	 * Zero out the receive descriptors status
854 	 */
855 	current_desc->status = 0;
856 
857 	/* Sync the Rx descriptor DMA buffers */
858 	(void) ddi_dma_sync(rx_data->rbd_dma_handle,
859 	    0, 0, DDI_DMA_SYNC_FORDEV);
860 
861 	if (current_desc == rx_data->rbd_last)
862 		rx_data->rbd_next = rx_data->rbd_first;
863 	else
864 		rx_data->rbd_next++;
865 
866 	last_desc = current_desc;
867 
868 	QUEUE_PUSH_TAIL(&rx_data->recv_list, &packet->Link);
869 	/*
870 	 * Reclaim all old buffers already allocated during
871 	 * Jumbo receives.....for incomplete reception
872 	 */
873 	if (rx_data->rx_mblk != NULL) {
874 		freemsg(rx_data->rx_mblk);
875 		rx_data->rx_mblk = NULL;
876 		rx_data->rx_mblk_tail = NULL;
877 		rx_data->rx_mblk_len = 0;
878 	}
879 	/*
880 	 * Advance the E1000's Receive Queue #0 "Tail Pointer".
881 	 */
882 	E1000_WRITE_REG(hw, E1000_RDT(0),
883 	    (uint32_t)(last_desc - rx_data->rbd_first));
884 
885 	if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
886 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
887 		Adapter->e1000g_state |= E1000G_ERROR;
888 	}
889 
890 	return (ret_mp);
891 }
892