xref: /illumos-gate/usr/src/uts/common/io/e1000g/e1000g_rx.c (revision 54e0d7a5e8285a3f01a0db8db1246ac7cac94d81)
1 /*
2  * This file is provided under a CDDLv1 license.  When using or
3  * redistributing this file, you may do so under this license.
4  * In redistributing this file this license must be included
5  * and no other modification of this header file is permitted.
6  *
7  * CDDL LICENSE SUMMARY
8  *
9  * Copyright(c) 1999 - 2009 Intel Corporation. All rights reserved.
10  *
11  * The contents of this file are subject to the terms of Version
12  * 1.0 of the Common Development and Distribution License (the "License").
13  *
14  * You should have received a copy of the License with this software.
15  * You can obtain a copy of the License at
16  *	http://www.opensolaris.org/os/licensing.
17  * See the License for the specific language governing permissions
18  * and limitations under the License.
19  */
20 
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * **********************************************************************
28  *									*
29  * Module Name:								*
30  *   e1000g_rx.c							*
31  *									*
32  * Abstract:								*
33  *   This file contains some routines that take care of Receive		*
34  *   interrupt and also for the received packets it sends up to		*
35  *   upper layer.							*
36  *   It tries to do a zero copy if free buffers are available in	*
37  *   the pool.								*
38  *									*
39  * **********************************************************************
40  */
41 
42 #include "e1000g_sw.h"
43 #include "e1000g_debug.h"
44 
45 static p_rx_sw_packet_t e1000g_get_buf(e1000g_rx_data_t *rx_data);
46 #pragma	inline(e1000g_get_buf)
47 
48 /*
49  * e1000g_rxfree_func - the call-back function to reclaim rx buffer
50  *
51  * This function is called when an mp is freed by the user thru
52  * freeb call (Only for mp constructed through desballoc call)
53  * It returns back the freed buffer to the freelist
54  */
55 void
56 e1000g_rxfree_func(p_rx_sw_packet_t packet)
57 {
58 	e1000g_rx_data_t *rx_data;
59 	private_devi_list_t *devi_node;
60 	struct e1000g *Adapter;
61 	uint32_t ring_cnt;
62 	uint32_t ref_cnt;
63 	unsigned char *address;
64 
65 	if (packet->ref_cnt == 0) {
66 		/*
67 		 * This case only happens when rx buffers are being freed
68 		 * in e1000g_stop() and freemsg() is called.
69 		 */
70 		return;
71 	}
72 
73 	rx_data = (e1000g_rx_data_t *)(uintptr_t)packet->rx_data;
74 
75 	if (packet->mp == NULL) {
76 		/*
77 		 * Allocate a mblk that binds to the data buffer
78 		 */
79 		address = (unsigned char *)packet->rx_buf->address;
80 		if (address != NULL) {
81 			packet->mp = desballoc((unsigned char *)
82 			    address - E1000G_IPALIGNROOM,
83 			    packet->rx_buf->size + E1000G_IPALIGNROOM,
84 			    BPRI_MED, &packet->free_rtn);
85 		}
86 		if (packet->mp != NULL) {
87 			packet->mp->b_rptr += E1000G_IPALIGNROOM;
88 			packet->mp->b_wptr += E1000G_IPALIGNROOM;
89 		}
90 	}
91 
92 	/*
93 	 * Enqueue the recycled packets in a recycle queue. When freelist
94 	 * dries up, move the entire chain of packets from recycle queue
95 	 * to freelist. This helps in avoiding per packet mutex contention
96 	 * around freelist.
97 	 */
98 	mutex_enter(&rx_data->recycle_lock);
99 	QUEUE_PUSH_TAIL(&rx_data->recycle_list, &packet->Link);
100 	rx_data->recycle_freepkt++;
101 	mutex_exit(&rx_data->recycle_lock);
102 
103 	ref_cnt = atomic_dec_32_nv(&packet->ref_cnt);
104 	if (ref_cnt == 0) {
105 		e1000g_free_rx_sw_packet(packet, B_FALSE);
106 
107 		mutex_enter(&e1000g_rx_detach_lock);
108 		atomic_dec_32(&rx_data->pending_count);
109 		atomic_dec_32(&e1000g_mblks_pending);
110 
111 		if ((rx_data->pending_count == 0) &&
112 		    (rx_data->flag & E1000G_RX_STOPPED)) {
113 			devi_node = rx_data->priv_devi_node;
114 
115 			e1000g_free_rx_pending_buffers(rx_data);
116 			e1000g_free_rx_data(rx_data);
117 
118 			if (devi_node != NULL) {
119 				ring_cnt = atomic_dec_32_nv(
120 				    &devi_node->pending_rx_count);
121 				if ((ring_cnt == 0) &&
122 				    (devi_node->flag &
123 				    E1000G_PRIV_DEVI_DETACH)) {
124 					e1000g_free_priv_devi_node(
125 					    devi_node);
126 				}
127 			} else {
128 				Adapter = rx_data->rx_ring->adapter;
129 				atomic_dec_32(
130 				    &Adapter->pending_rx_count);
131 			}
132 		}
133 		mutex_exit(&e1000g_rx_detach_lock);
134 	}
135 }
136 
137 /*
138  * e1000g_rx_setup - setup rx data structures
139  *
140  * This routine initializes all of the receive related
141  * structures. This includes the receive descriptors, the
142  * actual receive buffers, and the rx_sw_packet software
143  * structures.
144  */
145 void
146 e1000g_rx_setup(struct e1000g *Adapter)
147 {
148 	struct e1000_hw *hw;
149 	p_rx_sw_packet_t packet;
150 	struct e1000_rx_desc *descriptor;
151 	uint32_t buf_low;
152 	uint32_t buf_high;
153 	uint32_t reg_val;
154 	uint32_t rctl;
155 	uint32_t rxdctl;
156 	uint32_t ert;
157 	int i;
158 	int size;
159 	e1000g_rx_data_t *rx_data;
160 
161 	hw = &Adapter->shared;
162 	rx_data = Adapter->rx_ring->rx_data;
163 
164 	/*
165 	 * zero out all of the receive buffer descriptor memory
166 	 * assures any previous data or status is erased
167 	 */
168 	bzero(rx_data->rbd_area,
169 	    sizeof (struct e1000_rx_desc) * Adapter->rx_desc_num);
170 
171 	if (!Adapter->rx_buffer_setup) {
172 		/* Init the list of "Receive Buffer" */
173 		QUEUE_INIT_LIST(&rx_data->recv_list);
174 
175 		/* Init the list of "Free Receive Buffer" */
176 		QUEUE_INIT_LIST(&rx_data->free_list);
177 
178 		/* Init the list of "Free Receive Buffer" */
179 		QUEUE_INIT_LIST(&rx_data->recycle_list);
180 		/*
181 		 * Setup Receive list and the Free list. Note that
182 		 * the both were allocated in one packet area.
183 		 */
184 		packet = rx_data->packet_area;
185 		descriptor = rx_data->rbd_first;
186 
187 		for (i = 0; i < Adapter->rx_desc_num;
188 		    i++, packet = packet->next, descriptor++) {
189 			ASSERT(packet != NULL);
190 			ASSERT(descriptor != NULL);
191 			descriptor->buffer_addr =
192 			    packet->rx_buf->dma_address;
193 
194 			/* Add this rx_sw_packet to the receive list */
195 			QUEUE_PUSH_TAIL(&rx_data->recv_list,
196 			    &packet->Link);
197 		}
198 
199 		for (i = 0; i < Adapter->rx_freelist_num;
200 		    i++, packet = packet->next) {
201 			ASSERT(packet != NULL);
202 			/* Add this rx_sw_packet to the free list */
203 			QUEUE_PUSH_TAIL(&rx_data->free_list,
204 			    &packet->Link);
205 		}
206 		rx_data->avail_freepkt = Adapter->rx_freelist_num;
207 		rx_data->recycle_freepkt = 0;
208 
209 		Adapter->rx_buffer_setup = B_TRUE;
210 	} else {
211 		/* Setup the initial pointer to the first rx descriptor */
212 		packet = (p_rx_sw_packet_t)
213 		    QUEUE_GET_HEAD(&rx_data->recv_list);
214 		descriptor = rx_data->rbd_first;
215 
216 		for (i = 0; i < Adapter->rx_desc_num; i++) {
217 			ASSERT(packet != NULL);
218 			ASSERT(descriptor != NULL);
219 			descriptor->buffer_addr =
220 			    packet->rx_buf->dma_address;
221 
222 			/* Get next rx_sw_packet */
223 			packet = (p_rx_sw_packet_t)
224 			    QUEUE_GET_NEXT(&rx_data->recv_list, &packet->Link);
225 			descriptor++;
226 		}
227 	}
228 
229 	E1000_WRITE_REG(&Adapter->shared, E1000_RDTR, Adapter->rx_intr_delay);
230 	E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
231 	    "E1000_RDTR: 0x%x\n", Adapter->rx_intr_delay);
232 	if (hw->mac.type >= e1000_82540) {
233 		E1000_WRITE_REG(&Adapter->shared, E1000_RADV,
234 		    Adapter->rx_intr_abs_delay);
235 		E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
236 		    "E1000_RADV: 0x%x\n", Adapter->rx_intr_abs_delay);
237 	}
238 
239 	/*
240 	 * Setup our descriptor pointers
241 	 */
242 	rx_data->rbd_next = rx_data->rbd_first;
243 
244 	size = Adapter->rx_desc_num * sizeof (struct e1000_rx_desc);
245 	E1000_WRITE_REG(hw, E1000_RDLEN(0), size);
246 	size = E1000_READ_REG(hw, E1000_RDLEN(0));
247 
248 	/* To get lower order bits */
249 	buf_low = (uint32_t)rx_data->rbd_dma_addr;
250 	/* To get the higher order bits */
251 	buf_high = (uint32_t)(rx_data->rbd_dma_addr >> 32);
252 
253 	E1000_WRITE_REG(hw, E1000_RDBAH(0), buf_high);
254 	E1000_WRITE_REG(hw, E1000_RDBAL(0), buf_low);
255 
256 	/*
257 	 * Setup our HW Rx Head & Tail descriptor pointers
258 	 */
259 	E1000_WRITE_REG(hw, E1000_RDT(0),
260 	    (uint32_t)(rx_data->rbd_last - rx_data->rbd_first));
261 	E1000_WRITE_REG(hw, E1000_RDH(0), 0);
262 
263 	/*
264 	 * Setup the Receive Control Register (RCTL), and ENABLE the
265 	 * receiver. The initial configuration is to: Enable the receiver,
266 	 * accept broadcasts, discard bad packets (and long packets),
267 	 * disable VLAN filter checking, set the receive descriptor
268 	 * minimum threshold size to 1/2, and the receive buffer size to
269 	 * 2k.
270 	 */
271 	rctl = E1000_RCTL_EN |		/* Enable Receive Unit */
272 	    E1000_RCTL_BAM |		/* Accept Broadcast Packets */
273 	    E1000_RCTL_LPE |		/* Large Packet Enable bit */
274 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT) |
275 	    E1000_RCTL_RDMTS_HALF |
276 	    E1000_RCTL_LBM_NO;		/* Loopback Mode = none */
277 
278 	if (Adapter->strip_crc)
279 		rctl |= E1000_RCTL_SECRC;	/* Strip Ethernet CRC */
280 
281 	if (Adapter->mem_workaround_82546 &&
282 	    ((hw->mac.type == e1000_82545) ||
283 	    (hw->mac.type == e1000_82546) ||
284 	    (hw->mac.type == e1000_82546_rev_3))) {
285 		rctl |= E1000_RCTL_SZ_2048;
286 	} else {
287 		if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_2K) &&
288 		    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_4K))
289 			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
290 		else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_4K) &&
291 		    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_8K))
292 			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
293 		else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_8K) &&
294 		    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_16K))
295 			rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX;
296 		else
297 			rctl |= E1000_RCTL_SZ_2048;
298 	}
299 
300 	if (e1000_tbi_sbp_enabled_82543(hw))
301 		rctl |= E1000_RCTL_SBP;
302 
303 	/*
304 	 * Enable Early Receive Threshold (ERT) on supported devices.
305 	 * Only takes effect when packet size is equal or larger than the
306 	 * specified value (in 8 byte units), e.g. using jumbo frames.
307 	 */
308 	if ((hw->mac.type == e1000_82573) ||
309 	    (hw->mac.type == e1000_82574) ||
310 	    (hw->mac.type == e1000_ich9lan) ||
311 	    (hw->mac.type == e1000_ich10lan)) {
312 
313 		ert = E1000_ERT_2048;
314 
315 		/*
316 		 * Special modification when ERT and
317 		 * jumbo frames are enabled
318 		 */
319 		if (Adapter->default_mtu > ETHERMTU) {
320 			rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
321 			E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 0x3);
322 			ert |= (1 << 13);
323 		}
324 
325 		E1000_WRITE_REG(hw, E1000_ERT, ert);
326 	}
327 
328 	reg_val =
329 	    E1000_RXCSUM_TUOFL |	/* TCP/UDP checksum offload Enable */
330 	    E1000_RXCSUM_IPOFL;		/* IP checksum offload Enable */
331 
332 	E1000_WRITE_REG(hw, E1000_RXCSUM, reg_val);
333 
334 	/*
335 	 * Workaround: Set bit 16 (IPv6_ExDIS) to disable the
336 	 * processing of received IPV6 extension headers
337 	 */
338 	if ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572)) {
339 		reg_val = E1000_READ_REG(hw, E1000_RFCTL);
340 		reg_val |= (E1000_RFCTL_IPV6_EX_DIS |
341 		    E1000_RFCTL_NEW_IPV6_EXT_DIS);
342 		E1000_WRITE_REG(hw, E1000_RFCTL, reg_val);
343 	}
344 
345 	/* Write to enable the receive unit */
346 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
347 }
348 
349 /*
350  * e1000g_get_buf - get an rx sw packet from the free_list
351  */
352 static p_rx_sw_packet_t
353 e1000g_get_buf(e1000g_rx_data_t *rx_data)
354 {
355 	p_rx_sw_packet_t packet;
356 
357 	mutex_enter(&rx_data->freelist_lock);
358 	packet = (p_rx_sw_packet_t)
359 	    QUEUE_POP_HEAD(&rx_data->free_list);
360 	if (packet != NULL) {
361 		rx_data->avail_freepkt--;
362 	} else {
363 		/*
364 		 * If the freelist has no packets, check the recycle list
365 		 * to see if there are any available descriptor there.
366 		 */
367 		mutex_enter(&rx_data->recycle_lock);
368 		QUEUE_SWITCH(&rx_data->free_list, &rx_data->recycle_list);
369 		rx_data->avail_freepkt = rx_data->recycle_freepkt;
370 		rx_data->recycle_freepkt = 0;
371 		mutex_exit(&rx_data->recycle_lock);
372 		packet = (p_rx_sw_packet_t)
373 		    QUEUE_POP_HEAD(&rx_data->free_list);
374 		if (packet != NULL)
375 			rx_data->avail_freepkt--;
376 	}
377 	mutex_exit(&rx_data->freelist_lock);
378 
379 	return (packet);
380 }
381 
382 /*
383  * e1000g_receive - main receive routine
384  *
385  * This routine will process packets received in an interrupt
386  */
387 mblk_t *
388 e1000g_receive(e1000g_rx_ring_t *rx_ring, mblk_t **tail, uint_t sz)
389 {
390 	struct e1000_hw *hw;
391 	mblk_t *nmp;
392 	mblk_t *ret_mp;
393 	mblk_t *ret_nmp;
394 	struct e1000_rx_desc *current_desc;
395 	struct e1000_rx_desc *last_desc;
396 	p_rx_sw_packet_t packet;
397 	p_rx_sw_packet_t newpkt;
398 	uint16_t length;
399 	uint32_t pkt_count;
400 	uint32_t desc_count;
401 	boolean_t accept_frame;
402 	boolean_t end_of_packet;
403 	boolean_t need_copy;
404 	struct e1000g *Adapter;
405 	dma_buffer_t *rx_buf;
406 	uint16_t cksumflags;
407 	uint_t chain_sz = 0;
408 	e1000g_rx_data_t *rx_data;
409 
410 	ret_mp = NULL;
411 	ret_nmp = NULL;
412 	pkt_count = 0;
413 	desc_count = 0;
414 	cksumflags = 0;
415 
416 	Adapter = rx_ring->adapter;
417 	rx_data = rx_ring->rx_data;
418 	hw = &Adapter->shared;
419 
420 	/* Sync the Rx descriptor DMA buffers */
421 	(void) ddi_dma_sync(rx_data->rbd_dma_handle,
422 	    0, 0, DDI_DMA_SYNC_FORKERNEL);
423 
424 	if (e1000g_check_dma_handle(rx_data->rbd_dma_handle) != DDI_FM_OK) {
425 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
426 		Adapter->e1000g_state |= E1000G_ERROR;
427 	}
428 
429 	current_desc = rx_data->rbd_next;
430 	if (!(current_desc->status & E1000_RXD_STAT_DD)) {
431 		/*
432 		 * don't send anything up. just clear the RFD
433 		 */
434 		E1000G_DEBUG_STAT(rx_ring->stat_none);
435 		return (ret_mp);
436 	}
437 
438 	/*
439 	 * Loop through the receive descriptors starting at the last known
440 	 * descriptor owned by the hardware that begins a packet.
441 	 */
442 	while ((current_desc->status & E1000_RXD_STAT_DD) &&
443 	    (pkt_count < Adapter->rx_limit_onintr) &&
444 	    ((sz == E1000G_CHAIN_NO_LIMIT) || (chain_sz <= sz))) {
445 
446 		desc_count++;
447 		/*
448 		 * Now this can happen in Jumbo frame situation.
449 		 */
450 		if (current_desc->status & E1000_RXD_STAT_EOP) {
451 			/* packet has EOP set */
452 			end_of_packet = B_TRUE;
453 		} else {
454 			/*
455 			 * If this received buffer does not have the
456 			 * End-Of-Packet bit set, the received packet
457 			 * will consume multiple buffers. We won't send this
458 			 * packet upstack till we get all the related buffers.
459 			 */
460 			end_of_packet = B_FALSE;
461 		}
462 
463 		/*
464 		 * Get a pointer to the actual receive buffer
465 		 * The mp->b_rptr is mapped to The CurrentDescriptor
466 		 * Buffer Address.
467 		 */
468 		packet =
469 		    (p_rx_sw_packet_t)QUEUE_GET_HEAD(&rx_data->recv_list);
470 		ASSERT(packet != NULL);
471 
472 		rx_buf = packet->rx_buf;
473 
474 		length = current_desc->length;
475 
476 #ifdef __sparc
477 		if (packet->dma_type == USE_DVMA)
478 			dvma_sync(rx_buf->dma_handle, 0,
479 			    DDI_DMA_SYNC_FORKERNEL);
480 		else
481 			(void) ddi_dma_sync(rx_buf->dma_handle,
482 			    E1000G_IPALIGNROOM, length,
483 			    DDI_DMA_SYNC_FORKERNEL);
484 #else
485 		(void) ddi_dma_sync(rx_buf->dma_handle,
486 		    E1000G_IPALIGNROOM, length,
487 		    DDI_DMA_SYNC_FORKERNEL);
488 #endif
489 
490 		if (e1000g_check_dma_handle(
491 		    rx_buf->dma_handle) != DDI_FM_OK) {
492 			ddi_fm_service_impact(Adapter->dip,
493 			    DDI_SERVICE_DEGRADED);
494 			Adapter->e1000g_state |= E1000G_ERROR;
495 		}
496 
497 		accept_frame = (current_desc->errors == 0) ||
498 		    ((current_desc->errors &
499 		    (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE)) != 0);
500 
501 		if (hw->mac.type == e1000_82543) {
502 			unsigned char last_byte;
503 
504 			last_byte =
505 			    *((unsigned char *)rx_buf->address + length - 1);
506 
507 			if (TBI_ACCEPT(hw,
508 			    current_desc->status, current_desc->errors,
509 			    current_desc->length, last_byte,
510 			    Adapter->min_frame_size, Adapter->max_frame_size)) {
511 
512 				e1000_tbi_adjust_stats(Adapter,
513 				    length, hw->mac.addr);
514 
515 				length--;
516 				accept_frame = B_TRUE;
517 			} else if (e1000_tbi_sbp_enabled_82543(hw) &&
518 			    (current_desc->errors == E1000_RXD_ERR_CE)) {
519 				accept_frame = B_TRUE;
520 			}
521 		}
522 
523 		/*
524 		 * Indicate the packet to the NOS if it was good.
525 		 * Normally, hardware will discard bad packets for us.
526 		 * Check for the packet to be a valid Ethernet packet
527 		 */
528 		if (!accept_frame) {
529 			/*
530 			 * error in incoming packet, either the packet is not a
531 			 * ethernet size packet, or the packet has an error. In
532 			 * either case, the packet will simply be discarded.
533 			 */
534 			E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
535 			    "Process Receive Interrupts: Error in Packet\n");
536 
537 			E1000G_STAT(rx_ring->stat_error);
538 			/*
539 			 * Returning here as we are done here. There is
540 			 * no point in waiting for while loop to elapse
541 			 * and the things which were done. More efficient
542 			 * and less error prone...
543 			 */
544 			goto rx_drop;
545 		}
546 
547 		/*
548 		 * If the Ethernet CRC is not stripped by the hardware,
549 		 * we need to strip it before sending it up to the stack.
550 		 */
551 		if (end_of_packet && !Adapter->strip_crc) {
552 			if (length > ETHERFCSL) {
553 				length -= ETHERFCSL;
554 			} else {
555 				/*
556 				 * If the fragment is smaller than the CRC,
557 				 * drop this fragment, do the processing of
558 				 * the end of the packet.
559 				 */
560 				ASSERT(rx_data->rx_mblk_tail != NULL);
561 				rx_data->rx_mblk_tail->b_wptr -=
562 				    ETHERFCSL - length;
563 				rx_data->rx_mblk_len -=
564 				    ETHERFCSL - length;
565 
566 				QUEUE_POP_HEAD(&rx_data->recv_list);
567 
568 				goto rx_end_of_packet;
569 			}
570 		}
571 
572 		need_copy = B_TRUE;
573 
574 		if (length <= Adapter->rx_bcopy_thresh)
575 			goto rx_copy;
576 
577 		/*
578 		 * Get the pre-constructed mblk that was associated
579 		 * to the receive data buffer.
580 		 */
581 		if (packet->mp == NULL) {
582 			packet->mp = desballoc((unsigned char *)
583 			    rx_buf->address - E1000G_IPALIGNROOM,
584 			    length + E1000G_IPALIGNROOM,
585 			    BPRI_MED, &packet->free_rtn);
586 
587 			if (packet->mp != NULL) {
588 				packet->mp->b_rptr += E1000G_IPALIGNROOM;
589 				packet->mp->b_wptr += E1000G_IPALIGNROOM;
590 			}
591 		}
592 
593 		if (packet->mp != NULL) {
594 			/*
595 			 * We have two sets of buffer pool. One associated with
596 			 * the Rxdescriptors and other a freelist buffer pool.
597 			 * Each time we get a good packet, Try to get a buffer
598 			 * from the freelist pool using e1000g_get_buf. If we
599 			 * get free buffer, then replace the descriptor buffer
600 			 * address with the free buffer we just got, and pass
601 			 * the pre-constructed mblk upstack. (note no copying)
602 			 *
603 			 * If we failed to get a free buffer, then try to
604 			 * allocate a new buffer(mp) and copy the recv buffer
605 			 * content to our newly allocated buffer(mp). Don't
606 			 * disturb the desriptor buffer address. (note copying)
607 			 */
608 			newpkt = e1000g_get_buf(rx_data);
609 
610 			if (newpkt != NULL) {
611 				/*
612 				 * Get the mblk associated to the data,
613 				 * and strip it off the sw packet.
614 				 */
615 				nmp = packet->mp;
616 				packet->mp = NULL;
617 				atomic_inc_32(&packet->ref_cnt);
618 
619 				/*
620 				 * Now replace old buffer with the new
621 				 * one we got from free list
622 				 * Both the RxSwPacket as well as the
623 				 * Receive Buffer Descriptor will now
624 				 * point to this new packet.
625 				 */
626 				packet = newpkt;
627 
628 				current_desc->buffer_addr =
629 				    newpkt->rx_buf->dma_address;
630 
631 				need_copy = B_FALSE;
632 			} else {
633 				E1000G_DEBUG_STAT(rx_ring->stat_no_freepkt);
634 			}
635 		}
636 
637 rx_copy:
638 		if (need_copy) {
639 			/*
640 			 * No buffers available on free list,
641 			 * bcopy the data from the buffer and
642 			 * keep the original buffer. Dont want to
643 			 * do this.. Yack but no other way
644 			 */
645 			if ((nmp = allocb(length + E1000G_IPALIGNROOM,
646 			    BPRI_MED)) == NULL) {
647 				/*
648 				 * The system has no buffers available
649 				 * to send up the incoming packet, hence
650 				 * the packet will have to be processed
651 				 * when there're more buffers available.
652 				 */
653 				E1000G_STAT(rx_ring->stat_allocb_fail);
654 				goto rx_drop;
655 			}
656 			nmp->b_rptr += E1000G_IPALIGNROOM;
657 			nmp->b_wptr += E1000G_IPALIGNROOM;
658 			/*
659 			 * The free list did not have any buffers
660 			 * available, so, the received packet will
661 			 * have to be copied into a mp and the original
662 			 * buffer will have to be retained for future
663 			 * packet reception.
664 			 */
665 			bcopy(rx_buf->address, nmp->b_wptr, length);
666 		}
667 
668 		/*
669 		 * The rx_sw_packet MUST be popped off the
670 		 * RxSwPacketList before either a putnext or freemsg
671 		 * is done on the mp that has now been created by the
672 		 * desballoc. If not, it is possible that the free
673 		 * routine will get called from the interrupt context
674 		 * and try to put this packet on the free list
675 		 */
676 		(p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_data->recv_list);
677 
678 		ASSERT(nmp != NULL);
679 		nmp->b_wptr += length;
680 
681 		if (rx_data->rx_mblk == NULL) {
682 			/*
683 			 *  TCP/UDP checksum offload and
684 			 *  IP checksum offload
685 			 */
686 			if (!(current_desc->status & E1000_RXD_STAT_IXSM)) {
687 				/*
688 				 * Check TCP/UDP checksum
689 				 */
690 				if ((current_desc->status &
691 				    E1000_RXD_STAT_TCPCS) &&
692 				    !(current_desc->errors &
693 				    E1000_RXD_ERR_TCPE))
694 					cksumflags |= HCK_FULLCKSUM |
695 					    HCK_FULLCKSUM_OK;
696 				/*
697 				 * Check IP Checksum
698 				 */
699 				if ((current_desc->status &
700 				    E1000_RXD_STAT_IPCS) &&
701 				    !(current_desc->errors &
702 				    E1000_RXD_ERR_IPE))
703 					cksumflags |= HCK_IPV4_HDRCKSUM;
704 			}
705 		}
706 
707 		/*
708 		 * We need to maintain our packet chain in the global
709 		 * Adapter structure, for the Rx processing can end
710 		 * with a fragment that has no EOP set.
711 		 */
712 		if (rx_data->rx_mblk == NULL) {
713 			/* Get the head of the message chain */
714 			rx_data->rx_mblk = nmp;
715 			rx_data->rx_mblk_tail = nmp;
716 			rx_data->rx_mblk_len = length;
717 		} else {	/* Not the first packet */
718 			/* Continue adding buffers */
719 			rx_data->rx_mblk_tail->b_cont = nmp;
720 			rx_data->rx_mblk_tail = nmp;
721 			rx_data->rx_mblk_len += length;
722 		}
723 		ASSERT(rx_data->rx_mblk != NULL);
724 		ASSERT(rx_data->rx_mblk_tail != NULL);
725 		ASSERT(rx_data->rx_mblk_tail->b_cont == NULL);
726 
727 		/*
728 		 * Now this MP is ready to travel upwards but some more
729 		 * fragments are coming.
730 		 * We will send packet upwards as soon as we get EOP
731 		 * set on the packet.
732 		 */
733 		if (!end_of_packet) {
734 			/*
735 			 * continue to get the next descriptor,
736 			 * Tail would be advanced at the end
737 			 */
738 			goto rx_next_desc;
739 		}
740 
741 rx_end_of_packet:
742 		/*
743 		 * Found packet with EOP
744 		 * Process the last fragment.
745 		 */
746 		if (cksumflags != 0) {
747 			(void) hcksum_assoc(rx_data->rx_mblk,
748 			    NULL, NULL, 0, 0, 0, 0, cksumflags, 0);
749 			cksumflags = 0;
750 		}
751 
752 		/*
753 		 * Count packets that span multi-descriptors
754 		 */
755 		E1000G_DEBUG_STAT_COND(rx_ring->stat_multi_desc,
756 		    (rx_data->rx_mblk->b_cont != NULL));
757 
758 		/*
759 		 * Append to list to send upstream
760 		 */
761 		if (ret_mp == NULL) {
762 			ret_mp = ret_nmp = rx_data->rx_mblk;
763 		} else {
764 			ret_nmp->b_next = rx_data->rx_mblk;
765 			ret_nmp = rx_data->rx_mblk;
766 		}
767 		ret_nmp->b_next = NULL;
768 		*tail = ret_nmp;
769 		chain_sz += length;
770 
771 		rx_data->rx_mblk = NULL;
772 		rx_data->rx_mblk_tail = NULL;
773 		rx_data->rx_mblk_len = 0;
774 
775 		pkt_count++;
776 
777 rx_next_desc:
778 		/*
779 		 * Zero out the receive descriptors status
780 		 */
781 		current_desc->status = 0;
782 
783 		if (current_desc == rx_data->rbd_last)
784 			rx_data->rbd_next = rx_data->rbd_first;
785 		else
786 			rx_data->rbd_next++;
787 
788 		last_desc = current_desc;
789 		current_desc = rx_data->rbd_next;
790 
791 		/*
792 		 * Put the buffer that we just indicated back
793 		 * at the end of our list
794 		 */
795 		QUEUE_PUSH_TAIL(&rx_data->recv_list,
796 		    &packet->Link);
797 	}	/* while loop */
798 
799 	/* Sync the Rx descriptor DMA buffers */
800 	(void) ddi_dma_sync(rx_data->rbd_dma_handle,
801 	    0, 0, DDI_DMA_SYNC_FORDEV);
802 
803 	/*
804 	 * Advance the E1000's Receive Queue #0 "Tail Pointer".
805 	 */
806 	E1000_WRITE_REG(hw, E1000_RDT(0),
807 	    (uint32_t)(last_desc - rx_data->rbd_first));
808 
809 	if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
810 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
811 		Adapter->e1000g_state |= E1000G_ERROR;
812 	}
813 
814 	Adapter->rx_pkt_cnt = pkt_count;
815 
816 	return (ret_mp);
817 
818 rx_drop:
819 	/*
820 	 * Zero out the receive descriptors status
821 	 */
822 	current_desc->status = 0;
823 
824 	/* Sync the Rx descriptor DMA buffers */
825 	(void) ddi_dma_sync(rx_data->rbd_dma_handle,
826 	    0, 0, DDI_DMA_SYNC_FORDEV);
827 
828 	if (current_desc == rx_data->rbd_last)
829 		rx_data->rbd_next = rx_data->rbd_first;
830 	else
831 		rx_data->rbd_next++;
832 
833 	last_desc = current_desc;
834 
835 	(p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_data->recv_list);
836 
837 	QUEUE_PUSH_TAIL(&rx_data->recv_list, &packet->Link);
838 	/*
839 	 * Reclaim all old buffers already allocated during
840 	 * Jumbo receives.....for incomplete reception
841 	 */
842 	if (rx_data->rx_mblk != NULL) {
843 		freemsg(rx_data->rx_mblk);
844 		rx_data->rx_mblk = NULL;
845 		rx_data->rx_mblk_tail = NULL;
846 		rx_data->rx_mblk_len = 0;
847 	}
848 	/*
849 	 * Advance the E1000's Receive Queue #0 "Tail Pointer".
850 	 */
851 	E1000_WRITE_REG(hw, E1000_RDT(0),
852 	    (uint32_t)(last_desc - rx_data->rbd_first));
853 
854 	if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
855 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
856 		Adapter->e1000g_state |= E1000G_ERROR;
857 	}
858 
859 	return (ret_mp);
860 }
861