xref: /titanic_52/usr/src/uts/common/io/e1000g/e1000g_rx.c (revision 54925bf60766fbb4f1f2d7c843721406a7b7a3fb)
1 /*
2  * This file is provided under a CDDLv1 license.  When using or
3  * redistributing this file, you may do so under this license.
4  * In redistributing this file this license must be included
5  * and no other modification of this header file is permitted.
6  *
7  * CDDL LICENSE SUMMARY
8  *
9  * Copyright(c) 1999 - 2007 Intel Corporation. All rights reserved.
10  *
11  * The contents of this file are subject to the terms of Version
12  * 1.0 of the Common Development and Distribution License (the "License").
13  *
14  * You should have received a copy of the License with this software.
15  * You can obtain a copy of the License at
16  *	http://www.opensolaris.org/os/licensing.
17  * See the License for the specific language governing permissions
18  * and limitations under the License.
19  */
20 
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms of the CDDLv1.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * **********************************************************************
30  *									*
31  * Module Name:								*
32  *   e1000g_rx.c							*
33  *									*
34  * Abstract:								*
35  *   This file contains some routines that take care of Receive		*
36  *   interrupt and also for the received packets it sends up to		*
37  *   upper layer.							*
38  *   It tries to do a zero copy if free buffers are available in	*
39  *   the pool.								*
40  *									*
41  * **********************************************************************
42  */
43 
44 #include "e1000g_sw.h"
45 #include "e1000g_debug.h"
46 
47 static p_rx_sw_packet_t e1000g_get_buf(e1000g_rx_ring_t *rx_ring);
48 #pragma	inline(e1000g_get_buf)
49 
50 /*
51  * e1000g_rxfree_func - the call-back function to reclaim rx buffer
52  *
53  * This function is called when an mp is freed by the user thru
54  * freeb call (Only for mp constructed through desballoc call)
55  * It returns back the freed buffer to the freelist
56  */
57 void
58 e1000g_rxfree_func(p_rx_sw_packet_t packet)
59 {
60 	struct e1000g *Adapter;
61 	e1000g_rx_ring_t *rx_ring;
62 
63 	rx_ring = (e1000g_rx_ring_t *)packet->rx_ring;
64 	Adapter = rx_ring->adapter;
65 
66 	/*
67 	 * Here the rx recycling processes different rx packets in different
68 	 * threads, so we protect it with RW_READER to ensure it won't block
69 	 * other rx recycling threads.
70 	 */
71 	rw_enter(&e1000g_rx_detach_lock, RW_READER);
72 
73 	if (packet->flag == E1000G_RX_SW_FREE) {
74 		rw_exit(&e1000g_rx_detach_lock);
75 		return;
76 	}
77 
78 	if (packet->flag == E1000G_RX_SW_STOP) {
79 		packet->flag = E1000G_RX_SW_FREE;
80 		rw_exit(&e1000g_rx_detach_lock);
81 
82 		rw_enter(&e1000g_rx_detach_lock, RW_WRITER);
83 		rx_ring->pending_count--;
84 		e1000g_mblks_pending--;
85 
86 		if (rx_ring->pending_count == 0) {
87 			while (rx_ring->pending_list != NULL) {
88 				packet = rx_ring->pending_list;
89 				rx_ring->pending_list =
90 				    rx_ring->pending_list->next;
91 
92 				ASSERT(packet->mp == NULL);
93 				e1000g_free_rx_sw_packet(packet);
94 			}
95 		}
96 		rw_exit(&e1000g_rx_detach_lock);
97 		return;
98 	}
99 
100 	if (packet->flag == E1000G_RX_SW_DETACH) {
101 		packet->flag = E1000G_RX_SW_FREE;
102 		rw_exit(&e1000g_rx_detach_lock);
103 
104 		ASSERT(packet->mp == NULL);
105 		e1000g_free_rx_sw_packet(packet);
106 
107 		/*
108 		 * Here the e1000g_mblks_pending may be modified by different
109 		 * rx recycling threads simultaneously, so we need to protect
110 		 * it with RW_WRITER.
111 		 */
112 		rw_enter(&e1000g_rx_detach_lock, RW_WRITER);
113 		e1000g_mblks_pending--;
114 		rw_exit(&e1000g_rx_detach_lock);
115 		return;
116 	}
117 
118 	packet->flag = E1000G_RX_SW_FREE;
119 
120 	if (packet->mp == NULL) {
121 		/*
122 		 * Allocate a mblk that binds to the data buffer
123 		 */
124 		packet->mp = desballoc((unsigned char *)
125 		    packet->rx_buf->address - E1000G_IPALIGNROOM,
126 		    packet->rx_buf->size + E1000G_IPALIGNROOM,
127 		    BPRI_MED, &packet->free_rtn);
128 
129 		if (packet->mp != NULL) {
130 			packet->mp->b_rptr += E1000G_IPALIGNROOM;
131 			packet->mp->b_wptr += E1000G_IPALIGNROOM;
132 		} else {
133 			E1000G_STAT(rx_ring->stat_esballoc_fail);
134 		}
135 	}
136 
137 	mutex_enter(&rx_ring->freelist_lock);
138 	QUEUE_PUSH_TAIL(&rx_ring->free_list, &packet->Link);
139 	rx_ring->avail_freepkt++;
140 	mutex_exit(&rx_ring->freelist_lock);
141 
142 	rw_exit(&e1000g_rx_detach_lock);
143 }
144 
145 /*
146  * e1000g_rx_setup - setup rx data structures
147  *
148  * This routine initializes all of the receive related
149  * structures. This includes the receive descriptors, the
150  * actual receive buffers, and the rx_sw_packet software
151  * structures.
152  */
153 void
154 e1000g_rx_setup(struct e1000g *Adapter)
155 {
156 	struct e1000_hw *hw;
157 	p_rx_sw_packet_t packet;
158 	struct e1000_rx_desc *descriptor;
159 	uint32_t buf_low;
160 	uint32_t buf_high;
161 	uint32_t reg_val;
162 	int i;
163 	int size;
164 	e1000g_rx_ring_t *rx_ring;
165 
166 	hw = &Adapter->shared;
167 	rx_ring = Adapter->rx_ring;
168 
169 	/*
170 	 * zero out all of the receive buffer descriptor memory
171 	 * assures any previous data or status is erased
172 	 */
173 	bzero(rx_ring->rbd_area,
174 	    sizeof (struct e1000_rx_desc) * Adapter->rx_desc_num);
175 
176 	if (!Adapter->rx_buffer_setup) {
177 		/* Init the list of "Receive Buffer" */
178 		QUEUE_INIT_LIST(&rx_ring->recv_list);
179 
180 		/* Init the list of "Free Receive Buffer" */
181 		QUEUE_INIT_LIST(&rx_ring->free_list);
182 
183 		/*
184 		 * Setup Receive list and the Free list. Note that
185 		 * the both were allocated in one packet area.
186 		 */
187 		packet = rx_ring->packet_area;
188 		descriptor = rx_ring->rbd_first;
189 
190 		for (i = 0; i < Adapter->rx_desc_num;
191 		    i++, packet = packet->next, descriptor++) {
192 			ASSERT(packet != NULL);
193 			ASSERT(descriptor != NULL);
194 			descriptor->buffer_addr =
195 			    packet->rx_buf->dma_address;
196 
197 			/* Add this rx_sw_packet to the receive list */
198 			QUEUE_PUSH_TAIL(&rx_ring->recv_list,
199 			    &packet->Link);
200 		}
201 
202 		for (i = 0; i < Adapter->rx_freelist_num;
203 		    i++, packet = packet->next) {
204 			ASSERT(packet != NULL);
205 			/* Add this rx_sw_packet to the free list */
206 			QUEUE_PUSH_TAIL(&rx_ring->free_list,
207 			    &packet->Link);
208 		}
209 		rx_ring->avail_freepkt = Adapter->rx_freelist_num;
210 
211 		Adapter->rx_buffer_setup = B_TRUE;
212 	} else {
213 		/* Setup the initial pointer to the first rx descriptor */
214 		packet = (p_rx_sw_packet_t)
215 		    QUEUE_GET_HEAD(&rx_ring->recv_list);
216 		descriptor = rx_ring->rbd_first;
217 
218 		for (i = 0; i < Adapter->rx_desc_num; i++) {
219 			ASSERT(packet != NULL);
220 			ASSERT(descriptor != NULL);
221 			descriptor->buffer_addr =
222 			    packet->rx_buf->dma_address;
223 
224 			/* Get next rx_sw_packet */
225 			packet = (p_rx_sw_packet_t)
226 			    QUEUE_GET_NEXT(&rx_ring->recv_list, &packet->Link);
227 			descriptor++;
228 		}
229 	}
230 
231 	/*
232 	 * Setup our descriptor pointers
233 	 */
234 	rx_ring->rbd_next = rx_ring->rbd_first;
235 
236 	size = Adapter->rx_desc_num * sizeof (struct e1000_rx_desc);
237 	E1000_WRITE_REG(hw, E1000_RDLEN, size);
238 	size = E1000_READ_REG(hw, E1000_RDLEN);
239 
240 	/* To get lower order bits */
241 	buf_low = (uint32_t)rx_ring->rbd_dma_addr;
242 	/* To get the higher order bits */
243 	buf_high = (uint32_t)(rx_ring->rbd_dma_addr >> 32);
244 
245 	E1000_WRITE_REG(hw, E1000_RDBAH, buf_high);
246 	E1000_WRITE_REG(hw, E1000_RDBAL, buf_low);
247 
248 	/*
249 	 * Setup our HW Rx Head & Tail descriptor pointers
250 	 */
251 	E1000_WRITE_REG(hw, E1000_RDT,
252 	    (uint32_t)(rx_ring->rbd_last - rx_ring->rbd_first));
253 	E1000_WRITE_REG(hw, E1000_RDH, 0);
254 
255 	/*
256 	 * Setup the Receive Control Register (RCTL), and ENABLE the
257 	 * receiver. The initial configuration is to: Enable the receiver,
258 	 * accept broadcasts, discard bad packets (and long packets),
259 	 * disable VLAN filter checking, set the receive descriptor
260 	 * minimum threshold size to 1/2, and the receive buffer size to
261 	 * 2k.
262 	 */
263 	reg_val = E1000_RCTL_EN |	/* Enable Receive Unit */
264 	    E1000_RCTL_BAM |		/* Accept Broadcast Packets */
265 	    E1000_RCTL_LPE |		/* Large Packet Enable bit */
266 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT) |
267 	    E1000_RCTL_RDMTS_HALF |
268 	    E1000_RCTL_LBM_NO;		/* Loopback Mode = none */
269 
270 	if (Adapter->strip_crc)
271 		reg_val |= E1000_RCTL_SECRC;	/* Strip Ethernet CRC */
272 
273 	switch (hw->mac.max_frame_size) {
274 	case ETHERMAX:
275 		reg_val |= E1000_RCTL_SZ_2048;
276 		break;
277 	case FRAME_SIZE_UPTO_4K:
278 		reg_val |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
279 		break;
280 	case FRAME_SIZE_UPTO_8K:
281 		reg_val |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
282 		break;
283 	case FRAME_SIZE_UPTO_9K:
284 	case FRAME_SIZE_UPTO_16K:
285 		reg_val |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX;
286 		break;
287 	default:
288 		reg_val |= E1000_RCTL_SZ_2048;
289 		break;
290 	}
291 
292 	if (e1000_tbi_sbp_enabled_82543(hw))
293 		reg_val |= E1000_RCTL_SBP;
294 
295 	/*
296 	 * Enable early receives on supported devices, only takes effect when
297 	 * packet size is equal or larger than the specified value (in 8 byte
298 	 * units), e.g. using jumbo frames when setting to E1000_ERT_2048
299 	 */
300 	if ((hw->mac.type == e1000_82573) || (hw->mac.type == e1000_ich9lan))
301 		E1000_WRITE_REG(hw, E1000_ERT, E1000_ERT_2048);
302 
303 	E1000_WRITE_REG(hw, E1000_RCTL, reg_val);
304 
305 	reg_val =
306 	    E1000_RXCSUM_TUOFL |	/* TCP/UDP checksum offload Enable */
307 	    E1000_RXCSUM_IPOFL;		/* IP checksum offload Enable */
308 
309 	E1000_WRITE_REG(hw, E1000_RXCSUM, reg_val);
310 }
311 
312 /*
313  * e1000g_get_buf - get an rx sw packet from the free_list
314  */
315 static p_rx_sw_packet_t
316 e1000g_get_buf(e1000g_rx_ring_t *rx_ring)
317 {
318 	struct e1000g *Adapter;
319 	p_rx_sw_packet_t packet;
320 
321 	Adapter = rx_ring->adapter;
322 
323 	mutex_enter(&rx_ring->freelist_lock);
324 	packet = (p_rx_sw_packet_t)
325 	    QUEUE_POP_HEAD(&rx_ring->free_list);
326 	if (packet != NULL)
327 		rx_ring->avail_freepkt--;
328 	mutex_exit(&rx_ring->freelist_lock);
329 
330 	return (packet);
331 }
332 
333 /*
334  * e1000g_receive - main receive routine
335  *
336  * This routine will process packets received in an interrupt
337  */
338 mblk_t *
339 e1000g_receive(struct e1000g *Adapter)
340 {
341 	struct e1000_hw *hw;
342 	mblk_t *nmp;
343 	mblk_t *ret_mp;
344 	mblk_t *ret_nmp;
345 	struct e1000_rx_desc *current_desc;
346 	struct e1000_rx_desc *last_desc;
347 	p_rx_sw_packet_t packet;
348 	p_rx_sw_packet_t newpkt;
349 	USHORT length;
350 	uint32_t pkt_count;
351 	uint32_t desc_count;
352 	boolean_t accept_frame;
353 	boolean_t end_of_packet;
354 	boolean_t need_copy;
355 	e1000g_rx_ring_t *rx_ring;
356 	dma_buffer_t *rx_buf;
357 	uint16_t cksumflags;
358 
359 	ret_mp = NULL;
360 	ret_nmp = NULL;
361 	pkt_count = 0;
362 	desc_count = 0;
363 	cksumflags = 0;
364 
365 	hw = &Adapter->shared;
366 	rx_ring = Adapter->rx_ring;
367 
368 	/* Sync the Rx descriptor DMA buffers */
369 	(void) ddi_dma_sync(rx_ring->rbd_dma_handle,
370 	    0, 0, DDI_DMA_SYNC_FORKERNEL);
371 
372 	current_desc = rx_ring->rbd_next;
373 	if (!(current_desc->status & E1000_RXD_STAT_DD)) {
374 		/*
375 		 * don't send anything up. just clear the RFD
376 		 */
377 		E1000G_DEBUG_STAT(rx_ring->stat_none);
378 		return (ret_mp);
379 	}
380 
381 	/*
382 	 * Loop through the receive descriptors starting at the last known
383 	 * descriptor owned by the hardware that begins a packet.
384 	 */
385 	while ((current_desc->status & E1000_RXD_STAT_DD) &&
386 	    (pkt_count < Adapter->rx_limit_onintr)) {
387 
388 		desc_count++;
389 		/*
390 		 * Now this can happen in Jumbo frame situation.
391 		 */
392 		if (current_desc->status & E1000_RXD_STAT_EOP) {
393 			/* packet has EOP set */
394 			end_of_packet = B_TRUE;
395 		} else {
396 			/*
397 			 * If this received buffer does not have the
398 			 * End-Of-Packet bit set, the received packet
399 			 * will consume multiple buffers. We won't send this
400 			 * packet upstack till we get all the related buffers.
401 			 */
402 			end_of_packet = B_FALSE;
403 		}
404 
405 		/*
406 		 * Get a pointer to the actual receive buffer
407 		 * The mp->b_rptr is mapped to The CurrentDescriptor
408 		 * Buffer Address.
409 		 */
410 		packet =
411 		    (p_rx_sw_packet_t)QUEUE_GET_HEAD(&rx_ring->recv_list);
412 		ASSERT(packet != NULL);
413 
414 		rx_buf = packet->rx_buf;
415 
416 		length = current_desc->length;
417 
418 #ifdef __sparc
419 		if (packet->dma_type == USE_DVMA)
420 			dvma_sync(rx_buf->dma_handle, 0,
421 			    DDI_DMA_SYNC_FORKERNEL);
422 		else
423 			(void) ddi_dma_sync(rx_buf->dma_handle,
424 			    E1000G_IPALIGNROOM, length,
425 			    DDI_DMA_SYNC_FORKERNEL);
426 #else
427 		(void) ddi_dma_sync(rx_buf->dma_handle,
428 		    E1000G_IPALIGNROOM, length,
429 		    DDI_DMA_SYNC_FORKERNEL);
430 #endif
431 
432 		accept_frame = (current_desc->errors == 0) ||
433 		    ((current_desc->errors &
434 		    (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE)) != 0);
435 
436 		if (hw->mac.type == e1000_82543) {
437 			unsigned char last_byte;
438 
439 			last_byte =
440 			    *((unsigned char *)rx_buf->address + length - 1);
441 
442 			if (TBI_ACCEPT(hw,
443 			    current_desc->status, current_desc->errors,
444 			    current_desc->length, last_byte)) {
445 
446 				e1000_tbi_adjust_stats(Adapter,
447 				    length, hw->mac.addr);
448 
449 				length--;
450 				accept_frame = B_TRUE;
451 			} else if (e1000_tbi_sbp_enabled_82543(hw) &&
452 			    (current_desc->errors == E1000_RXD_ERR_CE)) {
453 				accept_frame = B_TRUE;
454 			}
455 		}
456 
457 		/*
458 		 * Indicate the packet to the NOS if it was good.
459 		 * Normally, hardware will discard bad packets for us.
460 		 * Check for the packet to be a valid Ethernet packet
461 		 */
462 		if (!accept_frame) {
463 			/*
464 			 * error in incoming packet, either the packet is not a
465 			 * ethernet size packet, or the packet has an error. In
466 			 * either case, the packet will simply be discarded.
467 			 */
468 			E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
469 			    "Process Receive Interrupts: Error in Packet\n");
470 
471 			E1000G_STAT(rx_ring->stat_error);
472 			/*
473 			 * Returning here as we are done here. There is
474 			 * no point in waiting for while loop to elapse
475 			 * and the things which were done. More efficient
476 			 * and less error prone...
477 			 */
478 			goto rx_drop;
479 		}
480 
481 		/*
482 		 * If the Ethernet CRC is not stripped by the hardware,
483 		 * we need to strip it before sending it up to the stack.
484 		 */
485 		if (end_of_packet && !Adapter->strip_crc) {
486 			if (length > CRC_LENGTH) {
487 				length -= CRC_LENGTH;
488 			} else {
489 				/*
490 				 * If the fragment is smaller than the CRC,
491 				 * drop this fragment, do the processing of
492 				 * the end of the packet.
493 				 */
494 				ASSERT(rx_ring->rx_mblk_tail != NULL);
495 				rx_ring->rx_mblk_tail->b_wptr -=
496 				    CRC_LENGTH - length;
497 				rx_ring->rx_mblk_len -=
498 				    CRC_LENGTH - length;
499 
500 				QUEUE_POP_HEAD(&rx_ring->recv_list);
501 
502 				goto rx_end_of_packet;
503 			}
504 		}
505 
506 		need_copy = B_TRUE;
507 
508 		if (length <= Adapter->rx_bcopy_thresh)
509 			goto rx_copy;
510 
511 		/*
512 		 * Get the pre-constructed mblk that was associated
513 		 * to the receive data buffer.
514 		 */
515 		if (packet->mp == NULL) {
516 			packet->mp = desballoc((unsigned char *)
517 			    rx_buf->address - E1000G_IPALIGNROOM,
518 			    length + E1000G_IPALIGNROOM,
519 			    BPRI_MED, &packet->free_rtn);
520 
521 			if (packet->mp != NULL) {
522 				packet->mp->b_rptr += E1000G_IPALIGNROOM;
523 				packet->mp->b_wptr += E1000G_IPALIGNROOM;
524 			} else {
525 				E1000G_STAT(rx_ring->stat_esballoc_fail);
526 			}
527 		}
528 
529 		if (packet->mp != NULL) {
530 			/*
531 			 * We have two sets of buffer pool. One associated with
532 			 * the Rxdescriptors and other a freelist buffer pool.
533 			 * Each time we get a good packet, Try to get a buffer
534 			 * from the freelist pool using e1000g_get_buf. If we
535 			 * get free buffer, then replace the descriptor buffer
536 			 * address with the free buffer we just got, and pass
537 			 * the pre-constructed mblk upstack. (note no copying)
538 			 *
539 			 * If we failed to get a free buffer, then try to
540 			 * allocate a new buffer(mp) and copy the recv buffer
541 			 * content to our newly allocated buffer(mp). Don't
542 			 * disturb the desriptor buffer address. (note copying)
543 			 */
544 			newpkt = e1000g_get_buf(rx_ring);
545 
546 			if (newpkt != NULL) {
547 				/*
548 				 * Get the mblk associated to the data,
549 				 * and strip it off the sw packet.
550 				 */
551 				nmp = packet->mp;
552 				packet->mp = NULL;
553 				packet->flag == E1000G_RX_SW_SENDUP;
554 
555 				/*
556 				 * Now replace old buffer with the new
557 				 * one we got from free list
558 				 * Both the RxSwPacket as well as the
559 				 * Receive Buffer Descriptor will now
560 				 * point to this new packet.
561 				 */
562 				packet = newpkt;
563 
564 				current_desc->buffer_addr =
565 				    newpkt->rx_buf->dma_address;
566 
567 				need_copy = B_FALSE;
568 			} else {
569 				E1000G_DEBUG_STAT(rx_ring->stat_no_freepkt);
570 			}
571 		}
572 
573 rx_copy:
574 		if (need_copy) {
575 			/*
576 			 * No buffers available on free list,
577 			 * bcopy the data from the buffer and
578 			 * keep the original buffer. Dont want to
579 			 * do this.. Yack but no other way
580 			 */
581 			if ((nmp = allocb(length + E1000G_IPALIGNROOM,
582 			    BPRI_MED)) == NULL) {
583 				/*
584 				 * The system has no buffers available
585 				 * to send up the incoming packet, hence
586 				 * the packet will have to be processed
587 				 * when there're more buffers available.
588 				 */
589 				E1000G_STAT(rx_ring->stat_allocb_fail);
590 				goto rx_drop;
591 			}
592 			nmp->b_rptr += E1000G_IPALIGNROOM;
593 			nmp->b_wptr += E1000G_IPALIGNROOM;
594 			/*
595 			 * The free list did not have any buffers
596 			 * available, so, the received packet will
597 			 * have to be copied into a mp and the original
598 			 * buffer will have to be retained for future
599 			 * packet reception.
600 			 */
601 			bcopy(rx_buf->address, nmp->b_wptr, length);
602 		}
603 
604 		/*
605 		 * The rx_sw_packet MUST be popped off the
606 		 * RxSwPacketList before either a putnext or freemsg
607 		 * is done on the mp that has now been created by the
608 		 * desballoc. If not, it is possible that the free
609 		 * routine will get called from the interrupt context
610 		 * and try to put this packet on the free list
611 		 */
612 		(p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_ring->recv_list);
613 
614 		ASSERT(nmp != NULL);
615 		nmp->b_wptr += length;
616 
617 		if (rx_ring->rx_mblk == NULL) {
618 			/*
619 			 *  TCP/UDP checksum offload and
620 			 *  IP checksum offload
621 			 */
622 			if (!(current_desc->status & E1000_RXD_STAT_IXSM)) {
623 				/*
624 				 * Check TCP/UDP checksum
625 				 */
626 				if ((current_desc->status &
627 				    E1000_RXD_STAT_TCPCS) &&
628 				    !(current_desc->errors &
629 				    E1000_RXD_ERR_TCPE))
630 					cksumflags |= HCK_FULLCKSUM |
631 					    HCK_FULLCKSUM_OK;
632 				/*
633 				 * Check IP Checksum
634 				 */
635 				if ((current_desc->status &
636 				    E1000_RXD_STAT_IPCS) &&
637 				    !(current_desc->errors &
638 				    E1000_RXD_ERR_IPE))
639 					cksumflags |= HCK_IPV4_HDRCKSUM;
640 			}
641 		}
642 
643 		/*
644 		 * We need to maintain our packet chain in the global
645 		 * Adapter structure, for the Rx processing can end
646 		 * with a fragment that has no EOP set.
647 		 */
648 		if (rx_ring->rx_mblk == NULL) {
649 			/* Get the head of the message chain */
650 			rx_ring->rx_mblk = nmp;
651 			rx_ring->rx_mblk_tail = nmp;
652 			rx_ring->rx_mblk_len = length;
653 		} else {	/* Not the first packet */
654 			/* Continue adding buffers */
655 			rx_ring->rx_mblk_tail->b_cont = nmp;
656 			rx_ring->rx_mblk_tail = nmp;
657 			rx_ring->rx_mblk_len += length;
658 		}
659 		ASSERT(rx_ring->rx_mblk != NULL);
660 		ASSERT(rx_ring->rx_mblk_tail != NULL);
661 		ASSERT(rx_ring->rx_mblk_tail->b_cont == NULL);
662 
663 		/*
664 		 * Now this MP is ready to travel upwards but some more
665 		 * fragments are coming.
666 		 * We will send packet upwards as soon as we get EOP
667 		 * set on the packet.
668 		 */
669 		if (!end_of_packet) {
670 			/*
671 			 * continue to get the next descriptor,
672 			 * Tail would be advanced at the end
673 			 */
674 			goto rx_next_desc;
675 		}
676 
677 rx_end_of_packet:
678 		/*
679 		 * Found packet with EOP
680 		 * Process the last fragment.
681 		 */
682 		if (cksumflags != 0) {
683 			(void) hcksum_assoc(rx_ring->rx_mblk,
684 			    NULL, NULL, 0, 0, 0, 0, cksumflags, 0);
685 			cksumflags = 0;
686 		}
687 
688 		/*
689 		 * Count packets that span multi-descriptors
690 		 */
691 		E1000G_DEBUG_STAT_COND(rx_ring->stat_multi_desc,
692 		    (rx_ring->rx_mblk->b_cont != NULL));
693 
694 		/*
695 		 * Append to list to send upstream
696 		 */
697 		if (ret_mp == NULL) {
698 			ret_mp = ret_nmp = rx_ring->rx_mblk;
699 		} else {
700 			ret_nmp->b_next = rx_ring->rx_mblk;
701 			ret_nmp = rx_ring->rx_mblk;
702 		}
703 		ret_nmp->b_next = NULL;
704 
705 		rx_ring->rx_mblk = NULL;
706 		rx_ring->rx_mblk_tail = NULL;
707 		rx_ring->rx_mblk_len = 0;
708 
709 		pkt_count++;
710 
711 rx_next_desc:
712 		/*
713 		 * Zero out the receive descriptors status
714 		 */
715 		current_desc->status = 0;
716 
717 		if (current_desc == rx_ring->rbd_last)
718 			rx_ring->rbd_next = rx_ring->rbd_first;
719 		else
720 			rx_ring->rbd_next++;
721 
722 		last_desc = current_desc;
723 		current_desc = rx_ring->rbd_next;
724 
725 		/*
726 		 * Put the buffer that we just indicated back
727 		 * at the end of our list
728 		 */
729 		QUEUE_PUSH_TAIL(&rx_ring->recv_list,
730 		    &packet->Link);
731 	}	/* while loop */
732 
733 	if (pkt_count >= Adapter->rx_limit_onintr)
734 		E1000G_STAT(rx_ring->stat_exceed_pkt);
735 
736 	/* Sync the Rx descriptor DMA buffers */
737 	(void) ddi_dma_sync(rx_ring->rbd_dma_handle,
738 	    0, 0, DDI_DMA_SYNC_FORDEV);
739 
740 	/*
741 	 * Advance the E1000's Receive Queue #0 "Tail Pointer".
742 	 */
743 	E1000_WRITE_REG(hw, E1000_RDT,
744 	    (uint32_t)(last_desc - rx_ring->rbd_first));
745 
746 	return (ret_mp);
747 
748 rx_drop:
749 	/*
750 	 * Zero out the receive descriptors status
751 	 */
752 	current_desc->status = 0;
753 
754 	/* Sync the Rx descriptor DMA buffers */
755 	(void) ddi_dma_sync(rx_ring->rbd_dma_handle,
756 	    0, 0, DDI_DMA_SYNC_FORDEV);
757 
758 	if (current_desc == rx_ring->rbd_last)
759 		rx_ring->rbd_next = rx_ring->rbd_first;
760 	else
761 		rx_ring->rbd_next++;
762 
763 	last_desc = current_desc;
764 
765 	(p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_ring->recv_list);
766 
767 	QUEUE_PUSH_TAIL(&rx_ring->recv_list, &packet->Link);
768 	/*
769 	 * Reclaim all old buffers already allocated during
770 	 * Jumbo receives.....for incomplete reception
771 	 */
772 	if (rx_ring->rx_mblk != NULL) {
773 		freemsg(rx_ring->rx_mblk);
774 		rx_ring->rx_mblk = NULL;
775 		rx_ring->rx_mblk_tail = NULL;
776 		rx_ring->rx_mblk_len = 0;
777 	}
778 	/*
779 	 * Advance the E1000's Receive Queue #0 "Tail Pointer".
780 	 */
781 	E1000_WRITE_REG(hw, E1000_RDT,
782 	    (uint32_t)(last_desc - rx_ring->rbd_first));
783 
784 	return (ret_mp);
785 }
786