xref: /titanic_51/usr/src/uts/common/io/e1000g/e1000g_rx.c (revision 1c9de0c9325f9f5d3540e19a4ad3691e6d50c0f8)
1 /*
2  * This file is provided under a CDDLv1 license.  When using or
3  * redistributing this file, you may do so under this license.
4  * In redistributing this file this license must be included
5  * and no other modification of this header file is permitted.
6  *
7  * CDDL LICENSE SUMMARY
8  *
9  * Copyright(c) 1999 - 2008 Intel Corporation. All rights reserved.
10  *
11  * The contents of this file are subject to the terms of Version
12  * 1.0 of the Common Development and Distribution License (the "License").
13  *
14  * You should have received a copy of the License with this software.
15  * You can obtain a copy of the License at
16  *	http://www.opensolaris.org/os/licensing.
17  * See the License for the specific language governing permissions
18  * and limitations under the License.
19  */
20 
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms of the CDDLv1.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * **********************************************************************
30  *									*
31  * Module Name:								*
32  *   e1000g_rx.c							*
33  *									*
34  * Abstract:								*
35  *   This file contains some routines that take care of Receive		*
36  *   interrupt and also for the received packets it sends up to		*
37  *   upper layer.							*
38  *   It tries to do a zero copy if free buffers are available in	*
39  *   the pool.								*
40  *									*
41  * **********************************************************************
42  */
43 
44 #include "e1000g_sw.h"
45 #include "e1000g_debug.h"
46 
47 static p_rx_sw_packet_t e1000g_get_buf(e1000g_rx_ring_t *rx_ring);
48 #pragma	inline(e1000g_get_buf)
49 static void e1000g_priv_devi_list_clean();
50 
51 /*
52  * e1000g_rxfree_func - the call-back function to reclaim rx buffer
53  *
54  * This function is called when an mp is freed by the user thru
55  * freeb call (Only for mp constructed through desballoc call)
56  * It returns back the freed buffer to the freelist
57  */
58 void
59 e1000g_rxfree_func(p_rx_sw_packet_t packet)
60 {
61 	struct e1000g *Adapter;
62 	e1000g_rx_ring_t *rx_ring;
63 
64 	rx_ring = (e1000g_rx_ring_t *)packet->rx_ring;
65 	Adapter = rx_ring->adapter;
66 
67 	/*
68 	 * Here the rx recycling processes different rx packets in different
69 	 * threads, so we protect it with RW_READER to ensure it won't block
70 	 * other rx recycling threads.
71 	 */
72 	rw_enter(&e1000g_rx_detach_lock, RW_READER);
73 
74 	if (packet->flag == E1000G_RX_SW_FREE) {
75 		rw_exit(&e1000g_rx_detach_lock);
76 		return;
77 	}
78 
79 	if (packet->flag == E1000G_RX_SW_STOP) {
80 		packet->flag = E1000G_RX_SW_FREE;
81 		rw_exit(&e1000g_rx_detach_lock);
82 
83 		rw_enter(&e1000g_rx_detach_lock, RW_WRITER);
84 		rx_ring->pending_count--;
85 		e1000g_mblks_pending--;
86 
87 		if (rx_ring->pending_count == 0) {
88 			while (rx_ring->pending_list != NULL) {
89 				packet = rx_ring->pending_list;
90 				rx_ring->pending_list =
91 				    rx_ring->pending_list->next;
92 
93 				ASSERT(packet->mp == NULL);
94 				e1000g_free_rx_sw_packet(packet);
95 			}
96 		}
97 
98 		/*
99 		 * If e1000g_force_detach is enabled, we need to clean up
100 		 * the idle priv_dip entries in the private dip list while
101 		 * e1000g_mblks_pending is zero.
102 		 */
103 		if (e1000g_force_detach && (e1000g_mblks_pending == 0))
104 			e1000g_priv_devi_list_clean();
105 		rw_exit(&e1000g_rx_detach_lock);
106 		return;
107 	}
108 
109 	if (packet->flag == E1000G_RX_SW_DETACH) {
110 		packet->flag = E1000G_RX_SW_FREE;
111 		rw_exit(&e1000g_rx_detach_lock);
112 
113 		ASSERT(packet->mp == NULL);
114 		e1000g_free_rx_sw_packet(packet);
115 
116 		/*
117 		 * Here the e1000g_mblks_pending may be modified by different
118 		 * rx recycling threads simultaneously, so we need to protect
119 		 * it with RW_WRITER.
120 		 */
121 		rw_enter(&e1000g_rx_detach_lock, RW_WRITER);
122 		e1000g_mblks_pending--;
123 
124 		/*
125 		 * If e1000g_force_detach is enabled, we need to clean up
126 		 * the idle priv_dip entries in the private dip list while
127 		 * e1000g_mblks_pending is zero.
128 		 */
129 		if (e1000g_force_detach && (e1000g_mblks_pending == 0))
130 			e1000g_priv_devi_list_clean();
131 		rw_exit(&e1000g_rx_detach_lock);
132 		return;
133 	}
134 
135 	packet->flag = E1000G_RX_SW_FREE;
136 
137 	if (packet->mp == NULL) {
138 		/*
139 		 * Allocate a mblk that binds to the data buffer
140 		 */
141 		packet->mp = desballoc((unsigned char *)
142 		    packet->rx_buf->address - E1000G_IPALIGNROOM,
143 		    packet->rx_buf->size + E1000G_IPALIGNROOM,
144 		    BPRI_MED, &packet->free_rtn);
145 
146 		if (packet->mp != NULL) {
147 			packet->mp->b_rptr += E1000G_IPALIGNROOM;
148 			packet->mp->b_wptr += E1000G_IPALIGNROOM;
149 		} else {
150 			E1000G_STAT(rx_ring->stat_esballoc_fail);
151 		}
152 	}
153 
154 	mutex_enter(&rx_ring->freelist_lock);
155 	QUEUE_PUSH_TAIL(&rx_ring->free_list, &packet->Link);
156 	rx_ring->avail_freepkt++;
157 	mutex_exit(&rx_ring->freelist_lock);
158 
159 	rw_exit(&e1000g_rx_detach_lock);
160 }
161 
162 /*
163  * e1000g_priv_devi_list_clean - clean up e1000g_private_devi_list
164  *
165  * We will walk the e1000g_private_devi_list to free the entry marked
166  * with the E1000G_PRIV_DEVI_DETACH flag.
167  */
168 static void
169 e1000g_priv_devi_list_clean()
170 {
171 	private_devi_list_t *devi_node, *devi_del;
172 
173 	if (e1000g_private_devi_list == NULL)
174 		return;
175 
176 	devi_node = e1000g_private_devi_list;
177 	while ((devi_node != NULL) &&
178 	    (devi_node->flag == E1000G_PRIV_DEVI_DETACH)) {
179 		e1000g_private_devi_list = devi_node->next;
180 		kmem_free(devi_node->priv_dip,
181 		    sizeof (struct dev_info));
182 		kmem_free(devi_node,
183 		    sizeof (private_devi_list_t));
184 		devi_node = e1000g_private_devi_list;
185 	}
186 	if (e1000g_private_devi_list == NULL)
187 		return;
188 	while (devi_node->next != NULL) {
189 		if (devi_node->next->flag == E1000G_PRIV_DEVI_DETACH) {
190 			devi_del = devi_node->next;
191 			devi_node->next = devi_del->next;
192 			kmem_free(devi_del->priv_dip,
193 			    sizeof (struct dev_info));
194 			kmem_free(devi_del,
195 			    sizeof (private_devi_list_t));
196 		} else {
197 			devi_node = devi_node->next;
198 		}
199 	}
200 }
201 
202 /*
203  * e1000g_rx_setup - setup rx data structures
204  *
205  * This routine initializes all of the receive related
206  * structures. This includes the receive descriptors, the
207  * actual receive buffers, and the rx_sw_packet software
208  * structures.
209  */
210 void
211 e1000g_rx_setup(struct e1000g *Adapter)
212 {
213 	struct e1000_hw *hw;
214 	p_rx_sw_packet_t packet;
215 	struct e1000_rx_desc *descriptor;
216 	uint32_t buf_low;
217 	uint32_t buf_high;
218 	uint32_t reg_val;
219 	int i;
220 	int size;
221 	e1000g_rx_ring_t *rx_ring;
222 
223 	hw = &Adapter->shared;
224 	rx_ring = Adapter->rx_ring;
225 
226 	/*
227 	 * zero out all of the receive buffer descriptor memory
228 	 * assures any previous data or status is erased
229 	 */
230 	bzero(rx_ring->rbd_area,
231 	    sizeof (struct e1000_rx_desc) * Adapter->rx_desc_num);
232 
233 	if (!Adapter->rx_buffer_setup) {
234 		/* Init the list of "Receive Buffer" */
235 		QUEUE_INIT_LIST(&rx_ring->recv_list);
236 
237 		/* Init the list of "Free Receive Buffer" */
238 		QUEUE_INIT_LIST(&rx_ring->free_list);
239 
240 		/*
241 		 * Setup Receive list and the Free list. Note that
242 		 * the both were allocated in one packet area.
243 		 */
244 		packet = rx_ring->packet_area;
245 		descriptor = rx_ring->rbd_first;
246 
247 		for (i = 0; i < Adapter->rx_desc_num;
248 		    i++, packet = packet->next, descriptor++) {
249 			ASSERT(packet != NULL);
250 			ASSERT(descriptor != NULL);
251 			descriptor->buffer_addr =
252 			    packet->rx_buf->dma_address;
253 
254 			/* Add this rx_sw_packet to the receive list */
255 			QUEUE_PUSH_TAIL(&rx_ring->recv_list,
256 			    &packet->Link);
257 		}
258 
259 		for (i = 0; i < Adapter->rx_freelist_num;
260 		    i++, packet = packet->next) {
261 			ASSERT(packet != NULL);
262 			/* Add this rx_sw_packet to the free list */
263 			QUEUE_PUSH_TAIL(&rx_ring->free_list,
264 			    &packet->Link);
265 		}
266 		rx_ring->avail_freepkt = Adapter->rx_freelist_num;
267 
268 		Adapter->rx_buffer_setup = B_TRUE;
269 	} else {
270 		/* Setup the initial pointer to the first rx descriptor */
271 		packet = (p_rx_sw_packet_t)
272 		    QUEUE_GET_HEAD(&rx_ring->recv_list);
273 		descriptor = rx_ring->rbd_first;
274 
275 		for (i = 0; i < Adapter->rx_desc_num; i++) {
276 			ASSERT(packet != NULL);
277 			ASSERT(descriptor != NULL);
278 			descriptor->buffer_addr =
279 			    packet->rx_buf->dma_address;
280 
281 			/* Get next rx_sw_packet */
282 			packet = (p_rx_sw_packet_t)
283 			    QUEUE_GET_NEXT(&rx_ring->recv_list, &packet->Link);
284 			descriptor++;
285 		}
286 	}
287 
288 	E1000_WRITE_REG(&Adapter->shared, E1000_RDTR, Adapter->rx_intr_delay);
289 	E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
290 	    "E1000_RDTR: 0x%x\n", Adapter->rx_intr_delay);
291 	if (hw->mac.type >= e1000_82540) {
292 		E1000_WRITE_REG(&Adapter->shared, E1000_RADV,
293 		    Adapter->rx_intr_abs_delay);
294 		E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
295 		    "E1000_RADV: 0x%x\n", Adapter->rx_intr_abs_delay);
296 	}
297 
298 	/*
299 	 * Setup our descriptor pointers
300 	 */
301 	rx_ring->rbd_next = rx_ring->rbd_first;
302 
303 	size = Adapter->rx_desc_num * sizeof (struct e1000_rx_desc);
304 	E1000_WRITE_REG(hw, E1000_RDLEN(0), size);
305 	size = E1000_READ_REG(hw, E1000_RDLEN(0));
306 
307 	/* To get lower order bits */
308 	buf_low = (uint32_t)rx_ring->rbd_dma_addr;
309 	/* To get the higher order bits */
310 	buf_high = (uint32_t)(rx_ring->rbd_dma_addr >> 32);
311 
312 	E1000_WRITE_REG(hw, E1000_RDBAH(0), buf_high);
313 	E1000_WRITE_REG(hw, E1000_RDBAL(0), buf_low);
314 
315 	/*
316 	 * Setup our HW Rx Head & Tail descriptor pointers
317 	 */
318 	E1000_WRITE_REG(hw, E1000_RDT(0),
319 	    (uint32_t)(rx_ring->rbd_last - rx_ring->rbd_first));
320 	E1000_WRITE_REG(hw, E1000_RDH(0), 0);
321 
322 	/*
323 	 * Setup the Receive Control Register (RCTL), and ENABLE the
324 	 * receiver. The initial configuration is to: Enable the receiver,
325 	 * accept broadcasts, discard bad packets (and long packets),
326 	 * disable VLAN filter checking, set the receive descriptor
327 	 * minimum threshold size to 1/2, and the receive buffer size to
328 	 * 2k.
329 	 */
330 	reg_val = E1000_RCTL_EN |	/* Enable Receive Unit */
331 	    E1000_RCTL_BAM |		/* Accept Broadcast Packets */
332 	    E1000_RCTL_LPE |		/* Large Packet Enable bit */
333 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT) |
334 	    E1000_RCTL_RDMTS_HALF |
335 	    E1000_RCTL_LBM_NO;		/* Loopback Mode = none */
336 
337 	if (Adapter->strip_crc)
338 		reg_val |= E1000_RCTL_SECRC;	/* Strip Ethernet CRC */
339 
340 	if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_2K) &&
341 	    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_4K))
342 		reg_val |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
343 	else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_4K) &&
344 	    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_8K))
345 		reg_val |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
346 	else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_8K) &&
347 	    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_16K))
348 		reg_val |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX;
349 	else
350 		reg_val |= E1000_RCTL_SZ_2048;
351 
352 	if (e1000_tbi_sbp_enabled_82543(hw))
353 		reg_val |= E1000_RCTL_SBP;
354 
355 	/*
356 	 * Enable early receives on supported devices, only takes effect when
357 	 * packet size is equal or larger than the specified value (in 8 byte
358 	 * units), e.g. using jumbo frames when setting to E1000_ERT_2048
359 	 */
360 	if ((hw->mac.type == e1000_82573) || (hw->mac.type == e1000_ich9lan))
361 		E1000_WRITE_REG(hw, E1000_ERT, E1000_ERT_2048);
362 
363 	E1000_WRITE_REG(hw, E1000_RCTL, reg_val);
364 
365 	reg_val =
366 	    E1000_RXCSUM_TUOFL |	/* TCP/UDP checksum offload Enable */
367 	    E1000_RXCSUM_IPOFL;		/* IP checksum offload Enable */
368 
369 	E1000_WRITE_REG(hw, E1000_RXCSUM, reg_val);
370 
371 	/*
372 	 * Workaround: Set bit 16 (IPv6_ExDIS) to disable the
373 	 * processing of received IPV6 extension headers
374 	 */
375 	if ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572)) {
376 		reg_val = E1000_READ_REG(hw, E1000_RFCTL);
377 		reg_val |= (E1000_RFCTL_IPV6_EX_DIS |
378 		    E1000_RFCTL_NEW_IPV6_EXT_DIS);
379 		E1000_WRITE_REG(hw, E1000_RFCTL, reg_val);
380 	}
381 }
382 
383 /*
384  * e1000g_get_buf - get an rx sw packet from the free_list
385  */
386 static p_rx_sw_packet_t
387 e1000g_get_buf(e1000g_rx_ring_t *rx_ring)
388 {
389 	struct e1000g *Adapter;
390 	p_rx_sw_packet_t packet;
391 
392 	Adapter = rx_ring->adapter;
393 
394 	mutex_enter(&rx_ring->freelist_lock);
395 	packet = (p_rx_sw_packet_t)
396 	    QUEUE_POP_HEAD(&rx_ring->free_list);
397 	if (packet != NULL)
398 		rx_ring->avail_freepkt--;
399 	mutex_exit(&rx_ring->freelist_lock);
400 
401 	return (packet);
402 }
403 
404 /*
405  * e1000g_receive - main receive routine
406  *
407  * This routine will process packets received in an interrupt
408  */
409 mblk_t *
410 e1000g_receive(struct e1000g *Adapter)
411 {
412 	struct e1000_hw *hw;
413 	mblk_t *nmp;
414 	mblk_t *ret_mp;
415 	mblk_t *ret_nmp;
416 	struct e1000_rx_desc *current_desc;
417 	struct e1000_rx_desc *last_desc;
418 	p_rx_sw_packet_t packet;
419 	p_rx_sw_packet_t newpkt;
420 	USHORT length;
421 	uint32_t pkt_count;
422 	uint32_t desc_count;
423 	boolean_t accept_frame;
424 	boolean_t end_of_packet;
425 	boolean_t need_copy;
426 	e1000g_rx_ring_t *rx_ring;
427 	dma_buffer_t *rx_buf;
428 	uint16_t cksumflags;
429 
430 	ret_mp = NULL;
431 	ret_nmp = NULL;
432 	pkt_count = 0;
433 	desc_count = 0;
434 	cksumflags = 0;
435 
436 	hw = &Adapter->shared;
437 	rx_ring = Adapter->rx_ring;
438 
439 	/* Sync the Rx descriptor DMA buffers */
440 	(void) ddi_dma_sync(rx_ring->rbd_dma_handle,
441 	    0, 0, DDI_DMA_SYNC_FORKERNEL);
442 
443 	if (e1000g_check_dma_handle(rx_ring->rbd_dma_handle) != DDI_FM_OK) {
444 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
445 		Adapter->chip_state = E1000G_ERROR;
446 	}
447 
448 	current_desc = rx_ring->rbd_next;
449 	if (!(current_desc->status & E1000_RXD_STAT_DD)) {
450 		/*
451 		 * don't send anything up. just clear the RFD
452 		 */
453 		E1000G_DEBUG_STAT(rx_ring->stat_none);
454 		return (ret_mp);
455 	}
456 
457 	/*
458 	 * Loop through the receive descriptors starting at the last known
459 	 * descriptor owned by the hardware that begins a packet.
460 	 */
461 	while ((current_desc->status & E1000_RXD_STAT_DD) &&
462 	    (pkt_count < Adapter->rx_limit_onintr)) {
463 
464 		desc_count++;
465 		/*
466 		 * Now this can happen in Jumbo frame situation.
467 		 */
468 		if (current_desc->status & E1000_RXD_STAT_EOP) {
469 			/* packet has EOP set */
470 			end_of_packet = B_TRUE;
471 		} else {
472 			/*
473 			 * If this received buffer does not have the
474 			 * End-Of-Packet bit set, the received packet
475 			 * will consume multiple buffers. We won't send this
476 			 * packet upstack till we get all the related buffers.
477 			 */
478 			end_of_packet = B_FALSE;
479 		}
480 
481 		/*
482 		 * Get a pointer to the actual receive buffer
483 		 * The mp->b_rptr is mapped to The CurrentDescriptor
484 		 * Buffer Address.
485 		 */
486 		packet =
487 		    (p_rx_sw_packet_t)QUEUE_GET_HEAD(&rx_ring->recv_list);
488 		ASSERT(packet != NULL);
489 
490 		rx_buf = packet->rx_buf;
491 
492 		length = current_desc->length;
493 
494 #ifdef __sparc
495 		if (packet->dma_type == USE_DVMA)
496 			dvma_sync(rx_buf->dma_handle, 0,
497 			    DDI_DMA_SYNC_FORKERNEL);
498 		else
499 			(void) ddi_dma_sync(rx_buf->dma_handle,
500 			    E1000G_IPALIGNROOM, length,
501 			    DDI_DMA_SYNC_FORKERNEL);
502 #else
503 		(void) ddi_dma_sync(rx_buf->dma_handle,
504 		    E1000G_IPALIGNROOM, length,
505 		    DDI_DMA_SYNC_FORKERNEL);
506 #endif
507 
508 		if (e1000g_check_dma_handle(
509 		    rx_buf->dma_handle) != DDI_FM_OK) {
510 			ddi_fm_service_impact(Adapter->dip,
511 			    DDI_SERVICE_DEGRADED);
512 			Adapter->chip_state = E1000G_ERROR;
513 		}
514 
515 		accept_frame = (current_desc->errors == 0) ||
516 		    ((current_desc->errors &
517 		    (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE)) != 0);
518 
519 		if (hw->mac.type == e1000_82543) {
520 			unsigned char last_byte;
521 
522 			last_byte =
523 			    *((unsigned char *)rx_buf->address + length - 1);
524 
525 			if (TBI_ACCEPT(hw,
526 			    current_desc->status, current_desc->errors,
527 			    current_desc->length, last_byte,
528 			    Adapter->min_frame_size, Adapter->max_frame_size)) {
529 
530 				e1000_tbi_adjust_stats(Adapter,
531 				    length, hw->mac.addr);
532 
533 				length--;
534 				accept_frame = B_TRUE;
535 			} else if (e1000_tbi_sbp_enabled_82543(hw) &&
536 			    (current_desc->errors == E1000_RXD_ERR_CE)) {
537 				accept_frame = B_TRUE;
538 			}
539 		}
540 
541 		/*
542 		 * Indicate the packet to the NOS if it was good.
543 		 * Normally, hardware will discard bad packets for us.
544 		 * Check for the packet to be a valid Ethernet packet
545 		 */
546 		if (!accept_frame) {
547 			/*
548 			 * error in incoming packet, either the packet is not a
549 			 * ethernet size packet, or the packet has an error. In
550 			 * either case, the packet will simply be discarded.
551 			 */
552 			E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
553 			    "Process Receive Interrupts: Error in Packet\n");
554 
555 			E1000G_STAT(rx_ring->stat_error);
556 			/*
557 			 * Returning here as we are done here. There is
558 			 * no point in waiting for while loop to elapse
559 			 * and the things which were done. More efficient
560 			 * and less error prone...
561 			 */
562 			goto rx_drop;
563 		}
564 
565 		/*
566 		 * If the Ethernet CRC is not stripped by the hardware,
567 		 * we need to strip it before sending it up to the stack.
568 		 */
569 		if (end_of_packet && !Adapter->strip_crc) {
570 			if (length > ETHERFCSL) {
571 				length -= ETHERFCSL;
572 			} else {
573 				/*
574 				 * If the fragment is smaller than the CRC,
575 				 * drop this fragment, do the processing of
576 				 * the end of the packet.
577 				 */
578 				ASSERT(rx_ring->rx_mblk_tail != NULL);
579 				rx_ring->rx_mblk_tail->b_wptr -=
580 				    ETHERFCSL - length;
581 				rx_ring->rx_mblk_len -=
582 				    ETHERFCSL - length;
583 
584 				QUEUE_POP_HEAD(&rx_ring->recv_list);
585 
586 				goto rx_end_of_packet;
587 			}
588 		}
589 
590 		need_copy = B_TRUE;
591 
592 		if (length <= Adapter->rx_bcopy_thresh)
593 			goto rx_copy;
594 
595 		/*
596 		 * Get the pre-constructed mblk that was associated
597 		 * to the receive data buffer.
598 		 */
599 		if (packet->mp == NULL) {
600 			packet->mp = desballoc((unsigned char *)
601 			    rx_buf->address - E1000G_IPALIGNROOM,
602 			    length + E1000G_IPALIGNROOM,
603 			    BPRI_MED, &packet->free_rtn);
604 
605 			if (packet->mp != NULL) {
606 				packet->mp->b_rptr += E1000G_IPALIGNROOM;
607 				packet->mp->b_wptr += E1000G_IPALIGNROOM;
608 			} else {
609 				E1000G_STAT(rx_ring->stat_esballoc_fail);
610 			}
611 		}
612 
613 		if (packet->mp != NULL) {
614 			/*
615 			 * We have two sets of buffer pool. One associated with
616 			 * the Rxdescriptors and other a freelist buffer pool.
617 			 * Each time we get a good packet, Try to get a buffer
618 			 * from the freelist pool using e1000g_get_buf. If we
619 			 * get free buffer, then replace the descriptor buffer
620 			 * address with the free buffer we just got, and pass
621 			 * the pre-constructed mblk upstack. (note no copying)
622 			 *
623 			 * If we failed to get a free buffer, then try to
624 			 * allocate a new buffer(mp) and copy the recv buffer
625 			 * content to our newly allocated buffer(mp). Don't
626 			 * disturb the desriptor buffer address. (note copying)
627 			 */
628 			newpkt = e1000g_get_buf(rx_ring);
629 
630 			if (newpkt != NULL) {
631 				/*
632 				 * Get the mblk associated to the data,
633 				 * and strip it off the sw packet.
634 				 */
635 				nmp = packet->mp;
636 				packet->mp = NULL;
637 				packet->flag = E1000G_RX_SW_SENDUP;
638 
639 				/*
640 				 * Now replace old buffer with the new
641 				 * one we got from free list
642 				 * Both the RxSwPacket as well as the
643 				 * Receive Buffer Descriptor will now
644 				 * point to this new packet.
645 				 */
646 				packet = newpkt;
647 
648 				current_desc->buffer_addr =
649 				    newpkt->rx_buf->dma_address;
650 
651 				need_copy = B_FALSE;
652 			} else {
653 				E1000G_DEBUG_STAT(rx_ring->stat_no_freepkt);
654 			}
655 		}
656 
657 rx_copy:
658 		if (need_copy) {
659 			/*
660 			 * No buffers available on free list,
661 			 * bcopy the data from the buffer and
662 			 * keep the original buffer. Dont want to
663 			 * do this.. Yack but no other way
664 			 */
665 			if ((nmp = allocb(length + E1000G_IPALIGNROOM,
666 			    BPRI_MED)) == NULL) {
667 				/*
668 				 * The system has no buffers available
669 				 * to send up the incoming packet, hence
670 				 * the packet will have to be processed
671 				 * when there're more buffers available.
672 				 */
673 				E1000G_STAT(rx_ring->stat_allocb_fail);
674 				goto rx_drop;
675 			}
676 			nmp->b_rptr += E1000G_IPALIGNROOM;
677 			nmp->b_wptr += E1000G_IPALIGNROOM;
678 			/*
679 			 * The free list did not have any buffers
680 			 * available, so, the received packet will
681 			 * have to be copied into a mp and the original
682 			 * buffer will have to be retained for future
683 			 * packet reception.
684 			 */
685 			bcopy(rx_buf->address, nmp->b_wptr, length);
686 		}
687 
688 		/*
689 		 * The rx_sw_packet MUST be popped off the
690 		 * RxSwPacketList before either a putnext or freemsg
691 		 * is done on the mp that has now been created by the
692 		 * desballoc. If not, it is possible that the free
693 		 * routine will get called from the interrupt context
694 		 * and try to put this packet on the free list
695 		 */
696 		(p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_ring->recv_list);
697 
698 		ASSERT(nmp != NULL);
699 		nmp->b_wptr += length;
700 
701 		if (rx_ring->rx_mblk == NULL) {
702 			/*
703 			 *  TCP/UDP checksum offload and
704 			 *  IP checksum offload
705 			 */
706 			if (!(current_desc->status & E1000_RXD_STAT_IXSM)) {
707 				/*
708 				 * Check TCP/UDP checksum
709 				 */
710 				if ((current_desc->status &
711 				    E1000_RXD_STAT_TCPCS) &&
712 				    !(current_desc->errors &
713 				    E1000_RXD_ERR_TCPE))
714 					cksumflags |= HCK_FULLCKSUM |
715 					    HCK_FULLCKSUM_OK;
716 				/*
717 				 * Check IP Checksum
718 				 */
719 				if ((current_desc->status &
720 				    E1000_RXD_STAT_IPCS) &&
721 				    !(current_desc->errors &
722 				    E1000_RXD_ERR_IPE))
723 					cksumflags |= HCK_IPV4_HDRCKSUM;
724 			}
725 		}
726 
727 		/*
728 		 * We need to maintain our packet chain in the global
729 		 * Adapter structure, for the Rx processing can end
730 		 * with a fragment that has no EOP set.
731 		 */
732 		if (rx_ring->rx_mblk == NULL) {
733 			/* Get the head of the message chain */
734 			rx_ring->rx_mblk = nmp;
735 			rx_ring->rx_mblk_tail = nmp;
736 			rx_ring->rx_mblk_len = length;
737 		} else {	/* Not the first packet */
738 			/* Continue adding buffers */
739 			rx_ring->rx_mblk_tail->b_cont = nmp;
740 			rx_ring->rx_mblk_tail = nmp;
741 			rx_ring->rx_mblk_len += length;
742 		}
743 		ASSERT(rx_ring->rx_mblk != NULL);
744 		ASSERT(rx_ring->rx_mblk_tail != NULL);
745 		ASSERT(rx_ring->rx_mblk_tail->b_cont == NULL);
746 
747 		/*
748 		 * Now this MP is ready to travel upwards but some more
749 		 * fragments are coming.
750 		 * We will send packet upwards as soon as we get EOP
751 		 * set on the packet.
752 		 */
753 		if (!end_of_packet) {
754 			/*
755 			 * continue to get the next descriptor,
756 			 * Tail would be advanced at the end
757 			 */
758 			goto rx_next_desc;
759 		}
760 
761 rx_end_of_packet:
762 		/*
763 		 * Found packet with EOP
764 		 * Process the last fragment.
765 		 */
766 		if (cksumflags != 0) {
767 			(void) hcksum_assoc(rx_ring->rx_mblk,
768 			    NULL, NULL, 0, 0, 0, 0, cksumflags, 0);
769 			cksumflags = 0;
770 		}
771 
772 		/*
773 		 * Count packets that span multi-descriptors
774 		 */
775 		E1000G_DEBUG_STAT_COND(rx_ring->stat_multi_desc,
776 		    (rx_ring->rx_mblk->b_cont != NULL));
777 
778 		/*
779 		 * Append to list to send upstream
780 		 */
781 		if (ret_mp == NULL) {
782 			ret_mp = ret_nmp = rx_ring->rx_mblk;
783 		} else {
784 			ret_nmp->b_next = rx_ring->rx_mblk;
785 			ret_nmp = rx_ring->rx_mblk;
786 		}
787 		ret_nmp->b_next = NULL;
788 
789 		rx_ring->rx_mblk = NULL;
790 		rx_ring->rx_mblk_tail = NULL;
791 		rx_ring->rx_mblk_len = 0;
792 
793 		pkt_count++;
794 
795 rx_next_desc:
796 		/*
797 		 * Zero out the receive descriptors status
798 		 */
799 		current_desc->status = 0;
800 
801 		if (current_desc == rx_ring->rbd_last)
802 			rx_ring->rbd_next = rx_ring->rbd_first;
803 		else
804 			rx_ring->rbd_next++;
805 
806 		last_desc = current_desc;
807 		current_desc = rx_ring->rbd_next;
808 
809 		/*
810 		 * Put the buffer that we just indicated back
811 		 * at the end of our list
812 		 */
813 		QUEUE_PUSH_TAIL(&rx_ring->recv_list,
814 		    &packet->Link);
815 	}	/* while loop */
816 
817 	/* Sync the Rx descriptor DMA buffers */
818 	(void) ddi_dma_sync(rx_ring->rbd_dma_handle,
819 	    0, 0, DDI_DMA_SYNC_FORDEV);
820 
821 	/*
822 	 * Advance the E1000's Receive Queue #0 "Tail Pointer".
823 	 */
824 	E1000_WRITE_REG(hw, E1000_RDT(0),
825 	    (uint32_t)(last_desc - rx_ring->rbd_first));
826 
827 	if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
828 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
829 		Adapter->chip_state = E1000G_ERROR;
830 	}
831 
832 	Adapter->rx_pkt_cnt = pkt_count;
833 
834 	return (ret_mp);
835 
836 rx_drop:
837 	/*
838 	 * Zero out the receive descriptors status
839 	 */
840 	current_desc->status = 0;
841 
842 	/* Sync the Rx descriptor DMA buffers */
843 	(void) ddi_dma_sync(rx_ring->rbd_dma_handle,
844 	    0, 0, DDI_DMA_SYNC_FORDEV);
845 
846 	if (current_desc == rx_ring->rbd_last)
847 		rx_ring->rbd_next = rx_ring->rbd_first;
848 	else
849 		rx_ring->rbd_next++;
850 
851 	last_desc = current_desc;
852 
853 	(p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_ring->recv_list);
854 
855 	QUEUE_PUSH_TAIL(&rx_ring->recv_list, &packet->Link);
856 	/*
857 	 * Reclaim all old buffers already allocated during
858 	 * Jumbo receives.....for incomplete reception
859 	 */
860 	if (rx_ring->rx_mblk != NULL) {
861 		freemsg(rx_ring->rx_mblk);
862 		rx_ring->rx_mblk = NULL;
863 		rx_ring->rx_mblk_tail = NULL;
864 		rx_ring->rx_mblk_len = 0;
865 	}
866 	/*
867 	 * Advance the E1000's Receive Queue #0 "Tail Pointer".
868 	 */
869 	E1000_WRITE_REG(hw, E1000_RDT(0),
870 	    (uint32_t)(last_desc - rx_ring->rbd_first));
871 
872 	if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
873 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
874 		Adapter->chip_state = E1000G_ERROR;
875 	}
876 
877 	return (ret_mp);
878 }
879