xref: /titanic_50/usr/src/uts/common/io/e1000g/e1000g_rx.c (revision 42cc51e07cdbcad3b9aca8d9d991fc09b251feb7)
1 /*
2  * This file is provided under a CDDLv1 license.  When using or
3  * redistributing this file, you may do so under this license.
4  * In redistributing this file this license must be included
5  * and no other modification of this header file is permitted.
6  *
7  * CDDL LICENSE SUMMARY
8  *
9  * Copyright(c) 1999 - 2009 Intel Corporation. All rights reserved.
10  *
11  * The contents of this file are subject to the terms of Version
12  * 1.0 of the Common Development and Distribution License (the "License").
13  *
14  * You should have received a copy of the License with this software.
15  * You can obtain a copy of the License at
16  *	http://www.opensolaris.org/os/licensing.
17  * See the License for the specific language governing permissions
18  * and limitations under the License.
19  */
20 
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
27  * Copyright 2016 Joyent, Inc.
28  */
29 
30 /*
31  * **********************************************************************
32  *									*
33  * Module Name:								*
34  *   e1000g_rx.c							*
35  *									*
36  * Abstract:								*
37  *   This file contains some routines that take care of Receive		*
38  *   interrupt and also for the received packets it sends up to		*
39  *   upper layer.							*
40  *   It tries to do a zero copy if free buffers are available in	*
41  *   the pool.								*
42  *									*
43  * **********************************************************************
44  */
45 
46 #include "e1000g_sw.h"
47 #include "e1000g_debug.h"
48 
49 static p_rx_sw_packet_t e1000g_get_buf(e1000g_rx_data_t *rx_data);
50 #pragma	inline(e1000g_get_buf)
51 
52 /*
53  * e1000g_rxfree_func - the call-back function to reclaim rx buffer
54  *
55  * This function is called when an mp is freed by the user thru
56  * freeb call (Only for mp constructed through desballoc call)
57  * It returns back the freed buffer to the freelist
58  */
59 void
e1000g_rxfree_func(p_rx_sw_packet_t packet)60 e1000g_rxfree_func(p_rx_sw_packet_t packet)
61 {
62 	e1000g_rx_data_t *rx_data;
63 	private_devi_list_t *devi_node;
64 	struct e1000g *Adapter;
65 	uint32_t ring_cnt;
66 	uint32_t ref_cnt;
67 	unsigned char *address;
68 
69 	if (packet->ref_cnt == 0) {
70 		/*
71 		 * This case only happens when rx buffers are being freed
72 		 * in e1000g_stop() and freemsg() is called.
73 		 */
74 		return;
75 	}
76 
77 	rx_data = (e1000g_rx_data_t *)(uintptr_t)packet->rx_data;
78 
79 	if (packet->mp == NULL) {
80 		/*
81 		 * Allocate a mblk that binds to the data buffer
82 		 */
83 		address = (unsigned char *)packet->rx_buf->address;
84 		if (address != NULL) {
85 			packet->mp = desballoc((unsigned char *)
86 			    address, packet->rx_buf->size,
87 			    BPRI_MED, &packet->free_rtn);
88 		}
89 	}
90 
91 	/*
92 	 * Enqueue the recycled packets in a recycle queue. When freelist
93 	 * dries up, move the entire chain of packets from recycle queue
94 	 * to freelist. This helps in avoiding per packet mutex contention
95 	 * around freelist.
96 	 */
97 	mutex_enter(&rx_data->recycle_lock);
98 	QUEUE_PUSH_TAIL(&rx_data->recycle_list, &packet->Link);
99 	rx_data->recycle_freepkt++;
100 	mutex_exit(&rx_data->recycle_lock);
101 
102 	ref_cnt = atomic_dec_32_nv(&packet->ref_cnt);
103 	if (ref_cnt == 0) {
104 		mutex_enter(&e1000g_rx_detach_lock);
105 		e1000g_free_rx_sw_packet(packet, B_FALSE);
106 
107 		atomic_dec_32(&rx_data->pending_count);
108 		atomic_dec_32(&e1000g_mblks_pending);
109 
110 		if ((rx_data->pending_count == 0) &&
111 		    (rx_data->flag & E1000G_RX_STOPPED)) {
112 			devi_node = rx_data->priv_devi_node;
113 
114 			if (devi_node != NULL) {
115 				ring_cnt = atomic_dec_32_nv(
116 				    &devi_node->pending_rx_count);
117 				if ((ring_cnt == 0) &&
118 				    (devi_node->flag &
119 				    E1000G_PRIV_DEVI_DETACH)) {
120 					e1000g_free_priv_devi_node(
121 					    devi_node);
122 				}
123 			} else {
124 				Adapter = rx_data->rx_ring->adapter;
125 				atomic_dec_32(
126 				    &Adapter->pending_rx_count);
127 			}
128 
129 			e1000g_free_rx_pending_buffers(rx_data);
130 			e1000g_free_rx_data(rx_data);
131 		}
132 		mutex_exit(&e1000g_rx_detach_lock);
133 	}
134 }
135 
136 /*
137  * e1000g_rx_setup - setup rx data structures
138  *
139  * This routine initializes all of the receive related
140  * structures. This includes the receive descriptors, the
141  * actual receive buffers, and the rx_sw_packet software
142  * structures.
143  */
144 void
e1000g_rx_setup(struct e1000g * Adapter)145 e1000g_rx_setup(struct e1000g *Adapter)
146 {
147 	struct e1000_hw *hw;
148 	p_rx_sw_packet_t packet;
149 	struct e1000_rx_desc *descriptor;
150 	uint32_t buf_low;
151 	uint32_t buf_high;
152 	uint32_t reg_val;
153 	uint32_t rctl;
154 	uint32_t rxdctl;
155 	uint32_t ert;
156 	uint16_t phy_data;
157 	int i;
158 	int size;
159 	e1000g_rx_data_t *rx_data;
160 
161 	hw = &Adapter->shared;
162 	rx_data = Adapter->rx_ring->rx_data;
163 
164 	/*
165 	 * zero out all of the receive buffer descriptor memory
166 	 * assures any previous data or status is erased
167 	 */
168 	bzero(rx_data->rbd_area,
169 	    sizeof (struct e1000_rx_desc) * Adapter->rx_desc_num);
170 
171 	if (!Adapter->rx_buffer_setup) {
172 		/* Init the list of "Receive Buffer" */
173 		QUEUE_INIT_LIST(&rx_data->recv_list);
174 
175 		/* Init the list of "Free Receive Buffer" */
176 		QUEUE_INIT_LIST(&rx_data->free_list);
177 
178 		/* Init the list of "Free Receive Buffer" */
179 		QUEUE_INIT_LIST(&rx_data->recycle_list);
180 		/*
181 		 * Setup Receive list and the Free list. Note that
182 		 * the both were allocated in one packet area.
183 		 */
184 		packet = rx_data->packet_area;
185 		descriptor = rx_data->rbd_first;
186 
187 		for (i = 0; i < Adapter->rx_desc_num;
188 		    i++, packet = packet->next, descriptor++) {
189 			ASSERT(packet != NULL);
190 			ASSERT(descriptor != NULL);
191 			descriptor->buffer_addr =
192 			    packet->rx_buf->dma_address;
193 
194 			/* Add this rx_sw_packet to the receive list */
195 			QUEUE_PUSH_TAIL(&rx_data->recv_list,
196 			    &packet->Link);
197 		}
198 
199 		for (i = 0; i < Adapter->rx_freelist_num;
200 		    i++, packet = packet->next) {
201 			ASSERT(packet != NULL);
202 			/* Add this rx_sw_packet to the free list */
203 			QUEUE_PUSH_TAIL(&rx_data->free_list,
204 			    &packet->Link);
205 		}
206 		rx_data->avail_freepkt = Adapter->rx_freelist_num;
207 		rx_data->recycle_freepkt = 0;
208 
209 		Adapter->rx_buffer_setup = B_TRUE;
210 	} else {
211 		/* Setup the initial pointer to the first rx descriptor */
212 		packet = (p_rx_sw_packet_t)
213 		    QUEUE_GET_HEAD(&rx_data->recv_list);
214 		descriptor = rx_data->rbd_first;
215 
216 		for (i = 0; i < Adapter->rx_desc_num; i++) {
217 			ASSERT(packet != NULL);
218 			ASSERT(descriptor != NULL);
219 			descriptor->buffer_addr =
220 			    packet->rx_buf->dma_address;
221 
222 			/* Get next rx_sw_packet */
223 			packet = (p_rx_sw_packet_t)
224 			    QUEUE_GET_NEXT(&rx_data->recv_list, &packet->Link);
225 			descriptor++;
226 		}
227 	}
228 
229 	E1000_WRITE_REG(&Adapter->shared, E1000_RDTR, Adapter->rx_intr_delay);
230 	E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
231 	    "E1000_RDTR: 0x%x\n", Adapter->rx_intr_delay);
232 	if (hw->mac.type >= e1000_82540) {
233 		E1000_WRITE_REG(&Adapter->shared, E1000_RADV,
234 		    Adapter->rx_intr_abs_delay);
235 		E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
236 		    "E1000_RADV: 0x%x\n", Adapter->rx_intr_abs_delay);
237 	}
238 
239 	/*
240 	 * Setup our descriptor pointers
241 	 */
242 	rx_data->rbd_next = rx_data->rbd_first;
243 
244 	size = Adapter->rx_desc_num * sizeof (struct e1000_rx_desc);
245 	E1000_WRITE_REG(hw, E1000_RDLEN(0), size);
246 	size = E1000_READ_REG(hw, E1000_RDLEN(0));
247 
248 	/* To get lower order bits */
249 	buf_low = (uint32_t)rx_data->rbd_dma_addr;
250 	/* To get the higher order bits */
251 	buf_high = (uint32_t)(rx_data->rbd_dma_addr >> 32);
252 
253 	E1000_WRITE_REG(hw, E1000_RDBAH(0), buf_high);
254 	E1000_WRITE_REG(hw, E1000_RDBAL(0), buf_low);
255 
256 	/*
257 	 * Setup our HW Rx Head & Tail descriptor pointers
258 	 */
259 	E1000_WRITE_REG(hw, E1000_RDT(0),
260 	    (uint32_t)(rx_data->rbd_last - rx_data->rbd_first));
261 	E1000_WRITE_REG(hw, E1000_RDH(0), 0);
262 
263 	/*
264 	 * Setup the Receive Control Register (RCTL), and ENABLE the
265 	 * receiver. The initial configuration is to: Enable the receiver,
266 	 * accept broadcasts, discard bad packets (and long packets),
267 	 * disable VLAN filter checking, set the receive descriptor
268 	 * minimum threshold size to 1/2, and the receive buffer size to
269 	 * 2k.
270 	 */
271 	rctl = E1000_RCTL_EN |		/* Enable Receive Unit */
272 	    E1000_RCTL_BAM |		/* Accept Broadcast Packets */
273 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT) |
274 	    E1000_RCTL_RDMTS_HALF |
275 	    E1000_RCTL_LBM_NO;		/* Loopback Mode = none */
276 
277 	if (Adapter->default_mtu > ETHERMTU)
278 		rctl |= E1000_RCTL_LPE;  /* Large Packet Enable bit */
279 
280 	if (Adapter->strip_crc)
281 		rctl |= E1000_RCTL_SECRC;	/* Strip Ethernet CRC */
282 
283 	if (Adapter->mem_workaround_82546 &&
284 	    ((hw->mac.type == e1000_82545) ||
285 	    (hw->mac.type == e1000_82546) ||
286 	    (hw->mac.type == e1000_82546_rev_3))) {
287 		rctl |= E1000_RCTL_SZ_2048;
288 	} else {
289 		if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_2K) &&
290 		    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_4K))
291 			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
292 		else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_4K) &&
293 		    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_8K))
294 			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
295 		else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_8K) &&
296 		    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_16K))
297 			rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX;
298 		else
299 			rctl |= E1000_RCTL_SZ_2048;
300 	}
301 
302 	if (e1000_tbi_sbp_enabled_82543(hw))
303 		rctl |= E1000_RCTL_SBP;
304 
305 	/*
306 	 * Enable Early Receive Threshold (ERT) on supported devices.
307 	 * Only takes effect when packet size is equal or larger than the
308 	 * specified value (in 8 byte units), e.g. using jumbo frames.
309 	 */
310 	if ((hw->mac.type == e1000_82573) ||
311 	    (hw->mac.type == e1000_82574) ||
312 	    (hw->mac.type == e1000_ich9lan) ||
313 	    (hw->mac.type == e1000_ich10lan)) {
314 
315 		ert = E1000_ERT_2048;
316 
317 		/*
318 		 * Special modification when ERT and
319 		 * jumbo frames are enabled
320 		 */
321 		if (Adapter->default_mtu > ETHERMTU) {
322 			rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
323 			E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 0x3);
324 			ert |= (1 << 13);
325 		}
326 
327 		E1000_WRITE_REG(hw, E1000_ERT, ert);
328 	}
329 
330 	/* Workaround errata on 82577/8 adapters with large frames */
331 	if ((hw->mac.type == e1000_pchlan) &&
332 	    (Adapter->default_mtu > ETHERMTU)) {
333 
334 		(void) e1000_read_phy_reg(hw, PHY_REG(770, 26), &phy_data);
335 		phy_data &= 0xfff8;
336 		phy_data |= (1 << 2);
337 		(void) e1000_write_phy_reg(hw, PHY_REG(770, 26), phy_data);
338 
339 		if (hw->phy.type == e1000_phy_82577) {
340 			(void) e1000_read_phy_reg(hw, 22, &phy_data);
341 			phy_data &= 0x0fff;
342 			phy_data |= (1 << 14);
343 			(void) e1000_write_phy_reg(hw, 0x10, 0x2823);
344 			(void) e1000_write_phy_reg(hw, 0x11, 0x0003);
345 			(void) e1000_write_phy_reg(hw, 22, phy_data);
346 		}
347 	}
348 
349 	/* Workaround errata on 82579 adapters with large frames */
350 	if (hw->mac.type == e1000_pch2lan) {
351 		boolean_t enable_jumbo = (Adapter->default_mtu > ETHERMTU ?
352 		    B_TRUE : B_FALSE);
353 
354 		if (e1000_lv_jumbo_workaround_ich8lan(hw, enable_jumbo) != 0)
355 			E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
356 			    "failed to enable jumbo frame workaround mode\n");
357 	}
358 
359 	reg_val =
360 	    E1000_RXCSUM_TUOFL |	/* TCP/UDP checksum offload Enable */
361 	    E1000_RXCSUM_IPOFL;		/* IP checksum offload Enable */
362 
363 	E1000_WRITE_REG(hw, E1000_RXCSUM, reg_val);
364 
365 	/*
366 	 * Workaround: Set bit 16 (IPv6_ExDIS) to disable the
367 	 * processing of received IPV6 extension headers
368 	 */
369 	if ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572)) {
370 		reg_val = E1000_READ_REG(hw, E1000_RFCTL);
371 		reg_val |= (E1000_RFCTL_IPV6_EX_DIS |
372 		    E1000_RFCTL_NEW_IPV6_EXT_DIS);
373 		E1000_WRITE_REG(hw, E1000_RFCTL, reg_val);
374 	}
375 
376 	/* Write to enable the receive unit */
377 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
378 }
379 
380 /*
381  * e1000g_get_buf - get an rx sw packet from the free_list
382  */
383 static p_rx_sw_packet_t
e1000g_get_buf(e1000g_rx_data_t * rx_data)384 e1000g_get_buf(e1000g_rx_data_t *rx_data)
385 {
386 	p_rx_sw_packet_t packet;
387 	struct e1000g *Adapter;
388 
389 	Adapter = rx_data->rx_ring->adapter;
390 
391 	mutex_enter(&rx_data->freelist_lock);
392 	packet = (p_rx_sw_packet_t)
393 	    QUEUE_POP_HEAD(&rx_data->free_list);
394 	if (packet != NULL) {
395 		rx_data->avail_freepkt--;
396 		goto end;
397 	}
398 
399 	/*
400 	 * If the freelist has no packets, check the recycle list
401 	 * to see if there are any available descriptor there.
402 	 */
403 	mutex_enter(&rx_data->recycle_lock);
404 	QUEUE_SWITCH(&rx_data->free_list, &rx_data->recycle_list);
405 	rx_data->avail_freepkt = rx_data->recycle_freepkt;
406 	rx_data->recycle_freepkt = 0;
407 	mutex_exit(&rx_data->recycle_lock);
408 	packet = (p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_data->free_list);
409 	if (packet != NULL) {
410 		rx_data->avail_freepkt--;
411 		goto end;
412 	}
413 
414 	if (Adapter->rx_freelist_num < Adapter->rx_freelist_limit) {
415 		(void) e1000g_increase_rx_packets(rx_data);
416 		packet = (p_rx_sw_packet_t)
417 		    QUEUE_POP_HEAD(&rx_data->free_list);
418 		if (packet != NULL) {
419 			rx_data->avail_freepkt--;
420 		}
421 	}
422 
423 end:
424 	mutex_exit(&rx_data->freelist_lock);
425 	return (packet);
426 }
427 
428 /*
429  * e1000g_receive - main receive routine
430  *
431  * This routine will process packets received in an interrupt
432  */
433 mblk_t *
e1000g_receive(e1000g_rx_ring_t * rx_ring,mblk_t ** tail,uint_t sz)434 e1000g_receive(e1000g_rx_ring_t *rx_ring, mblk_t **tail, uint_t sz)
435 {
436 	struct e1000_hw *hw;
437 	mblk_t *nmp;
438 	mblk_t *ret_mp;
439 	mblk_t *ret_nmp;
440 	struct e1000_rx_desc *current_desc;
441 	struct e1000_rx_desc *last_desc;
442 	p_rx_sw_packet_t packet;
443 	p_rx_sw_packet_t newpkt;
444 	uint16_t length;
445 	uint32_t pkt_count;
446 	uint32_t desc_count;
447 	boolean_t accept_frame;
448 	boolean_t end_of_packet;
449 	boolean_t need_copy;
450 	struct e1000g *Adapter;
451 	dma_buffer_t *rx_buf;
452 	uint16_t cksumflags;
453 	uint_t chain_sz = 0;
454 	e1000g_rx_data_t *rx_data;
455 	uint32_t max_size;
456 	uint32_t min_size;
457 
458 	ret_mp = NULL;
459 	ret_nmp = NULL;
460 	pkt_count = 0;
461 	desc_count = 0;
462 	cksumflags = 0;
463 
464 	Adapter = rx_ring->adapter;
465 	rx_data = rx_ring->rx_data;
466 	hw = &Adapter->shared;
467 
468 	/* Sync the Rx descriptor DMA buffers */
469 	(void) ddi_dma_sync(rx_data->rbd_dma_handle,
470 	    0, 0, DDI_DMA_SYNC_FORKERNEL);
471 
472 	if (e1000g_check_dma_handle(rx_data->rbd_dma_handle) != DDI_FM_OK) {
473 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
474 		Adapter->e1000g_state |= E1000G_ERROR;
475 		return (NULL);
476 	}
477 
478 	current_desc = rx_data->rbd_next;
479 	if (!(current_desc->status & E1000_RXD_STAT_DD)) {
480 		/*
481 		 * don't send anything up. just clear the RFD
482 		 */
483 		E1000G_DEBUG_STAT(rx_ring->stat_none);
484 		return (NULL);
485 	}
486 
487 	max_size = Adapter->max_frame_size - ETHERFCSL - VLAN_TAGSZ;
488 	min_size = ETHERMIN;
489 
490 	/*
491 	 * Loop through the receive descriptors starting at the last known
492 	 * descriptor owned by the hardware that begins a packet.
493 	 */
494 	while ((current_desc->status & E1000_RXD_STAT_DD) &&
495 	    (pkt_count < Adapter->rx_limit_onintr) &&
496 	    ((sz == E1000G_CHAIN_NO_LIMIT) || (chain_sz <= sz))) {
497 
498 		desc_count++;
499 		/*
500 		 * Now this can happen in Jumbo frame situation.
501 		 */
502 		if (current_desc->status & E1000_RXD_STAT_EOP) {
503 			/* packet has EOP set */
504 			end_of_packet = B_TRUE;
505 		} else {
506 			/*
507 			 * If this received buffer does not have the
508 			 * End-Of-Packet bit set, the received packet
509 			 * will consume multiple buffers. We won't send this
510 			 * packet upstack till we get all the related buffers.
511 			 */
512 			end_of_packet = B_FALSE;
513 		}
514 
515 		/*
516 		 * Get a pointer to the actual receive buffer
517 		 * The mp->b_rptr is mapped to The CurrentDescriptor
518 		 * Buffer Address.
519 		 */
520 		packet =
521 		    (p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_data->recv_list);
522 		ASSERT(packet != NULL);
523 
524 		rx_buf = packet->rx_buf;
525 
526 		length = current_desc->length;
527 
528 #ifdef __sparc
529 		if (packet->dma_type == USE_DVMA)
530 			dvma_sync(rx_buf->dma_handle, 0,
531 			    DDI_DMA_SYNC_FORKERNEL);
532 		else
533 			(void) ddi_dma_sync(rx_buf->dma_handle,
534 			    E1000G_IPALIGNROOM, length,
535 			    DDI_DMA_SYNC_FORKERNEL);
536 #else
537 		(void) ddi_dma_sync(rx_buf->dma_handle,
538 		    E1000G_IPALIGNROOM, length,
539 		    DDI_DMA_SYNC_FORKERNEL);
540 #endif
541 
542 		if (e1000g_check_dma_handle(
543 		    rx_buf->dma_handle) != DDI_FM_OK) {
544 			ddi_fm_service_impact(Adapter->dip,
545 			    DDI_SERVICE_DEGRADED);
546 			Adapter->e1000g_state |= E1000G_ERROR;
547 
548 			goto rx_drop;
549 		}
550 
551 		/*
552 		 * workaround for redmine #3100. After a switch reset packet
553 		 * queue and descriptor dma addresses got out of sync. Detect
554 		 * this and flag the error. Let the watchdog timer do the reset
555 		 */
556 		if (current_desc->buffer_addr != rx_buf->dma_address) {
557 			e1000g_log(Adapter, CE_WARN, "receive dma descriptors "
558 			    "got out of sync, resetting adapter");
559 			Adapter->e1000g_state |= E1000G_ERROR;
560 		}
561 		accept_frame = (current_desc->errors == 0) ||
562 		    ((current_desc->errors &
563 		    (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE)) != 0);
564 
565 		if (hw->mac.type == e1000_82543) {
566 			unsigned char last_byte;
567 
568 			last_byte =
569 			    *((unsigned char *)rx_buf->address + length - 1);
570 
571 			if (TBI_ACCEPT(hw,
572 			    current_desc->status, current_desc->errors,
573 			    current_desc->length, last_byte,
574 			    Adapter->min_frame_size, Adapter->max_frame_size)) {
575 
576 				e1000_tbi_adjust_stats(Adapter,
577 				    length, hw->mac.addr);
578 
579 				length--;
580 				accept_frame = B_TRUE;
581 			} else if (e1000_tbi_sbp_enabled_82543(hw) &&
582 			    (current_desc->errors == E1000_RXD_ERR_CE)) {
583 				accept_frame = B_TRUE;
584 			}
585 		}
586 
587 		/*
588 		 * Indicate the packet to the NOS if it was good.
589 		 * Normally, hardware will discard bad packets for us.
590 		 * Check for the packet to be a valid Ethernet packet
591 		 */
592 		if (!accept_frame) {
593 			/*
594 			 * error in incoming packet, either the packet is not a
595 			 * ethernet size packet, or the packet has an error. In
596 			 * either case, the packet will simply be discarded.
597 			 */
598 			E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
599 			    "Process Receive Interrupts: Error in Packet\n");
600 
601 			E1000G_STAT(rx_ring->stat_error);
602 			/*
603 			 * Returning here as we are done here. There is
604 			 * no point in waiting for while loop to elapse
605 			 * and the things which were done. More efficient
606 			 * and less error prone...
607 			 */
608 			goto rx_drop;
609 		}
610 
611 		/*
612 		 * If the Ethernet CRC is not stripped by the hardware,
613 		 * we need to strip it before sending it up to the stack.
614 		 */
615 		if (end_of_packet && !Adapter->strip_crc) {
616 			if (length > ETHERFCSL) {
617 				length -= ETHERFCSL;
618 			} else {
619 				/*
620 				 * If the fragment is smaller than the CRC,
621 				 * drop this fragment, do the processing of
622 				 * the end of the packet.
623 				 */
624 				if (rx_data->rx_mblk_tail == NULL) {
625 					E1000G_STAT(rx_ring->stat_crc_only_pkt);
626 					goto rx_next_desc;
627 				}
628 
629 				rx_data->rx_mblk_tail->b_wptr -=
630 				    ETHERFCSL - length;
631 				rx_data->rx_mblk_len -=
632 				    ETHERFCSL - length;
633 				goto rx_end_of_packet;
634 			}
635 		}
636 
637 		need_copy = B_TRUE;
638 
639 		if (length <= Adapter->rx_bcopy_thresh)
640 			goto rx_copy;
641 
642 		/*
643 		 * Get the pre-constructed mblk that was associated
644 		 * to the receive data buffer.
645 		 */
646 		if (packet->mp == NULL) {
647 			packet->mp = desballoc((unsigned char *)
648 			    rx_buf->address, length,
649 			    BPRI_MED, &packet->free_rtn);
650 		}
651 
652 		if (packet->mp != NULL) {
653 			/*
654 			 * We have two sets of buffer pool. One associated with
655 			 * the Rxdescriptors and other a freelist buffer pool.
656 			 * Each time we get a good packet, Try to get a buffer
657 			 * from the freelist pool using e1000g_get_buf. If we
658 			 * get free buffer, then replace the descriptor buffer
659 			 * address with the free buffer we just got, and pass
660 			 * the pre-constructed mblk upstack. (note no copying)
661 			 *
662 			 * If we failed to get a free buffer, then try to
663 			 * allocate a new buffer(mp) and copy the recv buffer
664 			 * content to our newly allocated buffer(mp). Don't
665 			 * disturb the desriptor buffer address. (note copying)
666 			 */
667 			newpkt = e1000g_get_buf(rx_data);
668 
669 			if (newpkt != NULL) {
670 				/*
671 				 * Get the mblk associated to the data,
672 				 * and strip it off the sw packet.
673 				 */
674 				nmp = packet->mp;
675 				packet->mp = NULL;
676 				atomic_inc_32(&packet->ref_cnt);
677 
678 				/*
679 				 * Now replace old buffer with the new
680 				 * one we got from free list
681 				 * Both the RxSwPacket as well as the
682 				 * Receive Buffer Descriptor will now
683 				 * point to this new packet.
684 				 */
685 				packet = newpkt;
686 
687 				current_desc->buffer_addr =
688 				    newpkt->rx_buf->dma_address;
689 
690 				need_copy = B_FALSE;
691 			} else {
692 				/* EMPTY */
693 				E1000G_DEBUG_STAT(rx_ring->stat_no_freepkt);
694 			}
695 		}
696 
697 rx_copy:
698 		if (need_copy) {
699 			/*
700 			 * No buffers available on free list,
701 			 * bcopy the data from the buffer and
702 			 * keep the original buffer. Dont want to
703 			 * do this.. Yack but no other way
704 			 */
705 			if ((nmp = allocb(length + E1000G_IPALIGNROOM,
706 			    BPRI_MED)) == NULL) {
707 				/*
708 				 * The system has no buffers available
709 				 * to send up the incoming packet, hence
710 				 * the packet will have to be processed
711 				 * when there're more buffers available.
712 				 */
713 				E1000G_STAT(rx_ring->stat_allocb_fail);
714 				goto rx_drop;
715 			}
716 			nmp->b_rptr += E1000G_IPALIGNROOM;
717 			nmp->b_wptr += E1000G_IPALIGNROOM;
718 			/*
719 			 * The free list did not have any buffers
720 			 * available, so, the received packet will
721 			 * have to be copied into a mp and the original
722 			 * buffer will have to be retained for future
723 			 * packet reception.
724 			 */
725 			bcopy(rx_buf->address, nmp->b_wptr, length);
726 		}
727 
728 		ASSERT(nmp != NULL);
729 		nmp->b_wptr += length;
730 
731 		if (rx_data->rx_mblk == NULL) {
732 			/*
733 			 *  TCP/UDP checksum offload and
734 			 *  IP checksum offload
735 			 */
736 			if (!(current_desc->status & E1000_RXD_STAT_IXSM)) {
737 				/*
738 				 * Check TCP/UDP checksum
739 				 */
740 				if ((current_desc->status &
741 				    E1000_RXD_STAT_TCPCS) &&
742 				    !(current_desc->errors &
743 				    E1000_RXD_ERR_TCPE))
744 					cksumflags |= HCK_FULLCKSUM_OK;
745 				/*
746 				 * Check IP Checksum
747 				 */
748 				if ((current_desc->status &
749 				    E1000_RXD_STAT_IPCS) &&
750 				    !(current_desc->errors &
751 				    E1000_RXD_ERR_IPE))
752 					cksumflags |= HCK_IPV4_HDRCKSUM_OK;
753 			}
754 		}
755 
756 		/*
757 		 * We need to maintain our packet chain in the global
758 		 * Adapter structure, for the Rx processing can end
759 		 * with a fragment that has no EOP set.
760 		 */
761 		if (rx_data->rx_mblk == NULL) {
762 			/* Get the head of the message chain */
763 			rx_data->rx_mblk = nmp;
764 			rx_data->rx_mblk_tail = nmp;
765 			rx_data->rx_mblk_len = length;
766 		} else {	/* Not the first packet */
767 			/* Continue adding buffers */
768 			rx_data->rx_mblk_tail->b_cont = nmp;
769 			rx_data->rx_mblk_tail = nmp;
770 			rx_data->rx_mblk_len += length;
771 		}
772 		ASSERT(rx_data->rx_mblk != NULL);
773 		ASSERT(rx_data->rx_mblk_tail != NULL);
774 		ASSERT(rx_data->rx_mblk_tail->b_cont == NULL);
775 
776 		/*
777 		 * Now this MP is ready to travel upwards but some more
778 		 * fragments are coming.
779 		 * We will send packet upwards as soon as we get EOP
780 		 * set on the packet.
781 		 */
782 		if (!end_of_packet) {
783 			/*
784 			 * continue to get the next descriptor,
785 			 * Tail would be advanced at the end
786 			 */
787 			goto rx_next_desc;
788 		}
789 
790 rx_end_of_packet:
791 		if (E1000G_IS_VLAN_PACKET(rx_data->rx_mblk->b_rptr))
792 			max_size = Adapter->max_frame_size - ETHERFCSL;
793 
794 		if ((rx_data->rx_mblk_len > max_size) ||
795 		    (rx_data->rx_mblk_len < min_size)) {
796 			E1000G_STAT(rx_ring->stat_size_error);
797 			goto rx_drop;
798 		}
799 
800 		/*
801 		 * Found packet with EOP
802 		 * Process the last fragment.
803 		 */
804 		if (cksumflags != 0) {
805 			mac_hcksum_set(rx_data->rx_mblk,
806 			    0, 0, 0, 0, cksumflags);
807 			cksumflags = 0;
808 		}
809 
810 		/*
811 		 * Count packets that span multi-descriptors
812 		 */
813 		E1000G_DEBUG_STAT_COND(rx_ring->stat_multi_desc,
814 		    (rx_data->rx_mblk->b_cont != NULL));
815 
816 		/*
817 		 * Append to list to send upstream
818 		 */
819 		if (ret_mp == NULL) {
820 			ret_mp = ret_nmp = rx_data->rx_mblk;
821 		} else {
822 			ret_nmp->b_next = rx_data->rx_mblk;
823 			ret_nmp = rx_data->rx_mblk;
824 		}
825 		ret_nmp->b_next = NULL;
826 		*tail = ret_nmp;
827 		chain_sz += length;
828 
829 		rx_data->rx_mblk = NULL;
830 		rx_data->rx_mblk_tail = NULL;
831 		rx_data->rx_mblk_len = 0;
832 
833 		pkt_count++;
834 
835 rx_next_desc:
836 		/*
837 		 * Zero out the receive descriptors status
838 		 */
839 		current_desc->status = 0;
840 
841 		if (current_desc == rx_data->rbd_last)
842 			rx_data->rbd_next = rx_data->rbd_first;
843 		else
844 			rx_data->rbd_next++;
845 
846 		last_desc = current_desc;
847 		current_desc = rx_data->rbd_next;
848 
849 		/*
850 		 * Put the buffer that we just indicated back
851 		 * at the end of our list
852 		 */
853 		QUEUE_PUSH_TAIL(&rx_data->recv_list,
854 		    &packet->Link);
855 	}	/* while loop */
856 
857 	/* Sync the Rx descriptor DMA buffers */
858 	(void) ddi_dma_sync(rx_data->rbd_dma_handle,
859 	    0, 0, DDI_DMA_SYNC_FORDEV);
860 
861 	/*
862 	 * Advance the E1000's Receive Queue #0 "Tail Pointer".
863 	 */
864 	E1000_WRITE_REG(hw, E1000_RDT(0),
865 	    (uint32_t)(last_desc - rx_data->rbd_first));
866 
867 	if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
868 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
869 		Adapter->e1000g_state |= E1000G_ERROR;
870 	}
871 
872 	Adapter->rx_pkt_cnt = pkt_count;
873 
874 	return (ret_mp);
875 
876 rx_drop:
877 	/*
878 	 * Zero out the receive descriptors status
879 	 */
880 	current_desc->status = 0;
881 
882 	/* Sync the Rx descriptor DMA buffers */
883 	(void) ddi_dma_sync(rx_data->rbd_dma_handle,
884 	    0, 0, DDI_DMA_SYNC_FORDEV);
885 
886 	if (current_desc == rx_data->rbd_last)
887 		rx_data->rbd_next = rx_data->rbd_first;
888 	else
889 		rx_data->rbd_next++;
890 
891 	last_desc = current_desc;
892 
893 	QUEUE_PUSH_TAIL(&rx_data->recv_list, &packet->Link);
894 	/*
895 	 * Reclaim all old buffers already allocated during
896 	 * Jumbo receives.....for incomplete reception
897 	 */
898 	if (rx_data->rx_mblk != NULL) {
899 		freemsg(rx_data->rx_mblk);
900 		rx_data->rx_mblk = NULL;
901 		rx_data->rx_mblk_tail = NULL;
902 		rx_data->rx_mblk_len = 0;
903 	}
904 	/*
905 	 * Advance the E1000's Receive Queue #0 "Tail Pointer".
906 	 */
907 	E1000_WRITE_REG(hw, E1000_RDT(0),
908 	    (uint32_t)(last_desc - rx_data->rbd_first));
909 
910 	if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
911 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
912 		Adapter->e1000g_state |= E1000G_ERROR;
913 	}
914 
915 	return (ret_mp);
916 }
917 
918 /*
919  * This is part of a workaround for the I219, see e1000g_flush_desc_rings() for
920  * more information.
921  *
922  * Flush all descriptors in the rx ring and disable it.
923  */
924 void
e1000g_flush_rx_ring(struct e1000g * Adapter)925 e1000g_flush_rx_ring(struct e1000g *Adapter)
926 {
927 	struct e1000_hw	*hw = &Adapter->shared;
928 	uint32_t rctl, rxdctl;
929 
930 	rctl = E1000_READ_REG(hw, E1000_RCTL);
931 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
932 	E1000_WRITE_FLUSH(hw);
933 	usec_delay(150);
934 
935 	rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
936 	/* Zero the lower 14 bits (prefetch and host thresholds). */
937 	rxdctl &= 0xffffc000;
938 	/*
939 	 * Update thresholds: prefetch threshold to 31, host threshold to 1
940 	 * and make sure the granularity is "descriptors" and not "cache lines"
941 	 */
942 	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
943 	E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
944 
945 	/* Momentarily enable the RX ring for the changes to take effect */
946 	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
947 	E1000_WRITE_FLUSH(hw);
948 	usec_delay(150);
949 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
950 
951 }
952