xref: /illumos-gate/usr/src/uts/common/io/ixgbe/ixgbe_rx.c (revision fcdb3229a31dd4ff700c69238814e326aad49098)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright(c) 2007-2010 Intel Corporation. All rights reserved.
24  */
25 
26 /*
27  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Copyright 2017 Joyent, Inc.
29  */
30 
31 #include "ixgbe_sw.h"
32 
33 /* function prototypes */
34 static mblk_t *ixgbe_rx_bind(ixgbe_rx_data_t *, uint32_t, uint32_t);
35 static mblk_t *ixgbe_rx_copy(ixgbe_rx_data_t *, uint32_t, uint32_t);
36 static void ixgbe_rx_assoc_hcksum(mblk_t *, uint32_t);
37 static mblk_t *ixgbe_lro_bind(ixgbe_rx_data_t *, uint32_t, uint32_t, uint32_t);
38 static mblk_t *ixgbe_lro_copy(ixgbe_rx_data_t *, uint32_t, uint32_t, uint32_t);
39 static int ixgbe_lro_get_start(ixgbe_rx_data_t *, uint32_t);
40 static uint32_t ixgbe_lro_get_first(ixgbe_rx_data_t *, uint32_t);
41 
42 /*
43  * ixgbe_rx_recycle - The call-back function to reclaim rx buffer.
44  *
45  * This function is called when an mp is freed by the user thru
46  * freeb call (Only for mp constructed through desballoc call).
47  * It returns back the freed buffer to the free list.
48  */
49 void
ixgbe_rx_recycle(caddr_t arg)50 ixgbe_rx_recycle(caddr_t arg)
51 {
52 	ixgbe_t *ixgbe;
53 	ixgbe_rx_ring_t *rx_ring;
54 	ixgbe_rx_data_t	*rx_data;
55 	rx_control_block_t *recycle_rcb;
56 	uint32_t free_index;
57 	uint32_t ref_cnt;
58 
59 	recycle_rcb = (rx_control_block_t *)(uintptr_t)arg;
60 	rx_data = recycle_rcb->rx_data;
61 	rx_ring = rx_data->rx_ring;
62 	ixgbe = rx_ring->ixgbe;
63 
64 	if (recycle_rcb->ref_cnt == 0) {
65 		/*
66 		 * This case only happens when rx buffers are being freed
67 		 * in ixgbe_stop() and freemsg() is called.
68 		 */
69 		return;
70 	}
71 
72 	ASSERT(recycle_rcb->mp == NULL);
73 
74 	/*
75 	 * Using the recycled data buffer to generate a new mblk
76 	 */
77 	recycle_rcb->mp = desballoc((unsigned char *)
78 	    recycle_rcb->rx_buf.address,
79 	    recycle_rcb->rx_buf.size,
80 	    0, &recycle_rcb->free_rtn);
81 
82 	/*
83 	 * Put the recycled rx control block into free list
84 	 */
85 	mutex_enter(&rx_data->recycle_lock);
86 
87 	free_index = rx_data->rcb_tail;
88 	ASSERT(rx_data->free_list[free_index] == NULL);
89 
90 	rx_data->free_list[free_index] = recycle_rcb;
91 	rx_data->rcb_tail = NEXT_INDEX(free_index, 1, rx_data->free_list_size);
92 
93 	mutex_exit(&rx_data->recycle_lock);
94 
95 	/*
96 	 * The atomic operation on the number of the available rx control
97 	 * blocks in the free list is used to make the recycling mutual
98 	 * exclusive with the receiving.
99 	 */
100 	atomic_inc_32(&rx_data->rcb_free);
101 	ASSERT(rx_data->rcb_free <= rx_data->free_list_size);
102 
103 	/*
104 	 * Considering the case that the interface is unplumbed
105 	 * and there are still some buffers held by the upper layer.
106 	 * When the buffer is returned back, we need to free it.
107 	 */
108 	ref_cnt = atomic_dec_32_nv(&recycle_rcb->ref_cnt);
109 	if (ref_cnt == 0) {
110 		if (recycle_rcb->mp != NULL) {
111 			freemsg(recycle_rcb->mp);
112 			recycle_rcb->mp = NULL;
113 		}
114 
115 		ixgbe_free_dma_buffer(&recycle_rcb->rx_buf);
116 
117 		mutex_enter(&ixgbe->rx_pending_lock);
118 		atomic_dec_32(&rx_data->rcb_pending);
119 		atomic_dec_32(&ixgbe->rcb_pending);
120 
121 		/*
122 		 * When there is not any buffer belonging to this rx_data
123 		 * held by the upper layer, the rx_data can be freed.
124 		 */
125 		if ((rx_data->flag & IXGBE_RX_STOPPED) &&
126 		    (rx_data->rcb_pending == 0))
127 			ixgbe_free_rx_ring_data(rx_data);
128 
129 		mutex_exit(&ixgbe->rx_pending_lock);
130 	}
131 }
132 
133 /*
134  * ixgbe_rx_copy - Use copy to process the received packet.
135  *
136  * This function will use bcopy to process the packet
137  * and send the copied packet upstream.
138  */
139 static mblk_t *
ixgbe_rx_copy(ixgbe_rx_data_t * rx_data,uint32_t index,uint32_t pkt_len)140 ixgbe_rx_copy(ixgbe_rx_data_t *rx_data, uint32_t index, uint32_t pkt_len)
141 {
142 	ixgbe_t *ixgbe;
143 	rx_control_block_t *current_rcb;
144 	mblk_t *mp;
145 
146 	ixgbe = rx_data->rx_ring->ixgbe;
147 	current_rcb = rx_data->work_list[index];
148 
149 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
150 
151 	if (ixgbe_check_dma_handle(current_rcb->rx_buf.dma_handle) !=
152 	    DDI_FM_OK) {
153 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
154 		atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR);
155 		return (NULL);
156 	}
157 
158 	/*
159 	 * Allocate buffer to receive this packet
160 	 */
161 	mp = allocb(pkt_len + IPHDR_ALIGN_ROOM, 0);
162 	if (mp == NULL) {
163 		ixgbe_log(ixgbe, "ixgbe_rx_copy: allocate buffer failed");
164 		return (NULL);
165 	}
166 
167 	/*
168 	 * Copy the data received into the new cluster
169 	 */
170 	mp->b_rptr += IPHDR_ALIGN_ROOM;
171 	bcopy(current_rcb->rx_buf.address, mp->b_rptr, pkt_len);
172 	mp->b_wptr = mp->b_rptr + pkt_len;
173 
174 	return (mp);
175 }
176 
177 /*
178  * ixgbe_rx_bind - Use existing DMA buffer to build mblk for receiving.
179  *
180  * This function will use pre-bound DMA buffer to receive the packet
181  * and build mblk that will be sent upstream.
182  */
183 static mblk_t *
ixgbe_rx_bind(ixgbe_rx_data_t * rx_data,uint32_t index,uint32_t pkt_len)184 ixgbe_rx_bind(ixgbe_rx_data_t *rx_data, uint32_t index, uint32_t pkt_len)
185 {
186 	rx_control_block_t *current_rcb;
187 	rx_control_block_t *free_rcb;
188 	uint32_t free_index;
189 	mblk_t *mp;
190 	ixgbe_t	*ixgbe = rx_data->rx_ring->ixgbe;
191 
192 	/*
193 	 * If the free list is empty, we cannot proceed to send
194 	 * the current DMA buffer upstream. We'll have to return
195 	 * and use bcopy to process the packet.
196 	 */
197 	if (ixgbe_atomic_reserve(&rx_data->rcb_free, 1) < 0)
198 		return (NULL);
199 
200 	current_rcb = rx_data->work_list[index];
201 	/*
202 	 * If the mp of the rx control block is NULL, try to do
203 	 * desballoc again.
204 	 */
205 	if (current_rcb->mp == NULL) {
206 		current_rcb->mp = desballoc((unsigned char *)
207 		    current_rcb->rx_buf.address,
208 		    current_rcb->rx_buf.size,
209 		    0, &current_rcb->free_rtn);
210 		/*
211 		 * If it is failed to built a mblk using the current
212 		 * DMA buffer, we have to return and use bcopy to
213 		 * process the packet.
214 		 */
215 		if (current_rcb->mp == NULL) {
216 			atomic_inc_32(&rx_data->rcb_free);
217 			return (NULL);
218 		}
219 	}
220 	/*
221 	 * Sync up the data received
222 	 */
223 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
224 
225 	if (ixgbe_check_dma_handle(current_rcb->rx_buf.dma_handle) !=
226 	    DDI_FM_OK) {
227 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
228 		atomic_inc_32(&rx_data->rcb_free);
229 		atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR);
230 		return (NULL);
231 	}
232 
233 	mp = current_rcb->mp;
234 	current_rcb->mp = NULL;
235 	atomic_inc_32(&current_rcb->ref_cnt);
236 
237 	mp->b_wptr = mp->b_rptr + pkt_len;
238 	mp->b_next = mp->b_cont = NULL;
239 
240 	/*
241 	 * Strip off one free rx control block from the free list
242 	 */
243 	free_index = rx_data->rcb_head;
244 	free_rcb = rx_data->free_list[free_index];
245 	ASSERT(free_rcb != NULL);
246 	rx_data->free_list[free_index] = NULL;
247 	rx_data->rcb_head = NEXT_INDEX(free_index, 1, rx_data->free_list_size);
248 
249 	/*
250 	 * Put the rx control block to the work list
251 	 */
252 	rx_data->work_list[index] = free_rcb;
253 
254 	return (mp);
255 }
256 
257 /*
258  * ixgbe_lro_bind - Use existing DMA buffer to build LRO mblk for receiving.
259  *
260  * This function will use pre-bound DMA buffers to receive the packet
261  * and build LRO mblk that will be sent upstream.
262  */
263 static mblk_t *
ixgbe_lro_bind(ixgbe_rx_data_t * rx_data,uint32_t lro_start,uint32_t lro_num,uint32_t pkt_len)264 ixgbe_lro_bind(ixgbe_rx_data_t *rx_data, uint32_t lro_start,
265     uint32_t lro_num, uint32_t pkt_len)
266 {
267 	rx_control_block_t *current_rcb;
268 	union ixgbe_adv_rx_desc *current_rbd;
269 	rx_control_block_t *free_rcb;
270 	uint32_t free_index;
271 	int lro_next;
272 	uint32_t last_pkt_len;
273 	uint32_t i;
274 	mblk_t *mp;
275 	mblk_t *mblk_head;
276 	mblk_t **mblk_tail;
277 	ixgbe_t	*ixgbe = rx_data->rx_ring->ixgbe;
278 
279 	/*
280 	 * If the free list is empty, we cannot proceed to send
281 	 * the current DMA buffer upstream. We'll have to return
282 	 * and use bcopy to process the packet.
283 	 */
284 	if (ixgbe_atomic_reserve(&rx_data->rcb_free, lro_num) < 0)
285 		return (NULL);
286 	current_rcb = rx_data->work_list[lro_start];
287 
288 	/*
289 	 * If any one of the rx data blocks can not support
290 	 * lro bind  operation,  We'll have to return and use
291 	 * bcopy to process the lro  packet.
292 	 */
293 	for (i = lro_num; i > 0; i--) {
294 		/*
295 		 * Sync up the data received
296 		 */
297 		DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
298 
299 		if (ixgbe_check_dma_handle(current_rcb->rx_buf.dma_handle) !=
300 		    DDI_FM_OK) {
301 			ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
302 			atomic_add_32(&rx_data->rcb_free, lro_num);
303 			atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR);
304 			return (NULL);
305 		}
306 
307 		/*
308 		 * If the mp of the rx control block is NULL, try to do
309 		 * desballoc again.
310 		 */
311 		if (current_rcb->mp == NULL) {
312 			current_rcb->mp = desballoc((unsigned char *)
313 			    current_rcb->rx_buf.address,
314 			    current_rcb->rx_buf.size,
315 			    0, &current_rcb->free_rtn);
316 			/*
317 			 * If it is failed to built a mblk using the current
318 			 * DMA buffer, we have to return and use bcopy to
319 			 * process the packet.
320 			 */
321 			if (current_rcb->mp == NULL) {
322 				atomic_add_32(&rx_data->rcb_free, lro_num);
323 				return (NULL);
324 			}
325 		}
326 		if (current_rcb->lro_next != -1)
327 			lro_next = current_rcb->lro_next;
328 		current_rcb = rx_data->work_list[lro_next];
329 	}
330 
331 	mblk_head = NULL;
332 	mblk_tail = &mblk_head;
333 	lro_next = lro_start;
334 	last_pkt_len = pkt_len - ixgbe->rx_buf_size * (lro_num - 1);
335 	current_rcb = rx_data->work_list[lro_next];
336 	current_rbd = &rx_data->rbd_ring[lro_next];
337 	while (lro_num --) {
338 		mp = current_rcb->mp;
339 		current_rcb->mp = NULL;
340 		atomic_inc_32(&current_rcb->ref_cnt);
341 		if (lro_num != 0)
342 			mp->b_wptr = mp->b_rptr + ixgbe->rx_buf_size;
343 		else
344 			mp->b_wptr = mp->b_rptr + last_pkt_len;
345 		mp->b_next = mp->b_cont = NULL;
346 		*mblk_tail = mp;
347 		mblk_tail = &mp->b_cont;
348 
349 		/*
350 		 * Strip off one free rx control block from the free list
351 		 */
352 		free_index = rx_data->rcb_head;
353 		free_rcb = rx_data->free_list[free_index];
354 		ASSERT(free_rcb != NULL);
355 		rx_data->free_list[free_index] = NULL;
356 		rx_data->rcb_head = NEXT_INDEX(free_index, 1,
357 		    rx_data->free_list_size);
358 
359 		/*
360 		 * Put the rx control block to the work list
361 		 */
362 		rx_data->work_list[lro_next] = free_rcb;
363 		lro_next = current_rcb->lro_next;
364 		current_rcb->lro_next = -1;
365 		current_rcb->lro_prev = -1;
366 		current_rcb->lro_pkt = B_FALSE;
367 		current_rbd->read.pkt_addr = free_rcb->rx_buf.dma_address;
368 		current_rbd->read.hdr_addr = 0;
369 		if (lro_next == -1)
370 			break;
371 		current_rcb = rx_data->work_list[lro_next];
372 		current_rbd = &rx_data->rbd_ring[lro_next];
373 	}
374 	return (mblk_head);
375 }
376 
377 /*
378  * ixgbe_lro_copy - Use copy to process the received LRO packet.
379  *
380  * This function will use bcopy to process the LRO  packet
381  * and send the copied packet upstream.
382  */
383 static mblk_t *
ixgbe_lro_copy(ixgbe_rx_data_t * rx_data,uint32_t lro_start,uint32_t lro_num,uint32_t pkt_len)384 ixgbe_lro_copy(ixgbe_rx_data_t *rx_data, uint32_t lro_start,
385     uint32_t lro_num, uint32_t pkt_len)
386 {
387 	ixgbe_t *ixgbe;
388 	rx_control_block_t *current_rcb;
389 	union ixgbe_adv_rx_desc *current_rbd;
390 	mblk_t *mp;
391 	uint32_t last_pkt_len;
392 	int lro_next;
393 	uint32_t i;
394 
395 	ixgbe = rx_data->rx_ring->ixgbe;
396 
397 	/*
398 	 * Allocate buffer to receive this LRO packet
399 	 */
400 	mp = allocb(pkt_len + IPHDR_ALIGN_ROOM, 0);
401 	if (mp == NULL) {
402 		ixgbe_log(ixgbe, "LRO copy MP alloc failed");
403 		return (NULL);
404 	}
405 
406 	current_rcb = rx_data->work_list[lro_start];
407 
408 	/*
409 	 * Sync up the LRO packet data received
410 	 */
411 	for (i = lro_num; i > 0; i--) {
412 		DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
413 
414 		if (ixgbe_check_dma_handle(current_rcb->rx_buf.dma_handle) !=
415 		    DDI_FM_OK) {
416 			ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
417 			atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR);
418 			return (NULL);
419 		}
420 		if (current_rcb->lro_next != -1)
421 			lro_next = current_rcb->lro_next;
422 		current_rcb = rx_data->work_list[lro_next];
423 	}
424 	lro_next = lro_start;
425 	current_rcb = rx_data->work_list[lro_next];
426 	current_rbd = &rx_data->rbd_ring[lro_next];
427 	last_pkt_len = pkt_len - ixgbe->rx_buf_size * (lro_num - 1);
428 
429 	/*
430 	 * Copy the data received into the new cluster
431 	 */
432 	mp->b_rptr += IPHDR_ALIGN_ROOM;
433 	mp->b_wptr += IPHDR_ALIGN_ROOM;
434 	while (lro_num --) {
435 		if (lro_num != 0) {
436 			bcopy(current_rcb->rx_buf.address, mp->b_wptr,
437 			    ixgbe->rx_buf_size);
438 			mp->b_wptr += ixgbe->rx_buf_size;
439 		} else {
440 			bcopy(current_rcb->rx_buf.address, mp->b_wptr,
441 			    last_pkt_len);
442 			mp->b_wptr += last_pkt_len;
443 		}
444 		lro_next = current_rcb->lro_next;
445 		current_rcb->lro_next = -1;
446 		current_rcb->lro_prev = -1;
447 		current_rcb->lro_pkt = B_FALSE;
448 		current_rbd->read.pkt_addr = current_rcb->rx_buf.dma_address;
449 		current_rbd->read.hdr_addr = 0;
450 		if (lro_next == -1)
451 			break;
452 		current_rcb = rx_data->work_list[lro_next];
453 		current_rbd = &rx_data->rbd_ring[lro_next];
454 	}
455 
456 	return (mp);
457 }
458 
459 /*
460  * ixgbe_lro_get_start - get the start rcb index in one LRO packet
461  */
462 static int
ixgbe_lro_get_start(ixgbe_rx_data_t * rx_data,uint32_t rx_next)463 ixgbe_lro_get_start(ixgbe_rx_data_t *rx_data, uint32_t rx_next)
464 {
465 	int lro_prev;
466 	int lro_start;
467 	uint32_t lro_num = 1;
468 	rx_control_block_t *prev_rcb;
469 	rx_control_block_t *current_rcb = rx_data->work_list[rx_next];
470 	lro_prev = current_rcb->lro_prev;
471 
472 	while (lro_prev != -1) {
473 		lro_num ++;
474 		prev_rcb = rx_data->work_list[lro_prev];
475 		lro_start = lro_prev;
476 		lro_prev = prev_rcb->lro_prev;
477 	}
478 	rx_data->lro_num = lro_num;
479 	return (lro_start);
480 }
481 
482 /*
483  * ixgbe_lro_get_first - get the first LRO rcb index
484  */
485 static uint32_t
ixgbe_lro_get_first(ixgbe_rx_data_t * rx_data,uint32_t rx_next)486 ixgbe_lro_get_first(ixgbe_rx_data_t *rx_data, uint32_t rx_next)
487 {
488 	rx_control_block_t *current_rcb;
489 	uint32_t lro_first;
490 	lro_first = rx_data->lro_first;
491 	current_rcb = rx_data->work_list[lro_first];
492 	while ((!current_rcb->lro_pkt) && (lro_first != rx_next)) {
493 		lro_first =  NEXT_INDEX(lro_first, 1, rx_data->ring_size);
494 		current_rcb = rx_data->work_list[lro_first];
495 	}
496 	rx_data->lro_first = lro_first;
497 	return (lro_first);
498 }
499 
500 /*
501  * ixgbe_rx_assoc_hcksum - Check the rx hardware checksum status and associate
502  * the hcksum flags.
503  */
504 static void
ixgbe_rx_assoc_hcksum(mblk_t * mp,uint32_t status_error)505 ixgbe_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error)
506 {
507 	uint32_t hcksum_flags = 0;
508 
509 	/*
510 	 * Check TCP/UDP checksum
511 	 */
512 	if ((status_error & IXGBE_RXD_STAT_L4CS) &&
513 	    !(status_error & IXGBE_RXDADV_ERR_TCPE))
514 		hcksum_flags |= HCK_FULLCKSUM_OK;
515 
516 	/*
517 	 * Check IP Checksum
518 	 */
519 	if ((status_error & IXGBE_RXD_STAT_IPCS) &&
520 	    !(status_error & IXGBE_RXDADV_ERR_IPE))
521 		hcksum_flags |= HCK_IPV4_HDRCKSUM_OK;
522 
523 	if (hcksum_flags != 0) {
524 		mac_hcksum_set(mp, 0, 0, 0, 0, hcksum_flags);
525 	}
526 }
527 
528 /*
529  * ixgbe_ring_rx - Receive the data of one ring.
530  *
531  * This function goes throught h/w descriptor in one specified rx ring,
532  * receives the data if the descriptor status shows the data is ready.
533  * It returns a chain of mblks containing the received data, to be
534  * passed up to mac_rx().
535  */
536 mblk_t *
ixgbe_ring_rx(ixgbe_rx_ring_t * rx_ring,int poll_bytes)537 ixgbe_ring_rx(ixgbe_rx_ring_t *rx_ring, int poll_bytes)
538 {
539 	union ixgbe_adv_rx_desc *current_rbd;
540 	rx_control_block_t *current_rcb;
541 	mblk_t *mp;
542 	mblk_t *mblk_head;
543 	mblk_t **mblk_tail;
544 	uint32_t rx_next;
545 	uint32_t rx_tail;
546 	uint32_t pkt_len;
547 	uint32_t status_error;
548 	uint32_t pkt_num;
549 	uint32_t rsc_cnt;
550 	uint32_t lro_first;
551 	uint32_t lro_start;
552 	uint32_t lro_next;
553 	boolean_t lro_eop;
554 	uint32_t received_bytes;
555 	ixgbe_t *ixgbe = rx_ring->ixgbe;
556 	ixgbe_rx_data_t *rx_data;
557 
558 	if ((ixgbe->ixgbe_state & IXGBE_SUSPENDED) ||
559 	    (ixgbe->ixgbe_state & IXGBE_ERROR) ||
560 	    (ixgbe->ixgbe_state & IXGBE_OVERTEMP) ||
561 	    !(ixgbe->ixgbe_state & IXGBE_STARTED))
562 		return (NULL);
563 
564 	rx_data = rx_ring->rx_data;
565 	lro_eop = B_FALSE;
566 	mblk_head = NULL;
567 	mblk_tail = &mblk_head;
568 
569 	/*
570 	 * Sync the receive descriptors before accepting the packets
571 	 */
572 	DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORKERNEL);
573 
574 	if (ixgbe_check_dma_handle(rx_data->rbd_area.dma_handle) != DDI_FM_OK) {
575 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
576 		atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR);
577 		return (NULL);
578 	}
579 
580 	/*
581 	 * Get the start point of rx bd ring which should be examined
582 	 * during this cycle.
583 	 */
584 	rx_next = rx_data->rbd_next;
585 	current_rbd = &rx_data->rbd_ring[rx_next];
586 	received_bytes = 0;
587 	pkt_num = 0;
588 	status_error = current_rbd->wb.upper.status_error;
589 	while (status_error & IXGBE_RXD_STAT_DD) {
590 		/*
591 		 * If adapter has found errors, but the error
592 		 * is hardware checksum error, this does not discard the
593 		 * packet: let upper layer compute the checksum;
594 		 * Otherwise discard the packet.
595 		 */
596 		if ((status_error & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
597 		    ((!ixgbe->lro_enable) &&
598 		    (!(status_error & IXGBE_RXD_STAT_EOP)))) {
599 			rx_ring->stat_frame_error++;
600 			goto rx_discard;
601 		}
602 
603 		if ((status_error & IXGBE_RXDADV_ERR_TCPE) ||
604 		    (status_error & IXGBE_RXDADV_ERR_IPE))
605 			rx_ring->stat_cksum_error++;
606 
607 		if (ixgbe->lro_enable) {
608 			rsc_cnt =  (current_rbd->wb.lower.lo_dword.data &
609 			    IXGBE_RXDADV_RSCCNT_MASK) >>
610 			    IXGBE_RXDADV_RSCCNT_SHIFT;
611 			if (rsc_cnt != 0) {
612 				if (status_error & IXGBE_RXD_STAT_EOP) {
613 					pkt_len = current_rbd->wb.upper.length;
614 					if (rx_data->work_list[rx_next]->
615 					    lro_prev != -1) {
616 						lro_start =
617 						    ixgbe_lro_get_start(rx_data,
618 						    rx_next);
619 						ixgbe->lro_pkt_count++;
620 						pkt_len +=
621 						    (rx_data->lro_num  - 1) *
622 						    ixgbe->rx_buf_size;
623 						lro_eop = B_TRUE;
624 					}
625 				} else {
626 					lro_next = (status_error &
627 					    IXGBE_RXDADV_NEXTP_MASK) >>
628 					    IXGBE_RXDADV_NEXTP_SHIFT;
629 					rx_data->work_list[lro_next]->lro_prev
630 					    = rx_next;
631 					rx_data->work_list[rx_next]->lro_next =
632 					    lro_next;
633 					rx_data->work_list[rx_next]->lro_pkt =
634 					    B_TRUE;
635 					goto rx_discard;
636 				}
637 
638 			} else {
639 				pkt_len = current_rbd->wb.upper.length;
640 			}
641 		} else {
642 			pkt_len = current_rbd->wb.upper.length;
643 		}
644 
645 
646 		if ((poll_bytes != IXGBE_POLL_NULL) &&
647 		    ((received_bytes + pkt_len) > poll_bytes))
648 			break;
649 
650 		received_bytes += pkt_len;
651 		mp = NULL;
652 
653 		/*
654 		 * For packets with length more than the copy threshold,
655 		 * we'll first try to use the existing DMA buffer to build
656 		 * an mblk and send the mblk upstream.
657 		 *
658 		 * If the first method fails, or the packet length is less
659 		 * than the copy threshold, we'll allocate a new mblk and
660 		 * copy the packet data to the new mblk.
661 		 */
662 		if (lro_eop) {
663 			mp = ixgbe_lro_bind(rx_data, lro_start,
664 			    rx_data->lro_num, pkt_len);
665 			if (mp == NULL)
666 				mp = ixgbe_lro_copy(rx_data, lro_start,
667 				    rx_data->lro_num, pkt_len);
668 			lro_eop = B_FALSE;
669 			rx_data->lro_num = 0;
670 
671 		} else {
672 			if (pkt_len > ixgbe->rx_copy_thresh)
673 				mp = ixgbe_rx_bind(rx_data, rx_next, pkt_len);
674 
675 			if (mp == NULL)
676 				mp = ixgbe_rx_copy(rx_data, rx_next, pkt_len);
677 		}
678 		if (mp != NULL) {
679 			/*
680 			 * Check h/w checksum offload status
681 			 */
682 			if (ixgbe->rx_hcksum_enable)
683 				ixgbe_rx_assoc_hcksum(mp, status_error);
684 
685 			*mblk_tail = mp;
686 			mblk_tail = &mp->b_next;
687 		}
688 
689 rx_discard:
690 		/*
691 		 * Reset rx descriptor read bits
692 		 */
693 		current_rcb = rx_data->work_list[rx_next];
694 		if (ixgbe->lro_enable) {
695 			if (!current_rcb->lro_pkt) {
696 				current_rbd->read.pkt_addr =
697 				    current_rcb->rx_buf.dma_address;
698 				current_rbd->read.hdr_addr = 0;
699 			}
700 		} else {
701 			current_rbd->read.pkt_addr =
702 			    current_rcb->rx_buf.dma_address;
703 			current_rbd->read.hdr_addr = 0;
704 		}
705 
706 		rx_next = NEXT_INDEX(rx_next, 1, rx_data->ring_size);
707 
708 		/*
709 		 * The receive function is in interrupt context, so here
710 		 * rx_limit_per_intr is used to avoid doing receiving too long
711 		 * per interrupt.
712 		 */
713 		if (++pkt_num > ixgbe->rx_limit_per_intr) {
714 			rx_ring->stat_exceed_pkt++;
715 			break;
716 		}
717 
718 		current_rbd = &rx_data->rbd_ring[rx_next];
719 		status_error = current_rbd->wb.upper.status_error;
720 	}
721 
722 	rx_ring->stat_rbytes += received_bytes;
723 	rx_ring->stat_ipackets += pkt_num;
724 
725 	DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORDEV);
726 
727 	rx_data->rbd_next = rx_next;
728 
729 	/*
730 	 * Update the h/w tail accordingly
731 	 */
732 	if (ixgbe->lro_enable) {
733 		lro_first = ixgbe_lro_get_first(rx_data, rx_next);
734 		rx_tail = PREV_INDEX(lro_first, 1, rx_data->ring_size);
735 	} else
736 		rx_tail = PREV_INDEX(rx_next, 1, rx_data->ring_size);
737 
738 	IXGBE_WRITE_REG(&ixgbe->hw, IXGBE_RDT(rx_ring->hw_index), rx_tail);
739 
740 	if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) {
741 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
742 		atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR);
743 	}
744 
745 	return (mblk_head);
746 }
747 
748 mblk_t *
ixgbe_ring_rx_poll(void * arg,int n_bytes)749 ixgbe_ring_rx_poll(void *arg, int n_bytes)
750 {
751 	ixgbe_rx_ring_t *rx_ring = (ixgbe_rx_ring_t *)arg;
752 	mblk_t *mp = NULL;
753 
754 	ASSERT(n_bytes >= 0);
755 
756 	if (n_bytes == 0)
757 		return (NULL);
758 
759 	mutex_enter(&rx_ring->rx_lock);
760 	mp = ixgbe_ring_rx(rx_ring, n_bytes);
761 	mutex_exit(&rx_ring->rx_lock);
762 
763 	return (mp);
764 }
765