xref: /illumos-gate/usr/src/uts/common/io/igb/igb_rx.c (revision fa4e188e8e6bc718b1a096b3d1dc046952a69304)
1  /*
2   * CDDL HEADER START
3   *
4   * Copyright(c) 2007-2009 Intel Corporation. All rights reserved.
5   * The contents of this file are subject to the terms of the
6   * Common Development and Distribution License (the "License").
7   * You may not use this file except in compliance with the License.
8   *
9   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10   * or http://www.opensolaris.org/os/licensing.
11   * See the License for the specific language governing permissions
12   * and limitations under the License.
13   *
14   * When distributing Covered Code, include this CDDL HEADER in each
15   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16   * If applicable, add the following below this CDDL HEADER, with the
17   * fields enclosed by brackets "[]" replaced with your own identifying
18   * information: Portions Copyright [yyyy] [name of copyright owner]
19   *
20   * CDDL HEADER END
21   */
22  
23  /*
24   * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
25   * Use is subject to license terms.
26   */
27  
28  #include "igb_sw.h"
29  
30  /* function prototypes */
31  static mblk_t *igb_rx_bind(igb_rx_data_t *, uint32_t, uint32_t);
32  static mblk_t *igb_rx_copy(igb_rx_data_t *, uint32_t, uint32_t);
33  static void igb_rx_assoc_hcksum(mblk_t *, uint32_t);
34  
35  #ifndef IGB_DEBUG
36  #pragma inline(igb_rx_assoc_hcksum)
37  #endif
38  
39  
40  /*
41   * igb_rx_recycle - the call-back function to reclaim rx buffer
42   *
43   * This function is called when an mp is freed by the user thru
44   * freeb call (Only for mp constructed through desballoc call).
45   * It returns back the freed buffer to the free list.
46   */
47  void
igb_rx_recycle(caddr_t arg)48  igb_rx_recycle(caddr_t arg)
49  {
50  	igb_t *igb;
51  	igb_rx_ring_t *rx_ring;
52  	igb_rx_data_t	*rx_data;
53  	rx_control_block_t *recycle_rcb;
54  	uint32_t free_index;
55  	uint32_t ref_cnt;
56  
57  	recycle_rcb = (rx_control_block_t *)(uintptr_t)arg;
58  	rx_data = recycle_rcb->rx_data;
59  	rx_ring = rx_data->rx_ring;
60  	igb = rx_ring->igb;
61  
62  	if (recycle_rcb->ref_cnt == 0) {
63  		/*
64  		 * This case only happens when rx buffers are being freed
65  		 * in igb_stop() and freemsg() is called.
66  		 */
67  		return;
68  	}
69  
70  	ASSERT(recycle_rcb->mp == NULL);
71  
72  	/*
73  	 * Using the recycled data buffer to generate a new mblk
74  	 */
75  	recycle_rcb->mp = desballoc((unsigned char *)
76  	    recycle_rcb->rx_buf.address,
77  	    recycle_rcb->rx_buf.size,
78  	    0, &recycle_rcb->free_rtn);
79  
80  	/*
81  	 * Put the recycled rx control block into free list
82  	 */
83  	mutex_enter(&rx_data->recycle_lock);
84  
85  	free_index = rx_data->rcb_tail;
86  	ASSERT(rx_data->free_list[free_index] == NULL);
87  
88  	rx_data->free_list[free_index] = recycle_rcb;
89  	rx_data->rcb_tail = NEXT_INDEX(free_index, 1, rx_data->free_list_size);
90  
91  	mutex_exit(&rx_data->recycle_lock);
92  
93  	/*
94  	 * The atomic operation on the number of the available rx control
95  	 * blocks in the free list is used to make the recycling mutual
96  	 * exclusive with the receiving.
97  	 */
98  	atomic_inc_32(&rx_data->rcb_free);
99  	ASSERT(rx_data->rcb_free <= rx_data->free_list_size);
100  
101  	/*
102  	 * Considering the case that the interface is unplumbed
103  	 * and there are still some buffers held by the upper layer.
104  	 * When the buffer is returned back, we need to free it.
105  	 */
106  	ref_cnt = atomic_dec_32_nv(&recycle_rcb->ref_cnt);
107  	if (ref_cnt == 0) {
108  		if (recycle_rcb->mp != NULL) {
109  			freemsg(recycle_rcb->mp);
110  			recycle_rcb->mp = NULL;
111  		}
112  
113  		igb_free_dma_buffer(&recycle_rcb->rx_buf);
114  
115  		mutex_enter(&igb->rx_pending_lock);
116  		atomic_dec_32(&rx_data->rcb_pending);
117  		atomic_dec_32(&igb->rcb_pending);
118  
119  		/*
120  		 * When there is not any buffer belonging to this rx_data
121  		 * held by the upper layer, the rx_data can be freed.
122  		 */
123  		if ((rx_data->flag & IGB_RX_STOPPED) &&
124  		    (rx_data->rcb_pending == 0))
125  			igb_free_rx_ring_data(rx_data);
126  
127  		mutex_exit(&igb->rx_pending_lock);
128  	}
129  }
130  
131  /*
132   * igb_rx_copy - Use copy to process the received packet
133   *
134   * This function will use bcopy to process the packet
135   * and send the copied packet upstream
136   */
137  static mblk_t *
igb_rx_copy(igb_rx_data_t * rx_data,uint32_t index,uint32_t pkt_len)138  igb_rx_copy(igb_rx_data_t *rx_data, uint32_t index, uint32_t pkt_len)
139  {
140  	rx_control_block_t *current_rcb;
141  	mblk_t *mp;
142  	igb_t *igb = rx_data->rx_ring->igb;
143  
144  	current_rcb = rx_data->work_list[index];
145  
146  	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
147  
148  	if (igb_check_dma_handle(
149  	    current_rcb->rx_buf.dma_handle) != DDI_FM_OK) {
150  		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
151  		atomic_or_32(&igb->igb_state, IGB_ERROR);
152  		return (NULL);
153  	}
154  
155  	/*
156  	 * Allocate buffer to receive this packet
157  	 */
158  	mp = allocb(pkt_len + IPHDR_ALIGN_ROOM, 0);
159  	if (mp == NULL) {
160  		igb_log(igb, IGB_LOG_INFO,
161  		    "igb_rx_copy: allocate buffer failed");
162  		return (NULL);
163  	}
164  
165  	/*
166  	 * Copy the data received into the new cluster
167  	 */
168  	mp->b_rptr += IPHDR_ALIGN_ROOM;
169  	bcopy(current_rcb->rx_buf.address, mp->b_rptr, pkt_len);
170  	mp->b_wptr = mp->b_rptr + pkt_len;
171  
172  	return (mp);
173  }
174  
175  /*
176   * igb_rx_bind - Use existing DMA buffer to build mblk for receiving
177   *
178   * This function will use pre-bound DMA buffer to receive the packet
179   * and build mblk that will be sent upstream.
180   */
181  static mblk_t *
igb_rx_bind(igb_rx_data_t * rx_data,uint32_t index,uint32_t pkt_len)182  igb_rx_bind(igb_rx_data_t *rx_data, uint32_t index, uint32_t pkt_len)
183  {
184  	rx_control_block_t *current_rcb;
185  	rx_control_block_t *free_rcb;
186  	uint32_t free_index;
187  	mblk_t *mp;
188  	igb_t *igb = rx_data->rx_ring->igb;
189  
190  	/*
191  	 * If the free list is empty, we cannot proceed to send
192  	 * the current DMA buffer upstream. We'll have to return
193  	 * and use bcopy to process the packet.
194  	 */
195  	if (igb_atomic_reserve(&rx_data->rcb_free, 1) < 0)
196  		return (NULL);
197  
198  	current_rcb = rx_data->work_list[index];
199  	/*
200  	 * If the mp of the rx control block is NULL, try to do
201  	 * desballoc again.
202  	 */
203  	if (current_rcb->mp == NULL) {
204  		current_rcb->mp = desballoc((unsigned char *)
205  		    current_rcb->rx_buf.address,
206  		    current_rcb->rx_buf.size,
207  		    0, &current_rcb->free_rtn);
208  		/*
209  		 * If it is failed to built a mblk using the current
210  		 * DMA buffer, we have to return and use bcopy to
211  		 * process the packet.
212  		 */
213  		if (current_rcb->mp == NULL) {
214  			atomic_inc_32(&rx_data->rcb_free);
215  			return (NULL);
216  		}
217  	}
218  	/*
219  	 * Sync up the data received
220  	 */
221  	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
222  
223  	if (igb_check_dma_handle(
224  	    current_rcb->rx_buf.dma_handle) != DDI_FM_OK) {
225  		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
226  		atomic_or_32(&igb->igb_state, IGB_ERROR);
227  		atomic_inc_32(&rx_data->rcb_free);
228  		return (NULL);
229  	}
230  
231  	mp = current_rcb->mp;
232  	current_rcb->mp = NULL;
233  	atomic_inc_32(&current_rcb->ref_cnt);
234  
235  	mp->b_wptr = mp->b_rptr + pkt_len;
236  	mp->b_next = mp->b_cont = NULL;
237  
238  	/*
239  	 * Strip off one free rx control block from the free list
240  	 */
241  	free_index = rx_data->rcb_head;
242  	free_rcb = rx_data->free_list[free_index];
243  	ASSERT(free_rcb != NULL);
244  	rx_data->free_list[free_index] = NULL;
245  	rx_data->rcb_head = NEXT_INDEX(free_index, 1, rx_data->free_list_size);
246  
247  	/*
248  	 * Put the rx control block to the work list
249  	 */
250  	rx_data->work_list[index] = free_rcb;
251  
252  	return (mp);
253  }
254  
255  /*
256   * igb_rx_assoc_hcksum
257   *
258   * Check the rx hardware checksum status and associate the hcksum flags
259   */
260  static void
igb_rx_assoc_hcksum(mblk_t * mp,uint32_t status_error)261  igb_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error)
262  {
263  	uint32_t hcksum_flags = 0;
264  
265  	/* Ignore Checksum Indication */
266  	if (status_error & E1000_RXD_STAT_IXSM)
267  		return;
268  
269  	/*
270  	 * Check TCP/UDP checksum
271  	 */
272  	if (((status_error & E1000_RXD_STAT_TCPCS) ||
273  	    (status_error & E1000_RXD_STAT_UDPCS)) &&
274  	    !(status_error & E1000_RXDEXT_STATERR_TCPE))
275  		hcksum_flags |= HCK_FULLCKSUM_OK;
276  
277  	/*
278  	 * Check IP Checksum
279  	 */
280  	if ((status_error & E1000_RXD_STAT_IPCS) &&
281  	    !(status_error & E1000_RXDEXT_STATERR_IPE))
282  		hcksum_flags |= HCK_IPV4_HDRCKSUM_OK;
283  
284  	if (hcksum_flags != 0) {
285  		mac_hcksum_set(mp, 0, 0, 0, 0, hcksum_flags);
286  	}
287  }
288  
289  mblk_t *
igb_rx_ring_poll(void * arg,int bytes)290  igb_rx_ring_poll(void *arg, int bytes)
291  {
292  	igb_rx_ring_t *rx_ring = (igb_rx_ring_t *)arg;
293  	mblk_t *mp = NULL;
294  
295  	ASSERT(bytes >= 0);
296  
297  	if ((bytes == 0) || (rx_ring->igb->igb_state & IGB_SUSPENDED) ||
298  	    !(rx_ring->igb->igb_state & IGB_STARTED))
299  		return (NULL);
300  
301  	mutex_enter(&rx_ring->rx_lock);
302  	mp = igb_rx(rx_ring, bytes);
303  	mutex_exit(&rx_ring->rx_lock);
304  
305  	return (mp);
306  }
307  
308  /*
309   * igb_rx - Receive the data of one ring
310   *
311   * This function goes throught h/w descriptor in one specified rx ring,
312   * receives the data if the descriptor status shows the data is ready.
313   * It returns a chain of mblks containing the received data, to be
314   * passed up to mac_rx().
315   */
316  mblk_t *
igb_rx(igb_rx_ring_t * rx_ring,int poll_bytes)317  igb_rx(igb_rx_ring_t *rx_ring, int poll_bytes)
318  {
319  	union e1000_adv_rx_desc *current_rbd;
320  	rx_control_block_t *current_rcb;
321  	mblk_t *mp;
322  	mblk_t *mblk_head;
323  	mblk_t **mblk_tail;
324  	uint32_t rx_next;
325  	uint32_t rx_tail;
326  	uint32_t pkt_len;
327  	uint32_t status_error;
328  	uint32_t pkt_num;
329  	uint32_t total_bytes;
330  	igb_t *igb = rx_ring->igb;
331  	igb_rx_data_t *rx_data = rx_ring->rx_data;
332  
333  	mblk_head = NULL;
334  	mblk_tail = &mblk_head;
335  
336  	if (igb->igb_state & IGB_ERROR)
337  		return (NULL);
338  
339  	/*
340  	 * Sync the receive descriptors before
341  	 * accepting the packets
342  	 */
343  	DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORKERNEL);
344  
345  	if (igb_check_dma_handle(
346  	    rx_data->rbd_area.dma_handle) != DDI_FM_OK) {
347  		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
348  		atomic_or_32(&igb->igb_state, IGB_ERROR);
349  		return (NULL);
350  	}
351  
352  	/*
353  	 * Get the start point of rx bd ring which should be examined
354  	 * during this cycle.
355  	 */
356  	rx_next = rx_data->rbd_next;
357  
358  	current_rbd = &rx_data->rbd_ring[rx_next];
359  	pkt_num = 0;
360  	total_bytes = 0;
361  	status_error = current_rbd->wb.upper.status_error;
362  	while (status_error & E1000_RXD_STAT_DD) {
363  		/*
364  		 * If hardware has found the errors, but the error
365  		 * is hardware checksum error, here does not discard the
366  		 * packet, and let upper layer compute the checksum;
367  		 * Otherwise discard the packet.
368  		 */
369  		if ((status_error & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
370  		    !(status_error & E1000_RXD_STAT_EOP)) {
371  			IGB_DEBUG_STAT(rx_ring->stat_frame_error);
372  			goto rx_discard;
373  		}
374  
375  		IGB_DEBUG_STAT_COND(rx_ring->stat_cksum_error,
376  		    (status_error & E1000_RXDEXT_STATERR_TCPE) ||
377  		    (status_error & E1000_RXDEXT_STATERR_IPE));
378  
379  		pkt_len = current_rbd->wb.upper.length;
380  
381  		if ((poll_bytes != IGB_NO_POLL) &&
382  		    ((pkt_len + total_bytes) > poll_bytes))
383  			break;
384  
385  		IGB_DEBUG_STAT(rx_ring->stat_pkt_cnt);
386  		total_bytes += pkt_len;
387  
388  		mp = NULL;
389  		/*
390  		 * For packets with length more than the copy threshold,
391  		 * we'll firstly try to use the existed DMA buffer to built
392  		 * a mblk and send the mblk upstream.
393  		 *
394  		 * If the first method fails, or the packet length is less
395  		 * than the copy threshold, we'll allocate a new mblk and
396  		 * copy the packet data to the mblk.
397  		 */
398  		if (pkt_len > igb->rx_copy_thresh)
399  			mp = igb_rx_bind(rx_data, rx_next, pkt_len);
400  
401  		if (mp == NULL)
402  			mp = igb_rx_copy(rx_data, rx_next, pkt_len);
403  
404  		if (mp != NULL) {
405  			/*
406  			 * Check h/w checksum offload status
407  			 */
408  			if (igb->rx_hcksum_enable)
409  				igb_rx_assoc_hcksum(mp, status_error);
410  
411  			*mblk_tail = mp;
412  			mblk_tail = &mp->b_next;
413  		}
414  
415  		/* Update per-ring rx statistics */
416  		rx_ring->rx_pkts++;
417  		rx_ring->rx_bytes += pkt_len;
418  
419  rx_discard:
420  		/*
421  		 * Reset rx descriptor read bits
422  		 */
423  		current_rcb = rx_data->work_list[rx_next];
424  		current_rbd->read.pkt_addr = current_rcb->rx_buf.dma_address;
425  		current_rbd->read.hdr_addr = 0;
426  
427  		rx_next = NEXT_INDEX(rx_next, 1, rx_data->ring_size);
428  
429  		/*
430  		 * The receive function is in interrupt context, so here
431  		 * rx_limit_per_intr is used to avoid doing receiving too long
432  		 * per interrupt.
433  		 */
434  		if (++pkt_num > igb->rx_limit_per_intr) {
435  			IGB_DEBUG_STAT(rx_ring->stat_exceed_pkt);
436  			break;
437  		}
438  
439  		current_rbd = &rx_data->rbd_ring[rx_next];
440  		status_error = current_rbd->wb.upper.status_error;
441  	}
442  
443  	DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORDEV);
444  
445  	rx_data->rbd_next = rx_next;
446  
447  	/*
448  	 * Update the h/w tail accordingly
449  	 */
450  	rx_tail = PREV_INDEX(rx_next, 1, rx_data->ring_size);
451  
452  	E1000_WRITE_REG(&igb->hw, E1000_RDT(rx_ring->index), rx_tail);
453  
454  	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
455  		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
456  		atomic_or_32(&igb->igb_state, IGB_ERROR);
457  	}
458  
459  	return (mblk_head);
460  }
461