xref: /illumos-gate/usr/src/uts/common/io/igb/igb_rx.c (revision d8a7fe16f62711cdc5c4267da8b34ff24a6b668c)
1 /*
2  * CDDL HEADER START
3  *
4  * Copyright(c) 2007-2009 Intel Corporation. All rights reserved.
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at:
10  *	http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When using or redistributing this file, you may do so under the
15  * License only. No other modification of this header is permitted.
16  *
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  */
23 
24 /*
25  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
26  * Use is subject to license terms of the CDDL.
27  */
28 
29 #include "igb_sw.h"
30 
31 /* function prototypes */
32 static mblk_t *igb_rx_bind(igb_rx_ring_t *, uint32_t, uint32_t);
33 static mblk_t *igb_rx_copy(igb_rx_ring_t *, uint32_t, uint32_t);
34 static void igb_rx_assoc_hcksum(mblk_t *, uint32_t);
35 
36 #ifndef IGB_DEBUG
37 #pragma inline(igb_rx_assoc_hcksum)
38 #endif
39 
40 
41 /*
42  * igb_rx_recycle - the call-back function to reclaim rx buffer
43  *
44  * This function is called when an mp is freed by the user thru
45  * freeb call (Only for mp constructed through desballoc call).
46  * It returns back the freed buffer to the free list.
47  */
48 void
49 igb_rx_recycle(caddr_t arg)
50 {
51 	igb_rx_ring_t *rx_ring;
52 	rx_control_block_t *recycle_rcb;
53 	uint32_t free_index;
54 
55 	recycle_rcb = (rx_control_block_t *)(uintptr_t)arg;
56 	rx_ring = recycle_rcb->rx_ring;
57 
58 	if (recycle_rcb->state == RCB_FREE)
59 		return;
60 
61 	recycle_rcb->state = RCB_FREE;
62 
63 	ASSERT(recycle_rcb->mp == NULL);
64 
65 	/*
66 	 * Using the recycled data buffer to generate a new mblk
67 	 */
68 	recycle_rcb->mp = desballoc((unsigned char *)
69 	    recycle_rcb->rx_buf.address,
70 	    recycle_rcb->rx_buf.size,
71 	    0, &recycle_rcb->free_rtn);
72 
73 	/*
74 	 * Put the recycled rx control block into free list
75 	 */
76 	mutex_enter(&rx_ring->recycle_lock);
77 
78 	free_index = rx_ring->rcb_tail;
79 	ASSERT(rx_ring->free_list[free_index] == NULL);
80 
81 	rx_ring->free_list[free_index] = recycle_rcb;
82 	rx_ring->rcb_tail = NEXT_INDEX(free_index, 1, rx_ring->free_list_size);
83 
84 	mutex_exit(&rx_ring->recycle_lock);
85 
86 	/*
87 	 * The atomic operation on the number of the available rx control
88 	 * blocks in the free list is used to make the recycling mutual
89 	 * exclusive with the receiving.
90 	 */
91 	atomic_inc_32(&rx_ring->rcb_free);
92 	ASSERT(rx_ring->rcb_free <= rx_ring->free_list_size);
93 }
94 
95 /*
96  * igb_rx_copy - Use copy to process the received packet
97  *
98  * This function will use bcopy to process the packet
99  * and send the copied packet upstream
100  */
101 static mblk_t *
102 igb_rx_copy(igb_rx_ring_t *rx_ring, uint32_t index, uint32_t pkt_len)
103 {
104 	rx_control_block_t *current_rcb;
105 	mblk_t *mp;
106 	igb_t *igb = rx_ring->igb;
107 
108 	current_rcb = rx_ring->work_list[index];
109 
110 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
111 
112 	if (igb_check_dma_handle(
113 	    current_rcb->rx_buf.dma_handle) != DDI_FM_OK) {
114 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
115 		atomic_or_32(&igb->igb_state, IGB_ERROR);
116 		return (NULL);
117 	}
118 
119 	/*
120 	 * Allocate buffer to receive this packet
121 	 */
122 	mp = allocb(pkt_len + IPHDR_ALIGN_ROOM, 0);
123 	if (mp == NULL) {
124 		igb_log(rx_ring->igb, "igb_rx_copy: allocate buffer failed");
125 		return (NULL);
126 	}
127 
128 	/*
129 	 * Copy the data received into the new cluster
130 	 */
131 	mp->b_rptr += IPHDR_ALIGN_ROOM;
132 	bcopy(current_rcb->rx_buf.address, mp->b_rptr, pkt_len);
133 	mp->b_wptr = mp->b_rptr + pkt_len;
134 
135 	return (mp);
136 }
137 
138 /*
139  * igb_rx_bind - Use existing DMA buffer to build mblk for receiving
140  *
141  * This function will use pre-bound DMA buffer to receive the packet
142  * and build mblk that will be sent upstream.
143  */
144 static mblk_t *
145 igb_rx_bind(igb_rx_ring_t *rx_ring, uint32_t index, uint32_t pkt_len)
146 {
147 	rx_control_block_t *current_rcb;
148 	rx_control_block_t *free_rcb;
149 	uint32_t free_index;
150 	mblk_t *mp;
151 	igb_t *igb = rx_ring->igb;
152 
153 	/*
154 	 * If the free list is empty, we cannot proceed to send
155 	 * the current DMA buffer upstream. We'll have to return
156 	 * and use bcopy to process the packet.
157 	 */
158 	if (igb_atomic_reserve(&rx_ring->rcb_free, 1) < 0)
159 		return (NULL);
160 
161 	current_rcb = rx_ring->work_list[index];
162 	/*
163 	 * If the mp of the rx control block is NULL, try to do
164 	 * desballoc again.
165 	 */
166 	if (current_rcb->mp == NULL) {
167 		current_rcb->mp = desballoc((unsigned char *)
168 		    current_rcb->rx_buf.address,
169 		    current_rcb->rx_buf.size,
170 		    0, &current_rcb->free_rtn);
171 		/*
172 		 * If it is failed to built a mblk using the current
173 		 * DMA buffer, we have to return and use bcopy to
174 		 * process the packet.
175 		 */
176 		if (current_rcb->mp == NULL) {
177 			atomic_inc_32(&rx_ring->rcb_free);
178 			return (NULL);
179 		}
180 	}
181 	/*
182 	 * Sync up the data received
183 	 */
184 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
185 
186 	if (igb_check_dma_handle(
187 	    current_rcb->rx_buf.dma_handle) != DDI_FM_OK) {
188 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
189 		atomic_or_32(&igb->igb_state, IGB_ERROR);
190 		atomic_inc_32(&rx_ring->rcb_free);
191 		return (NULL);
192 	}
193 
194 	mp = current_rcb->mp;
195 	current_rcb->mp = NULL;
196 	current_rcb->state = RCB_SENDUP;
197 
198 	mp->b_wptr = mp->b_rptr + pkt_len;
199 	mp->b_next = mp->b_cont = NULL;
200 
201 	/*
202 	 * Strip off one free rx control block from the free list
203 	 */
204 	free_index = rx_ring->rcb_head;
205 	free_rcb = rx_ring->free_list[free_index];
206 	ASSERT(free_rcb != NULL);
207 	rx_ring->free_list[free_index] = NULL;
208 	rx_ring->rcb_head = NEXT_INDEX(free_index, 1, rx_ring->free_list_size);
209 
210 	/*
211 	 * Put the rx control block to the work list
212 	 */
213 	rx_ring->work_list[index] = free_rcb;
214 
215 	return (mp);
216 }
217 
218 /*
219  * igb_rx_assoc_hcksum
220  *
221  * Check the rx hardware checksum status and associate the hcksum flags
222  */
223 static void
224 igb_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error)
225 {
226 	uint32_t hcksum_flags = 0;
227 
228 	/* Ignore Checksum Indication */
229 	if (status_error & E1000_RXD_STAT_IXSM)
230 		return;
231 
232 	/*
233 	 * Check TCP/UDP checksum
234 	 */
235 	if (((status_error & E1000_RXD_STAT_TCPCS) ||
236 	    (status_error & E1000_RXD_STAT_UDPCS)) &&
237 	    !(status_error & E1000_RXDEXT_STATERR_TCPE))
238 		hcksum_flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK;
239 
240 	/*
241 	 * Check IP Checksum
242 	 */
243 	if ((status_error & E1000_RXD_STAT_IPCS) &&
244 	    !(status_error & E1000_RXDEXT_STATERR_IPE))
245 		hcksum_flags |= HCK_IPV4_HDRCKSUM;
246 
247 	if (hcksum_flags != 0) {
248 		(void) hcksum_assoc(mp,
249 		    NULL, NULL, 0, 0, 0, 0, hcksum_flags, 0);
250 	}
251 }
252 
253 mblk_t *
254 igb_rx_ring_poll(void *arg, int bytes)
255 {
256 	igb_rx_ring_t *rx_ring = (igb_rx_ring_t *)arg;
257 	mblk_t *mp = NULL;
258 
259 	ASSERT(bytes >= 0);
260 
261 	if ((bytes == 0) || (rx_ring->igb->igb_state & IGB_SUSPENDED) ||
262 	    !(rx_ring->igb->igb_state & IGB_STARTED))
263 		return (NULL);
264 
265 	mutex_enter(&rx_ring->rx_lock);
266 	mp = igb_rx(rx_ring, bytes);
267 	mutex_exit(&rx_ring->rx_lock);
268 
269 	return (mp);
270 }
271 
272 /*
273  * igb_rx - Receive the data of one ring
274  *
275  * This function goes throught h/w descriptor in one specified rx ring,
276  * receives the data if the descriptor status shows the data is ready.
277  * It returns a chain of mblks containing the received data, to be
278  * passed up to mac_rx().
279  */
280 mblk_t *
281 igb_rx(igb_rx_ring_t *rx_ring, int poll_bytes)
282 {
283 	union e1000_adv_rx_desc *current_rbd;
284 	rx_control_block_t *current_rcb;
285 	mblk_t *mp;
286 	mblk_t *mblk_head;
287 	mblk_t **mblk_tail;
288 	uint32_t rx_next;
289 	uint32_t rx_tail;
290 	uint32_t pkt_len;
291 	uint32_t status_error;
292 	uint32_t pkt_num;
293 	uint32_t total_bytes;
294 	igb_t *igb = rx_ring->igb;
295 
296 	mblk_head = NULL;
297 	mblk_tail = &mblk_head;
298 
299 	if (igb->igb_state & IGB_ERROR)
300 		return (NULL);
301 
302 	/*
303 	 * Sync the receive descriptors before
304 	 * accepting the packets
305 	 */
306 	DMA_SYNC(&rx_ring->rbd_area, DDI_DMA_SYNC_FORKERNEL);
307 
308 	if (igb_check_dma_handle(
309 	    rx_ring->rbd_area.dma_handle) != DDI_FM_OK) {
310 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
311 		atomic_or_32(&igb->igb_state, IGB_ERROR);
312 		return (NULL);
313 	}
314 
315 	/*
316 	 * Get the start point of rx bd ring which should be examined
317 	 * during this cycle.
318 	 */
319 	rx_next = rx_ring->rbd_next;
320 
321 	current_rbd = &rx_ring->rbd_ring[rx_next];
322 	pkt_num = 0;
323 	total_bytes = 0;
324 	status_error = current_rbd->wb.upper.status_error;
325 	while (status_error & E1000_RXD_STAT_DD) {
326 		/*
327 		 * If hardware has found the errors, but the error
328 		 * is hardware checksum error, here does not discard the
329 		 * packet, and let upper layer compute the checksum;
330 		 * Otherwise discard the packet.
331 		 */
332 		if ((status_error & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
333 		    !(status_error & E1000_RXD_STAT_EOP)) {
334 			IGB_DEBUG_STAT(rx_ring->stat_frame_error);
335 			goto rx_discard;
336 		}
337 
338 		IGB_DEBUG_STAT_COND(rx_ring->stat_cksum_error,
339 		    (status_error & E1000_RXDEXT_STATERR_TCPE) ||
340 		    (status_error & E1000_RXDEXT_STATERR_IPE));
341 
342 		pkt_len = current_rbd->wb.upper.length;
343 
344 		if ((poll_bytes != IGB_NO_POLL) &&
345 		    ((pkt_len + total_bytes) > poll_bytes))
346 			break;
347 
348 		IGB_DEBUG_STAT(rx_ring->stat_pkt_cnt);
349 		total_bytes += pkt_len;
350 
351 		mp = NULL;
352 		/*
353 		 * For packets with length more than the copy threshold,
354 		 * we'll firstly try to use the existed DMA buffer to built
355 		 * a mblk and send the mblk upstream.
356 		 *
357 		 * If the first method fails, or the packet length is less
358 		 * than the copy threshold, we'll allocate a new mblk and
359 		 * copy the packet data to the mblk.
360 		 */
361 		if (pkt_len > rx_ring->copy_thresh)
362 			mp = igb_rx_bind(rx_ring, rx_next, pkt_len);
363 
364 		if (mp == NULL)
365 			mp = igb_rx_copy(rx_ring, rx_next, pkt_len);
366 
367 		if (mp != NULL) {
368 			/*
369 			 * Check h/w checksum offload status
370 			 */
371 			if (igb->rx_hcksum_enable)
372 				igb_rx_assoc_hcksum(mp, status_error);
373 
374 			*mblk_tail = mp;
375 			mblk_tail = &mp->b_next;
376 		}
377 
378 rx_discard:
379 		/*
380 		 * Reset rx descriptor read bits
381 		 */
382 		current_rcb = rx_ring->work_list[rx_next];
383 		current_rbd->read.pkt_addr = current_rcb->rx_buf.dma_address;
384 		current_rbd->read.hdr_addr = 0;
385 
386 		rx_next = NEXT_INDEX(rx_next, 1, rx_ring->ring_size);
387 
388 		/*
389 		 * The receive function is in interrupt context, so here
390 		 * limit_per_intr is used to avoid doing receiving too long
391 		 * per interrupt.
392 		 */
393 		if (++pkt_num > rx_ring->limit_per_intr) {
394 			IGB_DEBUG_STAT(rx_ring->stat_exceed_pkt);
395 			break;
396 		}
397 
398 		current_rbd = &rx_ring->rbd_ring[rx_next];
399 		status_error = current_rbd->wb.upper.status_error;
400 	}
401 
402 	DMA_SYNC(&rx_ring->rbd_area, DDI_DMA_SYNC_FORDEV);
403 
404 	rx_ring->rbd_next = rx_next;
405 
406 	/*
407 	 * Update the h/w tail accordingly
408 	 */
409 	rx_tail = PREV_INDEX(rx_next, 1, rx_ring->ring_size);
410 
411 	E1000_WRITE_REG(&igb->hw, E1000_RDT(rx_ring->index), rx_tail);
412 
413 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
414 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
415 		atomic_or_32(&igb->igb_state, IGB_ERROR);
416 	}
417 
418 	return (mblk_head);
419 }
420