xref: /titanic_52/usr/src/uts/common/io/igb/igb_rx.c (revision 2dea4eed7ad1c66ae4770263aa2911815a8b86eb)
1 /*
2  * CDDL HEADER START
3  *
4  * Copyright(c) 2007-2009 Intel Corporation. All rights reserved.
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at:
10  *	http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When using or redistributing this file, you may do so under the
15  * License only. No other modification of this header is permitted.
16  *
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  */
23 
24 /*
25  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
26  * Use is subject to license terms of the CDDL.
27  */
28 
29 #include "igb_sw.h"
30 
31 /* function prototypes */
32 static mblk_t *igb_rx_bind(igb_rx_data_t *, uint32_t, uint32_t);
33 static mblk_t *igb_rx_copy(igb_rx_data_t *, uint32_t, uint32_t);
34 static void igb_rx_assoc_hcksum(mblk_t *, uint32_t);
35 
36 #ifndef IGB_DEBUG
37 #pragma inline(igb_rx_assoc_hcksum)
38 #endif
39 
40 
41 /*
42  * igb_rx_recycle - the call-back function to reclaim rx buffer
43  *
44  * This function is called when an mp is freed by the user thru
45  * freeb call (Only for mp constructed through desballoc call).
46  * It returns back the freed buffer to the free list.
47  */
48 void
49 igb_rx_recycle(caddr_t arg)
50 {
51 	igb_t *igb;
52 	igb_rx_ring_t *rx_ring;
53 	igb_rx_data_t	*rx_data;
54 	rx_control_block_t *recycle_rcb;
55 	uint32_t free_index;
56 	uint32_t ref_cnt;
57 
58 	recycle_rcb = (rx_control_block_t *)(uintptr_t)arg;
59 	rx_data = recycle_rcb->rx_data;
60 	rx_ring = rx_data->rx_ring;
61 	igb = rx_ring->igb;
62 
63 	if (recycle_rcb->ref_cnt == 0) {
64 		/*
65 		 * This case only happens when rx buffers are being freed
66 		 * in igb_stop() and freemsg() is called.
67 		 */
68 		return;
69 	}
70 
71 	ASSERT(recycle_rcb->mp == NULL);
72 
73 	/*
74 	 * Using the recycled data buffer to generate a new mblk
75 	 */
76 	recycle_rcb->mp = desballoc((unsigned char *)
77 	    recycle_rcb->rx_buf.address,
78 	    recycle_rcb->rx_buf.size,
79 	    0, &recycle_rcb->free_rtn);
80 
81 	/*
82 	 * Put the recycled rx control block into free list
83 	 */
84 	mutex_enter(&rx_data->recycle_lock);
85 
86 	free_index = rx_data->rcb_tail;
87 	ASSERT(rx_data->free_list[free_index] == NULL);
88 
89 	rx_data->free_list[free_index] = recycle_rcb;
90 	rx_data->rcb_tail = NEXT_INDEX(free_index, 1, rx_data->free_list_size);
91 
92 	mutex_exit(&rx_data->recycle_lock);
93 
94 	/*
95 	 * The atomic operation on the number of the available rx control
96 	 * blocks in the free list is used to make the recycling mutual
97 	 * exclusive with the receiving.
98 	 */
99 	atomic_inc_32(&rx_data->rcb_free);
100 	ASSERT(rx_data->rcb_free <= rx_data->free_list_size);
101 
102 	/*
103 	 * Considering the case that the interface is unplumbed
104 	 * and there are still some buffers held by the upper layer.
105 	 * When the buffer is returned back, we need to free it.
106 	 */
107 	ref_cnt = atomic_dec_32_nv(&recycle_rcb->ref_cnt);
108 	if (ref_cnt == 0) {
109 		if (recycle_rcb->mp != NULL) {
110 			freemsg(recycle_rcb->mp);
111 			recycle_rcb->mp = NULL;
112 		}
113 
114 		igb_free_dma_buffer(&recycle_rcb->rx_buf);
115 
116 		mutex_enter(&igb->rx_pending_lock);
117 		atomic_dec_32(&rx_data->rcb_pending);
118 		atomic_dec_32(&igb->rcb_pending);
119 
120 		/*
121 		 * When there is not any buffer belonging to this rx_data
122 		 * held by the upper layer, the rx_data can be freed.
123 		 */
124 		if ((rx_data->flag & IGB_RX_STOPPED) &&
125 		    (rx_data->rcb_pending == 0))
126 			igb_free_rx_ring_data(rx_data);
127 
128 		mutex_exit(&igb->rx_pending_lock);
129 	}
130 }
131 
132 /*
133  * igb_rx_copy - Use copy to process the received packet
134  *
135  * This function will use bcopy to process the packet
136  * and send the copied packet upstream
137  */
138 static mblk_t *
139 igb_rx_copy(igb_rx_data_t *rx_data, uint32_t index, uint32_t pkt_len)
140 {
141 	rx_control_block_t *current_rcb;
142 	mblk_t *mp;
143 	igb_t *igb = rx_data->rx_ring->igb;
144 
145 	current_rcb = rx_data->work_list[index];
146 
147 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
148 
149 	if (igb_check_dma_handle(
150 	    current_rcb->rx_buf.dma_handle) != DDI_FM_OK) {
151 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
152 		atomic_or_32(&igb->igb_state, IGB_ERROR);
153 		return (NULL);
154 	}
155 
156 	/*
157 	 * Allocate buffer to receive this packet
158 	 */
159 	mp = allocb(pkt_len + IPHDR_ALIGN_ROOM, 0);
160 	if (mp == NULL) {
161 		igb_log(igb, "igb_rx_copy: allocate buffer failed");
162 		return (NULL);
163 	}
164 
165 	/*
166 	 * Copy the data received into the new cluster
167 	 */
168 	mp->b_rptr += IPHDR_ALIGN_ROOM;
169 	bcopy(current_rcb->rx_buf.address, mp->b_rptr, pkt_len);
170 	mp->b_wptr = mp->b_rptr + pkt_len;
171 
172 	return (mp);
173 }
174 
175 /*
176  * igb_rx_bind - Use existing DMA buffer to build mblk for receiving
177  *
178  * This function will use pre-bound DMA buffer to receive the packet
179  * and build mblk that will be sent upstream.
180  */
181 static mblk_t *
182 igb_rx_bind(igb_rx_data_t *rx_data, uint32_t index, uint32_t pkt_len)
183 {
184 	rx_control_block_t *current_rcb;
185 	rx_control_block_t *free_rcb;
186 	uint32_t free_index;
187 	mblk_t *mp;
188 	igb_t *igb = rx_data->rx_ring->igb;
189 
190 	/*
191 	 * If the free list is empty, we cannot proceed to send
192 	 * the current DMA buffer upstream. We'll have to return
193 	 * and use bcopy to process the packet.
194 	 */
195 	if (igb_atomic_reserve(&rx_data->rcb_free, 1) < 0)
196 		return (NULL);
197 
198 	current_rcb = rx_data->work_list[index];
199 	/*
200 	 * If the mp of the rx control block is NULL, try to do
201 	 * desballoc again.
202 	 */
203 	if (current_rcb->mp == NULL) {
204 		current_rcb->mp = desballoc((unsigned char *)
205 		    current_rcb->rx_buf.address,
206 		    current_rcb->rx_buf.size,
207 		    0, &current_rcb->free_rtn);
208 		/*
209 		 * If it is failed to built a mblk using the current
210 		 * DMA buffer, we have to return and use bcopy to
211 		 * process the packet.
212 		 */
213 		if (current_rcb->mp == NULL) {
214 			atomic_inc_32(&rx_data->rcb_free);
215 			return (NULL);
216 		}
217 	}
218 	/*
219 	 * Sync up the data received
220 	 */
221 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
222 
223 	if (igb_check_dma_handle(
224 	    current_rcb->rx_buf.dma_handle) != DDI_FM_OK) {
225 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
226 		atomic_or_32(&igb->igb_state, IGB_ERROR);
227 		atomic_inc_32(&rx_data->rcb_free);
228 		return (NULL);
229 	}
230 
231 	mp = current_rcb->mp;
232 	current_rcb->mp = NULL;
233 	atomic_inc_32(&current_rcb->ref_cnt);
234 
235 	mp->b_wptr = mp->b_rptr + pkt_len;
236 	mp->b_next = mp->b_cont = NULL;
237 
238 	/*
239 	 * Strip off one free rx control block from the free list
240 	 */
241 	free_index = rx_data->rcb_head;
242 	free_rcb = rx_data->free_list[free_index];
243 	ASSERT(free_rcb != NULL);
244 	rx_data->free_list[free_index] = NULL;
245 	rx_data->rcb_head = NEXT_INDEX(free_index, 1, rx_data->free_list_size);
246 
247 	/*
248 	 * Put the rx control block to the work list
249 	 */
250 	rx_data->work_list[index] = free_rcb;
251 
252 	return (mp);
253 }
254 
255 /*
256  * igb_rx_assoc_hcksum
257  *
258  * Check the rx hardware checksum status and associate the hcksum flags
259  */
260 static void
261 igb_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error)
262 {
263 	uint32_t hcksum_flags = 0;
264 
265 	/* Ignore Checksum Indication */
266 	if (status_error & E1000_RXD_STAT_IXSM)
267 		return;
268 
269 	/*
270 	 * Check TCP/UDP checksum
271 	 */
272 	if (((status_error & E1000_RXD_STAT_TCPCS) ||
273 	    (status_error & E1000_RXD_STAT_UDPCS)) &&
274 	    !(status_error & E1000_RXDEXT_STATERR_TCPE))
275 		hcksum_flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK;
276 
277 	/*
278 	 * Check IP Checksum
279 	 */
280 	if ((status_error & E1000_RXD_STAT_IPCS) &&
281 	    !(status_error & E1000_RXDEXT_STATERR_IPE))
282 		hcksum_flags |= HCK_IPV4_HDRCKSUM;
283 
284 	if (hcksum_flags != 0) {
285 		(void) hcksum_assoc(mp,
286 		    NULL, NULL, 0, 0, 0, 0, hcksum_flags, 0);
287 	}
288 }
289 
290 mblk_t *
291 igb_rx_ring_poll(void *arg, int bytes)
292 {
293 	igb_rx_ring_t *rx_ring = (igb_rx_ring_t *)arg;
294 	mblk_t *mp = NULL;
295 
296 	ASSERT(bytes >= 0);
297 
298 	if ((bytes == 0) || (rx_ring->igb->igb_state & IGB_SUSPENDED) ||
299 	    !(rx_ring->igb->igb_state & IGB_STARTED))
300 		return (NULL);
301 
302 	mutex_enter(&rx_ring->rx_lock);
303 	mp = igb_rx(rx_ring, bytes);
304 	mutex_exit(&rx_ring->rx_lock);
305 
306 	return (mp);
307 }
308 
309 /*
310  * igb_rx - Receive the data of one ring
311  *
312  * This function goes throught h/w descriptor in one specified rx ring,
313  * receives the data if the descriptor status shows the data is ready.
314  * It returns a chain of mblks containing the received data, to be
315  * passed up to mac_rx().
316  */
317 mblk_t *
318 igb_rx(igb_rx_ring_t *rx_ring, int poll_bytes)
319 {
320 	union e1000_adv_rx_desc *current_rbd;
321 	rx_control_block_t *current_rcb;
322 	mblk_t *mp;
323 	mblk_t *mblk_head;
324 	mblk_t **mblk_tail;
325 	uint32_t rx_next;
326 	uint32_t rx_tail;
327 	uint32_t pkt_len;
328 	uint32_t status_error;
329 	uint32_t pkt_num;
330 	uint32_t total_bytes;
331 	igb_t *igb = rx_ring->igb;
332 	igb_rx_data_t *rx_data = rx_ring->rx_data;
333 
334 	mblk_head = NULL;
335 	mblk_tail = &mblk_head;
336 
337 	if (igb->igb_state & IGB_ERROR)
338 		return (NULL);
339 
340 	/*
341 	 * Sync the receive descriptors before
342 	 * accepting the packets
343 	 */
344 	DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORKERNEL);
345 
346 	if (igb_check_dma_handle(
347 	    rx_data->rbd_area.dma_handle) != DDI_FM_OK) {
348 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
349 		atomic_or_32(&igb->igb_state, IGB_ERROR);
350 		return (NULL);
351 	}
352 
353 	/*
354 	 * Get the start point of rx bd ring which should be examined
355 	 * during this cycle.
356 	 */
357 	rx_next = rx_data->rbd_next;
358 
359 	current_rbd = &rx_data->rbd_ring[rx_next];
360 	pkt_num = 0;
361 	total_bytes = 0;
362 	status_error = current_rbd->wb.upper.status_error;
363 	while (status_error & E1000_RXD_STAT_DD) {
364 		/*
365 		 * If hardware has found the errors, but the error
366 		 * is hardware checksum error, here does not discard the
367 		 * packet, and let upper layer compute the checksum;
368 		 * Otherwise discard the packet.
369 		 */
370 		if ((status_error & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
371 		    !(status_error & E1000_RXD_STAT_EOP)) {
372 			IGB_DEBUG_STAT(rx_ring->stat_frame_error);
373 			goto rx_discard;
374 		}
375 
376 		IGB_DEBUG_STAT_COND(rx_ring->stat_cksum_error,
377 		    (status_error & E1000_RXDEXT_STATERR_TCPE) ||
378 		    (status_error & E1000_RXDEXT_STATERR_IPE));
379 
380 		pkt_len = current_rbd->wb.upper.length;
381 
382 		if ((poll_bytes != IGB_NO_POLL) &&
383 		    ((pkt_len + total_bytes) > poll_bytes))
384 			break;
385 
386 		IGB_DEBUG_STAT(rx_ring->stat_pkt_cnt);
387 		total_bytes += pkt_len;
388 
389 		mp = NULL;
390 		/*
391 		 * For packets with length more than the copy threshold,
392 		 * we'll firstly try to use the existed DMA buffer to built
393 		 * a mblk and send the mblk upstream.
394 		 *
395 		 * If the first method fails, or the packet length is less
396 		 * than the copy threshold, we'll allocate a new mblk and
397 		 * copy the packet data to the mblk.
398 		 */
399 		if (pkt_len > igb->rx_copy_thresh)
400 			mp = igb_rx_bind(rx_data, rx_next, pkt_len);
401 
402 		if (mp == NULL)
403 			mp = igb_rx_copy(rx_data, rx_next, pkt_len);
404 
405 		if (mp != NULL) {
406 			/*
407 			 * Check h/w checksum offload status
408 			 */
409 			if (igb->rx_hcksum_enable)
410 				igb_rx_assoc_hcksum(mp, status_error);
411 
412 			*mblk_tail = mp;
413 			mblk_tail = &mp->b_next;
414 		}
415 
416 rx_discard:
417 		/*
418 		 * Reset rx descriptor read bits
419 		 */
420 		current_rcb = rx_data->work_list[rx_next];
421 		current_rbd->read.pkt_addr = current_rcb->rx_buf.dma_address;
422 		current_rbd->read.hdr_addr = 0;
423 
424 		rx_next = NEXT_INDEX(rx_next, 1, rx_data->ring_size);
425 
426 		/*
427 		 * The receive function is in interrupt context, so here
428 		 * rx_limit_per_intr is used to avoid doing receiving too long
429 		 * per interrupt.
430 		 */
431 		if (++pkt_num > igb->rx_limit_per_intr) {
432 			IGB_DEBUG_STAT(rx_ring->stat_exceed_pkt);
433 			break;
434 		}
435 
436 		current_rbd = &rx_data->rbd_ring[rx_next];
437 		status_error = current_rbd->wb.upper.status_error;
438 	}
439 
440 	DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORDEV);
441 
442 	rx_data->rbd_next = rx_next;
443 
444 	/*
445 	 * Update the h/w tail accordingly
446 	 */
447 	rx_tail = PREV_INDEX(rx_next, 1, rx_data->ring_size);
448 
449 	E1000_WRITE_REG(&igb->hw, E1000_RDT(rx_ring->index), rx_tail);
450 
451 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
452 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
453 		atomic_or_32(&igb->igb_state, IGB_ERROR);
454 	}
455 
456 	return (mblk_head);
457 }
458