xref: /titanic_51/usr/src/uts/common/io/ixgbe/ixgbe_rx.c (revision 604635facc40339ec5edaeba7cfbf31b615cfbfe)
1 /*
2  * CDDL HEADER START
3  *
4  * Copyright(c) 2007-2009 Intel Corporation. All rights reserved.
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 #include "ixgbe_sw.h"
29 
30 /* function prototypes */
31 static mblk_t *ixgbe_rx_bind(ixgbe_rx_data_t *, uint32_t, uint32_t);
32 static mblk_t *ixgbe_rx_copy(ixgbe_rx_data_t *, uint32_t, uint32_t);
33 static void ixgbe_rx_assoc_hcksum(mblk_t *, uint32_t);
34 
35 #ifndef IXGBE_DEBUG
36 #pragma inline(ixgbe_rx_assoc_hcksum)
37 #endif
38 
39 /*
40  * ixgbe_rx_recycle - The call-back function to reclaim rx buffer.
41  *
42  * This function is called when an mp is freed by the user thru
43  * freeb call (Only for mp constructed through desballoc call).
44  * It returns back the freed buffer to the free list.
45  */
46 void
47 ixgbe_rx_recycle(caddr_t arg)
48 {
49 	ixgbe_t *ixgbe;
50 	ixgbe_rx_ring_t *rx_ring;
51 	ixgbe_rx_data_t	*rx_data;
52 	rx_control_block_t *recycle_rcb;
53 	uint32_t free_index;
54 	uint32_t ref_cnt;
55 
56 	recycle_rcb = (rx_control_block_t *)(uintptr_t)arg;
57 	rx_data = recycle_rcb->rx_data;
58 	rx_ring = rx_data->rx_ring;
59 	ixgbe = rx_ring->ixgbe;
60 
61 	if (recycle_rcb->ref_cnt == 0) {
62 		/*
63 		 * This case only happens when rx buffers are being freed
64 		 * in ixgbe_stop() and freemsg() is called.
65 		 */
66 		return;
67 	}
68 
69 	ASSERT(recycle_rcb->mp == NULL);
70 
71 	/*
72 	 * Using the recycled data buffer to generate a new mblk
73 	 */
74 	recycle_rcb->mp = desballoc((unsigned char *)
75 	    recycle_rcb->rx_buf.address,
76 	    recycle_rcb->rx_buf.size,
77 	    0, &recycle_rcb->free_rtn);
78 
79 	/*
80 	 * Put the recycled rx control block into free list
81 	 */
82 	mutex_enter(&rx_data->recycle_lock);
83 
84 	free_index = rx_data->rcb_tail;
85 	ASSERT(rx_data->free_list[free_index] == NULL);
86 
87 	rx_data->free_list[free_index] = recycle_rcb;
88 	rx_data->rcb_tail = NEXT_INDEX(free_index, 1, rx_data->free_list_size);
89 
90 	mutex_exit(&rx_data->recycle_lock);
91 
92 	/*
93 	 * The atomic operation on the number of the available rx control
94 	 * blocks in the free list is used to make the recycling mutual
95 	 * exclusive with the receiving.
96 	 */
97 	atomic_inc_32(&rx_data->rcb_free);
98 	ASSERT(rx_data->rcb_free <= rx_data->free_list_size);
99 
100 	/*
101 	 * Considering the case that the interface is unplumbed
102 	 * and there are still some buffers held by the upper layer.
103 	 * When the buffer is returned back, we need to free it.
104 	 */
105 	ref_cnt = atomic_dec_32_nv(&recycle_rcb->ref_cnt);
106 	if (ref_cnt == 0) {
107 		if (recycle_rcb->mp != NULL) {
108 			freemsg(recycle_rcb->mp);
109 			recycle_rcb->mp = NULL;
110 		}
111 
112 		ixgbe_free_dma_buffer(&recycle_rcb->rx_buf);
113 
114 		mutex_enter(&ixgbe->rx_pending_lock);
115 		atomic_dec_32(&rx_data->rcb_pending);
116 		atomic_dec_32(&ixgbe->rcb_pending);
117 
118 		/*
119 		 * When there is not any buffer belonging to this rx_data
120 		 * held by the upper layer, the rx_data can be freed.
121 		 */
122 		if ((rx_data->flag & IXGBE_RX_STOPPED) &&
123 		    (rx_data->rcb_pending == 0))
124 			ixgbe_free_rx_ring_data(rx_data);
125 
126 		mutex_exit(&ixgbe->rx_pending_lock);
127 	}
128 }
129 
130 /*
131  * ixgbe_rx_copy - Use copy to process the received packet.
132  *
133  * This function will use bcopy to process the packet
134  * and send the copied packet upstream.
135  */
136 static mblk_t *
137 ixgbe_rx_copy(ixgbe_rx_data_t *rx_data, uint32_t index, uint32_t pkt_len)
138 {
139 	ixgbe_t *ixgbe;
140 	rx_control_block_t *current_rcb;
141 	mblk_t *mp;
142 
143 	ixgbe = rx_data->rx_ring->ixgbe;
144 	current_rcb = rx_data->work_list[index];
145 
146 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
147 
148 	if (ixgbe_check_dma_handle(current_rcb->rx_buf.dma_handle) !=
149 	    DDI_FM_OK) {
150 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
151 	}
152 
153 	/*
154 	 * Allocate buffer to receive this packet
155 	 */
156 	mp = allocb(pkt_len + IPHDR_ALIGN_ROOM, 0);
157 	if (mp == NULL) {
158 		ixgbe_log(ixgbe, "ixgbe_rx_copy: allocate buffer failed");
159 		return (NULL);
160 	}
161 
162 	/*
163 	 * Copy the data received into the new cluster
164 	 */
165 	mp->b_rptr += IPHDR_ALIGN_ROOM;
166 	bcopy(current_rcb->rx_buf.address, mp->b_rptr, pkt_len);
167 	mp->b_wptr = mp->b_rptr + pkt_len;
168 
169 	return (mp);
170 }
171 
172 /*
173  * ixgbe_rx_bind - Use existing DMA buffer to build mblk for receiving.
174  *
175  * This function will use pre-bound DMA buffer to receive the packet
176  * and build mblk that will be sent upstream.
177  */
178 static mblk_t *
179 ixgbe_rx_bind(ixgbe_rx_data_t *rx_data, uint32_t index, uint32_t pkt_len)
180 {
181 	rx_control_block_t *current_rcb;
182 	rx_control_block_t *free_rcb;
183 	uint32_t free_index;
184 	mblk_t *mp;
185 	ixgbe_t	*ixgbe = rx_data->rx_ring->ixgbe;
186 
187 	/*
188 	 * If the free list is empty, we cannot proceed to send
189 	 * the current DMA buffer upstream. We'll have to return
190 	 * and use bcopy to process the packet.
191 	 */
192 	if (ixgbe_atomic_reserve(&rx_data->rcb_free, 1) < 0)
193 		return (NULL);
194 
195 	current_rcb = rx_data->work_list[index];
196 	/*
197 	 * If the mp of the rx control block is NULL, try to do
198 	 * desballoc again.
199 	 */
200 	if (current_rcb->mp == NULL) {
201 		current_rcb->mp = desballoc((unsigned char *)
202 		    current_rcb->rx_buf.address,
203 		    current_rcb->rx_buf.size,
204 		    0, &current_rcb->free_rtn);
205 		/*
206 		 * If it is failed to built a mblk using the current
207 		 * DMA buffer, we have to return and use bcopy to
208 		 * process the packet.
209 		 */
210 		if (current_rcb->mp == NULL) {
211 			atomic_inc_32(&rx_data->rcb_free);
212 			return (NULL);
213 		}
214 	}
215 	/*
216 	 * Sync up the data received
217 	 */
218 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
219 
220 	if (ixgbe_check_dma_handle(current_rcb->rx_buf.dma_handle) !=
221 	    DDI_FM_OK) {
222 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
223 	}
224 
225 	mp = current_rcb->mp;
226 	current_rcb->mp = NULL;
227 	atomic_inc_32(&current_rcb->ref_cnt);
228 
229 	mp->b_wptr = mp->b_rptr + pkt_len;
230 	mp->b_next = mp->b_cont = NULL;
231 
232 	/*
233 	 * Strip off one free rx control block from the free list
234 	 */
235 	free_index = rx_data->rcb_head;
236 	free_rcb = rx_data->free_list[free_index];
237 	ASSERT(free_rcb != NULL);
238 	rx_data->free_list[free_index] = NULL;
239 	rx_data->rcb_head = NEXT_INDEX(free_index, 1, rx_data->free_list_size);
240 
241 	/*
242 	 * Put the rx control block to the work list
243 	 */
244 	rx_data->work_list[index] = free_rcb;
245 
246 	return (mp);
247 }
248 
249 /*
250  * ixgbe_rx_assoc_hcksum - Check the rx hardware checksum status and associate
251  * the hcksum flags.
252  */
253 static void
254 ixgbe_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error)
255 {
256 	uint32_t hcksum_flags = 0;
257 
258 	/*
259 	 * Check TCP/UDP checksum
260 	 */
261 	if ((status_error & IXGBE_RXD_STAT_L4CS) &&
262 	    !(status_error & IXGBE_RXDADV_ERR_TCPE))
263 		hcksum_flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK;
264 
265 	/*
266 	 * Check IP Checksum
267 	 */
268 	if ((status_error & IXGBE_RXD_STAT_IPCS) &&
269 	    !(status_error & IXGBE_RXDADV_ERR_IPE))
270 		hcksum_flags |= HCK_IPV4_HDRCKSUM;
271 
272 	if (hcksum_flags != 0) {
273 		(void) hcksum_assoc(mp,
274 		    NULL, NULL, 0, 0, 0, 0, hcksum_flags, 0);
275 	}
276 }
277 
278 /*
279  * ixgbe_ring_rx - Receive the data of one ring.
280  *
281  * This function goes throught h/w descriptor in one specified rx ring,
282  * receives the data if the descriptor status shows the data is ready.
283  * It returns a chain of mblks containing the received data, to be
284  * passed up to mac_rx().
285  */
286 mblk_t *
287 ixgbe_ring_rx(ixgbe_rx_ring_t *rx_ring, int poll_bytes)
288 {
289 	union ixgbe_adv_rx_desc *current_rbd;
290 	rx_control_block_t *current_rcb;
291 	mblk_t *mp;
292 	mblk_t *mblk_head;
293 	mblk_t **mblk_tail;
294 	uint32_t rx_next;
295 	uint32_t rx_tail;
296 	uint32_t pkt_len;
297 	uint32_t status_error;
298 	uint32_t pkt_num;
299 	uint32_t received_bytes;
300 	ixgbe_t *ixgbe = rx_ring->ixgbe;
301 	ixgbe_rx_data_t *rx_data = rx_ring->rx_data;
302 
303 	mblk_head = NULL;
304 	mblk_tail = &mblk_head;
305 
306 	/*
307 	 * Sync the receive descriptors before accepting the packets
308 	 */
309 	DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORKERNEL);
310 
311 	if (ixgbe_check_dma_handle(rx_data->rbd_area.dma_handle) != DDI_FM_OK) {
312 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
313 	}
314 
315 	/*
316 	 * Get the start point of rx bd ring which should be examined
317 	 * during this cycle.
318 	 */
319 	rx_next = rx_data->rbd_next;
320 
321 	current_rbd = &rx_data->rbd_ring[rx_next];
322 	received_bytes = 0;
323 	pkt_num = 0;
324 	status_error = current_rbd->wb.upper.status_error;
325 	while (status_error & IXGBE_RXD_STAT_DD) {
326 		/*
327 		 * If adapter has found errors, but the error
328 		 * is hardware checksum error, this does not discard the
329 		 * packet: let upper layer compute the checksum;
330 		 * Otherwise discard the packet.
331 		 */
332 		if ((status_error & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
333 		    !(status_error & IXGBE_RXD_STAT_EOP)) {
334 			IXGBE_DEBUG_STAT(rx_ring->stat_frame_error);
335 			goto rx_discard;
336 		}
337 
338 		IXGBE_DEBUG_STAT_COND(rx_ring->stat_cksum_error,
339 		    (status_error & IXGBE_RXDADV_ERR_TCPE) ||
340 		    (status_error & IXGBE_RXDADV_ERR_IPE));
341 
342 		pkt_len = current_rbd->wb.upper.length;
343 
344 		if ((poll_bytes != IXGBE_POLL_NULL) &&
345 		    ((received_bytes + pkt_len) > poll_bytes))
346 			break;
347 
348 		received_bytes += pkt_len;
349 
350 		mp = NULL;
351 		/*
352 		 * For packets with length more than the copy threshold,
353 		 * we'll first try to use the existing DMA buffer to build
354 		 * an mblk and send the mblk upstream.
355 		 *
356 		 * If the first method fails, or the packet length is less
357 		 * than the copy threshold, we'll allocate a new mblk and
358 		 * copy the packet data to the new mblk.
359 		 */
360 		if (pkt_len > ixgbe->rx_copy_thresh)
361 			mp = ixgbe_rx_bind(rx_data, rx_next, pkt_len);
362 
363 		if (mp == NULL)
364 			mp = ixgbe_rx_copy(rx_data, rx_next, pkt_len);
365 
366 		if (mp != NULL) {
367 			/*
368 			 * Check h/w checksum offload status
369 			 */
370 			if (ixgbe->rx_hcksum_enable)
371 				ixgbe_rx_assoc_hcksum(mp, status_error);
372 
373 			*mblk_tail = mp;
374 			mblk_tail = &mp->b_next;
375 		}
376 
377 rx_discard:
378 		/*
379 		 * Reset rx descriptor read bits
380 		 */
381 		current_rcb = rx_data->work_list[rx_next];
382 		current_rbd->read.pkt_addr = current_rcb->rx_buf.dma_address;
383 		current_rbd->read.hdr_addr = 0;
384 
385 		rx_next = NEXT_INDEX(rx_next, 1, rx_data->ring_size);
386 
387 		/*
388 		 * The receive function is in interrupt context, so here
389 		 * rx_limit_per_intr is used to avoid doing receiving too long
390 		 * per interrupt.
391 		 */
392 		if (++pkt_num > ixgbe->rx_limit_per_intr) {
393 			IXGBE_DEBUG_STAT(rx_ring->stat_exceed_pkt);
394 			break;
395 		}
396 
397 		current_rbd = &rx_data->rbd_ring[rx_next];
398 		status_error = current_rbd->wb.upper.status_error;
399 	}
400 
401 	DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORDEV);
402 
403 	rx_data->rbd_next = rx_next;
404 
405 	/*
406 	 * Update the h/w tail accordingly
407 	 */
408 	rx_tail = PREV_INDEX(rx_next, 1, rx_data->ring_size);
409 	IXGBE_WRITE_REG(&ixgbe->hw, IXGBE_RDT(rx_ring->index), rx_tail);
410 
411 	if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) {
412 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
413 	}
414 
415 	return (mblk_head);
416 }
417 
418 mblk_t *
419 ixgbe_ring_rx_poll(void *arg, int n_bytes)
420 {
421 	ixgbe_rx_ring_t *rx_ring = (ixgbe_rx_ring_t *)arg;
422 	mblk_t *mp = NULL;
423 
424 	ASSERT(n_bytes >= 0);
425 
426 	if (n_bytes == 0)
427 		return (mp);
428 
429 	mutex_enter(&rx_ring->rx_lock);
430 	mp = ixgbe_ring_rx(rx_ring, n_bytes);
431 	mutex_exit(&rx_ring->rx_lock);
432 
433 	return (mp);
434 }
435