xref: /illumos-gate/usr/src/uts/common/io/ixgbe/ixgbe_rx.c (revision 73cd555c10e70dac413ae4b40de8450a291750ac)
1 /*
2  * CDDL HEADER START
3  *
4  * Copyright(c) 2007-2009 Intel Corporation. All rights reserved.
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 #include "ixgbe_sw.h"
29 
30 /* function prototypes */
31 static mblk_t *ixgbe_rx_bind(ixgbe_rx_ring_t *, uint32_t, uint32_t);
32 static mblk_t *ixgbe_rx_copy(ixgbe_rx_ring_t *, uint32_t, uint32_t);
33 static void ixgbe_rx_assoc_hcksum(mblk_t *, uint32_t);
34 
35 #ifndef IXGBE_DEBUG
36 #pragma inline(ixgbe_rx_assoc_hcksum)
37 #endif
38 
39 /*
40  * ixgbe_rx_recycle - The call-back function to reclaim rx buffer.
41  *
42  * This function is called when an mp is freed by the user thru
43  * freeb call (Only for mp constructed through desballoc call).
44  * It returns back the freed buffer to the free list.
45  */
46 void
47 ixgbe_rx_recycle(caddr_t arg)
48 {
49 	ixgbe_rx_ring_t *rx_ring;
50 	rx_control_block_t *recycle_rcb;
51 	uint32_t free_index;
52 
53 	recycle_rcb = (rx_control_block_t *)(uintptr_t)arg;
54 	rx_ring = recycle_rcb->rx_ring;
55 
56 	if (recycle_rcb->state == RCB_FREE)
57 		return;
58 
59 	recycle_rcb->state = RCB_FREE;
60 
61 	ASSERT(recycle_rcb->mp == NULL);
62 
63 	/*
64 	 * Using the recycled data buffer to generate a new mblk
65 	 */
66 	recycle_rcb->mp = desballoc((unsigned char *)
67 	    recycle_rcb->rx_buf.address,
68 	    recycle_rcb->rx_buf.size,
69 	    0, &recycle_rcb->free_rtn);
70 
71 	/*
72 	 * Put the recycled rx control block into free list
73 	 */
74 	mutex_enter(&rx_ring->recycle_lock);
75 
76 	free_index = rx_ring->rcb_tail;
77 	ASSERT(rx_ring->free_list[free_index] == NULL);
78 
79 	rx_ring->free_list[free_index] = recycle_rcb;
80 	rx_ring->rcb_tail = NEXT_INDEX(free_index, 1, rx_ring->free_list_size);
81 
82 	mutex_exit(&rx_ring->recycle_lock);
83 
84 	/*
85 	 * The atomic operation on the number of the available rx control
86 	 * blocks in the free list is used to make the recycling mutual
87 	 * exclusive with the receiving.
88 	 */
89 	atomic_inc_32(&rx_ring->rcb_free);
90 	ASSERT(rx_ring->rcb_free <= rx_ring->free_list_size);
91 }
92 
93 /*
94  * ixgbe_rx_copy - Use copy to process the received packet.
95  *
96  * This function will use bcopy to process the packet
97  * and send the copied packet upstream.
98  */
99 static mblk_t *
100 ixgbe_rx_copy(ixgbe_rx_ring_t *rx_ring, uint32_t index, uint32_t pkt_len)
101 {
102 	rx_control_block_t *current_rcb;
103 	mblk_t *mp;
104 
105 	current_rcb = rx_ring->work_list[index];
106 
107 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
108 
109 	if (ixgbe_check_dma_handle(current_rcb->rx_buf.dma_handle) !=
110 	    DDI_FM_OK) {
111 		ddi_fm_service_impact(rx_ring->ixgbe->dip,
112 		    DDI_SERVICE_DEGRADED);
113 	}
114 
115 	/*
116 	 * Allocate buffer to receive this packet
117 	 */
118 	mp = allocb(pkt_len + IPHDR_ALIGN_ROOM, 0);
119 	if (mp == NULL) {
120 		ixgbe_log(rx_ring->ixgbe,
121 		    "ixgbe_rx_copy: allocate buffer failed");
122 		return (NULL);
123 	}
124 
125 	/*
126 	 * Copy the data received into the new cluster
127 	 */
128 	mp->b_rptr += IPHDR_ALIGN_ROOM;
129 	bcopy(current_rcb->rx_buf.address, mp->b_rptr, pkt_len);
130 	mp->b_wptr = mp->b_rptr + pkt_len;
131 
132 	return (mp);
133 }
134 
135 /*
136  * ixgbe_rx_bind - Use existing DMA buffer to build mblk for receiving.
137  *
138  * This function will use pre-bound DMA buffer to receive the packet
139  * and build mblk that will be sent upstream.
140  */
141 static mblk_t *
142 ixgbe_rx_bind(ixgbe_rx_ring_t *rx_ring, uint32_t index, uint32_t pkt_len)
143 {
144 	rx_control_block_t *current_rcb;
145 	rx_control_block_t *free_rcb;
146 	uint32_t free_index;
147 	mblk_t *mp;
148 
149 	/*
150 	 * If the free list is empty, we cannot proceed to send
151 	 * the current DMA buffer upstream. We'll have to return
152 	 * and use bcopy to process the packet.
153 	 */
154 	if (ixgbe_atomic_reserve(&rx_ring->rcb_free, 1) < 0)
155 		return (NULL);
156 
157 	current_rcb = rx_ring->work_list[index];
158 	/*
159 	 * If the mp of the rx control block is NULL, try to do
160 	 * desballoc again.
161 	 */
162 	if (current_rcb->mp == NULL) {
163 		current_rcb->mp = desballoc((unsigned char *)
164 		    current_rcb->rx_buf.address,
165 		    current_rcb->rx_buf.size,
166 		    0, &current_rcb->free_rtn);
167 		/*
168 		 * If it is failed to built a mblk using the current
169 		 * DMA buffer, we have to return and use bcopy to
170 		 * process the packet.
171 		 */
172 		if (current_rcb->mp == NULL) {
173 			atomic_inc_32(&rx_ring->rcb_free);
174 			return (NULL);
175 		}
176 	}
177 	/*
178 	 * Sync up the data received
179 	 */
180 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
181 
182 	if (ixgbe_check_dma_handle(current_rcb->rx_buf.dma_handle) !=
183 	    DDI_FM_OK) {
184 		ddi_fm_service_impact(rx_ring->ixgbe->dip,
185 		    DDI_SERVICE_DEGRADED);
186 	}
187 
188 	mp = current_rcb->mp;
189 	current_rcb->mp = NULL;
190 	current_rcb->state = RCB_SENDUP;
191 
192 	mp->b_wptr = mp->b_rptr + pkt_len;
193 	mp->b_next = mp->b_cont = NULL;
194 
195 	/*
196 	 * Strip off one free rx control block from the free list
197 	 */
198 	free_index = rx_ring->rcb_head;
199 	free_rcb = rx_ring->free_list[free_index];
200 	ASSERT(free_rcb != NULL);
201 	rx_ring->free_list[free_index] = NULL;
202 	rx_ring->rcb_head = NEXT_INDEX(free_index, 1, rx_ring->free_list_size);
203 
204 	/*
205 	 * Put the rx control block to the work list
206 	 */
207 	rx_ring->work_list[index] = free_rcb;
208 
209 	return (mp);
210 }
211 
212 /*
213  * ixgbe_rx_assoc_hcksum - Check the rx hardware checksum status and associate
214  * the hcksum flags.
215  */
216 static void
217 ixgbe_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error)
218 {
219 	uint32_t hcksum_flags = 0;
220 
221 	/*
222 	 * Check TCP/UDP checksum
223 	 */
224 	if ((status_error & IXGBE_RXD_STAT_L4CS) &&
225 	    !(status_error & IXGBE_RXDADV_ERR_TCPE))
226 		hcksum_flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK;
227 
228 	/*
229 	 * Check IP Checksum
230 	 */
231 	if ((status_error & IXGBE_RXD_STAT_IPCS) &&
232 	    !(status_error & IXGBE_RXDADV_ERR_IPE))
233 		hcksum_flags |= HCK_IPV4_HDRCKSUM;
234 
235 	if (hcksum_flags != 0) {
236 		(void) hcksum_assoc(mp,
237 		    NULL, NULL, 0, 0, 0, 0, hcksum_flags, 0);
238 	}
239 }
240 
241 /*
242  * ixgbe_ring_rx - Receive the data of one ring.
243  *
244  * This function goes throught h/w descriptor in one specified rx ring,
245  * receives the data if the descriptor status shows the data is ready.
246  * It returns a chain of mblks containing the received data, to be
247  * passed up to mac_rx().
248  */
249 mblk_t *
250 ixgbe_ring_rx(ixgbe_rx_ring_t *rx_ring, int poll_bytes)
251 {
252 	union ixgbe_adv_rx_desc *current_rbd;
253 	rx_control_block_t *current_rcb;
254 	mblk_t *mp;
255 	mblk_t *mblk_head;
256 	mblk_t **mblk_tail;
257 	uint32_t rx_next;
258 	uint32_t rx_tail;
259 	uint32_t pkt_len;
260 	uint32_t status_error;
261 	uint32_t pkt_num;
262 	uint32_t received_bytes;
263 	ixgbe_t *ixgbe = rx_ring->ixgbe;
264 
265 	mblk_head = NULL;
266 	mblk_tail = &mblk_head;
267 
268 	/*
269 	 * Sync the receive descriptors before accepting the packets
270 	 */
271 	DMA_SYNC(&rx_ring->rbd_area, DDI_DMA_SYNC_FORKERNEL);
272 
273 	if (ixgbe_check_dma_handle(rx_ring->rbd_area.dma_handle) != DDI_FM_OK) {
274 		ddi_fm_service_impact(rx_ring->ixgbe->dip,
275 		    DDI_SERVICE_DEGRADED);
276 	}
277 
278 	/*
279 	 * Get the start point of rx bd ring which should be examined
280 	 * during this cycle.
281 	 */
282 	rx_next = rx_ring->rbd_next;
283 
284 	current_rbd = &rx_ring->rbd_ring[rx_next];
285 	received_bytes = 0;
286 	pkt_num = 0;
287 	status_error = current_rbd->wb.upper.status_error;
288 	while (status_error & IXGBE_RXD_STAT_DD) {
289 		/*
290 		 * If adapter has found errors, but the error
291 		 * is hardware checksum error, this does not discard the
292 		 * packet: let upper layer compute the checksum;
293 		 * Otherwise discard the packet.
294 		 */
295 		if ((status_error & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
296 		    !(status_error & IXGBE_RXD_STAT_EOP)) {
297 			IXGBE_DEBUG_STAT(rx_ring->stat_frame_error);
298 			goto rx_discard;
299 		}
300 
301 		IXGBE_DEBUG_STAT_COND(rx_ring->stat_cksum_error,
302 		    (status_error & IXGBE_RXDADV_ERR_TCPE) ||
303 		    (status_error & IXGBE_RXDADV_ERR_IPE));
304 
305 		pkt_len = current_rbd->wb.upper.length;
306 
307 		if ((poll_bytes != IXGBE_POLL_NULL) &&
308 		    ((received_bytes + pkt_len) > poll_bytes))
309 			break;
310 
311 		received_bytes += pkt_len;
312 
313 		mp = NULL;
314 		/*
315 		 * For packets with length more than the copy threshold,
316 		 * we'll first try to use the existing DMA buffer to build
317 		 * an mblk and send the mblk upstream.
318 		 *
319 		 * If the first method fails, or the packet length is less
320 		 * than the copy threshold, we'll allocate a new mblk and
321 		 * copy the packet data to the new mblk.
322 		 */
323 		if (pkt_len > rx_ring->copy_thresh)
324 			mp = ixgbe_rx_bind(rx_ring, rx_next, pkt_len);
325 
326 		if (mp == NULL)
327 			mp = ixgbe_rx_copy(rx_ring, rx_next, pkt_len);
328 
329 		if (mp != NULL) {
330 			/*
331 			 * Check h/w checksum offload status
332 			 */
333 			if (ixgbe->rx_hcksum_enable)
334 				ixgbe_rx_assoc_hcksum(mp, status_error);
335 
336 			*mblk_tail = mp;
337 			mblk_tail = &mp->b_next;
338 		}
339 
340 rx_discard:
341 		/*
342 		 * Reset rx descriptor read bits
343 		 */
344 		current_rcb = rx_ring->work_list[rx_next];
345 		current_rbd->read.pkt_addr = current_rcb->rx_buf.dma_address;
346 		current_rbd->read.hdr_addr = 0;
347 
348 		rx_next = NEXT_INDEX(rx_next, 1, rx_ring->ring_size);
349 
350 		/*
351 		 * The receive function is in interrupt context, so here
352 		 * limit_per_intr is used to avoid doing receiving too long
353 		 * per interrupt.
354 		 */
355 		if (++pkt_num > rx_ring->limit_per_intr) {
356 			IXGBE_DEBUG_STAT(rx_ring->stat_exceed_pkt);
357 			break;
358 		}
359 
360 		current_rbd = &rx_ring->rbd_ring[rx_next];
361 		status_error = current_rbd->wb.upper.status_error;
362 	}
363 
364 	DMA_SYNC(&rx_ring->rbd_area, DDI_DMA_SYNC_FORDEV);
365 
366 	rx_ring->rbd_next = rx_next;
367 
368 	/*
369 	 * Update the h/w tail accordingly
370 	 */
371 	rx_tail = PREV_INDEX(rx_next, 1, rx_ring->ring_size);
372 	IXGBE_WRITE_REG(&ixgbe->hw, IXGBE_RDT(rx_ring->index), rx_tail);
373 
374 	if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) {
375 		ddi_fm_service_impact(rx_ring->ixgbe->dip,
376 		    DDI_SERVICE_DEGRADED);
377 	}
378 
379 	return (mblk_head);
380 }
381 
382 mblk_t *
383 ixgbe_ring_rx_poll(void *arg, int n_bytes)
384 {
385 	ixgbe_rx_ring_t *rx_ring = (ixgbe_rx_ring_t *)arg;
386 	mblk_t *mp = NULL;
387 
388 	ASSERT(n_bytes >= 0);
389 
390 	if (n_bytes == 0)
391 		return (mp);
392 
393 	mutex_enter(&rx_ring->rx_lock);
394 	mp = ixgbe_ring_rx(rx_ring, n_bytes);
395 	mutex_exit(&rx_ring->rx_lock);
396 
397 	return (mp);
398 }
399