xref: /illumos-gate/usr/src/uts/common/io/ixgbe/ixgbe_rx.c (revision 13740cb230f19fcbf1a6468d1a6a0ba9a0a09c22)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright(c) 2007-2008 Intel Corporation. All rights reserved.
24  */
25 
26 /*
27  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #include "ixgbe_sw.h"
32 
33 /* function prototypes */
34 static mblk_t *ixgbe_rx_bind(ixgbe_rx_ring_t *, uint32_t, uint32_t);
35 static mblk_t *ixgbe_rx_copy(ixgbe_rx_ring_t *, uint32_t, uint32_t);
36 static void ixgbe_rx_assoc_hcksum(mblk_t *, uint32_t);
37 
38 #ifndef IXGBE_DEBUG
39 #pragma inline(ixgbe_rx_assoc_hcksum)
40 #endif
41 
42 /*
43  * ixgbe_rx_recycle - The call-back function to reclaim rx buffer.
44  *
45  * This function is called when an mp is freed by the user thru
46  * freeb call (Only for mp constructed through desballoc call).
47  * It returns back the freed buffer to the free list.
48  */
49 void
50 ixgbe_rx_recycle(caddr_t arg)
51 {
52 	ixgbe_rx_ring_t *rx_ring;
53 	rx_control_block_t *recycle_rcb;
54 	uint32_t free_index;
55 
56 	recycle_rcb = (rx_control_block_t *)(uintptr_t)arg;
57 	rx_ring = recycle_rcb->rx_ring;
58 
59 	if (recycle_rcb->state == RCB_FREE)
60 		return;
61 
62 	recycle_rcb->state = RCB_FREE;
63 
64 	ASSERT(recycle_rcb->mp == NULL);
65 
66 	/*
67 	 * Using the recycled data buffer to generate a new mblk
68 	 */
69 	recycle_rcb->mp = desballoc((unsigned char *)
70 	    (recycle_rcb->rx_buf.address - IPHDR_ALIGN_ROOM),
71 	    (recycle_rcb->rx_buf.size + IPHDR_ALIGN_ROOM),
72 	    0, &recycle_rcb->free_rtn);
73 	if (recycle_rcb->mp != NULL) {
74 		recycle_rcb->mp->b_rptr += IPHDR_ALIGN_ROOM;
75 		recycle_rcb->mp->b_wptr += IPHDR_ALIGN_ROOM;
76 	}
77 
78 	/*
79 	 * Put the recycled rx control block into free list
80 	 */
81 	mutex_enter(&rx_ring->recycle_lock);
82 
83 	free_index = rx_ring->rcb_tail;
84 	ASSERT(rx_ring->free_list[free_index] == NULL);
85 
86 	rx_ring->free_list[free_index] = recycle_rcb;
87 	rx_ring->rcb_tail = NEXT_INDEX(free_index, 1, rx_ring->free_list_size);
88 
89 	mutex_exit(&rx_ring->recycle_lock);
90 
91 	/*
92 	 * The atomic operation on the number of the available rx control
93 	 * blocks in the free list is used to make the recycling mutual
94 	 * exclusive with the receiving.
95 	 */
96 	atomic_inc_32(&rx_ring->rcb_free);
97 	ASSERT(rx_ring->rcb_free <= rx_ring->free_list_size);
98 }
99 
100 /*
101  * ixgbe_rx_copy - Use copy to process the received packet.
102  *
103  * This function will use bcopy to process the packet
104  * and send the copied packet upstream.
105  */
106 static mblk_t *
107 ixgbe_rx_copy(ixgbe_rx_ring_t *rx_ring, uint32_t index, uint32_t pkt_len)
108 {
109 	rx_control_block_t *current_rcb;
110 	mblk_t *mp;
111 
112 	current_rcb = rx_ring->work_list[index];
113 
114 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
115 
116 	if (ixgbe_check_dma_handle(current_rcb->rx_buf.dma_handle) !=
117 	    DDI_FM_OK) {
118 		ddi_fm_service_impact(rx_ring->ixgbe->dip,
119 		    DDI_SERVICE_DEGRADED);
120 	}
121 
122 	/*
123 	 * Allocate buffer to receive this packet
124 	 */
125 	mp = allocb(pkt_len + IPHDR_ALIGN_ROOM, 0);
126 	if (mp == NULL) {
127 		ixgbe_log(rx_ring->ixgbe,
128 		    "ixgbe_rx_copy: allocate buffer failed");
129 		return (NULL);
130 	}
131 
132 	/*
133 	 * Copy the data received into the new cluster
134 	 */
135 	mp->b_rptr += IPHDR_ALIGN_ROOM;
136 	bcopy(current_rcb->rx_buf.address, mp->b_rptr, pkt_len);
137 	mp->b_wptr = mp->b_rptr + pkt_len;
138 
139 	return (mp);
140 }
141 
142 /*
143  * ixgbe_rx_bind - Use existing DMA buffer to build mblk for receiving.
144  *
145  * This function will use pre-bound DMA buffer to receive the packet
146  * and build mblk that will be sent upstream.
147  */
148 static mblk_t *
149 ixgbe_rx_bind(ixgbe_rx_ring_t *rx_ring, uint32_t index, uint32_t pkt_len)
150 {
151 	rx_control_block_t *current_rcb;
152 	rx_control_block_t *free_rcb;
153 	uint32_t free_index;
154 	mblk_t *mp;
155 
156 	/*
157 	 * If the free list is empty, we cannot proceed to send
158 	 * the current DMA buffer upstream. We'll have to return
159 	 * and use bcopy to process the packet.
160 	 */
161 	if (ixgbe_atomic_reserve(&rx_ring->rcb_free, 1) < 0)
162 		return (NULL);
163 
164 	current_rcb = rx_ring->work_list[index];
165 	/*
166 	 * If the mp of the rx control block is NULL, try to do
167 	 * desballoc again.
168 	 */
169 	if (current_rcb->mp == NULL) {
170 		current_rcb->mp = desballoc((unsigned char *)
171 		    (current_rcb->rx_buf.address - IPHDR_ALIGN_ROOM),
172 		    (current_rcb->rx_buf.size + IPHDR_ALIGN_ROOM),
173 		    0, &current_rcb->free_rtn);
174 		/*
175 		 * If it is failed to built a mblk using the current
176 		 * DMA buffer, we have to return and use bcopy to
177 		 * process the packet.
178 		 */
179 		if (current_rcb->mp != NULL) {
180 			current_rcb->mp->b_rptr += IPHDR_ALIGN_ROOM;
181 			current_rcb->mp->b_wptr += IPHDR_ALIGN_ROOM;
182 		} else {
183 			atomic_inc_32(&rx_ring->rcb_free);
184 			return (NULL);
185 		}
186 	}
187 	/*
188 	 * Sync up the data received
189 	 */
190 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
191 
192 	if (ixgbe_check_dma_handle(current_rcb->rx_buf.dma_handle) !=
193 	    DDI_FM_OK) {
194 		ddi_fm_service_impact(rx_ring->ixgbe->dip,
195 		    DDI_SERVICE_DEGRADED);
196 	}
197 
198 	mp = current_rcb->mp;
199 	current_rcb->mp = NULL;
200 	current_rcb->state = RCB_SENDUP;
201 
202 	mp->b_wptr = mp->b_rptr + pkt_len;
203 	mp->b_next = mp->b_cont = NULL;
204 
205 	/*
206 	 * Strip off one free rx control block from the free list
207 	 */
208 	free_index = rx_ring->rcb_head;
209 	free_rcb = rx_ring->free_list[free_index];
210 	ASSERT(free_rcb != NULL);
211 	rx_ring->free_list[free_index] = NULL;
212 	rx_ring->rcb_head = NEXT_INDEX(free_index, 1, rx_ring->free_list_size);
213 
214 	/*
215 	 * Put the rx control block to the work list
216 	 */
217 	rx_ring->work_list[index] = free_rcb;
218 
219 	return (mp);
220 }
221 
222 /*
223  * ixgbe_rx_assoc_hcksum - Check the rx hardware checksum status and associate
224  * the hcksum flags.
225  */
226 static void
227 ixgbe_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error)
228 {
229 	uint32_t hcksum_flags = 0;
230 
231 	/*
232 	 * Check TCP/UDP checksum
233 	 */
234 	if ((status_error & IXGBE_RXD_STAT_L4CS) &&
235 	    !(status_error & IXGBE_RXDADV_ERR_TCPE))
236 		hcksum_flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK;
237 
238 	/*
239 	 * Check IP Checksum
240 	 */
241 	if ((status_error & IXGBE_RXD_STAT_IPCS) &&
242 	    !(status_error & IXGBE_RXDADV_ERR_IPE))
243 		hcksum_flags |= HCK_IPV4_HDRCKSUM;
244 
245 	if (hcksum_flags != 0) {
246 		(void) hcksum_assoc(mp,
247 		    NULL, NULL, 0, 0, 0, 0, hcksum_flags, 0);
248 	}
249 }
250 
251 /*
252  * ixgbe_ring_rx - Receive the data of one ring.
253  *
254  * This function goes throught h/w descriptor in one specified rx ring,
255  * receives the data if the descriptor status shows the data is ready.
256  * It returns a chain of mblks containing the received data, to be
257  * passed up to mac_rx().
258  */
259 mblk_t *
260 ixgbe_ring_rx(ixgbe_rx_ring_t *rx_ring, int poll_bytes)
261 {
262 	union ixgbe_adv_rx_desc *current_rbd;
263 	rx_control_block_t *current_rcb;
264 	mblk_t *mp;
265 	mblk_t *mblk_head;
266 	mblk_t **mblk_tail;
267 	uint32_t rx_next;
268 	uint32_t rx_tail;
269 	uint32_t pkt_len;
270 	uint32_t status_error;
271 	uint32_t pkt_num;
272 	uint32_t received_bytes;
273 	ixgbe_t *ixgbe = rx_ring->ixgbe;
274 
275 	mblk_head = NULL;
276 	mblk_tail = &mblk_head;
277 
278 	/*
279 	 * Sync the receive descriptors before accepting the packets
280 	 */
281 	DMA_SYNC(&rx_ring->rbd_area, DDI_DMA_SYNC_FORKERNEL);
282 
283 	if (ixgbe_check_dma_handle(rx_ring->rbd_area.dma_handle) != DDI_FM_OK) {
284 		ddi_fm_service_impact(rx_ring->ixgbe->dip,
285 		    DDI_SERVICE_DEGRADED);
286 	}
287 
288 	/*
289 	 * Get the start point of rx bd ring which should be examined
290 	 * during this cycle.
291 	 */
292 	rx_next = rx_ring->rbd_next;
293 
294 	current_rbd = &rx_ring->rbd_ring[rx_next];
295 	received_bytes = 0;
296 	pkt_num = 0;
297 	status_error = current_rbd->wb.upper.status_error;
298 	while (status_error & IXGBE_RXD_STAT_DD) {
299 		/*
300 		 * If adapter has found errors, but the error
301 		 * is hardware checksum error, this does not discard the
302 		 * packet: let upper layer compute the checksum;
303 		 * Otherwise discard the packet.
304 		 */
305 		if ((status_error & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
306 		    !(status_error & IXGBE_RXD_STAT_EOP)) {
307 			IXGBE_DEBUG_STAT(rx_ring->stat_frame_error);
308 			goto rx_discard;
309 		}
310 
311 		IXGBE_DEBUG_STAT_COND(rx_ring->stat_cksum_error,
312 		    (status_error & IXGBE_RXDADV_ERR_TCPE) ||
313 		    (status_error & IXGBE_RXDADV_ERR_IPE));
314 
315 		pkt_len = current_rbd->wb.upper.length;
316 
317 		if ((poll_bytes != IXGBE_POLL_NULL) &&
318 		    ((received_bytes + pkt_len) > poll_bytes))
319 			break;
320 
321 		received_bytes += pkt_len;
322 
323 		mp = NULL;
324 		/*
325 		 * For packets with length more than the copy threshold,
326 		 * we'll first try to use the existing DMA buffer to build
327 		 * an mblk and send the mblk upstream.
328 		 *
329 		 * If the first method fails, or the packet length is less
330 		 * than the copy threshold, we'll allocate a new mblk and
331 		 * copy the packet data to the new mblk.
332 		 */
333 		if (pkt_len > rx_ring->copy_thresh)
334 			mp = ixgbe_rx_bind(rx_ring, rx_next, pkt_len);
335 
336 		if (mp == NULL)
337 			mp = ixgbe_rx_copy(rx_ring, rx_next, pkt_len);
338 
339 		if (mp != NULL) {
340 			/*
341 			 * Check h/w checksum offload status
342 			 */
343 			if (ixgbe->rx_hcksum_enable)
344 				ixgbe_rx_assoc_hcksum(mp, status_error);
345 
346 			*mblk_tail = mp;
347 			mblk_tail = &mp->b_next;
348 		}
349 
350 rx_discard:
351 		/*
352 		 * Reset rx descriptor read bits
353 		 */
354 		current_rcb = rx_ring->work_list[rx_next];
355 		current_rbd->read.pkt_addr = current_rcb->rx_buf.dma_address;
356 		current_rbd->read.hdr_addr = 0;
357 
358 		rx_next = NEXT_INDEX(rx_next, 1, rx_ring->ring_size);
359 
360 		/*
361 		 * The receive function is in interrupt context, so here
362 		 * limit_per_intr is used to avoid doing receiving too long
363 		 * per interrupt.
364 		 */
365 		if (++pkt_num > rx_ring->limit_per_intr) {
366 			IXGBE_DEBUG_STAT(rx_ring->stat_exceed_pkt);
367 			break;
368 		}
369 
370 		current_rbd = &rx_ring->rbd_ring[rx_next];
371 		status_error = current_rbd->wb.upper.status_error;
372 	}
373 
374 	DMA_SYNC(&rx_ring->rbd_area, DDI_DMA_SYNC_FORDEV);
375 
376 	rx_ring->rbd_next = rx_next;
377 
378 	/*
379 	 * Update the h/w tail accordingly
380 	 */
381 	rx_tail = PREV_INDEX(rx_next, 1, rx_ring->ring_size);
382 
383 	IXGBE_WRITE_REG(&ixgbe->hw, IXGBE_RDT(rx_ring->index), rx_tail);
384 
385 	if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) {
386 		ddi_fm_service_impact(rx_ring->ixgbe->dip,
387 		    DDI_SERVICE_DEGRADED);
388 	}
389 
390 	return (mblk_head);
391 }
392 
393 mblk_t *
394 ixgbe_ring_rx_poll(void *arg, int n_bytes)
395 {
396 	ixgbe_rx_ring_t *rx_ring = (ixgbe_rx_ring_t *)arg;
397 	mblk_t *mp = NULL;
398 
399 	ASSERT(n_bytes >= 0);
400 
401 	if (n_bytes == 0)
402 		return (mp);
403 
404 	mutex_enter(&rx_ring->rx_lock);
405 	mp = ixgbe_ring_rx(rx_ring, n_bytes);
406 	mutex_exit(&rx_ring->rx_lock);
407 
408 	return (mp);
409 }
410