xref: /freebsd/sys/dev/e1000/em_txrx.c (revision 74d9553e43cfafc29448d0bb836916aa21dea0de)
1 /*-
2  * Copyright (c) 2016 Matt Macy <mmacy@nextbsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /* $FreeBSD$ */
28 #include "if_em.h"
29 
30 #ifdef	RSS
31 #include <net/rss_config.h>
32 #include <netinet/in_rss.h>
33 #endif
34 
35 #ifdef VERBOSE_DEBUG
36 #define DPRINTF device_printf
37 #else
38 #define DPRINTF(...)
39 #endif
40 
41 /*********************************************************************
42  *  Local Function prototypes
43  *********************************************************************/
44 static int em_tso_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower);
45 static int em_transmit_checksum_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower);
46 static int em_isc_txd_encap(void *arg, if_pkt_info_t pi);
47 static void em_isc_txd_flush(void *arg, uint16_t txqid, uint32_t pidx);
48 static int em_isc_txd_credits_update(void *arg, uint16_t txqid, uint32_t cidx_init, bool clear);
49 static void em_isc_rxd_refill(void *arg, uint16_t rxqid, uint8_t flid __unused,
50 			      uint32_t pidx, uint64_t *paddrs, caddr_t *vaddrs __unused, uint16_t count, uint16_t buflen __unused);
51 static void em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, uint32_t pidx);
52 static int em_isc_rxd_available(void *arg, uint16_t rxqid, uint32_t idx,
53 				int budget);
54 static int em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
55 
56 static void lem_isc_rxd_refill(void *arg, uint16_t rxqid, uint8_t flid __unused,
57 			      uint32_t pidx, uint64_t *paddrs, caddr_t *vaddrs __unused, uint16_t count, uint16_t buflen __unused);
58 
59 static int lem_isc_rxd_available(void *arg, uint16_t rxqid, uint32_t idx,
60 				int budget);
61 static int lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
62 
63 static void lem_receive_checksum(int status, int errors, if_rxd_info_t ri);
64 static void em_receive_checksum(uint32_t status, if_rxd_info_t ri);
65 static int em_determine_rsstype(u32 pkt_info);
66 extern int em_intr(void *arg);
67 
68 struct if_txrx em_txrx  = {
69 	em_isc_txd_encap,
70 	em_isc_txd_flush,
71 	em_isc_txd_credits_update,
72 	em_isc_rxd_available,
73 	em_isc_rxd_pkt_get,
74 	em_isc_rxd_refill,
75 	em_isc_rxd_flush,
76 	em_intr
77 };
78 
79 struct if_txrx lem_txrx  = {
80 	em_isc_txd_encap,
81 	em_isc_txd_flush,
82 	em_isc_txd_credits_update,
83 	lem_isc_rxd_available,
84 	lem_isc_rxd_pkt_get,
85 	lem_isc_rxd_refill,
86 	em_isc_rxd_flush,
87 	em_intr
88 };
89 
90 extern if_shared_ctx_t em_sctx;
91 
92 /**********************************************************************
93  *
94  *  Setup work for hardware segmentation offload (TSO) on
95  *  adapters using advanced tx descriptors
96  *
97  **********************************************************************/
98 static int
99 em_tso_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower)
100 {
101 	if_softc_ctx_t scctx = adapter->shared;
102         struct em_tx_queue *que = &adapter->tx_queues[pi->ipi_qsidx];
103         struct tx_ring *txr = &que->txr;
104 	struct e1000_context_desc *TXD;
105 	struct em_txbuffer  *tx_buffer;
106         int cur, hdr_len;
107 
108 	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
109   	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
110 		      E1000_TXD_DTYP_D |	/* Data descr type */
111 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
112 
113 	/* IP and/or TCP header checksum calculation and insertion. */
114 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
115 
116 	cur = pi->ipi_pidx;
117         TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
118         tx_buffer = &txr->tx_buffers[cur];
119 
120 	 /*
121 	 * Start offset for header checksum calculation.
122 	 * End offset for header checksum calculation.
123 	 * Offset of place put the checksum.
124 	 */
125 	TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
126 	TXD->lower_setup.ip_fields.ipcse =
127 	    htole16(pi->ipi_ehdrlen + pi->ipi_ip_hlen - 1);
128 	TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum);
129 
130          /*
131 	 * Start offset for payload checksum calculation.
132 	 * End offset for payload checksum calculation.
133 	 * Offset of place to put the checksum.
134 	 */
135 	TXD->upper_setup.tcp_fields.tucss = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
136 	TXD->upper_setup.tcp_fields.tucse = 0;
137 	TXD->upper_setup.tcp_fields.tucso =
138 	    pi->ipi_ehdrlen + pi->ipi_ip_hlen + offsetof(struct tcphdr, th_sum);
139 
140          /*
141 	 * Payload size per packet w/o any headers.
142 	 * Length of all headers up to payload.
143 	 */
144 	TXD->tcp_seg_setup.fields.mss = htole16(pi->ipi_tso_segsz);
145 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
146 
147 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
148 				E1000_TXD_CMD_DEXT |	/* Extended descr */
149 				E1000_TXD_CMD_TSE |	/* TSE context */
150 				E1000_TXD_CMD_IP |	/* Do IP csum */
151 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
152 				      (pi->ipi_len - hdr_len)); /* Total len */
153 	tx_buffer->eop = -1;
154 	txr->tx_tso = TRUE;
155 
156 	if (++cur == scctx->isc_ntxd[0]) {
157 		cur = 0;
158 	}
159 	DPRINTF(iflib_get_dev(adapter->ctx), "%s: pidx: %d cur: %d\n", __FUNCTION__, pi->ipi_pidx, cur);
160 	return (cur);
161 }
162 
163 #define TSO_WORKAROUND 4
164 #define DONT_FORCE_CTX 1
165 
166 
167 /*********************************************************************
168  *  The offload context is protocol specific (TCP/UDP) and thus
169  *  only needs to be set when the protocol changes. The occasion
170  *  of a context change can be a performance detriment, and
171  *  might be better just disabled. The reason arises in the way
172  *  in which the controller supports pipelined requests from the
173  *  Tx data DMA. Up to four requests can be pipelined, and they may
174  *  belong to the same packet or to multiple packets. However all
175  *  requests for one packet are issued before a request is issued
176  *  for a subsequent packet and if a request for the next packet
177  *  requires a context change, that request will be stalled
178  *  until the previous request completes. This means setting up
179  *  a new context effectively disables pipelined Tx data DMA which
180  *  in turn greatly slow down performance to send small sized
181  *  frames.
182  **********************************************************************/
183 
184 static int
185 em_transmit_checksum_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower)
186 {
187         struct e1000_context_desc   *TXD = NULL;
188 	if_softc_ctx_t              scctx = adapter->shared;
189  	struct em_tx_queue          *que = &adapter->tx_queues[pi->ipi_qsidx];
190 	struct tx_ring              *txr = &que->txr;
191 	struct em_txbuffer          *tx_buffer;
192 	int                         csum_flags = pi->ipi_csum_flags;
193 	int                         cur, hdr_len;
194 	u32                         cmd;
195 
196 	cur = pi->ipi_pidx;
197 	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
198 	cmd = adapter->txd_cmd;
199 
200 	/*
201 	 * The 82574L can only remember the *last* context used
202 	 * regardless of queue that it was use for.  We cannot reuse
203 	 * contexts on this hardware platform and must generate a new
204 	 * context every time.  82574L hardware spec, section 7.2.6,
205 	 * second note.
206 	 */
207 	if (DONT_FORCE_CTX &&
208 	    adapter->tx_num_queues == 1 &&
209 	    txr->csum_lhlen == pi->ipi_ehdrlen &&
210 	    txr->csum_iphlen == pi->ipi_ip_hlen &&
211 	    txr->csum_flags == csum_flags) {
212 		/*
213 		 * Same csum offload context as the previous packets;
214 		 * just return.
215 		 */
216 		*txd_upper = txr->csum_txd_upper;
217 		*txd_lower = txr->csum_txd_lower;
218 		return (cur);
219 	}
220 
221 	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
222 	if (csum_flags & CSUM_IP) {
223 	  	*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
224 		/*
225 		 * Start offset for header checksum calculation.
226 		 * End offset for header checksum calculation.
227 		 * Offset of place to put the checksum.
228 		 */
229 		TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
230 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
231 		TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum);
232 		cmd |= E1000_TXD_CMD_IP;
233 	}
234 
235 	if (csum_flags & (CSUM_TCP|CSUM_UDP)) {
236 		uint8_t tucso;
237 
238  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
239 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
240 
241 		if (csum_flags & CSUM_TCP) {
242 			tucso = hdr_len + offsetof(struct tcphdr, th_sum);
243 			cmd |= E1000_TXD_CMD_TCP;
244 		} else
245 			tucso = hdr_len + offsetof(struct udphdr, uh_sum);
246  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
247  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
248  		TXD->upper_setup.tcp_fields.tucso = tucso;
249 	}
250 
251 	txr->csum_lhlen = pi->ipi_ehdrlen;
252 	txr->csum_iphlen = pi->ipi_ip_hlen;
253 	txr->csum_flags = csum_flags;
254 	txr->csum_txd_upper = *txd_upper;
255 	txr->csum_txd_lower = *txd_lower;
256 
257 	TXD->tcp_seg_setup.data = htole32(0);
258 	TXD->cmd_and_length =
259 		htole32(E1000_TXD_CMD_IFCS | E1000_TXD_CMD_DEXT | cmd);
260 
261 	tx_buffer = &txr->tx_buffers[cur];
262 	tx_buffer->eop = -1;
263 
264 	if (++cur == scctx->isc_ntxd[0]) {
265 		cur = 0;
266 	}
267 	DPRINTF(iflib_get_dev(adapter->ctx), "checksum_setup csum_flags=%x txd_upper=%x txd_lower=%x hdr_len=%d cmd=%x\n",
268 		      csum_flags, *txd_upper, *txd_lower, hdr_len, cmd);
269 	return (cur);
270 }
271 
272 static int
273 em_isc_txd_encap(void *arg, if_pkt_info_t pi)
274 {
275         struct adapter *sc       = arg;
276 	if_softc_ctx_t scctx     = sc->shared;
277 	struct em_tx_queue *que  = &sc->tx_queues[pi->ipi_qsidx];
278 	struct tx_ring *txr      = &que->txr;
279 	bus_dma_segment_t *segs  = pi->ipi_segs;
280 	int nsegs                = pi->ipi_nsegs;
281 	int csum_flags           = pi->ipi_csum_flags;
282         int i, j, first, pidx_last;
283 	u32                     txd_upper = 0, txd_lower = 0;
284 
285 	struct em_txbuffer *tx_buffer;
286 	struct e1000_tx_desc *ctxd = NULL;
287 	bool do_tso, tso_desc;
288 
289 	i = first = pi->ipi_pidx;
290 	do_tso = (csum_flags & CSUM_TSO);
291 	tso_desc = FALSE;
292            /*
293 	 * TSO Hardware workaround, if this packet is not
294 	 * TSO, and is only a single descriptor long, and
295 	 * it follows a TSO burst, then we need to add a
296 	 * sentinel descriptor to prevent premature writeback.
297 	 */
298 	if ((!do_tso) && (txr->tx_tso == TRUE)) {
299 		if (nsegs == 1)
300 			tso_desc = TRUE;
301 		txr->tx_tso = FALSE;
302 	}
303 
304 	/* Do hardware assists */
305 	if (do_tso) {
306 		i = em_tso_setup(sc, pi, &txd_upper, &txd_lower);
307 		tso_desc = TRUE;
308 	} else if (csum_flags & EM_CSUM_OFFLOAD) {
309 		i = em_transmit_checksum_setup(sc, pi, &txd_upper, &txd_lower);
310 	}
311 
312 	if (pi->ipi_mflags & M_VLANTAG) {
313 	  /* Set the vlan id. */
314 		txd_upper |= htole16(pi->ipi_vtag) << 16;
315                 /* Tell hardware to add tag */
316                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
317 	}
318 
319 	DPRINTF(iflib_get_dev(sc->ctx), "encap: set up tx: nsegs=%d first=%d i=%d\n", nsegs, first, i);
320 	/* XXX adapter->pcix_82544 -- lem_fill_descriptors */
321 
322 	/* Set up our transmit descriptors */
323 	for (j = 0; j < nsegs; j++) {
324 		bus_size_t seg_len;
325 		bus_addr_t seg_addr;
326 		uint32_t cmd;
327 
328 		ctxd = &txr->tx_base[i];
329 		tx_buffer = &txr->tx_buffers[i];
330 		seg_addr = segs[j].ds_addr;
331 		seg_len = segs[j].ds_len;
332 		cmd = E1000_TXD_CMD_IFCS | sc->txd_cmd;
333 
334 		/*
335 		** TSO Workaround:
336 		** If this is the last descriptor, we want to
337 		** split it so we have a small final sentinel
338 		*/
339 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
340 			seg_len -= TSO_WORKAROUND;
341 			ctxd->buffer_addr = htole64(seg_addr);
342 			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
343 			ctxd->upper.data = htole32(txd_upper);
344 
345                         if (++i == scctx->isc_ntxd[0])
346 				i = 0;
347 
348 			/* Now make the sentinel */
349 			ctxd = &txr->tx_base[i];
350 			tx_buffer = &txr->tx_buffers[i];
351 			ctxd->buffer_addr = htole64(seg_addr + seg_len);
352 			ctxd->lower.data = htole32(cmd | txd_lower | TSO_WORKAROUND);
353 			ctxd->upper.data = htole32(txd_upper);
354 			pidx_last = i;
355 			if (++i == scctx->isc_ntxd[0])
356 				i = 0;
357 			DPRINTF(iflib_get_dev(sc->ctx), "TSO path pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]);
358 		} else {
359 			ctxd->buffer_addr = htole64(seg_addr);
360 			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
361 			ctxd->upper.data = htole32(txd_upper);
362 			pidx_last = i;
363 			if (++i == scctx->isc_ntxd[0])
364 				i = 0;
365 			DPRINTF(iflib_get_dev(sc->ctx), "pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]);
366 		}
367 		tx_buffer->eop = -1;
368 	}
369 
370 	/*
371          * Last Descriptor of Packet
372 	 * needs End Of Packet (EOP)
373 	 * and Report Status (RS)
374          */
375         ctxd->lower.data |=
376 		htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
377 
378 	tx_buffer = &txr->tx_buffers[first];
379 	tx_buffer->eop = pidx_last;
380 	DPRINTF(iflib_get_dev(sc->ctx), "tx_buffers[%d]->eop = %d ipi_new_pidx=%d\n", first, pidx_last, i);
381 	pi->ipi_new_pidx = i;
382 
383 	return (0);
384 }
385 
386 static void
387 em_isc_txd_flush(void *arg, uint16_t txqid, uint32_t pidx)
388 {
389 	struct adapter *adapter = arg;
390 	struct em_tx_queue *que = &adapter->tx_queues[txqid];
391 	struct tx_ring *txr = &que->txr;
392 
393 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), pidx);
394 }
395 
396 static int
397 em_isc_txd_credits_update(void *arg, uint16_t txqid, uint32_t cidx_init, bool clear)
398 {
399 	struct adapter *adapter = arg;
400 	if_softc_ctx_t scctx = adapter->shared;
401 	struct em_tx_queue *que = &adapter->tx_queues[txqid];
402 	struct tx_ring *txr = &que->txr;
403 
404 	u32 cidx, processed = 0;
405 	int last, done;
406 	struct em_txbuffer *buf;
407 	struct e1000_tx_desc *tx_desc, *eop_desc;
408 
409 	cidx = cidx_init;
410 	buf = &txr->tx_buffers[cidx];
411 	tx_desc = &txr->tx_base[cidx];
412 	last = buf->eop;
413 	if (last == -1)
414 		return (processed);
415 	eop_desc = &txr->tx_base[last];
416 
417 	DPRINTF(iflib_get_dev(adapter->ctx),
418 		      "credits_update: cidx_init=%d clear=%d last=%d\n",
419 		      cidx_init, clear, last);
420 	/*
421 	 * What this does is get the index of the
422 	 * first descriptor AFTER the EOP of the
423 	 * first packet, that way we can do the
424 	 * simple comparison on the inner while loop.
425 	 */
426 	if (++last == scctx->isc_ntxd[0])
427 		last = 0;
428 	done = last;
429 
430 
431 	while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
432 		/* We clean the range of the packet */
433 		while (cidx != done) {
434 			if (clear) {
435 				tx_desc->upper.data = 0;
436 				tx_desc->lower.data = 0;
437 				tx_desc->buffer_addr = 0;
438 				buf->eop = -1;
439 			}
440 			tx_desc++;
441 			buf++;
442 			processed++;
443 
444 			/* wrap the ring ? */
445 			if (++cidx == scctx->isc_ntxd[0]) {
446 				cidx = 0;
447 			}
448 			buf = &txr->tx_buffers[cidx];
449 			tx_desc = &txr->tx_base[cidx];
450 		}
451 		/* See if we can continue to the next packet */
452 		last = buf->eop;
453 		if (last == -1)
454 			break;
455 		eop_desc = &txr->tx_base[last];
456 		/* Get new done point */
457 		if (++last == scctx->isc_ntxd[0])
458 			last = 0;
459 		done = last;
460 	}
461 
462 	DPRINTF(iflib_get_dev(adapter->ctx), "Processed %d credits update\n", processed);
463 	return(processed);
464 }
465 
466 static void
467 lem_isc_rxd_refill(void *arg, uint16_t rxqid, uint8_t flid __unused,
468 		  uint32_t pidx, uint64_t *paddrs, caddr_t *vaddrs __unused, uint16_t count, uint16_t buflen __unused)
469 {
470 	struct adapter *sc = arg;
471 	if_softc_ctx_t scctx = sc->shared;
472 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
473 	struct rx_ring *rxr = &que->rxr;
474 	struct e1000_rx_desc *rxd;
475 	int i;
476 	uint32_t next_pidx;
477 
478 	for (i = 0, next_pidx = pidx; i < count; i++) {
479 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[next_pidx];
480 		rxd->buffer_addr = htole64(paddrs[i]);
481 		/* status bits must be cleared */
482 		rxd->status = 0;
483 
484 		if (++next_pidx == scctx->isc_nrxd[0])
485 			next_pidx = 0;
486 	}
487 }
488 
489 static void
490 em_isc_rxd_refill(void *arg, uint16_t rxqid, uint8_t flid __unused,
491 		  uint32_t pidx, uint64_t *paddrs, caddr_t *vaddrs __unused, uint16_t count, uint16_t buflen __unused)
492 {
493 	struct adapter *sc = arg;
494 	if_softc_ctx_t scctx = sc->shared;
495 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
496 	struct rx_ring *rxr = &que->rxr;
497 	union e1000_rx_desc_extended *rxd;
498 	int i;
499 	uint32_t next_pidx;
500 
501 	for (i = 0, next_pidx = pidx; i < count; i++) {
502 		rxd = &rxr->rx_base[next_pidx];
503 		rxd->read.buffer_addr = htole64(paddrs[i]);
504 		/* DD bits must be cleared */
505 		rxd->wb.upper.status_error = 0;
506 
507 		if (++next_pidx == scctx->isc_nrxd[0])
508 			next_pidx = 0;
509 	}
510 }
511 
512 static void
513 em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, uint32_t pidx)
514 {
515     	struct adapter *sc       = arg;
516 	struct em_rx_queue *que     = &sc->rx_queues[rxqid];
517 	struct rx_ring *rxr      = &que->rxr;
518 
519         E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx);
520 }
521 
522 static int
523 lem_isc_rxd_available(void *arg, uint16_t rxqid, uint32_t idx, int budget)
524 {
525 	struct adapter *sc         = arg;
526 	if_softc_ctx_t scctx = sc->shared;
527 	struct em_rx_queue *que   = &sc->rx_queues[rxqid];
528 	struct rx_ring *rxr        = &que->rxr;
529 	struct e1000_rx_desc *rxd;
530 	u32                      staterr = 0;
531 	int                      cnt, i;
532 
533 	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
534 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[i];
535 		staterr = rxd->status;
536 
537 		if ((staterr & E1000_RXD_STAT_DD) == 0)
538 			break;
539 
540 		if (++i == scctx->isc_nrxd[0])
541 			i = 0;
542 
543 		if (staterr & E1000_RXD_STAT_EOP)
544 			cnt++;
545 	}
546 	return (cnt);
547 }
548 
549 static int
550 em_isc_rxd_available(void *arg, uint16_t rxqid, uint32_t idx, int budget)
551 {
552        	struct adapter *sc         = arg;
553 	if_softc_ctx_t scctx = sc->shared;
554 	struct em_rx_queue *que   = &sc->rx_queues[rxqid];
555 	struct rx_ring *rxr        = &que->rxr;
556 	union e1000_rx_desc_extended *rxd;
557 	u32                      staterr = 0;
558 	int                      cnt, i;
559 
560 	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
561 		rxd = &rxr->rx_base[i];
562 		staterr = le32toh(rxd->wb.upper.status_error);
563 
564 		if ((staterr & E1000_RXD_STAT_DD) == 0)
565 			break;
566 
567 		if (++i == scctx->isc_nrxd[0]) {
568 			i = 0;
569 		}
570 
571 		if (staterr & E1000_RXD_STAT_EOP)
572 			cnt++;
573 
574 	}
575 	return (cnt);
576 }
577 
578 static int
579 lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
580 {
581 	struct adapter           *adapter = arg;
582 	if_softc_ctx_t           scctx = adapter->shared;
583 	struct em_rx_queue       *que = &adapter->rx_queues[ri->iri_qsidx];
584 	struct rx_ring           *rxr = &que->rxr;
585 	struct e1000_rx_desc *rxd;
586 	u16                      len;
587 	u32                      status, errors;
588 	bool                     eop;
589 	int                      i, cidx;
590 
591 	status = errors = i = 0;
592 	cidx = ri->iri_cidx;
593 
594 	do {
595 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[cidx];
596 		status = rxd->status;
597 		errors = rxd->errors;
598 
599 		/* Error Checking then decrement count */
600 		MPASS ((status & E1000_RXD_STAT_DD) != 0);
601 
602 		len = le16toh(rxd->length);
603 		ri->iri_len += len;
604 
605 		eop = (status & E1000_RXD_STAT_EOP) != 0;
606 
607 		/* Make sure bad packets are discarded */
608 		if (errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
609 			adapter->dropped_pkts++;
610 			/* XXX fixup if common */
611 			return (EBADMSG);
612 		}
613 
614 		ri->iri_frags[i].irf_flid = 0;
615 		ri->iri_frags[i].irf_idx = cidx;
616 		ri->iri_frags[i].irf_len = len;
617 		/* Zero out the receive descriptors status. */
618 		rxd->status = 0;
619 
620 		if (++cidx == scctx->isc_nrxd[0])
621 			cidx = 0;
622 		i++;
623 	} while (!eop);
624 
625 	/* XXX add a faster way to look this up */
626 	if (adapter->hw.mac.type >= e1000_82543 && !(status & E1000_RXD_STAT_IXSM))
627 		lem_receive_checksum(status, errors, ri);
628 
629 	if (status & E1000_RXD_STAT_VP) {
630 		ri->iri_vtag = le16toh(rxd->special);
631 		ri->iri_flags |= M_VLANTAG;
632 	}
633 
634 	ri->iri_nfrags = i;
635 
636 	return (0);
637 }
638 
639 static int
640 em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
641 {
642       	struct adapter           *adapter = arg;
643 	if_softc_ctx_t           scctx = adapter->shared;
644 	struct em_rx_queue       *que = &adapter->rx_queues[ri->iri_qsidx];
645 	struct rx_ring           *rxr = &que->rxr;
646 	union e1000_rx_desc_extended *rxd;
647 
648 	u16                      len;
649 	u32                      pkt_info;
650 	u32                      staterr = 0;
651 	bool                     eop;
652 	int                      i, cidx, vtag;
653 
654 	i = vtag = 0;
655 	cidx = ri->iri_cidx;
656 
657 	do {
658 		rxd = &rxr->rx_base[cidx];
659 		staterr = le32toh(rxd->wb.upper.status_error);
660 		pkt_info = le32toh(rxd->wb.lower.mrq);
661 
662 		/* Error Checking then decrement count */
663 		MPASS ((staterr & E1000_RXD_STAT_DD) != 0);
664 
665 		len = le16toh(rxd->wb.upper.length);
666 		ri->iri_len += len;
667 
668 		eop = (staterr & E1000_RXD_STAT_EOP) != 0;
669 
670 		/* Make sure bad packets are discarded */
671 		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
672 			adapter->dropped_pkts++;
673 			return EBADMSG;
674 		}
675 
676 		ri->iri_frags[i].irf_flid = 0;
677 		ri->iri_frags[i].irf_idx = cidx;
678 		ri->iri_frags[i].irf_len = len;
679 		/* Zero out the receive descriptors status. */
680 		rxd->wb.upper.status_error &= htole32(~0xFF);
681 
682 		if (++cidx == scctx->isc_nrxd[0])
683 			cidx = 0;
684 		i++;
685 	} while (!eop);
686 
687 	/* XXX add a faster way to look this up */
688 	if (adapter->hw.mac.type >= e1000_82543)
689 		em_receive_checksum(staterr, ri);
690 
691 	if (staterr & E1000_RXD_STAT_VP) {
692 		vtag = le16toh(rxd->wb.upper.vlan);
693 	}
694 
695 	ri->iri_vtag = vtag;
696 	if (vtag)
697 		ri->iri_flags |= M_VLANTAG;
698 
699         ri->iri_flowid =
700                 le32toh(rxd->wb.lower.hi_dword.rss);
701         ri->iri_rsstype = em_determine_rsstype(pkt_info);
702 
703 	ri->iri_nfrags = i;
704 	return (0);
705 }
706 
707 /*********************************************************************
708  *
709  *  Verify that the hardware indicated that the checksum is valid.
710  *  Inform the stack about the status of checksum so that stack
711  *  doesn't spend time verifying the checksum.
712  *
713  *********************************************************************/
714 static void
715 lem_receive_checksum(int status, int errors, if_rxd_info_t ri)
716 {
717 	/* Did it pass? */
718 	if (status & E1000_RXD_STAT_IPCS && !(errors & E1000_RXD_ERR_IPE))
719 		ri->iri_csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
720 
721 	if (status & E1000_RXD_STAT_TCPCS) {
722 		/* Did it pass? */
723 		if (!(errors & E1000_RXD_ERR_TCPE)) {
724 			ri->iri_csum_flags |=
725 			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
726 			ri->iri_csum_data = htons(0xffff);
727 		}
728 	}
729 }
730 
731 /********************************************************************
732  *
733  *  Parse the packet type to determine the appropriate hash
734  *
735  ******************************************************************/
736 static int
737 em_determine_rsstype(u32 pkt_info)
738 {
739         switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
740         case E1000_RXDADV_RSSTYPE_IPV4_TCP:
741                 return M_HASHTYPE_RSS_TCP_IPV4;
742         case E1000_RXDADV_RSSTYPE_IPV4:
743                 return M_HASHTYPE_RSS_IPV4;
744         case E1000_RXDADV_RSSTYPE_IPV6_TCP:
745                 return M_HASHTYPE_RSS_TCP_IPV6;
746         case E1000_RXDADV_RSSTYPE_IPV6_EX:
747                 return M_HASHTYPE_RSS_IPV6_EX;
748         case E1000_RXDADV_RSSTYPE_IPV6:
749                 return M_HASHTYPE_RSS_IPV6;
750         case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
751                 return M_HASHTYPE_RSS_TCP_IPV6_EX;
752         default:
753                 return M_HASHTYPE_OPAQUE;
754         }
755 }
756 static void
757 em_receive_checksum(uint32_t status, if_rxd_info_t ri)
758 {
759 	ri->iri_csum_flags = 0;
760 
761 	/* Ignore Checksum bit is set */
762 	if (status & E1000_RXD_STAT_IXSM)
763 		return;
764 
765 	/* If the IP checksum exists and there is no IP Checksum error */
766 	if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
767 		E1000_RXD_STAT_IPCS) {
768 		ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
769 	}
770 
771 	/* TCP or UDP checksum */
772 	if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
773 	    E1000_RXD_STAT_TCPCS) {
774 		ri->iri_csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
775 		ri->iri_csum_data = htons(0xffff);
776 	}
777 	if (status & E1000_RXD_STAT_UDPCS) {
778 		ri->iri_csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
779 		ri->iri_csum_data = htons(0xffff);
780 	}
781 }
782