xref: /freebsd/sys/dev/e1000/em_txrx.c (revision 64884e0d4ce7ed57c970e1b34f93e3754c656900)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2016 Nicole Graziano <nicole@nextbsd.org>
5  * Copyright (c) 2017 Matthew Macy <mmacy@mattmacy.io>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 /* $FreeBSD$ */
31 #include "if_em.h"
32 
33 #ifdef RSS
34 #include <net/rss_config.h>
35 #include <netinet/in_rss.h>
36 #endif
37 
38 #ifdef VERBOSE_DEBUG
39 #define DPRINTF device_printf
40 #else
41 #define DPRINTF(...)
42 #endif
43 
44 /*********************************************************************
45  *  Local Function prototypes
46  *********************************************************************/
47 static int em_tso_setup(struct e1000_softc *sc, if_pkt_info_t pi,
48     uint32_t *txd_upper, uint32_t *txd_lower);
49 static int em_transmit_checksum_setup(struct e1000_softc *sc,
50     if_pkt_info_t pi, uint32_t *txd_upper, uint32_t *txd_lower);
51 static int em_isc_txd_encap(void *arg, if_pkt_info_t pi);
52 static void em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx);
53 static int em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear);
54 static void em_isc_rxd_refill(void *arg, if_rxd_update_t iru);
55 static void em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
56     qidx_t pidx);
57 static int em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
58     qidx_t budget);
59 static int em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
60 
61 static void lem_isc_rxd_refill(void *arg, if_rxd_update_t iru);
62 
63 static int lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
64    qidx_t budget);
65 static int lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
66 
67 static void em_receive_checksum(uint16_t, uint8_t, if_rxd_info_t);
68 static int em_determine_rsstype(uint32_t pkt_info);
69 extern int em_intr(void *arg);
70 
71 struct if_txrx em_txrx = {
72 	.ift_txd_encap = em_isc_txd_encap,
73 	.ift_txd_flush = em_isc_txd_flush,
74 	.ift_txd_credits_update = em_isc_txd_credits_update,
75 	.ift_rxd_available = em_isc_rxd_available,
76 	.ift_rxd_pkt_get = em_isc_rxd_pkt_get,
77 	.ift_rxd_refill = em_isc_rxd_refill,
78 	.ift_rxd_flush = em_isc_rxd_flush,
79 	.ift_legacy_intr = em_intr
80 };
81 
82 struct if_txrx lem_txrx = {
83 	.ift_txd_encap = em_isc_txd_encap,
84 	.ift_txd_flush = em_isc_txd_flush,
85 	.ift_txd_credits_update = em_isc_txd_credits_update,
86 	.ift_rxd_available = lem_isc_rxd_available,
87 	.ift_rxd_pkt_get = lem_isc_rxd_pkt_get,
88 	.ift_rxd_refill = lem_isc_rxd_refill,
89 	.ift_rxd_flush = em_isc_rxd_flush,
90 	.ift_legacy_intr = em_intr
91 };
92 
93 extern if_shared_ctx_t em_sctx;
94 
95 void
96 em_dump_rs(struct e1000_softc *sc)
97 {
98 	if_softc_ctx_t scctx = sc->shared;
99 	struct em_tx_queue *que;
100 	struct tx_ring *txr;
101 	qidx_t i, ntxd, qid, cur;
102 	int16_t rs_cidx;
103 	uint8_t status;
104 
105 	printf("\n");
106 	ntxd = scctx->isc_ntxd[0];
107 	for (qid = 0; qid < sc->tx_num_queues; qid++) {
108 		que = &sc->tx_queues[qid];
109 		txr =  &que->txr;
110 		rs_cidx = txr->tx_rs_cidx;
111 		if (rs_cidx != txr->tx_rs_pidx) {
112 			cur = txr->tx_rsq[rs_cidx];
113 			status = txr->tx_base[cur].upper.fields.status;
114 			if (!(status & E1000_TXD_STAT_DD))
115 				printf("qid[%d]->tx_rsq[%d]: %d clear ", qid, rs_cidx, cur);
116 		} else {
117 			rs_cidx = (rs_cidx-1)&(ntxd-1);
118 			cur = txr->tx_rsq[rs_cidx];
119 			printf("qid[%d]->tx_rsq[rs_cidx-1=%d]: %d  ", qid, rs_cidx, cur);
120 		}
121 		printf("cidx_prev=%d rs_pidx=%d ",txr->tx_cidx_processed,
122 		    txr->tx_rs_pidx);
123 		for (i = 0; i < ntxd; i++) {
124 			if (txr->tx_base[i].upper.fields.status & E1000_TXD_STAT_DD)
125 				printf("%d set ", i);
126 		}
127 		printf("\n");
128 	}
129 }
130 
131 /**********************************************************************
132  *
133  *  Setup work for hardware segmentation offload (TSO) on
134  *  adapters using advanced tx descriptors
135  *
136  **********************************************************************/
137 static int
138 em_tso_setup(struct e1000_softc *sc, if_pkt_info_t pi, uint32_t *txd_upper,
139     uint32_t *txd_lower)
140 {
141 	if_softc_ctx_t scctx = sc->shared;
142 	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
143 	struct tx_ring *txr = &que->txr;
144 	struct e1000_hw *hw = &sc->hw;
145 	struct e1000_context_desc *TXD;
146 	int cur, hdr_len;
147 	uint32_t cmd_type_len;
148 
149 	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
150 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
151 		      E1000_TXD_DTYP_D |	/* Data descr type */
152 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
153 
154 	/* IP and/or TCP header checksum calculation and insertion. */
155 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
156 
157 	cur = pi->ipi_pidx;
158 	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
159 
160 	/*
161 	 * Start offset for header checksum calculation.
162 	 * End offset for header checksum calculation.
163 	 * Offset of place put the checksum.
164 	 */
165 	TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
166 	TXD->lower_setup.ip_fields.ipcse =
167 	    htole16(pi->ipi_ehdrlen + pi->ipi_ip_hlen - 1);
168 	TXD->lower_setup.ip_fields.ipcso =
169 	    pi->ipi_ehdrlen + offsetof(struct ip, ip_sum);
170 
171 	/*
172 	 * Start offset for payload checksum calculation.
173 	 * End offset for payload checksum calculation.
174 	 * Offset of place to put the checksum.
175 	 */
176 	TXD->upper_setup.tcp_fields.tucss = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
177 	TXD->upper_setup.tcp_fields.tucse = 0;
178 	TXD->upper_setup.tcp_fields.tucso =
179 	    pi->ipi_ehdrlen + pi->ipi_ip_hlen + offsetof(struct tcphdr, th_sum);
180 
181 	/*
182 	 * Payload size per packet w/o any headers.
183 	 * Length of all headers up to payload.
184 	 */
185 	TXD->tcp_seg_setup.fields.mss = htole16(pi->ipi_tso_segsz);
186 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
187 
188 	/*
189 	 * "PCI/PCI-X SDM 4.0" page 45, and "PCIe GbE SDM 2.5" page 63
190 	 * - Set up basic TUCMDs
191 	 * - Enable IP bit on 82544
192 	 * - For others IP bit on indicates IPv4, while off indicates IPv6
193 	*/
194 	cmd_type_len = sc->txd_cmd |
195 	    E1000_TXD_CMD_DEXT | /* Extended descr */
196 	    E1000_TXD_CMD_TSE |  /* TSE context */
197 	    E1000_TXD_CMD_TCP;   /* Do TCP checksum */
198 	if (hw->mac.type == e1000_82544)
199 		cmd_type_len |= E1000_TXD_CMD_IP;
200 	else if (pi->ipi_etype == ETHERTYPE_IP)
201 		cmd_type_len |= E1000_TXD_CMD_IP;
202 	TXD->cmd_and_length = htole32(cmd_type_len |
203 	    (pi->ipi_len - hdr_len)); /* Total len */
204 
205 	txr->tx_tso = true;
206 
207 	if (++cur == scctx->isc_ntxd[0]) {
208 		cur = 0;
209 	}
210 	DPRINTF(iflib_get_dev(sc->ctx), "%s: pidx: %d cur: %d\n", __FUNCTION__,
211 	    pi->ipi_pidx, cur);
212 	return (cur);
213 }
214 
215 /*********************************************************************
216  *  The offload context is protocol specific (TCP/UDP) and thus
217  *  only needs to be set when the protocol changes. The occasion
218  *  of a context change can be a performance detriment, and
219  *  might be better just disabled. The reason arises in the way
220  *  in which the controller supports pipelined requests from the
221  *  Tx data DMA. Up to four requests can be pipelined, and they may
222  *  belong to the same packet or to multiple packets. However all
223  *  requests for one packet are issued before a request is issued
224  *  for a subsequent packet and if a request for the next packet
225  *  requires a context change, that request will be stalled
226  *  until the previous request completes. This means setting up
227  *  a new context effectively disables pipelined Tx data DMA which
228  *  in turn greatly slow down performance to send small sized
229  *  frames.
230  **********************************************************************/
231 #define DONT_FORCE_CTX 1
232 
233 static int
234 em_transmit_checksum_setup(struct e1000_softc *sc, if_pkt_info_t pi,
235     uint32_t *txd_upper, uint32_t *txd_lower)
236 {
237 	struct e1000_context_desc *TXD = NULL;
238 	if_softc_ctx_t scctx = sc->shared;
239 	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
240 	struct tx_ring *txr = &que->txr;
241 	int csum_flags = pi->ipi_csum_flags;
242 	int cur, hdr_len;
243 	uint32_t cmd;
244 
245 	cur = pi->ipi_pidx;
246 	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
247 	cmd = sc->txd_cmd;
248 
249 	/*
250 	 * The 82574L can only remember the *last* context used
251 	 * regardless of queue that it was use for.  We cannot reuse
252 	 * contexts on this hardware platform and must generate a new
253 	 * context every time.  82574L hardware spec, section 7.2.6,
254 	 * second note.
255 	 */
256 	if (DONT_FORCE_CTX &&
257 	    sc->tx_num_queues == 1 &&
258 	    txr->csum_lhlen == pi->ipi_ehdrlen &&
259 	    txr->csum_iphlen == pi->ipi_ip_hlen &&
260 	    txr->csum_flags == csum_flags) {
261 		/*
262 		 * Same csum offload context as the previous packets;
263 		 * just return.
264 		 */
265 		*txd_upper = txr->csum_txd_upper;
266 		*txd_lower = txr->csum_txd_lower;
267 		return (cur);
268 	}
269 
270 	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
271 	/*
272 	 * ipcss - Start offset for header checksum calculation.
273 	 * ipcse - End offset for header checksum calculation.
274 	 * ipcso - Offset of place to put the checksum.
275 	 *
276 	 * We set ipcsX values regardless of IP version to work around HW issues
277 	 * and ipcse must be 0 for IPv6 per "PCIe GbE SDM 2.5" page 61.
278 	 * IXSM controls whether it's inserted.
279 	 */
280 	TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
281 	TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen +
282 	    offsetof(struct ip, ip_sum);
283 	if (csum_flags & CSUM_IP) {
284 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
285 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
286 		cmd |= E1000_TXD_CMD_IP;
287 	} else if (csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP))
288 		TXD->lower_setup.ip_fields.ipcse = htole16(0);
289 
290 	/*
291 	 * tucss - Start offset for payload checksum calculation.
292 	 * tucse - End offset for payload checksum calculation.
293 	 * tucso - Offset of place to put the checksum.
294 	 */
295 	if (csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_IP6_TCP | CSUM_IP6_UDP)) {
296 		uint8_t tucso;
297 
298 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
299 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
300 
301 		if (csum_flags & (CSUM_TCP | CSUM_IP6_TCP)) {
302 			tucso = hdr_len + offsetof(struct tcphdr, th_sum);
303 			cmd |= E1000_TXD_CMD_TCP;
304 		} else
305 			tucso = hdr_len + offsetof(struct udphdr, uh_sum);
306 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
307 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
308 		TXD->upper_setup.tcp_fields.tucso = tucso;
309 	}
310 
311 	txr->csum_lhlen = pi->ipi_ehdrlen;
312 	txr->csum_iphlen = pi->ipi_ip_hlen;
313 	txr->csum_flags = csum_flags;
314 	txr->csum_txd_upper = *txd_upper;
315 	txr->csum_txd_lower = *txd_lower;
316 
317 	TXD->tcp_seg_setup.data = htole32(0);
318 	TXD->cmd_and_length =
319 		htole32(E1000_TXD_CMD_IFCS | E1000_TXD_CMD_DEXT | cmd);
320 
321 	if (++cur == scctx->isc_ntxd[0]) {
322 		cur = 0;
323 	}
324 	DPRINTF(iflib_get_dev(sc->ctx),
325 	    "checksum_setup csum_flags=%x txd_upper=%x txd_lower=%x hdr_len=%d cmd=%x\n",
326 	    csum_flags, *txd_upper, *txd_lower, hdr_len, cmd);
327 	return (cur);
328 }
329 
330 #define TSO_WORKAROUND 4 /* TSO sentinel descriptor */
331 
332 static int
333 em_isc_txd_encap(void *arg, if_pkt_info_t pi)
334 {
335 	struct e1000_softc *sc = arg;
336 	if_softc_ctx_t scctx = sc->shared;
337 	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
338 	struct tx_ring *txr = &que->txr;
339 	bus_dma_segment_t *segs = pi->ipi_segs;
340 	int nsegs = pi->ipi_nsegs;
341 	int csum_flags = pi->ipi_csum_flags;
342 	int i, j, first, pidx_last;
343 	uint32_t txd_flags, txd_upper = 0, txd_lower = 0;
344 
345 	struct e1000_tx_desc *ctxd = NULL;
346 	bool do_tso, tso_desc;
347 	qidx_t ntxd;
348 
349 	txd_flags = pi->ipi_flags & IPI_TX_INTR ? E1000_TXD_CMD_RS : 0;
350 	i = first = pi->ipi_pidx;
351 	do_tso = (csum_flags & CSUM_TSO);
352 	tso_desc = false;
353 	ntxd = scctx->isc_ntxd[0];
354 	/*
355 	 * TSO Hardware workaround, if this packet is not
356 	 * TSO, and is only a single descriptor long, and
357 	 * it follows a TSO burst, then we need to add a
358 	 * sentinel descriptor to prevent premature writeback.
359 	 */
360 	if ((!do_tso) && (txr->tx_tso == true)) {
361 		if (nsegs == 1)
362 			tso_desc = true;
363 		txr->tx_tso = false;
364 	}
365 
366 	/* Do hardware assists */
367 	if (do_tso) {
368 		i = em_tso_setup(sc, pi, &txd_upper, &txd_lower);
369 		tso_desc = true;
370 	} else if (csum_flags & EM_CSUM_OFFLOAD) {
371 		i = em_transmit_checksum_setup(sc, pi, &txd_upper, &txd_lower);
372 	}
373 
374 	if (pi->ipi_mflags & M_VLANTAG) {
375 		/* Set the vlan id. */
376 		txd_upper |= htole16(pi->ipi_vtag) << 16;
377 		/* Tell hardware to add tag */
378 		txd_lower |= htole32(E1000_TXD_CMD_VLE);
379 	}
380 
381 	DPRINTF(iflib_get_dev(sc->ctx),
382 	    "encap: set up tx: nsegs=%d first=%d i=%d\n", nsegs, first, i);
383 	/* XXX sc->pcix_82544 -- lem_fill_descriptors */
384 
385 	/* Set up our transmit descriptors */
386 	for (j = 0; j < nsegs; j++) {
387 		bus_size_t seg_len;
388 		bus_addr_t seg_addr;
389 		uint32_t cmd;
390 
391 		ctxd = &txr->tx_base[i];
392 		seg_addr = segs[j].ds_addr;
393 		seg_len = segs[j].ds_len;
394 		cmd = E1000_TXD_CMD_IFCS | sc->txd_cmd;
395 
396 		/*
397 		 * TSO Workaround:
398 		 * If this is the last descriptor, we want to
399 		 * split it so we have a small final sentinel
400 		 */
401 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
402 			seg_len -= TSO_WORKAROUND;
403 			ctxd->buffer_addr = htole64(seg_addr);
404 			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
405 			ctxd->upper.data = htole32(txd_upper);
406 
407 			if (++i == scctx->isc_ntxd[0])
408 				i = 0;
409 
410 			/* Now make the sentinel */
411 			ctxd = &txr->tx_base[i];
412 			ctxd->buffer_addr = htole64(seg_addr + seg_len);
413 			ctxd->lower.data = htole32(cmd | txd_lower | TSO_WORKAROUND);
414 			ctxd->upper.data = htole32(txd_upper);
415 			pidx_last = i;
416 			if (++i == scctx->isc_ntxd[0])
417 				i = 0;
418 			DPRINTF(iflib_get_dev(sc->ctx),
419 			    "TSO path pidx_last=%d i=%d ntxd[0]=%d\n",
420 			    pidx_last, i, scctx->isc_ntxd[0]);
421 		} else {
422 			ctxd->buffer_addr = htole64(seg_addr);
423 			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
424 			ctxd->upper.data = htole32(txd_upper);
425 			pidx_last = i;
426 			if (++i == scctx->isc_ntxd[0])
427 				i = 0;
428 			DPRINTF(iflib_get_dev(sc->ctx), "pidx_last=%d i=%d ntxd[0]=%d\n",
429 			    pidx_last, i, scctx->isc_ntxd[0]);
430 		}
431 	}
432 
433 	/*
434 	 * Last Descriptor of Packet
435 	 * needs End Of Packet (EOP)
436 	 * and Report Status (RS)
437 	 */
438 	if (txd_flags && nsegs) {
439 		txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
440 		DPRINTF(iflib_get_dev(sc->ctx),
441 		    "setting to RS on %d rs_pidx %d first: %d\n",
442 		    pidx_last, txr->tx_rs_pidx, first);
443 		txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1);
444 		MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
445 	}
446 	ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | txd_flags);
447 	DPRINTF(iflib_get_dev(sc->ctx),
448 	    "tx_buffers[%d]->eop = %d ipi_new_pidx=%d\n", first, pidx_last, i);
449 	pi->ipi_new_pidx = i;
450 
451 	return (0);
452 }
453 
454 static void
455 em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
456 {
457 	struct e1000_softc *sc = arg;
458 	struct em_tx_queue *que = &sc->tx_queues[txqid];
459 	struct tx_ring *txr = &que->txr;
460 
461 	E1000_WRITE_REG(&sc->hw, E1000_TDT(txr->me), pidx);
462 }
463 
464 static int
465 em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear)
466 {
467 	struct e1000_softc *sc = arg;
468 	if_softc_ctx_t scctx = sc->shared;
469 	struct em_tx_queue *que = &sc->tx_queues[txqid];
470 	struct tx_ring *txr = &que->txr;
471 
472 	qidx_t processed = 0;
473 	int updated;
474 	qidx_t cur, prev, ntxd, rs_cidx;
475 	int32_t delta;
476 	uint8_t status;
477 
478 	rs_cidx = txr->tx_rs_cidx;
479 	if (rs_cidx == txr->tx_rs_pidx)
480 		return (0);
481 	cur = txr->tx_rsq[rs_cidx];
482 	MPASS(cur != QIDX_INVALID);
483 	status = txr->tx_base[cur].upper.fields.status;
484 	updated = !!(status & E1000_TXD_STAT_DD);
485 
486 	if (!updated)
487 		return (0);
488 
489 	/* If clear is false just let caller know that there
490 	 * are descriptors to reclaim */
491 	if (!clear)
492 		return (1);
493 
494 	prev = txr->tx_cidx_processed;
495 	ntxd = scctx->isc_ntxd[0];
496 	do {
497 		MPASS(prev != cur);
498 		delta = (int32_t)cur - (int32_t)prev;
499 		if (delta < 0)
500 			delta += ntxd;
501 		MPASS(delta > 0);
502 		DPRINTF(iflib_get_dev(sc->ctx),
503 			      "%s: cidx_processed=%u cur=%u clear=%d delta=%d\n",
504 			      __FUNCTION__, prev, cur, clear, delta);
505 
506 		processed += delta;
507 		prev  = cur;
508 		rs_cidx = (rs_cidx + 1) & (ntxd-1);
509 		if (rs_cidx  == txr->tx_rs_pidx)
510 			break;
511 		cur = txr->tx_rsq[rs_cidx];
512 		MPASS(cur != QIDX_INVALID);
513 		status = txr->tx_base[cur].upper.fields.status;
514 	} while ((status & E1000_TXD_STAT_DD));
515 
516 	txr->tx_rs_cidx = rs_cidx;
517 	txr->tx_cidx_processed = prev;
518 	return(processed);
519 }
520 
521 static void
522 lem_isc_rxd_refill(void *arg, if_rxd_update_t iru)
523 {
524 	struct e1000_softc *sc = arg;
525 	if_softc_ctx_t scctx = sc->shared;
526 	struct em_rx_queue *que = &sc->rx_queues[iru->iru_qsidx];
527 	struct rx_ring *rxr = &que->rxr;
528 	struct e1000_rx_desc *rxd;
529 	uint64_t *paddrs;
530 	uint32_t next_pidx, pidx;
531 	uint16_t count;
532 	int i;
533 
534 	paddrs = iru->iru_paddrs;
535 	pidx = iru->iru_pidx;
536 	count = iru->iru_count;
537 
538 	for (i = 0, next_pidx = pidx; i < count; i++) {
539 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[next_pidx];
540 		rxd->buffer_addr = htole64(paddrs[i]);
541 		/* status bits must be cleared */
542 		rxd->status = 0;
543 
544 		if (++next_pidx == scctx->isc_nrxd[0])
545 			next_pidx = 0;
546 	}
547 }
548 
549 static void
550 em_isc_rxd_refill(void *arg, if_rxd_update_t iru)
551 {
552 	struct e1000_softc *sc = arg;
553 	if_softc_ctx_t scctx = sc->shared;
554 	uint16_t rxqid = iru->iru_qsidx;
555 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
556 	struct rx_ring *rxr = &que->rxr;
557 	union e1000_rx_desc_extended *rxd;
558 	uint64_t *paddrs;
559 	uint32_t next_pidx, pidx;
560 	uint16_t count;
561 	int i;
562 
563 	paddrs = iru->iru_paddrs;
564 	pidx = iru->iru_pidx;
565 	count = iru->iru_count;
566 
567 	for (i = 0, next_pidx = pidx; i < count; i++) {
568 		rxd = &rxr->rx_base[next_pidx];
569 		rxd->read.buffer_addr = htole64(paddrs[i]);
570 		/* DD bits must be cleared */
571 		rxd->wb.upper.status_error = 0;
572 
573 		if (++next_pidx == scctx->isc_nrxd[0])
574 			next_pidx = 0;
575 	}
576 }
577 
578 static void
579 em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
580     qidx_t pidx)
581 {
582 	struct e1000_softc *sc = arg;
583 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
584 	struct rx_ring *rxr = &que->rxr;
585 
586 	E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx);
587 }
588 
589 static int
590 lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
591 {
592 	struct e1000_softc *sc = arg;
593 	if_softc_ctx_t scctx = sc->shared;
594 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
595 	struct rx_ring *rxr = &que->rxr;
596 	struct e1000_rx_desc *rxd;
597 	uint32_t staterr = 0;
598 	int cnt, i;
599 
600 	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
601 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[i];
602 		staterr = rxd->status;
603 
604 		if ((staterr & E1000_RXD_STAT_DD) == 0)
605 			break;
606 		if (++i == scctx->isc_nrxd[0])
607 			i = 0;
608 		if (staterr & E1000_RXD_STAT_EOP)
609 			cnt++;
610 	}
611 	return (cnt);
612 }
613 
614 static int
615 em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
616 {
617 	struct e1000_softc *sc = arg;
618 	if_softc_ctx_t scctx = sc->shared;
619 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
620 	struct rx_ring *rxr = &que->rxr;
621 	union e1000_rx_desc_extended *rxd;
622 	uint32_t staterr = 0;
623 	int cnt, i;
624 
625 	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
626 		rxd = &rxr->rx_base[i];
627 		staterr = le32toh(rxd->wb.upper.status_error);
628 
629 		if ((staterr & E1000_RXD_STAT_DD) == 0)
630 			break;
631 		if (++i == scctx->isc_nrxd[0])
632 			i = 0;
633 		if (staterr & E1000_RXD_STAT_EOP)
634 			cnt++;
635 	}
636 	return (cnt);
637 }
638 
639 static int
640 lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
641 {
642 	struct e1000_softc *sc = arg;
643 	if_softc_ctx_t scctx = sc->shared;
644 	struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx];
645 	struct rx_ring *rxr = &que->rxr;
646 	struct e1000_rx_desc *rxd;
647 	uint16_t len;
648 	uint32_t status, errors;
649 	bool eop;
650 	int i, cidx;
651 
652 	status = errors = i = 0;
653 	cidx = ri->iri_cidx;
654 
655 	do {
656 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[cidx];
657 		status = rxd->status;
658 		errors = rxd->errors;
659 
660 		/* Error Checking then decrement count */
661 		MPASS ((status & E1000_RXD_STAT_DD) != 0);
662 
663 		len = le16toh(rxd->length);
664 		ri->iri_len += len;
665 
666 		eop = (status & E1000_RXD_STAT_EOP) != 0;
667 
668 		/* Make sure bad packets are discarded */
669 		if (errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
670 			sc->dropped_pkts++;
671 			/* XXX fixup if common */
672 			return (EBADMSG);
673 		}
674 
675 		ri->iri_frags[i].irf_flid = 0;
676 		ri->iri_frags[i].irf_idx = cidx;
677 		ri->iri_frags[i].irf_len = len;
678 		/* Zero out the receive descriptors status. */
679 		rxd->status = 0;
680 
681 		if (++cidx == scctx->isc_nrxd[0])
682 			cidx = 0;
683 		i++;
684 	} while (!eop);
685 
686 	if (scctx->isc_capenable & IFCAP_RXCSUM)
687 		em_receive_checksum(status, errors, ri);
688 
689 	if (scctx->isc_capenable & IFCAP_VLAN_HWTAGGING &&
690 	    status & E1000_RXD_STAT_VP) {
691 		ri->iri_vtag = le16toh(rxd->special & E1000_RXD_SPC_VLAN_MASK);
692 		ri->iri_flags |= M_VLANTAG;
693 	}
694 
695 	ri->iri_nfrags = i;
696 
697 	return (0);
698 }
699 
700 static int
701 em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
702 {
703 	struct e1000_softc *sc = arg;
704 	if_softc_ctx_t scctx = sc->shared;
705 	struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx];
706 	struct rx_ring *rxr = &que->rxr;
707 	union e1000_rx_desc_extended *rxd;
708 
709 	uint16_t len;
710 	uint32_t pkt_info;
711 	uint32_t staterr;
712 	bool eop;
713 	int i, cidx;
714 
715 	staterr = i = 0;
716 	cidx = ri->iri_cidx;
717 
718 	do {
719 		rxd = &rxr->rx_base[cidx];
720 		staterr = le32toh(rxd->wb.upper.status_error);
721 		pkt_info = le32toh(rxd->wb.lower.mrq);
722 
723 		/* Error Checking then decrement count */
724 		MPASS ((staterr & E1000_RXD_STAT_DD) != 0);
725 
726 		len = le16toh(rxd->wb.upper.length);
727 		ri->iri_len += len;
728 
729 		eop = (staterr & E1000_RXD_STAT_EOP) != 0;
730 
731 		/* Make sure bad packets are discarded */
732 		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
733 			sc->dropped_pkts++;
734 			return EBADMSG;
735 		}
736 
737 		ri->iri_frags[i].irf_flid = 0;
738 		ri->iri_frags[i].irf_idx = cidx;
739 		ri->iri_frags[i].irf_len = len;
740 		/* Zero out the receive descriptors status. */
741 		rxd->wb.upper.status_error &= htole32(~0xFF);
742 
743 		if (++cidx == scctx->isc_nrxd[0])
744 			cidx = 0;
745 		i++;
746 	} while (!eop);
747 
748 	if (scctx->isc_capenable & IFCAP_RXCSUM)
749 		em_receive_checksum(staterr, staterr >> 24, ri);
750 
751 	if (scctx->isc_capenable & IFCAP_VLAN_HWTAGGING &&
752 	    staterr & E1000_RXD_STAT_VP) {
753 		ri->iri_vtag = le16toh(rxd->wb.upper.vlan);
754 		ri->iri_flags |= M_VLANTAG;
755 	}
756 
757 	ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss);
758 	ri->iri_rsstype = em_determine_rsstype(pkt_info);
759 
760 	ri->iri_nfrags = i;
761 	return (0);
762 }
763 
764 /*********************************************************************
765  *
766  *  Verify that the hardware indicated that the checksum is valid.
767  *  Inform the stack about the status of checksum so that stack
768  *  doesn't spend time verifying the checksum.
769  *
770  *********************************************************************/
771 static void
772 em_receive_checksum(uint16_t status, uint8_t errors, if_rxd_info_t ri)
773 {
774 	if (__predict_false(status & E1000_RXD_STAT_IXSM))
775 		return;
776 
777 	/* If there is a layer 3 or 4 error we are done */
778 	if (__predict_false(errors & (E1000_RXD_ERR_IPE | E1000_RXD_ERR_TCPE)))
779 		return;
780 
781 	/* IP Checksum Good */
782 	if (status & E1000_RXD_STAT_IPCS)
783 		ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
784 
785 	/* Valid L4E checksum */
786 	if (__predict_true(status &
787 	    (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))) {
788 		ri->iri_csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
789 		ri->iri_csum_data = htons(0xffff);
790 	}
791 }
792 
793 /********************************************************************
794  *
795  *  Parse the packet type to determine the appropriate hash
796  *
797  ******************************************************************/
798 static int
799 em_determine_rsstype(uint32_t pkt_info)
800 {
801 	switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
802 	case E1000_RXDADV_RSSTYPE_IPV4_TCP:
803 		return M_HASHTYPE_RSS_TCP_IPV4;
804 	case E1000_RXDADV_RSSTYPE_IPV4:
805 		return M_HASHTYPE_RSS_IPV4;
806 	case E1000_RXDADV_RSSTYPE_IPV6_TCP:
807 		return M_HASHTYPE_RSS_TCP_IPV6;
808 	case E1000_RXDADV_RSSTYPE_IPV6_EX:
809 		return M_HASHTYPE_RSS_IPV6_EX;
810 	case E1000_RXDADV_RSSTYPE_IPV6:
811 		return M_HASHTYPE_RSS_IPV6;
812 	case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
813 		return M_HASHTYPE_RSS_TCP_IPV6_EX;
814 	default:
815 		return M_HASHTYPE_OPAQUE;
816 	}
817 }
818