xref: /freebsd/sys/dev/e1000/em_txrx.c (revision 7a7741af18d6c8a804cc643cb7ecda9d730c6aa6)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2016 Nicole Graziano <nicole@nextbsd.org>
5  * Copyright (c) 2017 Matthew Macy <mmacy@mattmacy.io>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "if_em.h"
31 
32 #ifdef RSS
33 #include <net/rss_config.h>
34 #include <netinet/in_rss.h>
35 #endif
36 
37 #ifdef VERBOSE_DEBUG
38 #define DPRINTF device_printf
39 #else
40 #define DPRINTF(...)
41 #endif
42 
43 /*********************************************************************
44  *  Local Function prototypes
45  *********************************************************************/
46 static int em_tso_setup(struct e1000_softc *sc, if_pkt_info_t pi,
47     uint32_t *txd_upper, uint32_t *txd_lower);
48 static int em_transmit_checksum_setup(struct e1000_softc *sc,
49     if_pkt_info_t pi, uint32_t *txd_upper, uint32_t *txd_lower);
50 static int em_isc_txd_encap(void *arg, if_pkt_info_t pi);
51 static void em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx);
52 static int em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear);
53 static void em_isc_rxd_refill(void *arg, if_rxd_update_t iru);
54 static void em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
55     qidx_t pidx);
56 static int em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
57     qidx_t budget);
58 static int em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
59 
60 static void lem_isc_rxd_refill(void *arg, if_rxd_update_t iru);
61 
62 static int lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
63    qidx_t budget);
64 static int lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
65 
66 static void em_receive_checksum(uint16_t, uint8_t, if_rxd_info_t);
67 static int em_determine_rsstype(uint32_t pkt_info);
68 extern int em_intr(void *arg);
69 
70 struct if_txrx em_txrx = {
71 	.ift_txd_encap = em_isc_txd_encap,
72 	.ift_txd_flush = em_isc_txd_flush,
73 	.ift_txd_credits_update = em_isc_txd_credits_update,
74 	.ift_rxd_available = em_isc_rxd_available,
75 	.ift_rxd_pkt_get = em_isc_rxd_pkt_get,
76 	.ift_rxd_refill = em_isc_rxd_refill,
77 	.ift_rxd_flush = em_isc_rxd_flush,
78 	.ift_legacy_intr = em_intr
79 };
80 
81 struct if_txrx lem_txrx = {
82 	.ift_txd_encap = em_isc_txd_encap,
83 	.ift_txd_flush = em_isc_txd_flush,
84 	.ift_txd_credits_update = em_isc_txd_credits_update,
85 	.ift_rxd_available = lem_isc_rxd_available,
86 	.ift_rxd_pkt_get = lem_isc_rxd_pkt_get,
87 	.ift_rxd_refill = lem_isc_rxd_refill,
88 	.ift_rxd_flush = em_isc_rxd_flush,
89 	.ift_legacy_intr = em_intr
90 };
91 
92 extern if_shared_ctx_t em_sctx;
93 
94 void
95 em_dump_rs(struct e1000_softc *sc)
96 {
97 	if_softc_ctx_t scctx = sc->shared;
98 	struct em_tx_queue *que;
99 	struct tx_ring *txr;
100 	qidx_t i, ntxd, qid, cur;
101 	int16_t rs_cidx;
102 	uint8_t status;
103 
104 	printf("\n");
105 	ntxd = scctx->isc_ntxd[0];
106 	for (qid = 0; qid < sc->tx_num_queues; qid++) {
107 		que = &sc->tx_queues[qid];
108 		txr =  &que->txr;
109 		rs_cidx = txr->tx_rs_cidx;
110 		if (rs_cidx != txr->tx_rs_pidx) {
111 			cur = txr->tx_rsq[rs_cidx];
112 			status = txr->tx_base[cur].upper.fields.status;
113 			if (!(status & E1000_TXD_STAT_DD))
114 				printf("qid[%d]->tx_rsq[%d]: %d clear ", qid, rs_cidx, cur);
115 		} else {
116 			rs_cidx = (rs_cidx-1)&(ntxd-1);
117 			cur = txr->tx_rsq[rs_cidx];
118 			printf("qid[%d]->tx_rsq[rs_cidx-1=%d]: %d  ", qid, rs_cidx, cur);
119 		}
120 		printf("cidx_prev=%d rs_pidx=%d ",txr->tx_cidx_processed,
121 		    txr->tx_rs_pidx);
122 		for (i = 0; i < ntxd; i++) {
123 			if (txr->tx_base[i].upper.fields.status & E1000_TXD_STAT_DD)
124 				printf("%d set ", i);
125 		}
126 		printf("\n");
127 	}
128 }
129 
130 /**********************************************************************
131  *
132  *  Setup work for hardware segmentation offload (TSO) on
133  *  adapters using advanced tx descriptors
134  *
135  **********************************************************************/
136 static int
137 em_tso_setup(struct e1000_softc *sc, if_pkt_info_t pi, uint32_t *txd_upper,
138     uint32_t *txd_lower)
139 {
140 	if_softc_ctx_t scctx = sc->shared;
141 	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
142 	struct tx_ring *txr = &que->txr;
143 	struct e1000_context_desc *TXD;
144 	int cur, hdr_len;
145 	uint32_t cmd_type_len;
146 
147 	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
148 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
149 		      E1000_TXD_DTYP_D |	/* Data descr type */
150 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
151 
152 	cur = pi->ipi_pidx;
153 	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
154 
155 	/*
156 	 * ipcss - Start offset for header checksum calculation.
157 	 * ipcse - End offset for header checksum calculation.
158 	 * ipcso - Offset of place to put the checksum.
159 	 */
160 	switch(pi->ipi_etype) {
161 	case ETHERTYPE_IP:
162 		/* IP and/or TCP header checksum calculation and insertion. */
163 		*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
164 
165 		TXD->lower_setup.ip_fields.ipcse =
166 		    htole16(pi->ipi_ehdrlen + pi->ipi_ip_hlen - 1);
167 		break;
168 	case ETHERTYPE_IPV6:
169 		/* TCP header checksum calculation and insertion. */
170 		*txd_upper = E1000_TXD_POPTS_TXSM << 8;
171 
172 		TXD->lower_setup.ip_fields.ipcse = htole16(0);
173 		break;
174 	default:
175 		break;
176 	}
177 	TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
178 	TXD->lower_setup.ip_fields.ipcso =
179 	    pi->ipi_ehdrlen + offsetof(struct ip, ip_sum);
180 
181 	/*
182 	 * tucss - Start offset for payload checksum calculation.
183 	 * tucse - End offset for payload checksum calculation.
184 	 * tucso - Offset of place to put the checksum.
185 	 */
186 	TXD->upper_setup.tcp_fields.tucss = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
187 	TXD->upper_setup.tcp_fields.tucse = 0;
188 	TXD->upper_setup.tcp_fields.tucso =
189 	    pi->ipi_ehdrlen + pi->ipi_ip_hlen + offsetof(struct tcphdr, th_sum);
190 
191 	/*
192 	 * Payload size per packet w/o any headers.
193 	 * Length of all headers up to payload.
194 	 */
195 	TXD->tcp_seg_setup.fields.mss = htole16(pi->ipi_tso_segsz);
196 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
197 
198 	/*
199 	 * "PCI/PCI-X SDM 4.0" page 45, and "PCIe GbE SDM 2.5" page 63
200 	 * - Set up basic TUCMDs
201 	 * - For others IP bit on indicates IPv4, while off indicates IPv6
202 	*/
203 	cmd_type_len = sc->txd_cmd |
204 	    E1000_TXD_CMD_DEXT | /* Extended descr */
205 	    E1000_TXD_CMD_TSE |  /* TSE context */
206 	    E1000_TXD_CMD_TCP;   /* Do TCP checksum */
207 	if (pi->ipi_etype == ETHERTYPE_IP)
208 		cmd_type_len |= E1000_TXD_CMD_IP;
209 	TXD->cmd_and_length = htole32(cmd_type_len |
210 	    (pi->ipi_len - hdr_len)); /* Total len */
211 
212 	txr->tx_tso = true;
213 
214 	if (++cur == scctx->isc_ntxd[0]) {
215 		cur = 0;
216 	}
217 	DPRINTF(iflib_get_dev(sc->ctx), "%s: pidx: %d cur: %d\n", __FUNCTION__,
218 	    pi->ipi_pidx, cur);
219 	return (cur);
220 }
221 
222 /*********************************************************************
223  *  The offload context is protocol specific (TCP/UDP) and thus
224  *  only needs to be set when the protocol changes. The occasion
225  *  of a context change can be a performance detriment, and
226  *  might be better just disabled. The reason arises in the way
227  *  in which the controller supports pipelined requests from the
228  *  Tx data DMA. Up to four requests can be pipelined, and they may
229  *  belong to the same packet or to multiple packets. However all
230  *  requests for one packet are issued before a request is issued
231  *  for a subsequent packet and if a request for the next packet
232  *  requires a context change, that request will be stalled
233  *  until the previous request completes. This means setting up
234  *  a new context effectively disables pipelined Tx data DMA which
235  *  in turn greatly slow down performance to send small sized
236  *  frames.
237  **********************************************************************/
238 #define DONT_FORCE_CTX 1
239 
240 static int
241 em_transmit_checksum_setup(struct e1000_softc *sc, if_pkt_info_t pi,
242     uint32_t *txd_upper, uint32_t *txd_lower)
243 {
244 	struct e1000_context_desc *TXD = NULL;
245 	if_softc_ctx_t scctx = sc->shared;
246 	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
247 	struct tx_ring *txr = &que->txr;
248 	int csum_flags = pi->ipi_csum_flags;
249 	int cur, hdr_len;
250 	uint32_t cmd;
251 
252 	cur = pi->ipi_pidx;
253 	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
254 	cmd = sc->txd_cmd;
255 
256 	/*
257 	 * The 82574L can only remember the *last* context used
258 	 * regardless of queue that it was use for.  We cannot reuse
259 	 * contexts on this hardware platform and must generate a new
260 	 * context every time.  82574L hardware spec, section 7.2.6,
261 	 * second note.
262 	 */
263 	if (DONT_FORCE_CTX &&
264 	    sc->tx_num_queues == 1 &&
265 	    txr->csum_lhlen == pi->ipi_ehdrlen &&
266 	    txr->csum_iphlen == pi->ipi_ip_hlen &&
267 	    txr->csum_flags == csum_flags) {
268 		/*
269 		 * Same csum offload context as the previous packets;
270 		 * just return.
271 		 */
272 		*txd_upper = txr->csum_txd_upper;
273 		*txd_lower = txr->csum_txd_lower;
274 		return (cur);
275 	}
276 
277 	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
278 	/*
279 	 * ipcss - Start offset for header checksum calculation.
280 	 * ipcse - End offset for header checksum calculation.
281 	 * ipcso - Offset of place to put the checksum.
282 	 *
283 	 * We set ipcsX values regardless of IP version to work around HW issues
284 	 * and ipcse must be 0 for IPv6 per "PCIe GbE SDM 2.5" page 61.
285 	 * IXSM controls whether it's inserted.
286 	 */
287 	TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
288 	TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen +
289 	    offsetof(struct ip, ip_sum);
290 	if (csum_flags & CSUM_IP) {
291 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
292 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len - 1);
293 		cmd |= E1000_TXD_CMD_IP;
294 	} else if (csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP))
295 		TXD->lower_setup.ip_fields.ipcse = htole16(0);
296 
297 	/*
298 	 * tucss - Start offset for payload checksum calculation.
299 	 * tucse - End offset for payload checksum calculation.
300 	 * tucso - Offset of place to put the checksum.
301 	 */
302 	if (csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_IP6_TCP | CSUM_IP6_UDP)) {
303 		uint8_t tucso;
304 
305 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
306 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
307 
308 		if (csum_flags & (CSUM_TCP | CSUM_IP6_TCP)) {
309 			tucso = hdr_len + offsetof(struct tcphdr, th_sum);
310 			cmd |= E1000_TXD_CMD_TCP;
311 		} else
312 			tucso = hdr_len + offsetof(struct udphdr, uh_sum);
313 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
314 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
315 		TXD->upper_setup.tcp_fields.tucso = tucso;
316 	}
317 
318 	txr->csum_lhlen = pi->ipi_ehdrlen;
319 	txr->csum_iphlen = pi->ipi_ip_hlen;
320 	txr->csum_flags = csum_flags;
321 	txr->csum_txd_upper = *txd_upper;
322 	txr->csum_txd_lower = *txd_lower;
323 
324 	TXD->tcp_seg_setup.data = htole32(0);
325 	TXD->cmd_and_length =
326 		htole32(E1000_TXD_CMD_IFCS | E1000_TXD_CMD_DEXT | cmd);
327 
328 	if (++cur == scctx->isc_ntxd[0]) {
329 		cur = 0;
330 	}
331 	DPRINTF(iflib_get_dev(sc->ctx),
332 	    "checksum_setup csum_flags=%x txd_upper=%x txd_lower=%x hdr_len=%d cmd=%x\n",
333 	    csum_flags, *txd_upper, *txd_lower, hdr_len, cmd);
334 	return (cur);
335 }
336 
337 #define TSO_WORKAROUND 4 /* TSO sentinel descriptor */
338 
339 static int
340 em_isc_txd_encap(void *arg, if_pkt_info_t pi)
341 {
342 	struct e1000_softc *sc = arg;
343 	if_softc_ctx_t scctx = sc->shared;
344 	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
345 	struct tx_ring *txr = &que->txr;
346 	bus_dma_segment_t *segs = pi->ipi_segs;
347 	int nsegs = pi->ipi_nsegs;
348 	int csum_flags = pi->ipi_csum_flags;
349 	int i, j, first, pidx_last;
350 	uint32_t txd_flags, txd_upper = 0, txd_lower = 0;
351 
352 	struct e1000_tx_desc *ctxd = NULL;
353 	bool do_tso, tso_desc;
354 	qidx_t ntxd;
355 
356 	txd_flags = pi->ipi_flags & IPI_TX_INTR ? E1000_TXD_CMD_RS : 0;
357 	i = first = pi->ipi_pidx;
358 	do_tso = (csum_flags & CSUM_TSO);
359 	tso_desc = false;
360 	ntxd = scctx->isc_ntxd[0];
361 	/*
362 	 * TSO Hardware workaround, if this packet is not
363 	 * TSO, and is only a single descriptor long, and
364 	 * it follows a TSO burst, then we need to add a
365 	 * sentinel descriptor to prevent premature writeback.
366 	 */
367 	if ((!do_tso) && (txr->tx_tso == true)) {
368 		if (nsegs == 1)
369 			tso_desc = true;
370 		txr->tx_tso = false;
371 	}
372 
373 	/* Do hardware assists */
374 	if (do_tso) {
375 		i = em_tso_setup(sc, pi, &txd_upper, &txd_lower);
376 		tso_desc = true;
377 	} else if (csum_flags & EM_CSUM_OFFLOAD) {
378 		i = em_transmit_checksum_setup(sc, pi, &txd_upper, &txd_lower);
379 	}
380 
381 	if (pi->ipi_mflags & M_VLANTAG) {
382 		/* Set the vlan id. */
383 		txd_upper |= htole16(pi->ipi_vtag) << 16;
384 		/* Tell hardware to add tag */
385 		txd_lower |= htole32(E1000_TXD_CMD_VLE);
386 	}
387 
388 	DPRINTF(iflib_get_dev(sc->ctx),
389 	    "encap: set up tx: nsegs=%d first=%d i=%d\n", nsegs, first, i);
390 	/* XXX sc->pcix_82544 -- lem_fill_descriptors */
391 
392 	/* Set up our transmit descriptors */
393 	for (j = 0; j < nsegs; j++) {
394 		bus_size_t seg_len;
395 		bus_addr_t seg_addr;
396 		uint32_t cmd;
397 
398 		ctxd = &txr->tx_base[i];
399 		seg_addr = segs[j].ds_addr;
400 		seg_len = segs[j].ds_len;
401 		cmd = E1000_TXD_CMD_IFCS | sc->txd_cmd;
402 
403 		/*
404 		 * TSO Workaround:
405 		 * If this is the last descriptor, we want to
406 		 * split it so we have a small final sentinel
407 		 */
408 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
409 			seg_len -= TSO_WORKAROUND;
410 			ctxd->buffer_addr = htole64(seg_addr);
411 			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
412 			ctxd->upper.data = htole32(txd_upper);
413 
414 			if (++i == scctx->isc_ntxd[0])
415 				i = 0;
416 
417 			/* Now make the sentinel */
418 			ctxd = &txr->tx_base[i];
419 			ctxd->buffer_addr = htole64(seg_addr + seg_len);
420 			ctxd->lower.data = htole32(cmd | txd_lower | TSO_WORKAROUND);
421 			ctxd->upper.data = htole32(txd_upper);
422 			pidx_last = i;
423 			if (++i == scctx->isc_ntxd[0])
424 				i = 0;
425 			DPRINTF(iflib_get_dev(sc->ctx),
426 			    "TSO path pidx_last=%d i=%d ntxd[0]=%d\n",
427 			    pidx_last, i, scctx->isc_ntxd[0]);
428 		} else {
429 			ctxd->buffer_addr = htole64(seg_addr);
430 			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
431 			ctxd->upper.data = htole32(txd_upper);
432 			pidx_last = i;
433 			if (++i == scctx->isc_ntxd[0])
434 				i = 0;
435 			DPRINTF(iflib_get_dev(sc->ctx), "pidx_last=%d i=%d ntxd[0]=%d\n",
436 			    pidx_last, i, scctx->isc_ntxd[0]);
437 		}
438 	}
439 
440 	/*
441 	 * Last Descriptor of Packet
442 	 * needs End Of Packet (EOP)
443 	 * and Report Status (RS)
444 	 */
445 	if (txd_flags && nsegs) {
446 		txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
447 		DPRINTF(iflib_get_dev(sc->ctx),
448 		    "setting to RS on %d rs_pidx %d first: %d\n",
449 		    pidx_last, txr->tx_rs_pidx, first);
450 		txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1);
451 		MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
452 	}
453 	ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | txd_flags);
454 	DPRINTF(iflib_get_dev(sc->ctx),
455 	    "tx_buffers[%d]->eop = %d ipi_new_pidx=%d\n", first, pidx_last, i);
456 	pi->ipi_new_pidx = i;
457 
458 	/* Sent data accounting for AIM */
459 	txr->tx_bytes += pi->ipi_len;
460 	++txr->tx_packets;
461 
462 	return (0);
463 }
464 
465 static void
466 em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
467 {
468 	struct e1000_softc *sc = arg;
469 	struct em_tx_queue *que = &sc->tx_queues[txqid];
470 	struct tx_ring *txr = &que->txr;
471 
472 	E1000_WRITE_REG(&sc->hw, E1000_TDT(txr->me), pidx);
473 }
474 
475 static int
476 em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear)
477 {
478 	struct e1000_softc *sc = arg;
479 	if_softc_ctx_t scctx = sc->shared;
480 	struct em_tx_queue *que = &sc->tx_queues[txqid];
481 	struct tx_ring *txr = &que->txr;
482 
483 	qidx_t processed = 0;
484 	int updated;
485 	qidx_t cur, prev, ntxd, rs_cidx;
486 	int32_t delta;
487 	uint8_t status;
488 
489 	rs_cidx = txr->tx_rs_cidx;
490 	if (rs_cidx == txr->tx_rs_pidx)
491 		return (0);
492 	cur = txr->tx_rsq[rs_cidx];
493 	MPASS(cur != QIDX_INVALID);
494 	status = txr->tx_base[cur].upper.fields.status;
495 	updated = !!(status & E1000_TXD_STAT_DD);
496 
497 	if (!updated)
498 		return (0);
499 
500 	/* If clear is false just let caller know that there
501 	 * are descriptors to reclaim */
502 	if (!clear)
503 		return (1);
504 
505 	prev = txr->tx_cidx_processed;
506 	ntxd = scctx->isc_ntxd[0];
507 	do {
508 		MPASS(prev != cur);
509 		delta = (int32_t)cur - (int32_t)prev;
510 		if (delta < 0)
511 			delta += ntxd;
512 		MPASS(delta > 0);
513 		DPRINTF(iflib_get_dev(sc->ctx),
514 			      "%s: cidx_processed=%u cur=%u clear=%d delta=%d\n",
515 			      __FUNCTION__, prev, cur, clear, delta);
516 
517 		processed += delta;
518 		prev  = cur;
519 		rs_cidx = (rs_cidx + 1) & (ntxd-1);
520 		if (rs_cidx  == txr->tx_rs_pidx)
521 			break;
522 		cur = txr->tx_rsq[rs_cidx];
523 		MPASS(cur != QIDX_INVALID);
524 		status = txr->tx_base[cur].upper.fields.status;
525 	} while ((status & E1000_TXD_STAT_DD));
526 
527 	txr->tx_rs_cidx = rs_cidx;
528 	txr->tx_cidx_processed = prev;
529 	return(processed);
530 }
531 
532 static void
533 lem_isc_rxd_refill(void *arg, if_rxd_update_t iru)
534 {
535 	struct e1000_softc *sc = arg;
536 	if_softc_ctx_t scctx = sc->shared;
537 	struct em_rx_queue *que = &sc->rx_queues[iru->iru_qsidx];
538 	struct rx_ring *rxr = &que->rxr;
539 	struct e1000_rx_desc *rxd;
540 	uint64_t *paddrs;
541 	uint32_t next_pidx, pidx;
542 	uint16_t count;
543 	int i;
544 
545 	paddrs = iru->iru_paddrs;
546 	pidx = iru->iru_pidx;
547 	count = iru->iru_count;
548 
549 	for (i = 0, next_pidx = pidx; i < count; i++) {
550 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[next_pidx];
551 		rxd->buffer_addr = htole64(paddrs[i]);
552 		/* status bits must be cleared */
553 		rxd->status = 0;
554 
555 		if (++next_pidx == scctx->isc_nrxd[0])
556 			next_pidx = 0;
557 	}
558 }
559 
560 static void
561 em_isc_rxd_refill(void *arg, if_rxd_update_t iru)
562 {
563 	struct e1000_softc *sc = arg;
564 	if_softc_ctx_t scctx = sc->shared;
565 	uint16_t rxqid = iru->iru_qsidx;
566 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
567 	struct rx_ring *rxr = &que->rxr;
568 	union e1000_rx_desc_extended *rxd;
569 	uint64_t *paddrs;
570 	uint32_t next_pidx, pidx;
571 	uint16_t count;
572 	int i;
573 
574 	paddrs = iru->iru_paddrs;
575 	pidx = iru->iru_pidx;
576 	count = iru->iru_count;
577 
578 	for (i = 0, next_pidx = pidx; i < count; i++) {
579 		rxd = &rxr->rx_base[next_pidx];
580 		rxd->read.buffer_addr = htole64(paddrs[i]);
581 		/* DD bits must be cleared */
582 		rxd->wb.upper.status_error = 0;
583 
584 		if (++next_pidx == scctx->isc_nrxd[0])
585 			next_pidx = 0;
586 	}
587 }
588 
589 static void
590 em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
591     qidx_t pidx)
592 {
593 	struct e1000_softc *sc = arg;
594 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
595 	struct rx_ring *rxr = &que->rxr;
596 
597 	E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx);
598 }
599 
600 static int
601 lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
602 {
603 	struct e1000_softc *sc = arg;
604 	if_softc_ctx_t scctx = sc->shared;
605 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
606 	struct rx_ring *rxr = &que->rxr;
607 	struct e1000_rx_desc *rxd;
608 	uint32_t staterr = 0;
609 	int cnt, i;
610 
611 	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
612 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[i];
613 		staterr = rxd->status;
614 
615 		if ((staterr & E1000_RXD_STAT_DD) == 0)
616 			break;
617 		if (++i == scctx->isc_nrxd[0])
618 			i = 0;
619 		if (staterr & E1000_RXD_STAT_EOP)
620 			cnt++;
621 	}
622 	return (cnt);
623 }
624 
625 static int
626 em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
627 {
628 	struct e1000_softc *sc = arg;
629 	if_softc_ctx_t scctx = sc->shared;
630 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
631 	struct rx_ring *rxr = &que->rxr;
632 	union e1000_rx_desc_extended *rxd;
633 	uint32_t staterr = 0;
634 	int cnt, i;
635 
636 	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
637 		rxd = &rxr->rx_base[i];
638 		staterr = le32toh(rxd->wb.upper.status_error);
639 
640 		if ((staterr & E1000_RXD_STAT_DD) == 0)
641 			break;
642 		if (++i == scctx->isc_nrxd[0])
643 			i = 0;
644 		if (staterr & E1000_RXD_STAT_EOP)
645 			cnt++;
646 	}
647 	return (cnt);
648 }
649 
650 static int
651 lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
652 {
653 	struct e1000_softc *sc = arg;
654 	if_softc_ctx_t scctx = sc->shared;
655 	struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx];
656 	struct rx_ring *rxr = &que->rxr;
657 	struct e1000_rx_desc *rxd;
658 	uint16_t len;
659 	uint32_t status, errors;
660 	bool eop;
661 	int i, cidx;
662 
663 	status = errors = i = 0;
664 	cidx = ri->iri_cidx;
665 
666 	do {
667 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[cidx];
668 		status = rxd->status;
669 		errors = rxd->errors;
670 
671 		/* Error Checking then decrement count */
672 		MPASS ((status & E1000_RXD_STAT_DD) != 0);
673 
674 		len = le16toh(rxd->length);
675 		ri->iri_len += len;
676 		rxr->rx_bytes += ri->iri_len;
677 
678 		eop = (status & E1000_RXD_STAT_EOP) != 0;
679 
680 		/* Make sure bad packets are discarded */
681 		if (errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
682 			sc->dropped_pkts++;
683 			/* XXX fixup if common */
684 			return (EBADMSG);
685 		}
686 
687 		ri->iri_frags[i].irf_flid = 0;
688 		ri->iri_frags[i].irf_idx = cidx;
689 		ri->iri_frags[i].irf_len = len;
690 		/* Zero out the receive descriptors status. */
691 		rxd->status = 0;
692 
693 		if (++cidx == scctx->isc_nrxd[0])
694 			cidx = 0;
695 		i++;
696 	} while (!eop);
697 
698 	rxr->rx_packets++;
699 
700 	if (scctx->isc_capenable & IFCAP_RXCSUM)
701 		em_receive_checksum(status, errors, ri);
702 
703 	if (scctx->isc_capenable & IFCAP_VLAN_HWTAGGING &&
704 	    status & E1000_RXD_STAT_VP) {
705 		ri->iri_vtag = le16toh(rxd->special & E1000_RXD_SPC_VLAN_MASK);
706 		ri->iri_flags |= M_VLANTAG;
707 	}
708 
709 	ri->iri_nfrags = i;
710 
711 	return (0);
712 }
713 
714 static int
715 em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
716 {
717 	struct e1000_softc *sc = arg;
718 	if_softc_ctx_t scctx = sc->shared;
719 	struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx];
720 	struct rx_ring *rxr = &que->rxr;
721 	union e1000_rx_desc_extended *rxd;
722 
723 	uint16_t len;
724 	uint32_t pkt_info;
725 	uint32_t staterr;
726 	bool eop;
727 	int i, cidx;
728 
729 	staterr = i = 0;
730 	cidx = ri->iri_cidx;
731 
732 	do {
733 		rxd = &rxr->rx_base[cidx];
734 		staterr = le32toh(rxd->wb.upper.status_error);
735 		pkt_info = le32toh(rxd->wb.lower.mrq);
736 
737 		/* Error Checking then decrement count */
738 		MPASS ((staterr & E1000_RXD_STAT_DD) != 0);
739 
740 		len = le16toh(rxd->wb.upper.length);
741 		ri->iri_len += len;
742 		rxr->rx_bytes += ri->iri_len;
743 
744 		eop = (staterr & E1000_RXD_STAT_EOP) != 0;
745 
746 		/* Make sure bad packets are discarded */
747 		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
748 			sc->dropped_pkts++;
749 			return EBADMSG;
750 		}
751 
752 		ri->iri_frags[i].irf_flid = 0;
753 		ri->iri_frags[i].irf_idx = cidx;
754 		ri->iri_frags[i].irf_len = len;
755 		/* Zero out the receive descriptors status. */
756 		rxd->wb.upper.status_error &= htole32(~0xFF);
757 
758 		if (++cidx == scctx->isc_nrxd[0])
759 			cidx = 0;
760 		i++;
761 	} while (!eop);
762 
763 	rxr->rx_packets++;
764 
765 	if (scctx->isc_capenable & IFCAP_RXCSUM)
766 		em_receive_checksum(staterr, staterr >> 24, ri);
767 
768 	if (scctx->isc_capenable & IFCAP_VLAN_HWTAGGING &&
769 	    staterr & E1000_RXD_STAT_VP) {
770 		ri->iri_vtag = le16toh(rxd->wb.upper.vlan);
771 		ri->iri_flags |= M_VLANTAG;
772 	}
773 
774 	ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss);
775 	ri->iri_rsstype = em_determine_rsstype(pkt_info);
776 
777 	ri->iri_nfrags = i;
778 	return (0);
779 }
780 
781 /*********************************************************************
782  *
783  *  Verify that the hardware indicated that the checksum is valid.
784  *  Inform the stack about the status of checksum so that stack
785  *  doesn't spend time verifying the checksum.
786  *
787  *********************************************************************/
788 static void
789 em_receive_checksum(uint16_t status, uint8_t errors, if_rxd_info_t ri)
790 {
791 	if (__predict_false(status & E1000_RXD_STAT_IXSM))
792 		return;
793 
794 	/* If there is a layer 3 or 4 error we are done */
795 	if (__predict_false(errors & (E1000_RXD_ERR_IPE | E1000_RXD_ERR_TCPE)))
796 		return;
797 
798 	/* IP Checksum Good */
799 	if (status & E1000_RXD_STAT_IPCS)
800 		ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
801 
802 	/* Valid L4E checksum */
803 	if (__predict_true(status &
804 	    (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))) {
805 		ri->iri_csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
806 		ri->iri_csum_data = htons(0xffff);
807 	}
808 }
809 
810 /********************************************************************
811  *
812  *  Parse the packet type to determine the appropriate hash
813  *
814  ******************************************************************/
815 static int
816 em_determine_rsstype(uint32_t pkt_info)
817 {
818 	switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
819 	case E1000_RXDADV_RSSTYPE_IPV4_TCP:
820 		return M_HASHTYPE_RSS_TCP_IPV4;
821 	case E1000_RXDADV_RSSTYPE_IPV4:
822 		return M_HASHTYPE_RSS_IPV4;
823 	case E1000_RXDADV_RSSTYPE_IPV6_TCP:
824 		return M_HASHTYPE_RSS_TCP_IPV6;
825 	case E1000_RXDADV_RSSTYPE_IPV6_EX:
826 		return M_HASHTYPE_RSS_IPV6_EX;
827 	case E1000_RXDADV_RSSTYPE_IPV6:
828 		return M_HASHTYPE_RSS_IPV6;
829 	case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
830 		return M_HASHTYPE_RSS_TCP_IPV6_EX;
831 	default:
832 		return M_HASHTYPE_OPAQUE;
833 	}
834 }
835