xref: /freebsd/sys/dev/e1000/em_txrx.c (revision 7fdf597e96a02165cfe22ff357b857d5fa15ed8a)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2016 Nicole Graziano <nicole@nextbsd.org>
5  * Copyright (c) 2017 Matthew Macy <mmacy@mattmacy.io>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "if_em.h"
31 
32 #ifdef RSS
33 #include <net/rss_config.h>
34 #include <netinet/in_rss.h>
35 #endif
36 
37 #ifdef VERBOSE_DEBUG
38 #define DPRINTF device_printf
39 #else
40 #define DPRINTF(...)
41 #endif
42 
43 /*********************************************************************
44  *  Local Function prototypes
45  *********************************************************************/
46 static int em_tso_setup(struct e1000_softc *, if_pkt_info_t, uint32_t *,
47     uint32_t *);
48 static int em_transmit_checksum_setup(struct e1000_softc *, if_pkt_info_t,
49     uint32_t *, uint32_t *);
50 static int em_isc_txd_encap(void *, if_pkt_info_t);
51 static void em_isc_txd_flush(void *, uint16_t, qidx_t);
52 static int em_isc_txd_credits_update(void *, uint16_t, bool);
53 static void em_isc_rxd_refill(void *, if_rxd_update_t);
54 static void em_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
55 static int em_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
56 static int em_isc_rxd_pkt_get(void *, if_rxd_info_t);
57 
58 static void lem_isc_rxd_refill(void *, if_rxd_update_t);
59 
60 static int lem_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
61 static int lem_isc_rxd_pkt_get(void *, if_rxd_info_t);
62 
63 static void em_receive_checksum(uint16_t, uint8_t, if_rxd_info_t);
64 static int em_determine_rsstype(uint32_t);
65 extern int em_intr(void *);
66 
67 struct if_txrx em_txrx = {
68 	.ift_txd_encap = em_isc_txd_encap,
69 	.ift_txd_flush = em_isc_txd_flush,
70 	.ift_txd_credits_update = em_isc_txd_credits_update,
71 	.ift_rxd_available = em_isc_rxd_available,
72 	.ift_rxd_pkt_get = em_isc_rxd_pkt_get,
73 	.ift_rxd_refill = em_isc_rxd_refill,
74 	.ift_rxd_flush = em_isc_rxd_flush,
75 	.ift_legacy_intr = em_intr
76 };
77 
78 struct if_txrx lem_txrx = {
79 	.ift_txd_encap = em_isc_txd_encap,
80 	.ift_txd_flush = em_isc_txd_flush,
81 	.ift_txd_credits_update = em_isc_txd_credits_update,
82 	.ift_rxd_available = lem_isc_rxd_available,
83 	.ift_rxd_pkt_get = lem_isc_rxd_pkt_get,
84 	.ift_rxd_refill = lem_isc_rxd_refill,
85 	.ift_rxd_flush = em_isc_rxd_flush,
86 	.ift_legacy_intr = em_intr
87 };
88 
89 extern if_shared_ctx_t em_sctx;
90 
91 void
92 em_dump_rs(struct e1000_softc *sc)
93 {
94 	if_softc_ctx_t scctx = sc->shared;
95 	struct em_tx_queue *que;
96 	struct tx_ring *txr;
97 	qidx_t i, ntxd, qid, cur;
98 	int16_t rs_cidx;
99 	uint8_t status;
100 
101 	printf("\n");
102 	ntxd = scctx->isc_ntxd[0];
103 	for (qid = 0; qid < sc->tx_num_queues; qid++) {
104 		que = &sc->tx_queues[qid];
105 		txr =  &que->txr;
106 		rs_cidx = txr->tx_rs_cidx;
107 		if (rs_cidx != txr->tx_rs_pidx) {
108 			cur = txr->tx_rsq[rs_cidx];
109 			status = txr->tx_base[cur].upper.fields.status;
110 			if (!(status & E1000_TXD_STAT_DD))
111 				printf("qid[%d]->tx_rsq[%d]: %d clear ",
112 				    qid, rs_cidx, cur);
113 		} else {
114 			rs_cidx = (rs_cidx-1)&(ntxd-1);
115 			cur = txr->tx_rsq[rs_cidx];
116 			printf("qid[%d]->tx_rsq[rs_cidx-1=%d]: %d  ",
117 			    qid, rs_cidx, cur);
118 		}
119 		printf("cidx_prev=%d rs_pidx=%d ",txr->tx_cidx_processed,
120 		    txr->tx_rs_pidx);
121 		for (i = 0; i < ntxd; i++) {
122 			if (txr->tx_base[i].upper.fields.status &
123 			    E1000_TXD_STAT_DD)
124 				printf("%d set ", i);
125 		}
126 		printf("\n");
127 	}
128 }
129 
130 /**********************************************************************
131  *
132  *  Setup work for hardware segmentation offload (TSO) on
133  *  adapters using advanced tx descriptors
134  *
135  **********************************************************************/
136 static int
137 em_tso_setup(struct e1000_softc *sc, if_pkt_info_t pi, uint32_t *txd_upper,
138     uint32_t *txd_lower)
139 {
140 	if_softc_ctx_t scctx = sc->shared;
141 	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
142 	struct tx_ring *txr = &que->txr;
143 	struct e1000_context_desc *TXD;
144 	int cur, hdr_len;
145 	uint32_t cmd_type_len;
146 
147 	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
148 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
149 	    E1000_TXD_DTYP_D |			/* Data descr type */
150 	    E1000_TXD_CMD_TSE);			/* Do TSE on this packet */
151 
152 	cur = pi->ipi_pidx;
153 	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
154 
155 	/*
156 	 * ipcss - Start offset for header checksum calculation.
157 	 * ipcse - End offset for header checksum calculation.
158 	 * ipcso - Offset of place to put the checksum.
159 	 */
160 	switch(pi->ipi_etype) {
161 	case ETHERTYPE_IP:
162 		/* IP and/or TCP header checksum calculation and insertion. */
163 		*txd_upper =
164 		    (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
165 
166 		TXD->lower_setup.ip_fields.ipcse =
167 		    htole16(pi->ipi_ehdrlen + pi->ipi_ip_hlen - 1);
168 		break;
169 	case ETHERTYPE_IPV6:
170 		/* TCP header checksum calculation and insertion. */
171 		*txd_upper = E1000_TXD_POPTS_TXSM << 8;
172 
173 		TXD->lower_setup.ip_fields.ipcse = htole16(0);
174 		break;
175 	default:
176 		break;
177 	}
178 	TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
179 	TXD->lower_setup.ip_fields.ipcso =
180 	    pi->ipi_ehdrlen + offsetof(struct ip, ip_sum);
181 
182 	/*
183 	 * tucss - Start offset for payload checksum calculation.
184 	 * tucse - End offset for payload checksum calculation.
185 	 * tucso - Offset of place to put the checksum.
186 	 */
187 	TXD->upper_setup.tcp_fields.tucss = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
188 	TXD->upper_setup.tcp_fields.tucse = 0;
189 	TXD->upper_setup.tcp_fields.tucso =
190 	    pi->ipi_ehdrlen + pi->ipi_ip_hlen +
191 	    offsetof(struct tcphdr, th_sum);
192 
193 	/*
194 	 * Payload size per packet w/o any headers.
195 	 * Length of all headers up to payload.
196 	 */
197 	TXD->tcp_seg_setup.fields.mss = htole16(pi->ipi_tso_segsz);
198 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
199 
200 	/*
201 	 * "PCI/PCI-X SDM 4.0" page 45, and "PCIe GbE SDM 2.5" page 63
202 	 * - Set up basic TUCMDs
203 	 * - For others IP bit on indicates IPv4, while off indicates IPv6
204 	*/
205 	cmd_type_len = sc->txd_cmd |
206 	    E1000_TXD_CMD_DEXT | /* Extended descr */
207 	    E1000_TXD_CMD_TSE |  /* TSE context */
208 	    E1000_TXD_CMD_TCP;   /* Do TCP checksum */
209 	if (pi->ipi_etype == ETHERTYPE_IP)
210 		cmd_type_len |= E1000_TXD_CMD_IP;
211 	TXD->cmd_and_length = htole32(cmd_type_len |
212 	    (pi->ipi_len - hdr_len)); /* Total len */
213 
214 	txr->tx_tso = true;
215 
216 	if (++cur == scctx->isc_ntxd[0]) {
217 		cur = 0;
218 	}
219 	DPRINTF(iflib_get_dev(sc->ctx), "%s: pidx: %d cur: %d\n",
220 	    __FUNCTION__, pi->ipi_pidx, cur);
221 	return (cur);
222 }
223 
224 /*********************************************************************
225  *  The offload context is protocol specific (TCP/UDP) and thus
226  *  only needs to be set when the protocol changes. The occasion
227  *  of a context change can be a performance detriment, and
228  *  might be better just disabled. The reason arises in the way
229  *  in which the controller supports pipelined requests from the
230  *  Tx data DMA. Up to four requests can be pipelined, and they may
231  *  belong to the same packet or to multiple packets. However all
232  *  requests for one packet are issued before a request is issued
233  *  for a subsequent packet and if a request for the next packet
234  *  requires a context change, that request will be stalled
235  *  until the previous request completes. This means setting up
236  *  a new context effectively disables pipelined Tx data DMA which
237  *  in turn greatly slow down performance to send small sized
238  *  frames.
239  **********************************************************************/
240 #define DONT_FORCE_CTX 1
241 
242 static int
243 em_transmit_checksum_setup(struct e1000_softc *sc, if_pkt_info_t pi,
244     uint32_t *txd_upper, uint32_t *txd_lower)
245 {
246 	struct e1000_context_desc *TXD = NULL;
247 	if_softc_ctx_t scctx = sc->shared;
248 	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
249 	struct tx_ring *txr = &que->txr;
250 	int csum_flags = pi->ipi_csum_flags;
251 	int cur, hdr_len;
252 	uint32_t cmd;
253 
254 	cur = pi->ipi_pidx;
255 	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
256 	cmd = sc->txd_cmd;
257 
258 	/*
259 	 * The 82574L can only remember the *last* context used
260 	 * regardless of queue that it was use for.  We cannot reuse
261 	 * contexts on this hardware platform and must generate a new
262 	 * context every time.  82574L hardware spec, section 7.2.6,
263 	 * second note.
264 	 */
265 	if (DONT_FORCE_CTX &&
266 	    sc->tx_num_queues == 1 &&
267 	    txr->csum_lhlen == pi->ipi_ehdrlen &&
268 	    txr->csum_iphlen == pi->ipi_ip_hlen &&
269 	    txr->csum_flags == csum_flags) {
270 		/*
271 		 * Same csum offload context as the previous packets;
272 		 * just return.
273 		 */
274 		*txd_upper = txr->csum_txd_upper;
275 		*txd_lower = txr->csum_txd_lower;
276 		return (cur);
277 	}
278 
279 	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
280 	/*
281 	 * ipcss - Start offset for header checksum calculation.
282 	 * ipcse - End offset for header checksum calculation.
283 	 * ipcso - Offset of place to put the checksum.
284 	 *
285 	 * We set ipcsX values regardless of IP version to work around HW
286 	 * issues and ipcse must be 0 for IPv6 per "PCIe GbE SDM 2.5" page 61.
287 	 * IXSM controls whether it's inserted.
288 	 */
289 	TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
290 	TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen +
291 	    offsetof(struct ip, ip_sum);
292 	if (csum_flags & CSUM_IP) {
293 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
294 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len - 1);
295 		cmd |= E1000_TXD_CMD_IP;
296 	} else if (csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP))
297 		TXD->lower_setup.ip_fields.ipcse = htole16(0);
298 
299 	/*
300 	 * tucss - Start offset for payload checksum calculation.
301 	 * tucse - End offset for payload checksum calculation.
302 	 * tucso - Offset of place to put the checksum.
303 	 */
304 	if (csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_IP6_TCP |
305 	    CSUM_IP6_UDP)) {
306 		uint8_t tucso;
307 
308 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
309 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
310 
311 		if (csum_flags & (CSUM_TCP | CSUM_IP6_TCP)) {
312 			tucso = hdr_len + offsetof(struct tcphdr, th_sum);
313 			cmd |= E1000_TXD_CMD_TCP;
314 		} else
315 			tucso = hdr_len + offsetof(struct udphdr, uh_sum);
316 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
317 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
318 		TXD->upper_setup.tcp_fields.tucso = tucso;
319 	}
320 
321 	txr->csum_lhlen = pi->ipi_ehdrlen;
322 	txr->csum_iphlen = pi->ipi_ip_hlen;
323 	txr->csum_flags = csum_flags;
324 	txr->csum_txd_upper = *txd_upper;
325 	txr->csum_txd_lower = *txd_lower;
326 
327 	TXD->tcp_seg_setup.data = htole32(0);
328 	TXD->cmd_and_length =
329 		htole32(E1000_TXD_CMD_IFCS | E1000_TXD_CMD_DEXT | cmd);
330 
331 	if (++cur == scctx->isc_ntxd[0]) {
332 		cur = 0;
333 	}
334 	DPRINTF(iflib_get_dev(sc->ctx),
335 	    "checksum_setup csum_flags=%x txd_upper=%x txd_lower=%x"
336 	    " hdr_len=%d cmd=%x\n",
337 	    csum_flags, *txd_upper, *txd_lower, hdr_len, cmd);
338 	return (cur);
339 }
340 
341 #define TSO_WORKAROUND 4 /* TSO sentinel descriptor */
342 
343 static int
344 em_isc_txd_encap(void *arg, if_pkt_info_t pi)
345 {
346 	struct e1000_softc *sc = arg;
347 	if_softc_ctx_t scctx = sc->shared;
348 	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
349 	struct tx_ring *txr = &que->txr;
350 	bus_dma_segment_t *segs = pi->ipi_segs;
351 	int nsegs = pi->ipi_nsegs;
352 	int csum_flags = pi->ipi_csum_flags;
353 	int i, j, first, pidx_last;
354 	uint32_t txd_flags, txd_upper = 0, txd_lower = 0;
355 
356 	struct e1000_tx_desc *ctxd = NULL;
357 	bool do_tso, tso_desc;
358 	qidx_t ntxd;
359 
360 	txd_flags = pi->ipi_flags & IPI_TX_INTR ? E1000_TXD_CMD_RS : 0;
361 	i = first = pi->ipi_pidx;
362 	do_tso = (csum_flags & CSUM_TSO);
363 	tso_desc = false;
364 	ntxd = scctx->isc_ntxd[0];
365 	/*
366 	 * TSO Hardware workaround, if this packet is not
367 	 * TSO, and is only a single descriptor long, and
368 	 * it follows a TSO burst, then we need to add a
369 	 * sentinel descriptor to prevent premature writeback.
370 	 */
371 	if ((!do_tso) && (txr->tx_tso == true)) {
372 		if (nsegs == 1)
373 			tso_desc = true;
374 		txr->tx_tso = false;
375 	}
376 
377 	/* Do hardware assists */
378 	if (do_tso) {
379 		i = em_tso_setup(sc, pi, &txd_upper, &txd_lower);
380 		tso_desc = true;
381 	} else if (csum_flags & EM_CSUM_OFFLOAD) {
382 		i = em_transmit_checksum_setup(sc, pi, &txd_upper,
383 		    &txd_lower);
384 	}
385 
386 	if (pi->ipi_mflags & M_VLANTAG) {
387 		/* Set the vlan id. */
388 		txd_upper |= htole16(pi->ipi_vtag) << 16;
389 		/* Tell hardware to add tag */
390 		txd_lower |= htole32(E1000_TXD_CMD_VLE);
391 	}
392 
393 	DPRINTF(iflib_get_dev(sc->ctx),
394 	    "encap: set up tx: nsegs=%d first=%d i=%d\n", nsegs, first, i);
395 	/* XXX sc->pcix_82544 -- lem_fill_descriptors */
396 
397 	/* Set up our transmit descriptors */
398 	for (j = 0; j < nsegs; j++) {
399 		bus_size_t seg_len;
400 		bus_addr_t seg_addr;
401 		uint32_t cmd;
402 
403 		ctxd = &txr->tx_base[i];
404 		seg_addr = segs[j].ds_addr;
405 		seg_len = segs[j].ds_len;
406 		cmd = E1000_TXD_CMD_IFCS | sc->txd_cmd;
407 
408 		/*
409 		 * TSO Workaround:
410 		 * If this is the last descriptor, we want to
411 		 * split it so we have a small final sentinel
412 		 */
413 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
414 			seg_len -= TSO_WORKAROUND;
415 			ctxd->buffer_addr = htole64(seg_addr);
416 			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
417 			ctxd->upper.data = htole32(txd_upper);
418 
419 			if (++i == scctx->isc_ntxd[0])
420 				i = 0;
421 
422 			/* Now make the sentinel */
423 			ctxd = &txr->tx_base[i];
424 			ctxd->buffer_addr = htole64(seg_addr + seg_len);
425 			ctxd->lower.data =
426 			    htole32(cmd | txd_lower | TSO_WORKAROUND);
427 			ctxd->upper.data = htole32(txd_upper);
428 			pidx_last = i;
429 			if (++i == scctx->isc_ntxd[0])
430 				i = 0;
431 			DPRINTF(iflib_get_dev(sc->ctx),
432 			    "TSO path pidx_last=%d i=%d ntxd[0]=%d\n",
433 			    pidx_last, i, scctx->isc_ntxd[0]);
434 		} else {
435 			ctxd->buffer_addr = htole64(seg_addr);
436 			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
437 			ctxd->upper.data = htole32(txd_upper);
438 			pidx_last = i;
439 			if (++i == scctx->isc_ntxd[0])
440 				i = 0;
441 			DPRINTF(iflib_get_dev(sc->ctx),
442 			    "pidx_last=%d i=%d ntxd[0]=%d\n",
443 			    pidx_last, i, scctx->isc_ntxd[0]);
444 		}
445 	}
446 
447 	/*
448 	 * Last Descriptor of Packet
449 	 * needs End Of Packet (EOP)
450 	 * and Report Status (RS)
451 	 */
452 	if (txd_flags && nsegs) {
453 		txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
454 		DPRINTF(iflib_get_dev(sc->ctx),
455 		    "setting to RS on %d rs_pidx %d first: %d\n",
456 		    pidx_last, txr->tx_rs_pidx, first);
457 		txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1);
458 		MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
459 	}
460 	ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | txd_flags);
461 	DPRINTF(iflib_get_dev(sc->ctx),
462 	    "tx_buffers[%d]->eop = %d ipi_new_pidx=%d\n",
463 	    first, pidx_last, i);
464 	pi->ipi_new_pidx = i;
465 
466 	/* Sent data accounting for AIM */
467 	txr->tx_bytes += pi->ipi_len;
468 	++txr->tx_packets;
469 
470 	return (0);
471 }
472 
473 static void
474 em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
475 {
476 	struct e1000_softc *sc = arg;
477 	struct em_tx_queue *que = &sc->tx_queues[txqid];
478 	struct tx_ring *txr = &que->txr;
479 
480 	E1000_WRITE_REG(&sc->hw, E1000_TDT(txr->me), pidx);
481 }
482 
483 static int
484 em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear)
485 {
486 	struct e1000_softc *sc = arg;
487 	if_softc_ctx_t scctx = sc->shared;
488 	struct em_tx_queue *que = &sc->tx_queues[txqid];
489 	struct tx_ring *txr = &que->txr;
490 
491 	qidx_t processed = 0;
492 	int updated;
493 	qidx_t cur, prev, ntxd, rs_cidx;
494 	int32_t delta;
495 	uint8_t status;
496 
497 	rs_cidx = txr->tx_rs_cidx;
498 	if (rs_cidx == txr->tx_rs_pidx)
499 		return (0);
500 	cur = txr->tx_rsq[rs_cidx];
501 	MPASS(cur != QIDX_INVALID);
502 	status = txr->tx_base[cur].upper.fields.status;
503 	updated = !!(status & E1000_TXD_STAT_DD);
504 
505 	if (!updated)
506 		return (0);
507 
508 	/* If clear is false just let caller know that there
509 	 * are descriptors to reclaim */
510 	if (!clear)
511 		return (1);
512 
513 	prev = txr->tx_cidx_processed;
514 	ntxd = scctx->isc_ntxd[0];
515 	do {
516 		MPASS(prev != cur);
517 		delta = (int32_t)cur - (int32_t)prev;
518 		if (delta < 0)
519 			delta += ntxd;
520 		MPASS(delta > 0);
521 		DPRINTF(iflib_get_dev(sc->ctx),
522 		    "%s: cidx_processed=%u cur=%u clear=%d delta=%d\n",
523 		    __FUNCTION__, prev, cur, clear, delta);
524 
525 		processed += delta;
526 		prev  = cur;
527 		rs_cidx = (rs_cidx + 1) & (ntxd-1);
528 		if (rs_cidx  == txr->tx_rs_pidx)
529 			break;
530 		cur = txr->tx_rsq[rs_cidx];
531 		MPASS(cur != QIDX_INVALID);
532 		status = txr->tx_base[cur].upper.fields.status;
533 	} while ((status & E1000_TXD_STAT_DD));
534 
535 	txr->tx_rs_cidx = rs_cidx;
536 	txr->tx_cidx_processed = prev;
537 	return(processed);
538 }
539 
540 static void
541 lem_isc_rxd_refill(void *arg, if_rxd_update_t iru)
542 {
543 	struct e1000_softc *sc = arg;
544 	if_softc_ctx_t scctx = sc->shared;
545 	struct em_rx_queue *que = &sc->rx_queues[iru->iru_qsidx];
546 	struct rx_ring *rxr = &que->rxr;
547 	struct e1000_rx_desc *rxd;
548 	uint64_t *paddrs;
549 	uint32_t next_pidx, pidx;
550 	uint16_t count;
551 	int i;
552 
553 	paddrs = iru->iru_paddrs;
554 	pidx = iru->iru_pidx;
555 	count = iru->iru_count;
556 
557 	for (i = 0, next_pidx = pidx; i < count; i++) {
558 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[next_pidx];
559 		rxd->buffer_addr = htole64(paddrs[i]);
560 		/* status bits must be cleared */
561 		rxd->status = 0;
562 
563 		if (++next_pidx == scctx->isc_nrxd[0])
564 			next_pidx = 0;
565 	}
566 }
567 
568 static void
569 em_isc_rxd_refill(void *arg, if_rxd_update_t iru)
570 {
571 	struct e1000_softc *sc = arg;
572 	if_softc_ctx_t scctx = sc->shared;
573 	uint16_t rxqid = iru->iru_qsidx;
574 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
575 	struct rx_ring *rxr = &que->rxr;
576 	union e1000_rx_desc_extended *rxd;
577 	uint64_t *paddrs;
578 	uint32_t next_pidx, pidx;
579 	uint16_t count;
580 	int i;
581 
582 	paddrs = iru->iru_paddrs;
583 	pidx = iru->iru_pidx;
584 	count = iru->iru_count;
585 
586 	for (i = 0, next_pidx = pidx; i < count; i++) {
587 		rxd = &rxr->rx_base[next_pidx];
588 		rxd->read.buffer_addr = htole64(paddrs[i]);
589 		/* DD bits must be cleared */
590 		rxd->wb.upper.status_error = 0;
591 
592 		if (++next_pidx == scctx->isc_nrxd[0])
593 			next_pidx = 0;
594 	}
595 }
596 
597 static void
598 em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
599     qidx_t pidx)
600 {
601 	struct e1000_softc *sc = arg;
602 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
603 	struct rx_ring *rxr = &que->rxr;
604 
605 	E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx);
606 }
607 
608 static int
609 lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
610 {
611 	struct e1000_softc *sc = arg;
612 	if_softc_ctx_t scctx = sc->shared;
613 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
614 	struct rx_ring *rxr = &que->rxr;
615 	struct e1000_rx_desc *rxd;
616 	uint32_t staterr = 0;
617 	int cnt, i;
618 
619 	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
620 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[i];
621 		staterr = rxd->status;
622 
623 		if ((staterr & E1000_RXD_STAT_DD) == 0)
624 			break;
625 		if (++i == scctx->isc_nrxd[0])
626 			i = 0;
627 		if (staterr & E1000_RXD_STAT_EOP)
628 			cnt++;
629 	}
630 	return (cnt);
631 }
632 
633 static int
634 em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
635 {
636 	struct e1000_softc *sc = arg;
637 	if_softc_ctx_t scctx = sc->shared;
638 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
639 	struct rx_ring *rxr = &que->rxr;
640 	union e1000_rx_desc_extended *rxd;
641 	uint32_t staterr = 0;
642 	int cnt, i;
643 
644 	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
645 		rxd = &rxr->rx_base[i];
646 		staterr = le32toh(rxd->wb.upper.status_error);
647 
648 		if ((staterr & E1000_RXD_STAT_DD) == 0)
649 			break;
650 		if (++i == scctx->isc_nrxd[0])
651 			i = 0;
652 		if (staterr & E1000_RXD_STAT_EOP)
653 			cnt++;
654 	}
655 	return (cnt);
656 }
657 
658 static int
659 lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
660 {
661 	struct e1000_softc *sc = arg;
662 	if_softc_ctx_t scctx = sc->shared;
663 	struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx];
664 	struct rx_ring *rxr = &que->rxr;
665 	struct e1000_rx_desc *rxd;
666 	uint16_t len;
667 	uint32_t status, errors;
668 	bool eop;
669 	int i, cidx;
670 
671 	status = errors = i = 0;
672 	cidx = ri->iri_cidx;
673 
674 	do {
675 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[cidx];
676 		status = rxd->status;
677 		errors = rxd->errors;
678 
679 		/* Error Checking then decrement count */
680 		MPASS ((status & E1000_RXD_STAT_DD) != 0);
681 
682 		len = le16toh(rxd->length);
683 		ri->iri_len += len;
684 		rxr->rx_bytes += ri->iri_len;
685 
686 		eop = (status & E1000_RXD_STAT_EOP) != 0;
687 
688 		/* Make sure bad packets are discarded */
689 		if (errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
690 			sc->dropped_pkts++;
691 			/* XXX fixup if common */
692 			return (EBADMSG);
693 		}
694 
695 		ri->iri_frags[i].irf_flid = 0;
696 		ri->iri_frags[i].irf_idx = cidx;
697 		ri->iri_frags[i].irf_len = len;
698 		/* Zero out the receive descriptors status. */
699 		rxd->status = 0;
700 
701 		if (++cidx == scctx->isc_nrxd[0])
702 			cidx = 0;
703 		i++;
704 	} while (!eop);
705 
706 	rxr->rx_packets++;
707 
708 	if (scctx->isc_capenable & IFCAP_RXCSUM)
709 		em_receive_checksum(status, errors, ri);
710 
711 	if (scctx->isc_capenable & IFCAP_VLAN_HWTAGGING &&
712 	    status & E1000_RXD_STAT_VP) {
713 		ri->iri_vtag =
714 		    le16toh(rxd->special & E1000_RXD_SPC_VLAN_MASK);
715 		ri->iri_flags |= M_VLANTAG;
716 	}
717 
718 	ri->iri_nfrags = i;
719 
720 	return (0);
721 }
722 
723 static int
724 em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
725 {
726 	struct e1000_softc *sc = arg;
727 	if_softc_ctx_t scctx = sc->shared;
728 	struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx];
729 	struct rx_ring *rxr = &que->rxr;
730 	union e1000_rx_desc_extended *rxd;
731 
732 	uint16_t len;
733 	uint32_t pkt_info;
734 	uint32_t staterr;
735 	bool eop;
736 	int i, cidx;
737 
738 	staterr = i = 0;
739 	cidx = ri->iri_cidx;
740 
741 	do {
742 		rxd = &rxr->rx_base[cidx];
743 		staterr = le32toh(rxd->wb.upper.status_error);
744 		pkt_info = le32toh(rxd->wb.lower.mrq);
745 
746 		/* Error Checking then decrement count */
747 		MPASS ((staterr & E1000_RXD_STAT_DD) != 0);
748 
749 		len = le16toh(rxd->wb.upper.length);
750 		ri->iri_len += len;
751 		rxr->rx_bytes += ri->iri_len;
752 
753 		eop = (staterr & E1000_RXD_STAT_EOP) != 0;
754 
755 		/* Make sure bad packets are discarded */
756 		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
757 			sc->dropped_pkts++;
758 			return EBADMSG;
759 		}
760 
761 		ri->iri_frags[i].irf_flid = 0;
762 		ri->iri_frags[i].irf_idx = cidx;
763 		ri->iri_frags[i].irf_len = len;
764 		/* Zero out the receive descriptors status. */
765 		rxd->wb.upper.status_error &= htole32(~0xFF);
766 
767 		if (++cidx == scctx->isc_nrxd[0])
768 			cidx = 0;
769 		i++;
770 	} while (!eop);
771 
772 	rxr->rx_packets++;
773 
774 	if (scctx->isc_capenable & IFCAP_RXCSUM)
775 		em_receive_checksum(staterr, staterr >> 24, ri);
776 
777 	if (scctx->isc_capenable & IFCAP_VLAN_HWTAGGING &&
778 	    staterr & E1000_RXD_STAT_VP) {
779 		ri->iri_vtag = le16toh(rxd->wb.upper.vlan);
780 		ri->iri_flags |= M_VLANTAG;
781 	}
782 
783 	ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss);
784 	ri->iri_rsstype = em_determine_rsstype(pkt_info);
785 
786 	ri->iri_nfrags = i;
787 	return (0);
788 }
789 
790 /*********************************************************************
791  *
792  *  Verify that the hardware indicated that the checksum is valid.
793  *  Inform the stack about the status of checksum so that stack
794  *  doesn't spend time verifying the checksum.
795  *
796  *********************************************************************/
797 static void
798 em_receive_checksum(uint16_t status, uint8_t errors, if_rxd_info_t ri)
799 {
800 	if (__predict_false(status & E1000_RXD_STAT_IXSM))
801 		return;
802 
803 	/* If there is a layer 3 or 4 error we are done */
804 	if (__predict_false(errors & (E1000_RXD_ERR_IPE |
805 	    E1000_RXD_ERR_TCPE)))
806 		return;
807 
808 	/* IP Checksum Good */
809 	if (status & E1000_RXD_STAT_IPCS)
810 		ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
811 
812 	/* Valid L4E checksum */
813 	if (__predict_true(status &
814 	    (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))) {
815 		ri->iri_csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
816 		ri->iri_csum_data = htons(0xffff);
817 	}
818 }
819 
820 /********************************************************************
821  *
822  *  Parse the packet type to determine the appropriate hash
823  *
824  ******************************************************************/
825 static int
826 em_determine_rsstype(uint32_t pkt_info)
827 {
828 	switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
829 	case E1000_RXDADV_RSSTYPE_IPV4_TCP:
830 		return M_HASHTYPE_RSS_TCP_IPV4;
831 	case E1000_RXDADV_RSSTYPE_IPV4:
832 		return M_HASHTYPE_RSS_IPV4;
833 	case E1000_RXDADV_RSSTYPE_IPV6_TCP:
834 		return M_HASHTYPE_RSS_TCP_IPV6;
835 	case E1000_RXDADV_RSSTYPE_IPV6_EX:
836 		return M_HASHTYPE_RSS_IPV6_EX;
837 	case E1000_RXDADV_RSSTYPE_IPV6:
838 		return M_HASHTYPE_RSS_IPV6;
839 	case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
840 		return M_HASHTYPE_RSS_TCP_IPV6_EX;
841 	default:
842 		return M_HASHTYPE_OPAQUE;
843 	}
844 }
845