xref: /freebsd/sys/dev/e1000/em_txrx.c (revision a4128aad8503277614f2d214011ef60a19447b83)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2016 Nicole Graziano <nicole@nextbsd.org>
5  * Copyright (c) 2017 Matthew Macy <mmacy@mattmacy.io>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "if_em.h"
31 
32 #ifdef RSS
33 #include <net/rss_config.h>
34 #include <netinet/in_rss.h>
35 #endif
36 
37 #ifdef VERBOSE_DEBUG
38 #define DPRINTF device_printf
39 #else
40 #define DPRINTF(...)
41 #endif
42 
43 /*********************************************************************
44  *  Local Function prototypes
45  *********************************************************************/
46 static int em_tso_setup(struct e1000_softc *, if_pkt_info_t, uint32_t *,
47     uint32_t *);
48 static int em_transmit_checksum_setup(struct e1000_softc *, if_pkt_info_t,
49     uint32_t *, uint32_t *);
50 static int em_isc_txd_encap(void *, if_pkt_info_t);
51 static void em_isc_txd_flush(void *, uint16_t, qidx_t);
52 static int em_isc_txd_credits_update(void *, uint16_t, bool);
53 static void em_isc_rxd_refill(void *, if_rxd_update_t);
54 static void em_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
55 static int em_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
56 static int em_isc_rxd_pkt_get(void *, if_rxd_info_t);
57 
58 static void lem_isc_rxd_refill(void *, if_rxd_update_t);
59 
60 static int lem_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
61 static int lem_isc_rxd_pkt_get(void *, if_rxd_info_t);
62 
63 static void em_receive_checksum(uint16_t, uint8_t, if_rxd_info_t);
64 static int em_determine_rsstype(uint32_t);
65 extern int em_intr(void *);
66 
67 struct if_txrx em_txrx = {
68 	.ift_txd_encap = em_isc_txd_encap,
69 	.ift_txd_flush = em_isc_txd_flush,
70 	.ift_txd_credits_update = em_isc_txd_credits_update,
71 	.ift_rxd_available = em_isc_rxd_available,
72 	.ift_rxd_pkt_get = em_isc_rxd_pkt_get,
73 	.ift_rxd_refill = em_isc_rxd_refill,
74 	.ift_rxd_flush = em_isc_rxd_flush,
75 	.ift_legacy_intr = em_intr
76 };
77 
78 struct if_txrx lem_txrx = {
79 	.ift_txd_encap = em_isc_txd_encap,
80 	.ift_txd_flush = em_isc_txd_flush,
81 	.ift_txd_credits_update = em_isc_txd_credits_update,
82 	.ift_rxd_available = lem_isc_rxd_available,
83 	.ift_rxd_pkt_get = lem_isc_rxd_pkt_get,
84 	.ift_rxd_refill = lem_isc_rxd_refill,
85 	.ift_rxd_flush = em_isc_rxd_flush,
86 	.ift_legacy_intr = em_intr
87 };
88 
89 extern if_shared_ctx_t em_sctx;
90 
91 void
92 em_dump_rs(struct e1000_softc *sc)
93 {
94 	if_softc_ctx_t scctx = sc->shared;
95 	struct em_tx_queue *que;
96 	struct tx_ring *txr;
97 	qidx_t i, ntxd, qid, cur;
98 	int16_t rs_cidx;
99 	uint8_t status;
100 
101 	printf("\n");
102 	ntxd = scctx->isc_ntxd[0];
103 	for (qid = 0; qid < sc->tx_num_queues; qid++) {
104 		que = &sc->tx_queues[qid];
105 		txr =  &que->txr;
106 		rs_cidx = txr->tx_rs_cidx;
107 		if (rs_cidx != txr->tx_rs_pidx) {
108 			cur = txr->tx_rsq[rs_cidx];
109 			status = txr->tx_base[cur].upper.fields.status;
110 			if (!(status & E1000_TXD_STAT_DD))
111 				printf("qid[%d]->tx_rsq[%d]: %d clear ", qid, rs_cidx, cur);
112 		} else {
113 			rs_cidx = (rs_cidx-1)&(ntxd-1);
114 			cur = txr->tx_rsq[rs_cidx];
115 			printf("qid[%d]->tx_rsq[rs_cidx-1=%d]: %d  ", qid, rs_cidx, cur);
116 		}
117 		printf("cidx_prev=%d rs_pidx=%d ",txr->tx_cidx_processed,
118 		    txr->tx_rs_pidx);
119 		for (i = 0; i < ntxd; i++) {
120 			if (txr->tx_base[i].upper.fields.status & E1000_TXD_STAT_DD)
121 				printf("%d set ", i);
122 		}
123 		printf("\n");
124 	}
125 }
126 
127 /**********************************************************************
128  *
129  *  Setup work for hardware segmentation offload (TSO) on
130  *  adapters using advanced tx descriptors
131  *
132  **********************************************************************/
133 static int
134 em_tso_setup(struct e1000_softc *sc, if_pkt_info_t pi, uint32_t *txd_upper,
135     uint32_t *txd_lower)
136 {
137 	if_softc_ctx_t scctx = sc->shared;
138 	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
139 	struct tx_ring *txr = &que->txr;
140 	struct e1000_context_desc *TXD;
141 	int cur, hdr_len;
142 	uint32_t cmd_type_len;
143 
144 	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
145 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
146 		      E1000_TXD_DTYP_D |	/* Data descr type */
147 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
148 
149 	cur = pi->ipi_pidx;
150 	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
151 
152 	/*
153 	 * ipcss - Start offset for header checksum calculation.
154 	 * ipcse - End offset for header checksum calculation.
155 	 * ipcso - Offset of place to put the checksum.
156 	 */
157 	switch(pi->ipi_etype) {
158 	case ETHERTYPE_IP:
159 		/* IP and/or TCP header checksum calculation and insertion. */
160 		*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
161 
162 		TXD->lower_setup.ip_fields.ipcse =
163 		    htole16(pi->ipi_ehdrlen + pi->ipi_ip_hlen - 1);
164 		break;
165 	case ETHERTYPE_IPV6:
166 		/* TCP header checksum calculation and insertion. */
167 		*txd_upper = E1000_TXD_POPTS_TXSM << 8;
168 
169 		TXD->lower_setup.ip_fields.ipcse = htole16(0);
170 		break;
171 	default:
172 		break;
173 	}
174 	TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
175 	TXD->lower_setup.ip_fields.ipcso =
176 	    pi->ipi_ehdrlen + offsetof(struct ip, ip_sum);
177 
178 	/*
179 	 * tucss - Start offset for payload checksum calculation.
180 	 * tucse - End offset for payload checksum calculation.
181 	 * tucso - Offset of place to put the checksum.
182 	 */
183 	TXD->upper_setup.tcp_fields.tucss = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
184 	TXD->upper_setup.tcp_fields.tucse = 0;
185 	TXD->upper_setup.tcp_fields.tucso =
186 	    pi->ipi_ehdrlen + pi->ipi_ip_hlen + offsetof(struct tcphdr, th_sum);
187 
188 	/*
189 	 * Payload size per packet w/o any headers.
190 	 * Length of all headers up to payload.
191 	 */
192 	TXD->tcp_seg_setup.fields.mss = htole16(pi->ipi_tso_segsz);
193 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
194 
195 	/*
196 	 * "PCI/PCI-X SDM 4.0" page 45, and "PCIe GbE SDM 2.5" page 63
197 	 * - Set up basic TUCMDs
198 	 * - For others IP bit on indicates IPv4, while off indicates IPv6
199 	*/
200 	cmd_type_len = sc->txd_cmd |
201 	    E1000_TXD_CMD_DEXT | /* Extended descr */
202 	    E1000_TXD_CMD_TSE |  /* TSE context */
203 	    E1000_TXD_CMD_TCP;   /* Do TCP checksum */
204 	if (pi->ipi_etype == ETHERTYPE_IP)
205 		cmd_type_len |= E1000_TXD_CMD_IP;
206 	TXD->cmd_and_length = htole32(cmd_type_len |
207 	    (pi->ipi_len - hdr_len)); /* Total len */
208 
209 	txr->tx_tso = true;
210 
211 	if (++cur == scctx->isc_ntxd[0]) {
212 		cur = 0;
213 	}
214 	DPRINTF(iflib_get_dev(sc->ctx), "%s: pidx: %d cur: %d\n", __FUNCTION__,
215 	    pi->ipi_pidx, cur);
216 	return (cur);
217 }
218 
219 /*********************************************************************
220  *  The offload context is protocol specific (TCP/UDP) and thus
221  *  only needs to be set when the protocol changes. The occasion
222  *  of a context change can be a performance detriment, and
223  *  might be better just disabled. The reason arises in the way
224  *  in which the controller supports pipelined requests from the
225  *  Tx data DMA. Up to four requests can be pipelined, and they may
226  *  belong to the same packet or to multiple packets. However all
227  *  requests for one packet are issued before a request is issued
228  *  for a subsequent packet and if a request for the next packet
229  *  requires a context change, that request will be stalled
230  *  until the previous request completes. This means setting up
231  *  a new context effectively disables pipelined Tx data DMA which
232  *  in turn greatly slow down performance to send small sized
233  *  frames.
234  **********************************************************************/
235 #define DONT_FORCE_CTX 1
236 
237 static int
238 em_transmit_checksum_setup(struct e1000_softc *sc, if_pkt_info_t pi,
239     uint32_t *txd_upper, uint32_t *txd_lower)
240 {
241 	struct e1000_context_desc *TXD = NULL;
242 	if_softc_ctx_t scctx = sc->shared;
243 	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
244 	struct tx_ring *txr = &que->txr;
245 	int csum_flags = pi->ipi_csum_flags;
246 	int cur, hdr_len;
247 	uint32_t cmd;
248 
249 	cur = pi->ipi_pidx;
250 	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
251 	cmd = sc->txd_cmd;
252 
253 	/*
254 	 * The 82574L can only remember the *last* context used
255 	 * regardless of queue that it was use for.  We cannot reuse
256 	 * contexts on this hardware platform and must generate a new
257 	 * context every time.  82574L hardware spec, section 7.2.6,
258 	 * second note.
259 	 */
260 	if (DONT_FORCE_CTX &&
261 	    sc->tx_num_queues == 1 &&
262 	    txr->csum_lhlen == pi->ipi_ehdrlen &&
263 	    txr->csum_iphlen == pi->ipi_ip_hlen &&
264 	    txr->csum_flags == csum_flags) {
265 		/*
266 		 * Same csum offload context as the previous packets;
267 		 * just return.
268 		 */
269 		*txd_upper = txr->csum_txd_upper;
270 		*txd_lower = txr->csum_txd_lower;
271 		return (cur);
272 	}
273 
274 	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
275 	/*
276 	 * ipcss - Start offset for header checksum calculation.
277 	 * ipcse - End offset for header checksum calculation.
278 	 * ipcso - Offset of place to put the checksum.
279 	 *
280 	 * We set ipcsX values regardless of IP version to work around HW issues
281 	 * and ipcse must be 0 for IPv6 per "PCIe GbE SDM 2.5" page 61.
282 	 * IXSM controls whether it's inserted.
283 	 */
284 	TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
285 	TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen +
286 	    offsetof(struct ip, ip_sum);
287 	if (csum_flags & CSUM_IP) {
288 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
289 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len - 1);
290 		cmd |= E1000_TXD_CMD_IP;
291 	} else if (csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP))
292 		TXD->lower_setup.ip_fields.ipcse = htole16(0);
293 
294 	/*
295 	 * tucss - Start offset for payload checksum calculation.
296 	 * tucse - End offset for payload checksum calculation.
297 	 * tucso - Offset of place to put the checksum.
298 	 */
299 	if (csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_IP6_TCP | CSUM_IP6_UDP)) {
300 		uint8_t tucso;
301 
302 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
303 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
304 
305 		if (csum_flags & (CSUM_TCP | CSUM_IP6_TCP)) {
306 			tucso = hdr_len + offsetof(struct tcphdr, th_sum);
307 			cmd |= E1000_TXD_CMD_TCP;
308 		} else
309 			tucso = hdr_len + offsetof(struct udphdr, uh_sum);
310 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
311 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
312 		TXD->upper_setup.tcp_fields.tucso = tucso;
313 	}
314 
315 	txr->csum_lhlen = pi->ipi_ehdrlen;
316 	txr->csum_iphlen = pi->ipi_ip_hlen;
317 	txr->csum_flags = csum_flags;
318 	txr->csum_txd_upper = *txd_upper;
319 	txr->csum_txd_lower = *txd_lower;
320 
321 	TXD->tcp_seg_setup.data = htole32(0);
322 	TXD->cmd_and_length =
323 		htole32(E1000_TXD_CMD_IFCS | E1000_TXD_CMD_DEXT | cmd);
324 
325 	if (++cur == scctx->isc_ntxd[0]) {
326 		cur = 0;
327 	}
328 	DPRINTF(iflib_get_dev(sc->ctx),
329 	    "checksum_setup csum_flags=%x txd_upper=%x txd_lower=%x hdr_len=%d cmd=%x\n",
330 	    csum_flags, *txd_upper, *txd_lower, hdr_len, cmd);
331 	return (cur);
332 }
333 
334 #define TSO_WORKAROUND 4 /* TSO sentinel descriptor */
335 
336 static int
337 em_isc_txd_encap(void *arg, if_pkt_info_t pi)
338 {
339 	struct e1000_softc *sc = arg;
340 	if_softc_ctx_t scctx = sc->shared;
341 	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
342 	struct tx_ring *txr = &que->txr;
343 	bus_dma_segment_t *segs = pi->ipi_segs;
344 	int nsegs = pi->ipi_nsegs;
345 	int csum_flags = pi->ipi_csum_flags;
346 	int i, j, first, pidx_last;
347 	uint32_t txd_flags, txd_upper = 0, txd_lower = 0;
348 
349 	struct e1000_tx_desc *ctxd = NULL;
350 	bool do_tso, tso_desc;
351 	qidx_t ntxd;
352 
353 	txd_flags = pi->ipi_flags & IPI_TX_INTR ? E1000_TXD_CMD_RS : 0;
354 	i = first = pi->ipi_pidx;
355 	do_tso = (csum_flags & CSUM_TSO);
356 	tso_desc = false;
357 	ntxd = scctx->isc_ntxd[0];
358 	/*
359 	 * TSO Hardware workaround, if this packet is not
360 	 * TSO, and is only a single descriptor long, and
361 	 * it follows a TSO burst, then we need to add a
362 	 * sentinel descriptor to prevent premature writeback.
363 	 */
364 	if ((!do_tso) && (txr->tx_tso == true)) {
365 		if (nsegs == 1)
366 			tso_desc = true;
367 		txr->tx_tso = false;
368 	}
369 
370 	/* Do hardware assists */
371 	if (do_tso) {
372 		i = em_tso_setup(sc, pi, &txd_upper, &txd_lower);
373 		tso_desc = true;
374 	} else if (csum_flags & EM_CSUM_OFFLOAD) {
375 		i = em_transmit_checksum_setup(sc, pi, &txd_upper, &txd_lower);
376 	}
377 
378 	if (pi->ipi_mflags & M_VLANTAG) {
379 		/* Set the vlan id. */
380 		txd_upper |= htole16(pi->ipi_vtag) << 16;
381 		/* Tell hardware to add tag */
382 		txd_lower |= htole32(E1000_TXD_CMD_VLE);
383 	}
384 
385 	DPRINTF(iflib_get_dev(sc->ctx),
386 	    "encap: set up tx: nsegs=%d first=%d i=%d\n", nsegs, first, i);
387 	/* XXX sc->pcix_82544 -- lem_fill_descriptors */
388 
389 	/* Set up our transmit descriptors */
390 	for (j = 0; j < nsegs; j++) {
391 		bus_size_t seg_len;
392 		bus_addr_t seg_addr;
393 		uint32_t cmd;
394 
395 		ctxd = &txr->tx_base[i];
396 		seg_addr = segs[j].ds_addr;
397 		seg_len = segs[j].ds_len;
398 		cmd = E1000_TXD_CMD_IFCS | sc->txd_cmd;
399 
400 		/*
401 		 * TSO Workaround:
402 		 * If this is the last descriptor, we want to
403 		 * split it so we have a small final sentinel
404 		 */
405 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
406 			seg_len -= TSO_WORKAROUND;
407 			ctxd->buffer_addr = htole64(seg_addr);
408 			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
409 			ctxd->upper.data = htole32(txd_upper);
410 
411 			if (++i == scctx->isc_ntxd[0])
412 				i = 0;
413 
414 			/* Now make the sentinel */
415 			ctxd = &txr->tx_base[i];
416 			ctxd->buffer_addr = htole64(seg_addr + seg_len);
417 			ctxd->lower.data = htole32(cmd | txd_lower | TSO_WORKAROUND);
418 			ctxd->upper.data = htole32(txd_upper);
419 			pidx_last = i;
420 			if (++i == scctx->isc_ntxd[0])
421 				i = 0;
422 			DPRINTF(iflib_get_dev(sc->ctx),
423 			    "TSO path pidx_last=%d i=%d ntxd[0]=%d\n",
424 			    pidx_last, i, scctx->isc_ntxd[0]);
425 		} else {
426 			ctxd->buffer_addr = htole64(seg_addr);
427 			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
428 			ctxd->upper.data = htole32(txd_upper);
429 			pidx_last = i;
430 			if (++i == scctx->isc_ntxd[0])
431 				i = 0;
432 			DPRINTF(iflib_get_dev(sc->ctx), "pidx_last=%d i=%d ntxd[0]=%d\n",
433 			    pidx_last, i, scctx->isc_ntxd[0]);
434 		}
435 	}
436 
437 	/*
438 	 * Last Descriptor of Packet
439 	 * needs End Of Packet (EOP)
440 	 * and Report Status (RS)
441 	 */
442 	if (txd_flags && nsegs) {
443 		txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
444 		DPRINTF(iflib_get_dev(sc->ctx),
445 		    "setting to RS on %d rs_pidx %d first: %d\n",
446 		    pidx_last, txr->tx_rs_pidx, first);
447 		txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1);
448 		MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
449 	}
450 	ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | txd_flags);
451 	DPRINTF(iflib_get_dev(sc->ctx),
452 	    "tx_buffers[%d]->eop = %d ipi_new_pidx=%d\n", first, pidx_last, i);
453 	pi->ipi_new_pidx = i;
454 
455 	/* Sent data accounting for AIM */
456 	txr->tx_bytes += pi->ipi_len;
457 	++txr->tx_packets;
458 
459 	return (0);
460 }
461 
462 static void
463 em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
464 {
465 	struct e1000_softc *sc = arg;
466 	struct em_tx_queue *que = &sc->tx_queues[txqid];
467 	struct tx_ring *txr = &que->txr;
468 
469 	E1000_WRITE_REG(&sc->hw, E1000_TDT(txr->me), pidx);
470 }
471 
472 static int
473 em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear)
474 {
475 	struct e1000_softc *sc = arg;
476 	if_softc_ctx_t scctx = sc->shared;
477 	struct em_tx_queue *que = &sc->tx_queues[txqid];
478 	struct tx_ring *txr = &que->txr;
479 
480 	qidx_t processed = 0;
481 	int updated;
482 	qidx_t cur, prev, ntxd, rs_cidx;
483 	int32_t delta;
484 	uint8_t status;
485 
486 	rs_cidx = txr->tx_rs_cidx;
487 	if (rs_cidx == txr->tx_rs_pidx)
488 		return (0);
489 	cur = txr->tx_rsq[rs_cidx];
490 	MPASS(cur != QIDX_INVALID);
491 	status = txr->tx_base[cur].upper.fields.status;
492 	updated = !!(status & E1000_TXD_STAT_DD);
493 
494 	if (!updated)
495 		return (0);
496 
497 	/* If clear is false just let caller know that there
498 	 * are descriptors to reclaim */
499 	if (!clear)
500 		return (1);
501 
502 	prev = txr->tx_cidx_processed;
503 	ntxd = scctx->isc_ntxd[0];
504 	do {
505 		MPASS(prev != cur);
506 		delta = (int32_t)cur - (int32_t)prev;
507 		if (delta < 0)
508 			delta += ntxd;
509 		MPASS(delta > 0);
510 		DPRINTF(iflib_get_dev(sc->ctx),
511 			      "%s: cidx_processed=%u cur=%u clear=%d delta=%d\n",
512 			      __FUNCTION__, prev, cur, clear, delta);
513 
514 		processed += delta;
515 		prev  = cur;
516 		rs_cidx = (rs_cidx + 1) & (ntxd-1);
517 		if (rs_cidx  == txr->tx_rs_pidx)
518 			break;
519 		cur = txr->tx_rsq[rs_cidx];
520 		MPASS(cur != QIDX_INVALID);
521 		status = txr->tx_base[cur].upper.fields.status;
522 	} while ((status & E1000_TXD_STAT_DD));
523 
524 	txr->tx_rs_cidx = rs_cidx;
525 	txr->tx_cidx_processed = prev;
526 	return(processed);
527 }
528 
529 static void
530 lem_isc_rxd_refill(void *arg, if_rxd_update_t iru)
531 {
532 	struct e1000_softc *sc = arg;
533 	if_softc_ctx_t scctx = sc->shared;
534 	struct em_rx_queue *que = &sc->rx_queues[iru->iru_qsidx];
535 	struct rx_ring *rxr = &que->rxr;
536 	struct e1000_rx_desc *rxd;
537 	uint64_t *paddrs;
538 	uint32_t next_pidx, pidx;
539 	uint16_t count;
540 	int i;
541 
542 	paddrs = iru->iru_paddrs;
543 	pidx = iru->iru_pidx;
544 	count = iru->iru_count;
545 
546 	for (i = 0, next_pidx = pidx; i < count; i++) {
547 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[next_pidx];
548 		rxd->buffer_addr = htole64(paddrs[i]);
549 		/* status bits must be cleared */
550 		rxd->status = 0;
551 
552 		if (++next_pidx == scctx->isc_nrxd[0])
553 			next_pidx = 0;
554 	}
555 }
556 
557 static void
558 em_isc_rxd_refill(void *arg, if_rxd_update_t iru)
559 {
560 	struct e1000_softc *sc = arg;
561 	if_softc_ctx_t scctx = sc->shared;
562 	uint16_t rxqid = iru->iru_qsidx;
563 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
564 	struct rx_ring *rxr = &que->rxr;
565 	union e1000_rx_desc_extended *rxd;
566 	uint64_t *paddrs;
567 	uint32_t next_pidx, pidx;
568 	uint16_t count;
569 	int i;
570 
571 	paddrs = iru->iru_paddrs;
572 	pidx = iru->iru_pidx;
573 	count = iru->iru_count;
574 
575 	for (i = 0, next_pidx = pidx; i < count; i++) {
576 		rxd = &rxr->rx_base[next_pidx];
577 		rxd->read.buffer_addr = htole64(paddrs[i]);
578 		/* DD bits must be cleared */
579 		rxd->wb.upper.status_error = 0;
580 
581 		if (++next_pidx == scctx->isc_nrxd[0])
582 			next_pidx = 0;
583 	}
584 }
585 
586 static void
587 em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
588     qidx_t pidx)
589 {
590 	struct e1000_softc *sc = arg;
591 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
592 	struct rx_ring *rxr = &que->rxr;
593 
594 	E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx);
595 }
596 
597 static int
598 lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
599 {
600 	struct e1000_softc *sc = arg;
601 	if_softc_ctx_t scctx = sc->shared;
602 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
603 	struct rx_ring *rxr = &que->rxr;
604 	struct e1000_rx_desc *rxd;
605 	uint32_t staterr = 0;
606 	int cnt, i;
607 
608 	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
609 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[i];
610 		staterr = rxd->status;
611 
612 		if ((staterr & E1000_RXD_STAT_DD) == 0)
613 			break;
614 		if (++i == scctx->isc_nrxd[0])
615 			i = 0;
616 		if (staterr & E1000_RXD_STAT_EOP)
617 			cnt++;
618 	}
619 	return (cnt);
620 }
621 
622 static int
623 em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
624 {
625 	struct e1000_softc *sc = arg;
626 	if_softc_ctx_t scctx = sc->shared;
627 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
628 	struct rx_ring *rxr = &que->rxr;
629 	union e1000_rx_desc_extended *rxd;
630 	uint32_t staterr = 0;
631 	int cnt, i;
632 
633 	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
634 		rxd = &rxr->rx_base[i];
635 		staterr = le32toh(rxd->wb.upper.status_error);
636 
637 		if ((staterr & E1000_RXD_STAT_DD) == 0)
638 			break;
639 		if (++i == scctx->isc_nrxd[0])
640 			i = 0;
641 		if (staterr & E1000_RXD_STAT_EOP)
642 			cnt++;
643 	}
644 	return (cnt);
645 }
646 
647 static int
648 lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
649 {
650 	struct e1000_softc *sc = arg;
651 	if_softc_ctx_t scctx = sc->shared;
652 	struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx];
653 	struct rx_ring *rxr = &que->rxr;
654 	struct e1000_rx_desc *rxd;
655 	uint16_t len;
656 	uint32_t status, errors;
657 	bool eop;
658 	int i, cidx;
659 
660 	status = errors = i = 0;
661 	cidx = ri->iri_cidx;
662 
663 	do {
664 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[cidx];
665 		status = rxd->status;
666 		errors = rxd->errors;
667 
668 		/* Error Checking then decrement count */
669 		MPASS ((status & E1000_RXD_STAT_DD) != 0);
670 
671 		len = le16toh(rxd->length);
672 		ri->iri_len += len;
673 		rxr->rx_bytes += ri->iri_len;
674 
675 		eop = (status & E1000_RXD_STAT_EOP) != 0;
676 
677 		/* Make sure bad packets are discarded */
678 		if (errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
679 			sc->dropped_pkts++;
680 			/* XXX fixup if common */
681 			return (EBADMSG);
682 		}
683 
684 		ri->iri_frags[i].irf_flid = 0;
685 		ri->iri_frags[i].irf_idx = cidx;
686 		ri->iri_frags[i].irf_len = len;
687 		/* Zero out the receive descriptors status. */
688 		rxd->status = 0;
689 
690 		if (++cidx == scctx->isc_nrxd[0])
691 			cidx = 0;
692 		i++;
693 	} while (!eop);
694 
695 	rxr->rx_packets++;
696 
697 	if (scctx->isc_capenable & IFCAP_RXCSUM)
698 		em_receive_checksum(status, errors, ri);
699 
700 	if (scctx->isc_capenable & IFCAP_VLAN_HWTAGGING &&
701 	    status & E1000_RXD_STAT_VP) {
702 		ri->iri_vtag = le16toh(rxd->special & E1000_RXD_SPC_VLAN_MASK);
703 		ri->iri_flags |= M_VLANTAG;
704 	}
705 
706 	ri->iri_nfrags = i;
707 
708 	return (0);
709 }
710 
711 static int
712 em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
713 {
714 	struct e1000_softc *sc = arg;
715 	if_softc_ctx_t scctx = sc->shared;
716 	struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx];
717 	struct rx_ring *rxr = &que->rxr;
718 	union e1000_rx_desc_extended *rxd;
719 
720 	uint16_t len;
721 	uint32_t pkt_info;
722 	uint32_t staterr;
723 	bool eop;
724 	int i, cidx;
725 
726 	staterr = i = 0;
727 	cidx = ri->iri_cidx;
728 
729 	do {
730 		rxd = &rxr->rx_base[cidx];
731 		staterr = le32toh(rxd->wb.upper.status_error);
732 		pkt_info = le32toh(rxd->wb.lower.mrq);
733 
734 		/* Error Checking then decrement count */
735 		MPASS ((staterr & E1000_RXD_STAT_DD) != 0);
736 
737 		len = le16toh(rxd->wb.upper.length);
738 		ri->iri_len += len;
739 		rxr->rx_bytes += ri->iri_len;
740 
741 		eop = (staterr & E1000_RXD_STAT_EOP) != 0;
742 
743 		/* Make sure bad packets are discarded */
744 		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
745 			sc->dropped_pkts++;
746 			return EBADMSG;
747 		}
748 
749 		ri->iri_frags[i].irf_flid = 0;
750 		ri->iri_frags[i].irf_idx = cidx;
751 		ri->iri_frags[i].irf_len = len;
752 		/* Zero out the receive descriptors status. */
753 		rxd->wb.upper.status_error &= htole32(~0xFF);
754 
755 		if (++cidx == scctx->isc_nrxd[0])
756 			cidx = 0;
757 		i++;
758 	} while (!eop);
759 
760 	rxr->rx_packets++;
761 
762 	if (scctx->isc_capenable & IFCAP_RXCSUM)
763 		em_receive_checksum(staterr, staterr >> 24, ri);
764 
765 	if (scctx->isc_capenable & IFCAP_VLAN_HWTAGGING &&
766 	    staterr & E1000_RXD_STAT_VP) {
767 		ri->iri_vtag = le16toh(rxd->wb.upper.vlan);
768 		ri->iri_flags |= M_VLANTAG;
769 	}
770 
771 	ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss);
772 	ri->iri_rsstype = em_determine_rsstype(pkt_info);
773 
774 	ri->iri_nfrags = i;
775 	return (0);
776 }
777 
778 /*********************************************************************
779  *
780  *  Verify that the hardware indicated that the checksum is valid.
781  *  Inform the stack about the status of checksum so that stack
782  *  doesn't spend time verifying the checksum.
783  *
784  *********************************************************************/
785 static void
786 em_receive_checksum(uint16_t status, uint8_t errors, if_rxd_info_t ri)
787 {
788 	if (__predict_false(status & E1000_RXD_STAT_IXSM))
789 		return;
790 
791 	/* If there is a layer 3 or 4 error we are done */
792 	if (__predict_false(errors & (E1000_RXD_ERR_IPE | E1000_RXD_ERR_TCPE)))
793 		return;
794 
795 	/* IP Checksum Good */
796 	if (status & E1000_RXD_STAT_IPCS)
797 		ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
798 
799 	/* Valid L4E checksum */
800 	if (__predict_true(status &
801 	    (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))) {
802 		ri->iri_csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
803 		ri->iri_csum_data = htons(0xffff);
804 	}
805 }
806 
807 /********************************************************************
808  *
809  *  Parse the packet type to determine the appropriate hash
810  *
811  ******************************************************************/
812 static int
813 em_determine_rsstype(uint32_t pkt_info)
814 {
815 	switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
816 	case E1000_RXDADV_RSSTYPE_IPV4_TCP:
817 		return M_HASHTYPE_RSS_TCP_IPV4;
818 	case E1000_RXDADV_RSSTYPE_IPV4:
819 		return M_HASHTYPE_RSS_IPV4;
820 	case E1000_RXDADV_RSSTYPE_IPV6_TCP:
821 		return M_HASHTYPE_RSS_TCP_IPV6;
822 	case E1000_RXDADV_RSSTYPE_IPV6_EX:
823 		return M_HASHTYPE_RSS_IPV6_EX;
824 	case E1000_RXDADV_RSSTYPE_IPV6:
825 		return M_HASHTYPE_RSS_IPV6;
826 	case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
827 		return M_HASHTYPE_RSS_TCP_IPV6_EX;
828 	default:
829 		return M_HASHTYPE_OPAQUE;
830 	}
831 }
832