xref: /freebsd/sys/dev/e1000/em_txrx.c (revision 134e17798c9af53632b372348ab828e75e65bf46)
1 /*-
2  * Copyright (c) 2016 Nicole Graziano <nicole@nextbsd.org>
3  * Copyright (c) 2017 Matthew Macy <mmacy@mattmacy.io>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /* $FreeBSD$ */
29 #include "if_em.h"
30 
31 #ifdef RSS
32 #include <net/rss_config.h>
33 #include <netinet/in_rss.h>
34 #endif
35 
36 #ifdef VERBOSE_DEBUG
37 #define DPRINTF device_printf
38 #else
39 #define DPRINTF(...)
40 #endif
41 
42 /*********************************************************************
43  *  Local Function prototypes
44  *********************************************************************/
45 static int em_tso_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper,
46     u32 *txd_lower);
47 static int em_transmit_checksum_setup(struct adapter *adapter, if_pkt_info_t pi,
48     u32 *txd_upper, u32 *txd_lower);
49 static int em_isc_txd_encap(void *arg, if_pkt_info_t pi);
50 static void em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx);
51 static int em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear);
52 static void em_isc_rxd_refill(void *arg, if_rxd_update_t iru);
53 static void em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
54     qidx_t pidx);
55 static int em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
56     qidx_t budget);
57 static int em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
58 
59 static void lem_isc_rxd_refill(void *arg, if_rxd_update_t iru);
60 
61 static int lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
62    qidx_t budget);
63 static int lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
64 
65 static void lem_receive_checksum(int status, int errors, if_rxd_info_t ri);
66 static void em_receive_checksum(uint32_t status, if_rxd_info_t ri);
67 static int em_determine_rsstype(u32 pkt_info);
68 extern int em_intr(void *arg);
69 
70 struct if_txrx em_txrx = {
71 	.ift_txd_encap = em_isc_txd_encap,
72 	.ift_txd_flush = em_isc_txd_flush,
73 	.ift_txd_credits_update = em_isc_txd_credits_update,
74 	.ift_rxd_available = em_isc_rxd_available,
75 	.ift_rxd_pkt_get = em_isc_rxd_pkt_get,
76 	.ift_rxd_refill = em_isc_rxd_refill,
77 	.ift_rxd_flush = em_isc_rxd_flush,
78 	.ift_legacy_intr = em_intr
79 };
80 
81 struct if_txrx lem_txrx = {
82 	.ift_txd_encap = em_isc_txd_encap,
83 	.ift_txd_flush = em_isc_txd_flush,
84 	.ift_txd_credits_update = em_isc_txd_credits_update,
85 	.ift_rxd_available = lem_isc_rxd_available,
86 	.ift_rxd_pkt_get = lem_isc_rxd_pkt_get,
87 	.ift_rxd_refill = lem_isc_rxd_refill,
88 	.ift_rxd_flush = em_isc_rxd_flush,
89 	.ift_legacy_intr = em_intr
90 };
91 
92 extern if_shared_ctx_t em_sctx;
93 
94 void
95 em_dump_rs(struct adapter *adapter)
96 {
97 	if_softc_ctx_t scctx = adapter->shared;
98 	struct em_tx_queue *que;
99 	struct tx_ring *txr;
100 	qidx_t i, ntxd, qid, cur;
101 	int16_t rs_cidx;
102 	uint8_t status;
103 
104 	printf("\n");
105 	ntxd = scctx->isc_ntxd[0];
106 	for (qid = 0; qid < adapter->tx_num_queues; qid++) {
107 		que = &adapter->tx_queues[qid];
108 		txr =  &que->txr;
109 		rs_cidx = txr->tx_rs_cidx;
110 		if (rs_cidx != txr->tx_rs_pidx) {
111 			cur = txr->tx_rsq[rs_cidx];
112 			status = txr->tx_base[cur].upper.fields.status;
113 			if (!(status & E1000_TXD_STAT_DD))
114 				printf("qid[%d]->tx_rsq[%d]: %d clear ", qid, rs_cidx, cur);
115 		} else {
116 			rs_cidx = (rs_cidx-1)&(ntxd-1);
117 			cur = txr->tx_rsq[rs_cidx];
118 			printf("qid[%d]->tx_rsq[rs_cidx-1=%d]: %d  ", qid, rs_cidx, cur);
119 		}
120 		printf("cidx_prev=%d rs_pidx=%d ",txr->tx_cidx_processed, txr->tx_rs_pidx);
121 		for (i = 0; i < ntxd; i++) {
122 			if (txr->tx_base[i].upper.fields.status & E1000_TXD_STAT_DD)
123 				printf("%d set ", i);
124 		}
125 		printf("\n");
126 	}
127 }
128 
129 /**********************************************************************
130  *
131  *  Setup work for hardware segmentation offload (TSO) on
132  *  adapters using advanced tx descriptors
133  *
134  **********************************************************************/
135 static int
136 em_tso_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower)
137 {
138 	if_softc_ctx_t scctx = adapter->shared;
139 	struct em_tx_queue *que = &adapter->tx_queues[pi->ipi_qsidx];
140 	struct tx_ring *txr = &que->txr;
141 	struct e1000_context_desc *TXD;
142 	int cur, hdr_len;
143 
144 	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
145 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
146 		      E1000_TXD_DTYP_D |	/* Data descr type */
147 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
148 
149 	/* IP and/or TCP header checksum calculation and insertion. */
150 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
151 
152 	cur = pi->ipi_pidx;
153 	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
154 
155 	/*
156 	 * Start offset for header checksum calculation.
157 	 * End offset for header checksum calculation.
158 	 * Offset of place put the checksum.
159 	 */
160 	TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
161 	TXD->lower_setup.ip_fields.ipcse =
162 	    htole16(pi->ipi_ehdrlen + pi->ipi_ip_hlen - 1);
163 	TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum);
164 
165 	/*
166 	 * Start offset for payload checksum calculation.
167 	 * End offset for payload checksum calculation.
168 	 * Offset of place to put the checksum.
169 	 */
170 	TXD->upper_setup.tcp_fields.tucss = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
171 	TXD->upper_setup.tcp_fields.tucse = 0;
172 	TXD->upper_setup.tcp_fields.tucso =
173 	    pi->ipi_ehdrlen + pi->ipi_ip_hlen + offsetof(struct tcphdr, th_sum);
174 
175 	/*
176 	 * Payload size per packet w/o any headers.
177 	 * Length of all headers up to payload.
178 	 */
179 	TXD->tcp_seg_setup.fields.mss = htole16(pi->ipi_tso_segsz);
180 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
181 
182 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
183 				E1000_TXD_CMD_DEXT |	/* Extended descr */
184 				E1000_TXD_CMD_TSE |	/* TSE context */
185 				E1000_TXD_CMD_IP |	/* Do IP csum */
186 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
187 				      (pi->ipi_len - hdr_len)); /* Total len */
188 	txr->tx_tso = TRUE;
189 
190 	if (++cur == scctx->isc_ntxd[0]) {
191 		cur = 0;
192 	}
193 	DPRINTF(iflib_get_dev(adapter->ctx), "%s: pidx: %d cur: %d\n", __FUNCTION__, pi->ipi_pidx, cur);
194 	return (cur);
195 }
196 
197 #define TSO_WORKAROUND 4
198 #define DONT_FORCE_CTX 1
199 
200 
201 /*********************************************************************
202  *  The offload context is protocol specific (TCP/UDP) and thus
203  *  only needs to be set when the protocol changes. The occasion
204  *  of a context change can be a performance detriment, and
205  *  might be better just disabled. The reason arises in the way
206  *  in which the controller supports pipelined requests from the
207  *  Tx data DMA. Up to four requests can be pipelined, and they may
208  *  belong to the same packet or to multiple packets. However all
209  *  requests for one packet are issued before a request is issued
210  *  for a subsequent packet and if a request for the next packet
211  *  requires a context change, that request will be stalled
212  *  until the previous request completes. This means setting up
213  *  a new context effectively disables pipelined Tx data DMA which
214  *  in turn greatly slow down performance to send small sized
215  *  frames.
216  **********************************************************************/
217 
218 static int
219 em_transmit_checksum_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower)
220 {
221 	 struct e1000_context_desc *TXD = NULL;
222 	if_softc_ctx_t scctx = adapter->shared;
223 	struct em_tx_queue *que = &adapter->tx_queues[pi->ipi_qsidx];
224 	struct tx_ring *txr = &que->txr;
225 	int csum_flags = pi->ipi_csum_flags;
226 	int cur, hdr_len;
227 	u32 cmd;
228 
229 	cur = pi->ipi_pidx;
230 	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
231 	cmd = adapter->txd_cmd;
232 
233 	/*
234 	 * The 82574L can only remember the *last* context used
235 	 * regardless of queue that it was use for.  We cannot reuse
236 	 * contexts on this hardware platform and must generate a new
237 	 * context every time.  82574L hardware spec, section 7.2.6,
238 	 * second note.
239 	 */
240 	if (DONT_FORCE_CTX &&
241 	    adapter->tx_num_queues == 1 &&
242 	    txr->csum_lhlen == pi->ipi_ehdrlen &&
243 	    txr->csum_iphlen == pi->ipi_ip_hlen &&
244 	    txr->csum_flags == csum_flags) {
245 		/*
246 		 * Same csum offload context as the previous packets;
247 		 * just return.
248 		 */
249 		*txd_upper = txr->csum_txd_upper;
250 		*txd_lower = txr->csum_txd_lower;
251 		return (cur);
252 	}
253 
254 	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
255 	if (csum_flags & CSUM_IP) {
256 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
257 		/*
258 		 * Start offset for header checksum calculation.
259 		 * End offset for header checksum calculation.
260 		 * Offset of place to put the checksum.
261 		 */
262 		TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
263 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
264 		TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum);
265 		cmd |= E1000_TXD_CMD_IP;
266 	}
267 
268 	if (csum_flags & (CSUM_TCP|CSUM_UDP)) {
269 		uint8_t tucso;
270 
271 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
272 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
273 
274 		if (csum_flags & CSUM_TCP) {
275 			tucso = hdr_len + offsetof(struct tcphdr, th_sum);
276 			cmd |= E1000_TXD_CMD_TCP;
277 		} else
278 			tucso = hdr_len + offsetof(struct udphdr, uh_sum);
279 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
280 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
281 		TXD->upper_setup.tcp_fields.tucso = tucso;
282 	}
283 
284 	txr->csum_lhlen = pi->ipi_ehdrlen;
285 	txr->csum_iphlen = pi->ipi_ip_hlen;
286 	txr->csum_flags = csum_flags;
287 	txr->csum_txd_upper = *txd_upper;
288 	txr->csum_txd_lower = *txd_lower;
289 
290 	TXD->tcp_seg_setup.data = htole32(0);
291 	TXD->cmd_and_length =
292 		htole32(E1000_TXD_CMD_IFCS | E1000_TXD_CMD_DEXT | cmd);
293 
294 	if (++cur == scctx->isc_ntxd[0]) {
295 		cur = 0;
296 	}
297 	DPRINTF(iflib_get_dev(adapter->ctx), "checksum_setup csum_flags=%x txd_upper=%x txd_lower=%x hdr_len=%d cmd=%x\n",
298 		      csum_flags, *txd_upper, *txd_lower, hdr_len, cmd);
299 	return (cur);
300 }
301 
302 static int
303 em_isc_txd_encap(void *arg, if_pkt_info_t pi)
304 {
305 	struct adapter *sc = arg;
306 	if_softc_ctx_t scctx = sc->shared;
307 	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
308 	struct tx_ring *txr = &que->txr;
309 	bus_dma_segment_t *segs = pi->ipi_segs;
310 	int nsegs = pi->ipi_nsegs;
311 	int csum_flags = pi->ipi_csum_flags;
312 	int i, j, first, pidx_last;
313 	u32 txd_flags, txd_upper = 0, txd_lower = 0;
314 
315 	struct e1000_tx_desc *ctxd = NULL;
316 	bool do_tso, tso_desc;
317 	qidx_t ntxd;
318 
319 	txd_flags = pi->ipi_flags & IPI_TX_INTR ? E1000_TXD_CMD_RS : 0;
320 	i = first = pi->ipi_pidx;
321 	do_tso = (csum_flags & CSUM_TSO);
322 	tso_desc = FALSE;
323 	ntxd = scctx->isc_ntxd[0];
324 	/*
325 	 * TSO Hardware workaround, if this packet is not
326 	 * TSO, and is only a single descriptor long, and
327 	 * it follows a TSO burst, then we need to add a
328 	 * sentinel descriptor to prevent premature writeback.
329 	 */
330 	if ((!do_tso) && (txr->tx_tso == TRUE)) {
331 		if (nsegs == 1)
332 			tso_desc = TRUE;
333 		txr->tx_tso = FALSE;
334 	}
335 
336 	/* Do hardware assists */
337 	if (do_tso) {
338 		i = em_tso_setup(sc, pi, &txd_upper, &txd_lower);
339 		tso_desc = TRUE;
340 	} else if (csum_flags & EM_CSUM_OFFLOAD) {
341 		i = em_transmit_checksum_setup(sc, pi, &txd_upper, &txd_lower);
342 	}
343 
344 	if (pi->ipi_mflags & M_VLANTAG) {
345 		/* Set the vlan id. */
346 		txd_upper |= htole16(pi->ipi_vtag) << 16;
347 		/* Tell hardware to add tag */
348 		txd_lower |= htole32(E1000_TXD_CMD_VLE);
349 	}
350 
351 	DPRINTF(iflib_get_dev(sc->ctx), "encap: set up tx: nsegs=%d first=%d i=%d\n", nsegs, first, i);
352 	/* XXX adapter->pcix_82544 -- lem_fill_descriptors */
353 
354 	/* Set up our transmit descriptors */
355 	for (j = 0; j < nsegs; j++) {
356 		bus_size_t seg_len;
357 		bus_addr_t seg_addr;
358 		uint32_t cmd;
359 
360 		ctxd = &txr->tx_base[i];
361 		seg_addr = segs[j].ds_addr;
362 		seg_len = segs[j].ds_len;
363 		cmd = E1000_TXD_CMD_IFCS | sc->txd_cmd;
364 
365 		/*
366 		 * TSO Workaround:
367 		 * If this is the last descriptor, we want to
368 		 * split it so we have a small final sentinel
369 		 */
370 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
371 			seg_len -= TSO_WORKAROUND;
372 			ctxd->buffer_addr = htole64(seg_addr);
373 			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
374 			ctxd->upper.data = htole32(txd_upper);
375 
376 			if (++i == scctx->isc_ntxd[0])
377 				i = 0;
378 
379 			/* Now make the sentinel */
380 			ctxd = &txr->tx_base[i];
381 			ctxd->buffer_addr = htole64(seg_addr + seg_len);
382 			ctxd->lower.data = htole32(cmd | txd_lower | TSO_WORKAROUND);
383 			ctxd->upper.data = htole32(txd_upper);
384 			pidx_last = i;
385 			if (++i == scctx->isc_ntxd[0])
386 				i = 0;
387 			DPRINTF(iflib_get_dev(sc->ctx), "TSO path pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]);
388 		} else {
389 			ctxd->buffer_addr = htole64(seg_addr);
390 			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
391 			ctxd->upper.data = htole32(txd_upper);
392 			pidx_last = i;
393 			if (++i == scctx->isc_ntxd[0])
394 				i = 0;
395 			DPRINTF(iflib_get_dev(sc->ctx), "pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]);
396 		}
397 	}
398 
399 	/*
400 	 * Last Descriptor of Packet
401 	 * needs End Of Packet (EOP)
402 	 * and Report Status (RS)
403 	 */
404 	if (txd_flags && nsegs) {
405 		txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
406 		DPRINTF(iflib_get_dev(sc->ctx), "setting to RS on %d rs_pidx %d first: %d\n", pidx_last, txr->tx_rs_pidx, first);
407 		txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1);
408 		MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
409 	}
410 	ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | txd_flags);
411 	DPRINTF(iflib_get_dev(sc->ctx), "tx_buffers[%d]->eop = %d ipi_new_pidx=%d\n", first, pidx_last, i);
412 	pi->ipi_new_pidx = i;
413 
414 	return (0);
415 }
416 
417 static void
418 em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
419 {
420 	struct adapter *adapter = arg;
421 	struct em_tx_queue *que = &adapter->tx_queues[txqid];
422 	struct tx_ring *txr = &que->txr;
423 
424 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), pidx);
425 }
426 
427 static int
428 em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear)
429 {
430 	struct adapter *adapter = arg;
431 	if_softc_ctx_t scctx = adapter->shared;
432 	struct em_tx_queue *que = &adapter->tx_queues[txqid];
433 	struct tx_ring *txr = &que->txr;
434 
435 	qidx_t processed = 0;
436 	int updated;
437 	qidx_t cur, prev, ntxd, rs_cidx;
438 	int32_t delta;
439 	uint8_t status;
440 
441 	rs_cidx = txr->tx_rs_cidx;
442 	if (rs_cidx == txr->tx_rs_pidx)
443 		return (0);
444 	cur = txr->tx_rsq[rs_cidx];
445 	MPASS(cur != QIDX_INVALID);
446 	status = txr->tx_base[cur].upper.fields.status;
447 	updated = !!(status & E1000_TXD_STAT_DD);
448 
449 	if (!updated)
450 		return (0);
451 
452 	/* If clear is false just let caller know that there
453 	 * are descriptors to reclaim */
454 	if (!clear)
455 		return (1);
456 
457 	prev = txr->tx_cidx_processed;
458 	ntxd = scctx->isc_ntxd[0];
459 	do {
460 		MPASS(prev != cur);
461 		delta = (int32_t)cur - (int32_t)prev;
462 		if (delta < 0)
463 			delta += ntxd;
464 		MPASS(delta > 0);
465 		DPRINTF(iflib_get_dev(adapter->ctx),
466 			      "%s: cidx_processed=%u cur=%u clear=%d delta=%d\n",
467 			      __FUNCTION__, prev, cur, clear, delta);
468 
469 		processed += delta;
470 		prev  = cur;
471 		rs_cidx = (rs_cidx + 1) & (ntxd-1);
472 		if (rs_cidx  == txr->tx_rs_pidx)
473 			break;
474 		cur = txr->tx_rsq[rs_cidx];
475 		MPASS(cur != QIDX_INVALID);
476 		status = txr->tx_base[cur].upper.fields.status;
477 	} while ((status & E1000_TXD_STAT_DD));
478 
479 	txr->tx_rs_cidx = rs_cidx;
480 	txr->tx_cidx_processed = prev;
481 	return(processed);
482 }
483 
484 static void
485 lem_isc_rxd_refill(void *arg, if_rxd_update_t iru)
486 {
487 	struct adapter *sc = arg;
488 	if_softc_ctx_t scctx = sc->shared;
489 	struct em_rx_queue *que = &sc->rx_queues[iru->iru_qsidx];
490 	struct rx_ring *rxr = &que->rxr;
491 	struct e1000_rx_desc *rxd;
492 	uint64_t *paddrs;
493 	uint32_t next_pidx, pidx;
494 	uint16_t count;
495 	int i;
496 
497 	paddrs = iru->iru_paddrs;
498 	pidx = iru->iru_pidx;
499 	count = iru->iru_count;
500 
501 	for (i = 0, next_pidx = pidx; i < count; i++) {
502 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[next_pidx];
503 		rxd->buffer_addr = htole64(paddrs[i]);
504 		/* status bits must be cleared */
505 		rxd->status = 0;
506 
507 		if (++next_pidx == scctx->isc_nrxd[0])
508 			next_pidx = 0;
509 	}
510 }
511 
512 static void
513 em_isc_rxd_refill(void *arg, if_rxd_update_t iru)
514 {
515 	struct adapter *sc = arg;
516 	if_softc_ctx_t scctx = sc->shared;
517 	uint16_t rxqid = iru->iru_qsidx;
518 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
519 	struct rx_ring *rxr = &que->rxr;
520 	union e1000_rx_desc_extended *rxd;
521 	uint64_t *paddrs;
522 	uint32_t next_pidx, pidx;
523 	uint16_t count;
524 	int i;
525 
526 	paddrs = iru->iru_paddrs;
527 	pidx = iru->iru_pidx;
528 	count = iru->iru_count;
529 
530 	for (i = 0, next_pidx = pidx; i < count; i++) {
531 		rxd = &rxr->rx_base[next_pidx];
532 		rxd->read.buffer_addr = htole64(paddrs[i]);
533 		/* DD bits must be cleared */
534 		rxd->wb.upper.status_error = 0;
535 
536 		if (++next_pidx == scctx->isc_nrxd[0])
537 			next_pidx = 0;
538 	}
539 }
540 
541 static void
542 em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx)
543 {
544 	struct adapter *sc = arg;
545 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
546 	struct rx_ring *rxr = &que->rxr;
547 
548 	E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx);
549 }
550 
551 static int
552 lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
553 {
554 	struct adapter *sc = arg;
555 	if_softc_ctx_t scctx = sc->shared;
556 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
557 	struct rx_ring *rxr = &que->rxr;
558 	struct e1000_rx_desc *rxd;
559 	u32 staterr = 0;
560 	int cnt, i;
561 
562 	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
563 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[i];
564 		staterr = rxd->status;
565 
566 		if ((staterr & E1000_RXD_STAT_DD) == 0)
567 			break;
568 		if (++i == scctx->isc_nrxd[0])
569 			i = 0;
570 		if (staterr & E1000_RXD_STAT_EOP)
571 			cnt++;
572 	}
573 	return (cnt);
574 }
575 
576 static int
577 em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
578 {
579 	struct adapter *sc = arg;
580 	if_softc_ctx_t scctx = sc->shared;
581 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
582 	struct rx_ring *rxr = &que->rxr;
583 	union e1000_rx_desc_extended *rxd;
584 	u32 staterr = 0;
585 	int cnt, i;
586 
587 	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
588 		rxd = &rxr->rx_base[i];
589 		staterr = le32toh(rxd->wb.upper.status_error);
590 
591 		if ((staterr & E1000_RXD_STAT_DD) == 0)
592 			break;
593 		if (++i == scctx->isc_nrxd[0])
594 			i = 0;
595 		if (staterr & E1000_RXD_STAT_EOP)
596 			cnt++;
597 	}
598 	return (cnt);
599 }
600 
601 static int
602 lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
603 {
604 	struct adapter *adapter = arg;
605 	if_softc_ctx_t scctx = adapter->shared;
606 	struct em_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx];
607 	struct rx_ring *rxr = &que->rxr;
608 	struct e1000_rx_desc *rxd;
609 	u16 len;
610 	u32 status, errors;
611 	bool eop;
612 	int i, cidx;
613 
614 	status = errors = i = 0;
615 	cidx = ri->iri_cidx;
616 
617 	do {
618 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[cidx];
619 		status = rxd->status;
620 		errors = rxd->errors;
621 
622 		/* Error Checking then decrement count */
623 		MPASS ((status & E1000_RXD_STAT_DD) != 0);
624 
625 		len = le16toh(rxd->length);
626 		ri->iri_len += len;
627 
628 		eop = (status & E1000_RXD_STAT_EOP) != 0;
629 
630 		/* Make sure bad packets are discarded */
631 		if (errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
632 			adapter->dropped_pkts++;
633 			/* XXX fixup if common */
634 			return (EBADMSG);
635 		}
636 
637 		ri->iri_frags[i].irf_flid = 0;
638 		ri->iri_frags[i].irf_idx = cidx;
639 		ri->iri_frags[i].irf_len = len;
640 		/* Zero out the receive descriptors status. */
641 		rxd->status = 0;
642 
643 		if (++cidx == scctx->isc_nrxd[0])
644 			cidx = 0;
645 		i++;
646 	} while (!eop);
647 
648 	/* XXX add a faster way to look this up */
649 	if (adapter->hw.mac.type >= e1000_82543 && !(status & E1000_RXD_STAT_IXSM))
650 		lem_receive_checksum(status, errors, ri);
651 
652 	if (status & E1000_RXD_STAT_VP) {
653 		ri->iri_vtag = le16toh(rxd->special);
654 		ri->iri_flags |= M_VLANTAG;
655 	}
656 
657 	ri->iri_nfrags = i;
658 
659 	return (0);
660 }
661 
662 static int
663 em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
664 {
665 	struct adapter *adapter = arg;
666 	if_softc_ctx_t scctx = adapter->shared;
667 	struct em_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx];
668 	struct rx_ring *rxr = &que->rxr;
669 	union e1000_rx_desc_extended *rxd;
670 
671 	u16 len;
672 	u32 pkt_info;
673 	u32 staterr = 0;
674 	bool eop;
675 	int i, cidx, vtag;
676 
677 	i = vtag = 0;
678 	cidx = ri->iri_cidx;
679 
680 	do {
681 		rxd = &rxr->rx_base[cidx];
682 		staterr = le32toh(rxd->wb.upper.status_error);
683 		pkt_info = le32toh(rxd->wb.lower.mrq);
684 
685 		/* Error Checking then decrement count */
686 		MPASS ((staterr & E1000_RXD_STAT_DD) != 0);
687 
688 		len = le16toh(rxd->wb.upper.length);
689 		ri->iri_len += len;
690 
691 		eop = (staterr & E1000_RXD_STAT_EOP) != 0;
692 
693 		/* Make sure bad packets are discarded */
694 		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
695 			adapter->dropped_pkts++;
696 			return EBADMSG;
697 		}
698 
699 		ri->iri_frags[i].irf_flid = 0;
700 		ri->iri_frags[i].irf_idx = cidx;
701 		ri->iri_frags[i].irf_len = len;
702 		/* Zero out the receive descriptors status. */
703 		rxd->wb.upper.status_error &= htole32(~0xFF);
704 
705 		if (++cidx == scctx->isc_nrxd[0])
706 			cidx = 0;
707 		i++;
708 	} while (!eop);
709 
710 	/* XXX add a faster way to look this up */
711 	if (adapter->hw.mac.type >= e1000_82543)
712 		em_receive_checksum(staterr, ri);
713 
714 	if (staterr & E1000_RXD_STAT_VP) {
715 		vtag = le16toh(rxd->wb.upper.vlan);
716 	}
717 
718 	ri->iri_vtag = vtag;
719 	if (vtag)
720 		ri->iri_flags |= M_VLANTAG;
721 
722 	ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss);
723 	ri->iri_rsstype = em_determine_rsstype(pkt_info);
724 
725 	ri->iri_nfrags = i;
726 	return (0);
727 }
728 
729 /*********************************************************************
730  *
731  *  Verify that the hardware indicated that the checksum is valid.
732  *  Inform the stack about the status of checksum so that stack
733  *  doesn't spend time verifying the checksum.
734  *
735  *********************************************************************/
736 static void
737 lem_receive_checksum(int status, int errors, if_rxd_info_t ri)
738 {
739 	/* Did it pass? */
740 	if (status & E1000_RXD_STAT_IPCS && !(errors & E1000_RXD_ERR_IPE))
741 		ri->iri_csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
742 
743 	if (status & E1000_RXD_STAT_TCPCS) {
744 		/* Did it pass? */
745 		if (!(errors & E1000_RXD_ERR_TCPE)) {
746 			ri->iri_csum_flags |=
747 			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
748 			ri->iri_csum_data = htons(0xffff);
749 		}
750 	}
751 }
752 
753 /********************************************************************
754  *
755  *  Parse the packet type to determine the appropriate hash
756  *
757  ******************************************************************/
758 static int
759 em_determine_rsstype(u32 pkt_info)
760 {
761 	switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
762 	case E1000_RXDADV_RSSTYPE_IPV4_TCP:
763 		return M_HASHTYPE_RSS_TCP_IPV4;
764 	case E1000_RXDADV_RSSTYPE_IPV4:
765 		return M_HASHTYPE_RSS_IPV4;
766 	case E1000_RXDADV_RSSTYPE_IPV6_TCP:
767 		return M_HASHTYPE_RSS_TCP_IPV6;
768 	case E1000_RXDADV_RSSTYPE_IPV6_EX:
769 		return M_HASHTYPE_RSS_IPV6_EX;
770 	case E1000_RXDADV_RSSTYPE_IPV6:
771 		return M_HASHTYPE_RSS_IPV6;
772 	case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
773 		return M_HASHTYPE_RSS_TCP_IPV6_EX;
774 	default:
775 		return M_HASHTYPE_OPAQUE;
776 	}
777 }
778 
779 static void
780 em_receive_checksum(uint32_t status, if_rxd_info_t ri)
781 {
782 	ri->iri_csum_flags = 0;
783 
784 	/* Ignore Checksum bit is set */
785 	if (status & E1000_RXD_STAT_IXSM)
786 		return;
787 
788 	/* If the IP checksum exists and there is no IP Checksum error */
789 	if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
790 	    E1000_RXD_STAT_IPCS) {
791 		ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
792 	}
793 
794 	/* TCP or UDP checksum */
795 	if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
796 	    E1000_RXD_STAT_TCPCS) {
797 		ri->iri_csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
798 		ri->iri_csum_data = htons(0xffff);
799 	}
800 	if (status & E1000_RXD_STAT_UDPCS) {
801 		ri->iri_csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
802 		ri->iri_csum_data = htons(0xffff);
803 	}
804 }
805