xref: /freebsd/sys/dev/e1000/em_txrx.c (revision 99429157e8615dc3b7f11afbe3ed92de7476a5db)
1 /*-
2  * Copyright (c) 2016-2017 Matt Macy <mmacy@nextbsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /* $FreeBSD$ */
28 #include "if_em.h"
29 
30 #ifdef RSS
31 #include <net/rss_config.h>
32 #include <netinet/in_rss.h>
33 #endif
34 
35 #ifdef VERBOSE_DEBUG
36 #define DPRINTF device_printf
37 #else
38 #define DPRINTF(...)
39 #endif
40 
41 /*********************************************************************
42  *  Local Function prototypes
43  *********************************************************************/
44 static int em_tso_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper,
45     u32 *txd_lower);
46 static int em_transmit_checksum_setup(struct adapter *adapter, if_pkt_info_t pi,
47     u32 *txd_upper, u32 *txd_lower);
48 static int em_isc_txd_encap(void *arg, if_pkt_info_t pi);
49 static void em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx);
50 static int em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear);
51 static void em_isc_rxd_refill(void *arg, if_rxd_update_t iru);
52 static void em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
53     qidx_t pidx);
54 static int em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
55     qidx_t budget);
56 static int em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
57 
58 static void lem_isc_rxd_refill(void *arg, if_rxd_update_t iru);
59 
60 static int lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
61    qidx_t budget);
62 static int lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
63 
64 static void lem_receive_checksum(int status, int errors, if_rxd_info_t ri);
65 static void em_receive_checksum(uint32_t status, if_rxd_info_t ri);
66 static int em_determine_rsstype(u32 pkt_info);
67 extern int em_intr(void *arg);
68 
69 struct if_txrx em_txrx = {
70 	em_isc_txd_encap,
71 	em_isc_txd_flush,
72 	em_isc_txd_credits_update,
73 	em_isc_rxd_available,
74 	em_isc_rxd_pkt_get,
75 	em_isc_rxd_refill,
76 	em_isc_rxd_flush,
77 	em_intr
78 };
79 
80 struct if_txrx lem_txrx = {
81 	em_isc_txd_encap,
82 	em_isc_txd_flush,
83 	em_isc_txd_credits_update,
84 	lem_isc_rxd_available,
85 	lem_isc_rxd_pkt_get,
86 	lem_isc_rxd_refill,
87 	em_isc_rxd_flush,
88 	em_intr
89 };
90 
91 extern if_shared_ctx_t em_sctx;
92 
93 void
94 em_dump_rs(struct adapter *adapter)
95 {
96 	if_softc_ctx_t scctx = adapter->shared;
97 	struct em_tx_queue *que;
98 	struct tx_ring *txr;
99 	qidx_t i, ntxd, qid, cur;
100 	int16_t rs_cidx;
101 	uint8_t status;
102 
103 	printf("\n");
104 	ntxd = scctx->isc_ntxd[0];
105 	for (qid = 0; qid < adapter->tx_num_queues; qid++) {
106 		que = &adapter->tx_queues[qid];
107 		txr =  &que->txr;
108 		rs_cidx = txr->tx_rs_cidx;
109 		if (rs_cidx != txr->tx_rs_pidx) {
110 			cur = txr->tx_rsq[rs_cidx];
111 			status = txr->tx_base[cur].upper.fields.status;
112 			if (!(status & E1000_TXD_STAT_DD))
113 				printf("qid[%d]->tx_rsq[%d]: %d clear ", qid, rs_cidx, cur);
114 		} else {
115 			rs_cidx = (rs_cidx-1)&(ntxd-1);
116 			cur = txr->tx_rsq[rs_cidx];
117 			printf("qid[%d]->tx_rsq[rs_cidx-1=%d]: %d  ", qid, rs_cidx, cur);
118 		}
119 		printf("cidx_prev=%d rs_pidx=%d ",txr->tx_cidx_processed, txr->tx_rs_pidx);
120 		for (i = 0; i < ntxd; i++) {
121 			if (txr->tx_base[i].upper.fields.status & E1000_TXD_STAT_DD)
122 				printf("%d set ", i);
123 		}
124 		printf("\n");
125 	}
126 }
127 
128 /**********************************************************************
129  *
130  *  Setup work for hardware segmentation offload (TSO) on
131  *  adapters using advanced tx descriptors
132  *
133  **********************************************************************/
134 static int
135 em_tso_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower)
136 {
137 	if_softc_ctx_t scctx = adapter->shared;
138 	struct em_tx_queue *que = &adapter->tx_queues[pi->ipi_qsidx];
139 	struct tx_ring *txr = &que->txr;
140 	struct e1000_context_desc *TXD;
141 	int cur, hdr_len;
142 
143 	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
144 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
145 		      E1000_TXD_DTYP_D |	/* Data descr type */
146 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
147 
148 	/* IP and/or TCP header checksum calculation and insertion. */
149 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
150 
151 	cur = pi->ipi_pidx;
152 	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
153 
154 	/*
155 	 * Start offset for header checksum calculation.
156 	 * End offset for header checksum calculation.
157 	 * Offset of place put the checksum.
158 	 */
159 	TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
160 	TXD->lower_setup.ip_fields.ipcse =
161 	    htole16(pi->ipi_ehdrlen + pi->ipi_ip_hlen - 1);
162 	TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum);
163 
164 	/*
165 	 * Start offset for payload checksum calculation.
166 	 * End offset for payload checksum calculation.
167 	 * Offset of place to put the checksum.
168 	 */
169 	TXD->upper_setup.tcp_fields.tucss = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
170 	TXD->upper_setup.tcp_fields.tucse = 0;
171 	TXD->upper_setup.tcp_fields.tucso =
172 	    pi->ipi_ehdrlen + pi->ipi_ip_hlen + offsetof(struct tcphdr, th_sum);
173 
174 	/*
175 	 * Payload size per packet w/o any headers.
176 	 * Length of all headers up to payload.
177 	 */
178 	TXD->tcp_seg_setup.fields.mss = htole16(pi->ipi_tso_segsz);
179 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
180 
181 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
182 				E1000_TXD_CMD_DEXT |	/* Extended descr */
183 				E1000_TXD_CMD_TSE |	/* TSE context */
184 				E1000_TXD_CMD_IP |	/* Do IP csum */
185 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
186 				      (pi->ipi_len - hdr_len)); /* Total len */
187 	txr->tx_tso = TRUE;
188 
189 	if (++cur == scctx->isc_ntxd[0]) {
190 		cur = 0;
191 	}
192 	DPRINTF(iflib_get_dev(adapter->ctx), "%s: pidx: %d cur: %d\n", __FUNCTION__, pi->ipi_pidx, cur);
193 	return (cur);
194 }
195 
196 #define TSO_WORKAROUND 4
197 #define DONT_FORCE_CTX 1
198 
199 
200 /*********************************************************************
201  *  The offload context is protocol specific (TCP/UDP) and thus
202  *  only needs to be set when the protocol changes. The occasion
203  *  of a context change can be a performance detriment, and
204  *  might be better just disabled. The reason arises in the way
205  *  in which the controller supports pipelined requests from the
206  *  Tx data DMA. Up to four requests can be pipelined, and they may
207  *  belong to the same packet or to multiple packets. However all
208  *  requests for one packet are issued before a request is issued
209  *  for a subsequent packet and if a request for the next packet
210  *  requires a context change, that request will be stalled
211  *  until the previous request completes. This means setting up
212  *  a new context effectively disables pipelined Tx data DMA which
213  *  in turn greatly slow down performance to send small sized
214  *  frames.
215  **********************************************************************/
216 
217 static int
218 em_transmit_checksum_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower)
219 {
220 	 struct e1000_context_desc *TXD = NULL;
221 	if_softc_ctx_t scctx = adapter->shared;
222 	struct em_tx_queue *que = &adapter->tx_queues[pi->ipi_qsidx];
223 	struct tx_ring *txr = &que->txr;
224 	int csum_flags = pi->ipi_csum_flags;
225 	int cur, hdr_len;
226 	u32 cmd;
227 
228 	cur = pi->ipi_pidx;
229 	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
230 	cmd = adapter->txd_cmd;
231 
232 	/*
233 	 * The 82574L can only remember the *last* context used
234 	 * regardless of queue that it was use for.  We cannot reuse
235 	 * contexts on this hardware platform and must generate a new
236 	 * context every time.  82574L hardware spec, section 7.2.6,
237 	 * second note.
238 	 */
239 	if (DONT_FORCE_CTX &&
240 	    adapter->tx_num_queues == 1 &&
241 	    txr->csum_lhlen == pi->ipi_ehdrlen &&
242 	    txr->csum_iphlen == pi->ipi_ip_hlen &&
243 	    txr->csum_flags == csum_flags) {
244 		/*
245 		 * Same csum offload context as the previous packets;
246 		 * just return.
247 		 */
248 		*txd_upper = txr->csum_txd_upper;
249 		*txd_lower = txr->csum_txd_lower;
250 		return (cur);
251 	}
252 
253 	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
254 	if (csum_flags & CSUM_IP) {
255 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
256 		/*
257 		 * Start offset for header checksum calculation.
258 		 * End offset for header checksum calculation.
259 		 * Offset of place to put the checksum.
260 		 */
261 		TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
262 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
263 		TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum);
264 		cmd |= E1000_TXD_CMD_IP;
265 	}
266 
267 	if (csum_flags & (CSUM_TCP|CSUM_UDP)) {
268 		uint8_t tucso;
269 
270 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
271 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
272 
273 		if (csum_flags & CSUM_TCP) {
274 			tucso = hdr_len + offsetof(struct tcphdr, th_sum);
275 			cmd |= E1000_TXD_CMD_TCP;
276 		} else
277 			tucso = hdr_len + offsetof(struct udphdr, uh_sum);
278 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
279 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
280 		TXD->upper_setup.tcp_fields.tucso = tucso;
281 	}
282 
283 	txr->csum_lhlen = pi->ipi_ehdrlen;
284 	txr->csum_iphlen = pi->ipi_ip_hlen;
285 	txr->csum_flags = csum_flags;
286 	txr->csum_txd_upper = *txd_upper;
287 	txr->csum_txd_lower = *txd_lower;
288 
289 	TXD->tcp_seg_setup.data = htole32(0);
290 	TXD->cmd_and_length =
291 		htole32(E1000_TXD_CMD_IFCS | E1000_TXD_CMD_DEXT | cmd);
292 
293 	if (++cur == scctx->isc_ntxd[0]) {
294 		cur = 0;
295 	}
296 	DPRINTF(iflib_get_dev(adapter->ctx), "checksum_setup csum_flags=%x txd_upper=%x txd_lower=%x hdr_len=%d cmd=%x\n",
297 		      csum_flags, *txd_upper, *txd_lower, hdr_len, cmd);
298 	return (cur);
299 }
300 
301 static int
302 em_isc_txd_encap(void *arg, if_pkt_info_t pi)
303 {
304 	struct adapter *sc = arg;
305 	if_softc_ctx_t scctx = sc->shared;
306 	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
307 	struct tx_ring *txr = &que->txr;
308 	bus_dma_segment_t *segs = pi->ipi_segs;
309 	int nsegs = pi->ipi_nsegs;
310 	int csum_flags = pi->ipi_csum_flags;
311 	int i, j, first, pidx_last;
312 	u32 txd_flags, txd_upper = 0, txd_lower = 0;
313 
314 	struct e1000_tx_desc *ctxd = NULL;
315 	bool do_tso, tso_desc;
316 	qidx_t ntxd;
317 
318 	txd_flags = pi->ipi_flags & IPI_TX_INTR ? E1000_TXD_CMD_RS : 0;
319 	i = first = pi->ipi_pidx;
320 	do_tso = (csum_flags & CSUM_TSO);
321 	tso_desc = FALSE;
322 	ntxd = scctx->isc_ntxd[0];
323 	/*
324 	 * TSO Hardware workaround, if this packet is not
325 	 * TSO, and is only a single descriptor long, and
326 	 * it follows a TSO burst, then we need to add a
327 	 * sentinel descriptor to prevent premature writeback.
328 	 */
329 	if ((!do_tso) && (txr->tx_tso == TRUE)) {
330 		if (nsegs == 1)
331 			tso_desc = TRUE;
332 		txr->tx_tso = FALSE;
333 	}
334 
335 	/* Do hardware assists */
336 	if (do_tso) {
337 		i = em_tso_setup(sc, pi, &txd_upper, &txd_lower);
338 		tso_desc = TRUE;
339 	} else if (csum_flags & EM_CSUM_OFFLOAD) {
340 		i = em_transmit_checksum_setup(sc, pi, &txd_upper, &txd_lower);
341 	}
342 
343 	if (pi->ipi_mflags & M_VLANTAG) {
344 		/* Set the vlan id. */
345 		txd_upper |= htole16(pi->ipi_vtag) << 16;
346 		/* Tell hardware to add tag */
347 		txd_lower |= htole32(E1000_TXD_CMD_VLE);
348 	}
349 
350 	DPRINTF(iflib_get_dev(sc->ctx), "encap: set up tx: nsegs=%d first=%d i=%d\n", nsegs, first, i);
351 	/* XXX adapter->pcix_82544 -- lem_fill_descriptors */
352 
353 	/* Set up our transmit descriptors */
354 	for (j = 0; j < nsegs; j++) {
355 		bus_size_t seg_len;
356 		bus_addr_t seg_addr;
357 		uint32_t cmd;
358 
359 		ctxd = &txr->tx_base[i];
360 		seg_addr = segs[j].ds_addr;
361 		seg_len = segs[j].ds_len;
362 		cmd = E1000_TXD_CMD_IFCS | sc->txd_cmd;
363 
364 		/*
365 		 * TSO Workaround:
366 		 * If this is the last descriptor, we want to
367 		 * split it so we have a small final sentinel
368 		 */
369 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
370 			seg_len -= TSO_WORKAROUND;
371 			ctxd->buffer_addr = htole64(seg_addr);
372 			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
373 			ctxd->upper.data = htole32(txd_upper);
374 
375 			if (++i == scctx->isc_ntxd[0])
376 				i = 0;
377 
378 			/* Now make the sentinel */
379 			ctxd = &txr->tx_base[i];
380 			ctxd->buffer_addr = htole64(seg_addr + seg_len);
381 			ctxd->lower.data = htole32(cmd | txd_lower | TSO_WORKAROUND);
382 			ctxd->upper.data = htole32(txd_upper);
383 			pidx_last = i;
384 			if (++i == scctx->isc_ntxd[0])
385 				i = 0;
386 			DPRINTF(iflib_get_dev(sc->ctx), "TSO path pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]);
387 		} else {
388 			ctxd->buffer_addr = htole64(seg_addr);
389 			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
390 			ctxd->upper.data = htole32(txd_upper);
391 			pidx_last = i;
392 			if (++i == scctx->isc_ntxd[0])
393 				i = 0;
394 			DPRINTF(iflib_get_dev(sc->ctx), "pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]);
395 		}
396 	}
397 
398 	/*
399 	 * Last Descriptor of Packet
400 	 * needs End Of Packet (EOP)
401 	 * and Report Status (RS)
402 	 */
403 	if (txd_flags) {
404 		txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
405 		DPRINTF(iflib_get_dev(sc->ctx), "setting to RS on %d rs_pidx %d first: %d\n", pidx_last, txr->tx_rs_pidx, first);
406 		txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1);
407 		MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
408 	}
409 	ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | txd_flags);
410 	DPRINTF(iflib_get_dev(sc->ctx), "tx_buffers[%d]->eop = %d ipi_new_pidx=%d\n", first, pidx_last, i);
411 	pi->ipi_new_pidx = i;
412 
413 	return (0);
414 }
415 
416 static void
417 em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
418 {
419 	struct adapter *adapter = arg;
420 	struct em_tx_queue *que = &adapter->tx_queues[txqid];
421 	struct tx_ring *txr = &que->txr;
422 
423 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), pidx);
424 }
425 
426 static int
427 em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear)
428 {
429 	struct adapter *adapter = arg;
430 	if_softc_ctx_t scctx = adapter->shared;
431 	struct em_tx_queue *que = &adapter->tx_queues[txqid];
432 	struct tx_ring *txr = &que->txr;
433 
434 	qidx_t processed = 0;
435 	int updated;
436 	qidx_t cur, prev, ntxd, rs_cidx;
437 	int32_t delta;
438 	uint8_t status;
439 
440 	rs_cidx = txr->tx_rs_cidx;
441 	if (rs_cidx == txr->tx_rs_pidx)
442 		return (0);
443 	cur = txr->tx_rsq[rs_cidx];
444 	MPASS(cur != QIDX_INVALID);
445 	status = txr->tx_base[cur].upper.fields.status;
446 	updated = !!(status & E1000_TXD_STAT_DD);
447 
448 	if (clear == false || updated == 0)
449 		return (updated);
450 
451 	prev = txr->tx_cidx_processed;
452 	ntxd = scctx->isc_ntxd[0];
453 	do {
454 		delta = (int32_t)cur - (int32_t)prev;
455 		MPASS(prev == 0 || delta != 0);
456 		if (delta < 0)
457 			delta += ntxd;
458 		DPRINTF(iflib_get_dev(adapter->ctx),
459 			      "%s: cidx_processed=%u cur=%u clear=%d delta=%d\n",
460 			      __FUNCTION__, prev, cur, clear, delta);
461 
462 		processed += delta;
463 		prev  = cur;
464 		rs_cidx = (rs_cidx + 1) & (ntxd-1);
465 		if (rs_cidx  == txr->tx_rs_pidx)
466 			break;
467 		cur = txr->tx_rsq[rs_cidx];
468 		MPASS(cur != QIDX_INVALID);
469 		status = txr->tx_base[cur].upper.fields.status;
470 	} while ((status & E1000_TXD_STAT_DD));
471 
472 	txr->tx_rs_cidx = rs_cidx;
473 	txr->tx_cidx_processed = prev;
474 	return(processed);
475 }
476 
477 static void
478 lem_isc_rxd_refill(void *arg, if_rxd_update_t iru)
479 {
480 	struct adapter *sc = arg;
481 	if_softc_ctx_t scctx = sc->shared;
482 	struct em_rx_queue *que = &sc->rx_queues[iru->iru_qsidx];
483 	struct rx_ring *rxr = &que->rxr;
484 	struct e1000_rx_desc *rxd;
485 	uint64_t *paddrs;
486 	uint32_t next_pidx, pidx;
487 	uint16_t count;
488 	int i;
489 
490 	paddrs = iru->iru_paddrs;
491 	pidx = iru->iru_pidx;
492 	count = iru->iru_count;
493 
494 	for (i = 0, next_pidx = pidx; i < count; i++) {
495 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[next_pidx];
496 		rxd->buffer_addr = htole64(paddrs[i]);
497 		/* status bits must be cleared */
498 		rxd->status = 0;
499 
500 		if (++next_pidx == scctx->isc_nrxd[0])
501 			next_pidx = 0;
502 	}
503 }
504 
505 static void
506 em_isc_rxd_refill(void *arg, if_rxd_update_t iru)
507 {
508 	struct adapter *sc = arg;
509 	if_softc_ctx_t scctx = sc->shared;
510 	uint16_t rxqid = iru->iru_qsidx;
511 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
512 	struct rx_ring *rxr = &que->rxr;
513 	union e1000_rx_desc_extended *rxd;
514 	uint64_t *paddrs;
515 	uint32_t next_pidx, pidx;
516 	uint16_t count;
517 	int i;
518 
519 	paddrs = iru->iru_paddrs;
520 	pidx = iru->iru_pidx;
521 	count = iru->iru_count;
522 
523 	for (i = 0, next_pidx = pidx; i < count; i++) {
524 		rxd = &rxr->rx_base[next_pidx];
525 		rxd->read.buffer_addr = htole64(paddrs[i]);
526 		/* DD bits must be cleared */
527 		rxd->wb.upper.status_error = 0;
528 
529 		if (++next_pidx == scctx->isc_nrxd[0])
530 			next_pidx = 0;
531 	}
532 }
533 
534 static void
535 em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx)
536 {
537 	struct adapter *sc = arg;
538 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
539 	struct rx_ring *rxr = &que->rxr;
540 
541 	E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx);
542 }
543 
544 static int
545 lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
546 {
547 	struct adapter *sc = arg;
548 	if_softc_ctx_t scctx = sc->shared;
549 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
550 	struct rx_ring *rxr = &que->rxr;
551 	struct e1000_rx_desc *rxd;
552 	u32 staterr = 0;
553 	int cnt, i;
554 
555 	if (budget == 1) {
556 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[idx];
557 		staterr = rxd->status;
558 		return (staterr & E1000_RXD_STAT_DD);
559 	}
560 
561 	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
562 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[i];
563 		staterr = rxd->status;
564 
565 		if ((staterr & E1000_RXD_STAT_DD) == 0)
566 			break;
567 
568 		if (++i == scctx->isc_nrxd[0])
569 			i = 0;
570 
571 		if (staterr & E1000_RXD_STAT_EOP)
572 			cnt++;
573 	}
574 	return (cnt);
575 }
576 
577 static int
578 em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
579 {
580 	struct adapter *sc = arg;
581 	if_softc_ctx_t scctx = sc->shared;
582 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
583 	struct rx_ring *rxr = &que->rxr;
584 	union e1000_rx_desc_extended *rxd;
585 	u32 staterr = 0;
586 	int cnt, i;
587 
588 	if (budget == 1) {
589 		rxd = &rxr->rx_base[idx];
590 		staterr = le32toh(rxd->wb.upper.status_error);
591 		return (staterr & E1000_RXD_STAT_DD);
592 	}
593 
594 	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
595 		rxd = &rxr->rx_base[i];
596 		staterr = le32toh(rxd->wb.upper.status_error);
597 
598 		if ((staterr & E1000_RXD_STAT_DD) == 0)
599 			break;
600 
601 		if (++i == scctx->isc_nrxd[0]) {
602 			i = 0;
603 		}
604 
605 		if (staterr & E1000_RXD_STAT_EOP)
606 			cnt++;
607 
608 	}
609 	return (cnt);
610 }
611 
612 static int
613 lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
614 {
615 	struct adapter *adapter = arg;
616 	if_softc_ctx_t scctx = adapter->shared;
617 	struct em_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx];
618 	struct rx_ring *rxr = &que->rxr;
619 	struct e1000_rx_desc *rxd;
620 	u16 len;
621 	u32 status, errors;
622 	bool eop;
623 	int i, cidx;
624 
625 	status = errors = i = 0;
626 	cidx = ri->iri_cidx;
627 
628 	do {
629 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[cidx];
630 		status = rxd->status;
631 		errors = rxd->errors;
632 
633 		/* Error Checking then decrement count */
634 		MPASS ((status & E1000_RXD_STAT_DD) != 0);
635 
636 		len = le16toh(rxd->length);
637 		ri->iri_len += len;
638 
639 		eop = (status & E1000_RXD_STAT_EOP) != 0;
640 
641 		/* Make sure bad packets are discarded */
642 		if (errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
643 			adapter->dropped_pkts++;
644 			/* XXX fixup if common */
645 			return (EBADMSG);
646 		}
647 
648 		ri->iri_frags[i].irf_flid = 0;
649 		ri->iri_frags[i].irf_idx = cidx;
650 		ri->iri_frags[i].irf_len = len;
651 		/* Zero out the receive descriptors status. */
652 		rxd->status = 0;
653 
654 		if (++cidx == scctx->isc_nrxd[0])
655 			cidx = 0;
656 		i++;
657 	} while (!eop);
658 
659 	/* XXX add a faster way to look this up */
660 	if (adapter->hw.mac.type >= e1000_82543 && !(status & E1000_RXD_STAT_IXSM))
661 		lem_receive_checksum(status, errors, ri);
662 
663 	if (status & E1000_RXD_STAT_VP) {
664 		ri->iri_vtag = le16toh(rxd->special);
665 		ri->iri_flags |= M_VLANTAG;
666 	}
667 
668 	ri->iri_nfrags = i;
669 
670 	return (0);
671 }
672 
673 static int
674 em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
675 {
676 	struct adapter *adapter = arg;
677 	if_softc_ctx_t scctx = adapter->shared;
678 	struct em_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx];
679 	struct rx_ring *rxr = &que->rxr;
680 	union e1000_rx_desc_extended *rxd;
681 
682 	u16 len;
683 	u32 pkt_info;
684 	u32 staterr = 0;
685 	bool eop;
686 	int i, cidx, vtag;
687 
688 	i = vtag = 0;
689 	cidx = ri->iri_cidx;
690 
691 	do {
692 		rxd = &rxr->rx_base[cidx];
693 		staterr = le32toh(rxd->wb.upper.status_error);
694 		pkt_info = le32toh(rxd->wb.lower.mrq);
695 
696 		/* Error Checking then decrement count */
697 		MPASS ((staterr & E1000_RXD_STAT_DD) != 0);
698 
699 		len = le16toh(rxd->wb.upper.length);
700 		ri->iri_len += len;
701 
702 		eop = (staterr & E1000_RXD_STAT_EOP) != 0;
703 
704 		/* Make sure bad packets are discarded */
705 		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
706 			adapter->dropped_pkts++;
707 			return EBADMSG;
708 		}
709 
710 		ri->iri_frags[i].irf_flid = 0;
711 		ri->iri_frags[i].irf_idx = cidx;
712 		ri->iri_frags[i].irf_len = len;
713 		/* Zero out the receive descriptors status. */
714 		rxd->wb.upper.status_error &= htole32(~0xFF);
715 
716 		if (++cidx == scctx->isc_nrxd[0])
717 			cidx = 0;
718 		i++;
719 	} while (!eop);
720 
721 	/* XXX add a faster way to look this up */
722 	if (adapter->hw.mac.type >= e1000_82543)
723 		em_receive_checksum(staterr, ri);
724 
725 	if (staterr & E1000_RXD_STAT_VP) {
726 		vtag = le16toh(rxd->wb.upper.vlan);
727 	}
728 
729 	ri->iri_vtag = vtag;
730 	if (vtag)
731 		ri->iri_flags |= M_VLANTAG;
732 
733 	ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss);
734 	ri->iri_rsstype = em_determine_rsstype(pkt_info);
735 
736 	ri->iri_nfrags = i;
737 	return (0);
738 }
739 
740 /*********************************************************************
741  *
742  *  Verify that the hardware indicated that the checksum is valid.
743  *  Inform the stack about the status of checksum so that stack
744  *  doesn't spend time verifying the checksum.
745  *
746  *********************************************************************/
747 static void
748 lem_receive_checksum(int status, int errors, if_rxd_info_t ri)
749 {
750 	/* Did it pass? */
751 	if (status & E1000_RXD_STAT_IPCS && !(errors & E1000_RXD_ERR_IPE))
752 		ri->iri_csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
753 
754 	if (status & E1000_RXD_STAT_TCPCS) {
755 		/* Did it pass? */
756 		if (!(errors & E1000_RXD_ERR_TCPE)) {
757 			ri->iri_csum_flags |=
758 			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
759 			ri->iri_csum_data = htons(0xffff);
760 		}
761 	}
762 }
763 
764 /********************************************************************
765  *
766  *  Parse the packet type to determine the appropriate hash
767  *
768  ******************************************************************/
769 static int
770 em_determine_rsstype(u32 pkt_info)
771 {
772 	switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
773 	case E1000_RXDADV_RSSTYPE_IPV4_TCP:
774 		return M_HASHTYPE_RSS_TCP_IPV4;
775 	case E1000_RXDADV_RSSTYPE_IPV4:
776 		return M_HASHTYPE_RSS_IPV4;
777 	case E1000_RXDADV_RSSTYPE_IPV6_TCP:
778 		return M_HASHTYPE_RSS_TCP_IPV6;
779 	case E1000_RXDADV_RSSTYPE_IPV6_EX:
780 		return M_HASHTYPE_RSS_IPV6_EX;
781 	case E1000_RXDADV_RSSTYPE_IPV6:
782 		return M_HASHTYPE_RSS_IPV6;
783 	case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
784 		return M_HASHTYPE_RSS_TCP_IPV6_EX;
785 	default:
786 		return M_HASHTYPE_OPAQUE;
787 	}
788 }
789 
790 static void
791 em_receive_checksum(uint32_t status, if_rxd_info_t ri)
792 {
793 	ri->iri_csum_flags = 0;
794 
795 	/* Ignore Checksum bit is set */
796 	if (status & E1000_RXD_STAT_IXSM)
797 		return;
798 
799 	/* If the IP checksum exists and there is no IP Checksum error */
800 	if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
801 	    E1000_RXD_STAT_IPCS) {
802 		ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
803 	}
804 
805 	/* TCP or UDP checksum */
806 	if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
807 	    E1000_RXD_STAT_TCPCS) {
808 		ri->iri_csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
809 		ri->iri_csum_data = htons(0xffff);
810 	}
811 	if (status & E1000_RXD_STAT_UDPCS) {
812 		ri->iri_csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
813 		ri->iri_csum_data = htons(0xffff);
814 	}
815 }
816