xref: /linux/drivers/net/ethernet/intel/i40e/i40e_txrx.c (revision c0c914eca7f251c70facc37dfebeaf176601918d)
1 /*******************************************************************************
2  *
3  * Intel Ethernet Controller XL710 Family Linux Driver
4  * Copyright(c) 2013 - 2014 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  * The full GNU General Public License is included in this distribution in
19  * the file called "COPYING".
20  *
21  * Contact Information:
22  * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23  * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
24  *
25  ******************************************************************************/
26 
27 #include <linux/prefetch.h>
28 #include <net/busy_poll.h>
29 #include "i40e.h"
30 #include "i40e_prototype.h"
31 
32 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
33 				u32 td_tag)
34 {
35 	return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
36 			   ((u64)td_cmd  << I40E_TXD_QW1_CMD_SHIFT) |
37 			   ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
38 			   ((u64)size  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
39 			   ((u64)td_tag  << I40E_TXD_QW1_L2TAG1_SHIFT));
40 }
41 
42 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
43 #define I40E_FD_CLEAN_DELAY 10
44 /**
45  * i40e_program_fdir_filter - Program a Flow Director filter
46  * @fdir_data: Packet data that will be filter parameters
47  * @raw_packet: the pre-allocated packet buffer for FDir
48  * @pf: The PF pointer
49  * @add: True for add/update, False for remove
50  **/
51 int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
52 			     struct i40e_pf *pf, bool add)
53 {
54 	struct i40e_filter_program_desc *fdir_desc;
55 	struct i40e_tx_buffer *tx_buf, *first;
56 	struct i40e_tx_desc *tx_desc;
57 	struct i40e_ring *tx_ring;
58 	unsigned int fpt, dcc;
59 	struct i40e_vsi *vsi;
60 	struct device *dev;
61 	dma_addr_t dma;
62 	u32 td_cmd = 0;
63 	u16 delay = 0;
64 	u16 i;
65 
66 	/* find existing FDIR VSI */
67 	vsi = NULL;
68 	for (i = 0; i < pf->num_alloc_vsi; i++)
69 		if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
70 			vsi = pf->vsi[i];
71 	if (!vsi)
72 		return -ENOENT;
73 
74 	tx_ring = vsi->tx_rings[0];
75 	dev = tx_ring->dev;
76 
77 	/* we need two descriptors to add/del a filter and we can wait */
78 	do {
79 		if (I40E_DESC_UNUSED(tx_ring) > 1)
80 			break;
81 		msleep_interruptible(1);
82 		delay++;
83 	} while (delay < I40E_FD_CLEAN_DELAY);
84 
85 	if (!(I40E_DESC_UNUSED(tx_ring) > 1))
86 		return -EAGAIN;
87 
88 	dma = dma_map_single(dev, raw_packet,
89 			     I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
90 	if (dma_mapping_error(dev, dma))
91 		goto dma_fail;
92 
93 	/* grab the next descriptor */
94 	i = tx_ring->next_to_use;
95 	fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
96 	first = &tx_ring->tx_bi[i];
97 	memset(first, 0, sizeof(struct i40e_tx_buffer));
98 
99 	tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
100 
101 	fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
102 	      I40E_TXD_FLTR_QW0_QINDEX_MASK;
103 
104 	fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
105 	       I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
106 
107 	fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
108 	       I40E_TXD_FLTR_QW0_PCTYPE_MASK;
109 
110 	/* Use LAN VSI Id if not programmed by user */
111 	if (fdir_data->dest_vsi == 0)
112 		fpt |= (pf->vsi[pf->lan_vsi]->id) <<
113 		       I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
114 	else
115 		fpt |= ((u32)fdir_data->dest_vsi <<
116 			I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
117 		       I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
118 
119 	dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
120 
121 	if (add)
122 		dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
123 		       I40E_TXD_FLTR_QW1_PCMD_SHIFT;
124 	else
125 		dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
126 		       I40E_TXD_FLTR_QW1_PCMD_SHIFT;
127 
128 	dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
129 	       I40E_TXD_FLTR_QW1_DEST_MASK;
130 
131 	dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
132 	       I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
133 
134 	if (fdir_data->cnt_index != 0) {
135 		dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
136 		dcc |= ((u32)fdir_data->cnt_index <<
137 			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
138 			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
139 	}
140 
141 	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
142 	fdir_desc->rsvd = cpu_to_le32(0);
143 	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
144 	fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
145 
146 	/* Now program a dummy descriptor */
147 	i = tx_ring->next_to_use;
148 	tx_desc = I40E_TX_DESC(tx_ring, i);
149 	tx_buf = &tx_ring->tx_bi[i];
150 
151 	tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
152 
153 	memset(tx_buf, 0, sizeof(struct i40e_tx_buffer));
154 
155 	/* record length, and DMA address */
156 	dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
157 	dma_unmap_addr_set(tx_buf, dma, dma);
158 
159 	tx_desc->buffer_addr = cpu_to_le64(dma);
160 	td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
161 
162 	tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB;
163 	tx_buf->raw_buf = (void *)raw_packet;
164 
165 	tx_desc->cmd_type_offset_bsz =
166 		build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
167 
168 	/* Force memory writes to complete before letting h/w
169 	 * know there are new descriptors to fetch.
170 	 */
171 	wmb();
172 
173 	/* Mark the data descriptor to be watched */
174 	first->next_to_watch = tx_desc;
175 
176 	writel(tx_ring->next_to_use, tx_ring->tail);
177 	return 0;
178 
179 dma_fail:
180 	return -1;
181 }
182 
183 #define IP_HEADER_OFFSET 14
184 #define I40E_UDPIP_DUMMY_PACKET_LEN 42
185 /**
186  * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
187  * @vsi: pointer to the targeted VSI
188  * @fd_data: the flow director data required for the FDir descriptor
189  * @add: true adds a filter, false removes it
190  *
191  * Returns 0 if the filters were successfully added or removed
192  **/
193 static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
194 				   struct i40e_fdir_filter *fd_data,
195 				   bool add)
196 {
197 	struct i40e_pf *pf = vsi->back;
198 	struct udphdr *udp;
199 	struct iphdr *ip;
200 	bool err = false;
201 	u8 *raw_packet;
202 	int ret;
203 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
204 		0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
205 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
206 
207 	raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
208 	if (!raw_packet)
209 		return -ENOMEM;
210 	memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
211 
212 	ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
213 	udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
214 	      + sizeof(struct iphdr));
215 
216 	ip->daddr = fd_data->dst_ip[0];
217 	udp->dest = fd_data->dst_port;
218 	ip->saddr = fd_data->src_ip[0];
219 	udp->source = fd_data->src_port;
220 
221 	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
222 	ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
223 	if (ret) {
224 		dev_info(&pf->pdev->dev,
225 			 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
226 			 fd_data->pctype, fd_data->fd_id, ret);
227 		err = true;
228 	} else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
229 		if (add)
230 			dev_info(&pf->pdev->dev,
231 				 "Filter OK for PCTYPE %d loc = %d\n",
232 				 fd_data->pctype, fd_data->fd_id);
233 		else
234 			dev_info(&pf->pdev->dev,
235 				 "Filter deleted for PCTYPE %d loc = %d\n",
236 				 fd_data->pctype, fd_data->fd_id);
237 	}
238 	if (err)
239 		kfree(raw_packet);
240 
241 	return err ? -EOPNOTSUPP : 0;
242 }
243 
244 #define I40E_TCPIP_DUMMY_PACKET_LEN 54
245 /**
246  * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
247  * @vsi: pointer to the targeted VSI
248  * @fd_data: the flow director data required for the FDir descriptor
249  * @add: true adds a filter, false removes it
250  *
251  * Returns 0 if the filters were successfully added or removed
252  **/
253 static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
254 				   struct i40e_fdir_filter *fd_data,
255 				   bool add)
256 {
257 	struct i40e_pf *pf = vsi->back;
258 	struct tcphdr *tcp;
259 	struct iphdr *ip;
260 	bool err = false;
261 	u8 *raw_packet;
262 	int ret;
263 	/* Dummy packet */
264 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
265 		0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
266 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
267 		0x0, 0x72, 0, 0, 0, 0};
268 
269 	raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
270 	if (!raw_packet)
271 		return -ENOMEM;
272 	memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
273 
274 	ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
275 	tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
276 	      + sizeof(struct iphdr));
277 
278 	ip->daddr = fd_data->dst_ip[0];
279 	tcp->dest = fd_data->dst_port;
280 	ip->saddr = fd_data->src_ip[0];
281 	tcp->source = fd_data->src_port;
282 
283 	if (add) {
284 		pf->fd_tcp_rule++;
285 		if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
286 			if (I40E_DEBUG_FD & pf->hw.debug_mask)
287 				dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
288 			pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
289 		}
290 	} else {
291 		pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ?
292 				  (pf->fd_tcp_rule - 1) : 0;
293 		if (pf->fd_tcp_rule == 0) {
294 			pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
295 			if (I40E_DEBUG_FD & pf->hw.debug_mask)
296 				dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
297 		}
298 	}
299 
300 	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
301 	ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
302 
303 	if (ret) {
304 		dev_info(&pf->pdev->dev,
305 			 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
306 			 fd_data->pctype, fd_data->fd_id, ret);
307 		err = true;
308 	} else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
309 		if (add)
310 			dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n",
311 				 fd_data->pctype, fd_data->fd_id);
312 		else
313 			dev_info(&pf->pdev->dev,
314 				 "Filter deleted for PCTYPE %d loc = %d\n",
315 				 fd_data->pctype, fd_data->fd_id);
316 	}
317 
318 	if (err)
319 		kfree(raw_packet);
320 
321 	return err ? -EOPNOTSUPP : 0;
322 }
323 
324 /**
325  * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
326  * a specific flow spec
327  * @vsi: pointer to the targeted VSI
328  * @fd_data: the flow director data required for the FDir descriptor
329  * @add: true adds a filter, false removes it
330  *
331  * Returns 0 if the filters were successfully added or removed
332  **/
333 static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
334 				    struct i40e_fdir_filter *fd_data,
335 				    bool add)
336 {
337 	return -EOPNOTSUPP;
338 }
339 
340 #define I40E_IP_DUMMY_PACKET_LEN 34
341 /**
342  * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
343  * a specific flow spec
344  * @vsi: pointer to the targeted VSI
345  * @fd_data: the flow director data required for the FDir descriptor
346  * @add: true adds a filter, false removes it
347  *
348  * Returns 0 if the filters were successfully added or removed
349  **/
350 static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
351 				  struct i40e_fdir_filter *fd_data,
352 				  bool add)
353 {
354 	struct i40e_pf *pf = vsi->back;
355 	struct iphdr *ip;
356 	bool err = false;
357 	u8 *raw_packet;
358 	int ret;
359 	int i;
360 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
361 		0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
362 		0, 0, 0, 0};
363 
364 	for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
365 	     i <= I40E_FILTER_PCTYPE_FRAG_IPV4;	i++) {
366 		raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
367 		if (!raw_packet)
368 			return -ENOMEM;
369 		memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
370 		ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
371 
372 		ip->saddr = fd_data->src_ip[0];
373 		ip->daddr = fd_data->dst_ip[0];
374 		ip->protocol = 0;
375 
376 		fd_data->pctype = i;
377 		ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
378 
379 		if (ret) {
380 			dev_info(&pf->pdev->dev,
381 				 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
382 				 fd_data->pctype, fd_data->fd_id, ret);
383 			err = true;
384 		} else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
385 			if (add)
386 				dev_info(&pf->pdev->dev,
387 					 "Filter OK for PCTYPE %d loc = %d\n",
388 					 fd_data->pctype, fd_data->fd_id);
389 			else
390 				dev_info(&pf->pdev->dev,
391 					 "Filter deleted for PCTYPE %d loc = %d\n",
392 					 fd_data->pctype, fd_data->fd_id);
393 		}
394 	}
395 
396 	if (err)
397 		kfree(raw_packet);
398 
399 	return err ? -EOPNOTSUPP : 0;
400 }
401 
402 /**
403  * i40e_add_del_fdir - Build raw packets to add/del fdir filter
404  * @vsi: pointer to the targeted VSI
405  * @cmd: command to get or set RX flow classification rules
406  * @add: true adds a filter, false removes it
407  *
408  **/
409 int i40e_add_del_fdir(struct i40e_vsi *vsi,
410 		      struct i40e_fdir_filter *input, bool add)
411 {
412 	struct i40e_pf *pf = vsi->back;
413 	int ret;
414 
415 	switch (input->flow_type & ~FLOW_EXT) {
416 	case TCP_V4_FLOW:
417 		ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
418 		break;
419 	case UDP_V4_FLOW:
420 		ret = i40e_add_del_fdir_udpv4(vsi, input, add);
421 		break;
422 	case SCTP_V4_FLOW:
423 		ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
424 		break;
425 	case IPV4_FLOW:
426 		ret = i40e_add_del_fdir_ipv4(vsi, input, add);
427 		break;
428 	case IP_USER_FLOW:
429 		switch (input->ip4_proto) {
430 		case IPPROTO_TCP:
431 			ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
432 			break;
433 		case IPPROTO_UDP:
434 			ret = i40e_add_del_fdir_udpv4(vsi, input, add);
435 			break;
436 		case IPPROTO_SCTP:
437 			ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
438 			break;
439 		default:
440 			ret = i40e_add_del_fdir_ipv4(vsi, input, add);
441 			break;
442 		}
443 		break;
444 	default:
445 		dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
446 			 input->flow_type);
447 		ret = -EINVAL;
448 	}
449 
450 	/* The buffer allocated here is freed by the i40e_clean_tx_ring() */
451 	return ret;
452 }
453 
454 /**
455  * i40e_fd_handle_status - check the Programming Status for FD
456  * @rx_ring: the Rx ring for this descriptor
457  * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
458  * @prog_id: the id originally used for programming
459  *
460  * This is used to verify if the FD programming or invalidation
461  * requested by SW to the HW is successful or not and take actions accordingly.
462  **/
463 static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
464 				  union i40e_rx_desc *rx_desc, u8 prog_id)
465 {
466 	struct i40e_pf *pf = rx_ring->vsi->back;
467 	struct pci_dev *pdev = pf->pdev;
468 	u32 fcnt_prog, fcnt_avail;
469 	u32 error;
470 	u64 qw;
471 
472 	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
473 	error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
474 		I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
475 
476 	if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
477 		pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id);
478 		if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
479 		    (I40E_DEBUG_FD & pf->hw.debug_mask))
480 			dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
481 				 pf->fd_inv);
482 
483 		/* Check if the programming error is for ATR.
484 		 * If so, auto disable ATR and set a state for
485 		 * flush in progress. Next time we come here if flush is in
486 		 * progress do nothing, once flush is complete the state will
487 		 * be cleared.
488 		 */
489 		if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state))
490 			return;
491 
492 		pf->fd_add_err++;
493 		/* store the current atr filter count */
494 		pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
495 
496 		if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
497 		    (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) {
498 			pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
499 			set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
500 		}
501 
502 		/* filter programming failed most likely due to table full */
503 		fcnt_prog = i40e_get_global_fd_count(pf);
504 		fcnt_avail = pf->fdir_pf_filter_count;
505 		/* If ATR is running fcnt_prog can quickly change,
506 		 * if we are very close to full, it makes sense to disable
507 		 * FD ATR/SB and then re-enable it when there is room.
508 		 */
509 		if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
510 			if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
511 			    !(pf->auto_disable_flags &
512 				     I40E_FLAG_FD_SB_ENABLED)) {
513 				if (I40E_DEBUG_FD & pf->hw.debug_mask)
514 					dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
515 				pf->auto_disable_flags |=
516 							I40E_FLAG_FD_SB_ENABLED;
517 			}
518 		}
519 	} else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
520 		if (I40E_DEBUG_FD & pf->hw.debug_mask)
521 			dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
522 				 rx_desc->wb.qword0.hi_dword.fd_id);
523 	}
524 }
525 
526 /**
527  * i40e_unmap_and_free_tx_resource - Release a Tx buffer
528  * @ring:      the ring that owns the buffer
529  * @tx_buffer: the buffer to free
530  **/
531 static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
532 					    struct i40e_tx_buffer *tx_buffer)
533 {
534 	if (tx_buffer->skb) {
535 		dev_kfree_skb_any(tx_buffer->skb);
536 		if (dma_unmap_len(tx_buffer, len))
537 			dma_unmap_single(ring->dev,
538 					 dma_unmap_addr(tx_buffer, dma),
539 					 dma_unmap_len(tx_buffer, len),
540 					 DMA_TO_DEVICE);
541 	} else if (dma_unmap_len(tx_buffer, len)) {
542 		dma_unmap_page(ring->dev,
543 			       dma_unmap_addr(tx_buffer, dma),
544 			       dma_unmap_len(tx_buffer, len),
545 			       DMA_TO_DEVICE);
546 	}
547 
548 	if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
549 		kfree(tx_buffer->raw_buf);
550 
551 	tx_buffer->next_to_watch = NULL;
552 	tx_buffer->skb = NULL;
553 	dma_unmap_len_set(tx_buffer, len, 0);
554 	/* tx_buffer must be completely set up in the transmit path */
555 }
556 
557 /**
558  * i40e_clean_tx_ring - Free any empty Tx buffers
559  * @tx_ring: ring to be cleaned
560  **/
561 void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
562 {
563 	unsigned long bi_size;
564 	u16 i;
565 
566 	/* ring already cleared, nothing to do */
567 	if (!tx_ring->tx_bi)
568 		return;
569 
570 	/* Free all the Tx ring sk_buffs */
571 	for (i = 0; i < tx_ring->count; i++)
572 		i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
573 
574 	bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
575 	memset(tx_ring->tx_bi, 0, bi_size);
576 
577 	/* Zero out the descriptor ring */
578 	memset(tx_ring->desc, 0, tx_ring->size);
579 
580 	tx_ring->next_to_use = 0;
581 	tx_ring->next_to_clean = 0;
582 
583 	if (!tx_ring->netdev)
584 		return;
585 
586 	/* cleanup Tx queue statistics */
587 	netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
588 						  tx_ring->queue_index));
589 }
590 
591 /**
592  * i40e_free_tx_resources - Free Tx resources per queue
593  * @tx_ring: Tx descriptor ring for a specific queue
594  *
595  * Free all transmit software resources
596  **/
597 void i40e_free_tx_resources(struct i40e_ring *tx_ring)
598 {
599 	i40e_clean_tx_ring(tx_ring);
600 	kfree(tx_ring->tx_bi);
601 	tx_ring->tx_bi = NULL;
602 
603 	if (tx_ring->desc) {
604 		dma_free_coherent(tx_ring->dev, tx_ring->size,
605 				  tx_ring->desc, tx_ring->dma);
606 		tx_ring->desc = NULL;
607 	}
608 }
609 
610 /**
611  * i40e_get_tx_pending - how many tx descriptors not processed
612  * @tx_ring: the ring of descriptors
613  *
614  * Since there is no access to the ring head register
615  * in XL710, we need to use our local copies
616  **/
617 u32 i40e_get_tx_pending(struct i40e_ring *ring)
618 {
619 	u32 head, tail;
620 
621 	head = i40e_get_head(ring);
622 	tail = readl(ring->tail);
623 
624 	if (head != tail)
625 		return (head < tail) ?
626 			tail - head : (tail + ring->count - head);
627 
628 	return 0;
629 }
630 
631 #define WB_STRIDE 0x3
632 
633 /**
634  * i40e_clean_tx_irq - Reclaim resources after transmit completes
635  * @tx_ring:  tx ring to clean
636  * @budget:   how many cleans we're allowed
637  *
638  * Returns true if there's any budget left (e.g. the clean is finished)
639  **/
640 static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
641 {
642 	u16 i = tx_ring->next_to_clean;
643 	struct i40e_tx_buffer *tx_buf;
644 	struct i40e_tx_desc *tx_head;
645 	struct i40e_tx_desc *tx_desc;
646 	unsigned int total_packets = 0;
647 	unsigned int total_bytes = 0;
648 
649 	tx_buf = &tx_ring->tx_bi[i];
650 	tx_desc = I40E_TX_DESC(tx_ring, i);
651 	i -= tx_ring->count;
652 
653 	tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
654 
655 	do {
656 		struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
657 
658 		/* if next_to_watch is not set then there is no work pending */
659 		if (!eop_desc)
660 			break;
661 
662 		/* prevent any other reads prior to eop_desc */
663 		read_barrier_depends();
664 
665 		/* we have caught up to head, no work left to do */
666 		if (tx_head == tx_desc)
667 			break;
668 
669 		/* clear next_to_watch to prevent false hangs */
670 		tx_buf->next_to_watch = NULL;
671 
672 		/* update the statistics for this packet */
673 		total_bytes += tx_buf->bytecount;
674 		total_packets += tx_buf->gso_segs;
675 
676 		/* free the skb */
677 		dev_consume_skb_any(tx_buf->skb);
678 
679 		/* unmap skb header data */
680 		dma_unmap_single(tx_ring->dev,
681 				 dma_unmap_addr(tx_buf, dma),
682 				 dma_unmap_len(tx_buf, len),
683 				 DMA_TO_DEVICE);
684 
685 		/* clear tx_buffer data */
686 		tx_buf->skb = NULL;
687 		dma_unmap_len_set(tx_buf, len, 0);
688 
689 		/* unmap remaining buffers */
690 		while (tx_desc != eop_desc) {
691 
692 			tx_buf++;
693 			tx_desc++;
694 			i++;
695 			if (unlikely(!i)) {
696 				i -= tx_ring->count;
697 				tx_buf = tx_ring->tx_bi;
698 				tx_desc = I40E_TX_DESC(tx_ring, 0);
699 			}
700 
701 			/* unmap any remaining paged data */
702 			if (dma_unmap_len(tx_buf, len)) {
703 				dma_unmap_page(tx_ring->dev,
704 					       dma_unmap_addr(tx_buf, dma),
705 					       dma_unmap_len(tx_buf, len),
706 					       DMA_TO_DEVICE);
707 				dma_unmap_len_set(tx_buf, len, 0);
708 			}
709 		}
710 
711 		/* move us one more past the eop_desc for start of next pkt */
712 		tx_buf++;
713 		tx_desc++;
714 		i++;
715 		if (unlikely(!i)) {
716 			i -= tx_ring->count;
717 			tx_buf = tx_ring->tx_bi;
718 			tx_desc = I40E_TX_DESC(tx_ring, 0);
719 		}
720 
721 		prefetch(tx_desc);
722 
723 		/* update budget accounting */
724 		budget--;
725 	} while (likely(budget));
726 
727 	i += tx_ring->count;
728 	tx_ring->next_to_clean = i;
729 	u64_stats_update_begin(&tx_ring->syncp);
730 	tx_ring->stats.bytes += total_bytes;
731 	tx_ring->stats.packets += total_packets;
732 	u64_stats_update_end(&tx_ring->syncp);
733 	tx_ring->q_vector->tx.total_bytes += total_bytes;
734 	tx_ring->q_vector->tx.total_packets += total_packets;
735 
736 	if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
737 		unsigned int j = 0;
738 
739 		/* check to see if there are < 4 descriptors
740 		 * waiting to be written back, then kick the hardware to force
741 		 * them to be written back in case we stay in NAPI.
742 		 * In this mode on X722 we do not enable Interrupt.
743 		 */
744 		j = i40e_get_tx_pending(tx_ring);
745 
746 		if (budget &&
747 		    ((j / (WB_STRIDE + 1)) == 0) && (j != 0) &&
748 		    !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
749 		    (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
750 			tx_ring->arm_wb = true;
751 	}
752 
753 	netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
754 						      tx_ring->queue_index),
755 				  total_packets, total_bytes);
756 
757 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
758 	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
759 		     (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
760 		/* Make sure that anybody stopping the queue after this
761 		 * sees the new next_to_clean.
762 		 */
763 		smp_mb();
764 		if (__netif_subqueue_stopped(tx_ring->netdev,
765 					     tx_ring->queue_index) &&
766 		   !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) {
767 			netif_wake_subqueue(tx_ring->netdev,
768 					    tx_ring->queue_index);
769 			++tx_ring->tx_stats.restart_queue;
770 		}
771 	}
772 
773 	return !!budget;
774 }
775 
776 /**
777  * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors
778  * @vsi: the VSI we care about
779  * @q_vector: the vector  on which to force writeback
780  *
781  **/
782 void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
783 {
784 	u16 flags = q_vector->tx.ring[0].flags;
785 
786 	if (flags & I40E_TXR_FLAGS_WB_ON_ITR) {
787 		u32 val;
788 
789 		if (q_vector->arm_wb_state)
790 			return;
791 
792 		val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK;
793 
794 		wr32(&vsi->back->hw,
795 		     I40E_PFINT_DYN_CTLN(q_vector->v_idx +
796 					 vsi->base_vector - 1),
797 		     val);
798 		q_vector->arm_wb_state = true;
799 	} else if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
800 		u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
801 			  I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
802 			  I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
803 			  I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
804 			  /* allow 00 to be written to the index */
805 
806 		wr32(&vsi->back->hw,
807 		     I40E_PFINT_DYN_CTLN(q_vector->v_idx +
808 					 vsi->base_vector - 1), val);
809 	} else {
810 		u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK |
811 			  I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */
812 			  I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK |
813 			  I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK;
814 			/* allow 00 to be written to the index */
815 
816 		wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
817 	}
818 }
819 
820 /**
821  * i40e_set_new_dynamic_itr - Find new ITR level
822  * @rc: structure containing ring performance data
823  *
824  * Returns true if ITR changed, false if not
825  *
826  * Stores a new ITR value based on packets and byte counts during
827  * the last interrupt.  The advantage of per interrupt computation
828  * is faster updates and more accurate ITR for the current traffic
829  * pattern.  Constants in this function were computed based on
830  * theoretical maximum wire speed and thresholds were set based on
831  * testing data as well as attempting to minimize response time
832  * while increasing bulk throughput.
833  **/
834 static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
835 {
836 	enum i40e_latency_range new_latency_range = rc->latency_range;
837 	struct i40e_q_vector *qv = rc->ring->q_vector;
838 	u32 new_itr = rc->itr;
839 	int bytes_per_int;
840 	int usecs;
841 
842 	if (rc->total_packets == 0 || !rc->itr)
843 		return false;
844 
845 	/* simple throttlerate management
846 	 *   0-10MB/s   lowest (50000 ints/s)
847 	 *  10-20MB/s   low    (20000 ints/s)
848 	 *  20-1249MB/s bulk   (18000 ints/s)
849 	 *  > 40000 Rx packets per second (8000 ints/s)
850 	 *
851 	 * The math works out because the divisor is in 10^(-6) which
852 	 * turns the bytes/us input value into MB/s values, but
853 	 * make sure to use usecs, as the register values written
854 	 * are in 2 usec increments in the ITR registers, and make sure
855 	 * to use the smoothed values that the countdown timer gives us.
856 	 */
857 	usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
858 	bytes_per_int = rc->total_bytes / usecs;
859 
860 	switch (new_latency_range) {
861 	case I40E_LOWEST_LATENCY:
862 		if (bytes_per_int > 10)
863 			new_latency_range = I40E_LOW_LATENCY;
864 		break;
865 	case I40E_LOW_LATENCY:
866 		if (bytes_per_int > 20)
867 			new_latency_range = I40E_BULK_LATENCY;
868 		else if (bytes_per_int <= 10)
869 			new_latency_range = I40E_LOWEST_LATENCY;
870 		break;
871 	case I40E_BULK_LATENCY:
872 	case I40E_ULTRA_LATENCY:
873 	default:
874 		if (bytes_per_int <= 20)
875 			new_latency_range = I40E_LOW_LATENCY;
876 		break;
877 	}
878 
879 	/* this is to adjust RX more aggressively when streaming small
880 	 * packets.  The value of 40000 was picked as it is just beyond
881 	 * what the hardware can receive per second if in low latency
882 	 * mode.
883 	 */
884 #define RX_ULTRA_PACKET_RATE 40000
885 
886 	if ((((rc->total_packets * 1000000) / usecs) > RX_ULTRA_PACKET_RATE) &&
887 	    (&qv->rx == rc))
888 		new_latency_range = I40E_ULTRA_LATENCY;
889 
890 	rc->latency_range = new_latency_range;
891 
892 	switch (new_latency_range) {
893 	case I40E_LOWEST_LATENCY:
894 		new_itr = I40E_ITR_50K;
895 		break;
896 	case I40E_LOW_LATENCY:
897 		new_itr = I40E_ITR_20K;
898 		break;
899 	case I40E_BULK_LATENCY:
900 		new_itr = I40E_ITR_18K;
901 		break;
902 	case I40E_ULTRA_LATENCY:
903 		new_itr = I40E_ITR_8K;
904 		break;
905 	default:
906 		break;
907 	}
908 
909 	rc->total_bytes = 0;
910 	rc->total_packets = 0;
911 
912 	if (new_itr != rc->itr) {
913 		rc->itr = new_itr;
914 		return true;
915 	}
916 
917 	return false;
918 }
919 
920 /**
921  * i40e_clean_programming_status - clean the programming status descriptor
922  * @rx_ring: the rx ring that has this descriptor
923  * @rx_desc: the rx descriptor written back by HW
924  *
925  * Flow director should handle FD_FILTER_STATUS to check its filter programming
926  * status being successful or not and take actions accordingly. FCoE should
927  * handle its context/filter programming/invalidation status and take actions.
928  *
929  **/
930 static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
931 					  union i40e_rx_desc *rx_desc)
932 {
933 	u64 qw;
934 	u8 id;
935 
936 	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
937 	id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
938 		  I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
939 
940 	if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
941 		i40e_fd_handle_status(rx_ring, rx_desc, id);
942 #ifdef I40E_FCOE
943 	else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
944 		 (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS))
945 		i40e_fcoe_handle_status(rx_ring, rx_desc, id);
946 #endif
947 }
948 
949 /**
950  * i40e_setup_tx_descriptors - Allocate the Tx descriptors
951  * @tx_ring: the tx ring to set up
952  *
953  * Return 0 on success, negative on error
954  **/
955 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
956 {
957 	struct device *dev = tx_ring->dev;
958 	int bi_size;
959 
960 	if (!dev)
961 		return -ENOMEM;
962 
963 	/* warn if we are about to overwrite the pointer */
964 	WARN_ON(tx_ring->tx_bi);
965 	bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
966 	tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
967 	if (!tx_ring->tx_bi)
968 		goto err;
969 
970 	/* round up to nearest 4K */
971 	tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
972 	/* add u32 for head writeback, align after this takes care of
973 	 * guaranteeing this is at least one cache line in size
974 	 */
975 	tx_ring->size += sizeof(u32);
976 	tx_ring->size = ALIGN(tx_ring->size, 4096);
977 	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
978 					   &tx_ring->dma, GFP_KERNEL);
979 	if (!tx_ring->desc) {
980 		dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
981 			 tx_ring->size);
982 		goto err;
983 	}
984 
985 	tx_ring->next_to_use = 0;
986 	tx_ring->next_to_clean = 0;
987 	return 0;
988 
989 err:
990 	kfree(tx_ring->tx_bi);
991 	tx_ring->tx_bi = NULL;
992 	return -ENOMEM;
993 }
994 
995 /**
996  * i40e_clean_rx_ring - Free Rx buffers
997  * @rx_ring: ring to be cleaned
998  **/
999 void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
1000 {
1001 	struct device *dev = rx_ring->dev;
1002 	struct i40e_rx_buffer *rx_bi;
1003 	unsigned long bi_size;
1004 	u16 i;
1005 
1006 	/* ring already cleared, nothing to do */
1007 	if (!rx_ring->rx_bi)
1008 		return;
1009 
1010 	if (ring_is_ps_enabled(rx_ring)) {
1011 		int bufsz = ALIGN(rx_ring->rx_hdr_len, 256) * rx_ring->count;
1012 
1013 		rx_bi = &rx_ring->rx_bi[0];
1014 		if (rx_bi->hdr_buf) {
1015 			dma_free_coherent(dev,
1016 					  bufsz,
1017 					  rx_bi->hdr_buf,
1018 					  rx_bi->dma);
1019 			for (i = 0; i < rx_ring->count; i++) {
1020 				rx_bi = &rx_ring->rx_bi[i];
1021 				rx_bi->dma = 0;
1022 				rx_bi->hdr_buf = NULL;
1023 			}
1024 		}
1025 	}
1026 	/* Free all the Rx ring sk_buffs */
1027 	for (i = 0; i < rx_ring->count; i++) {
1028 		rx_bi = &rx_ring->rx_bi[i];
1029 		if (rx_bi->dma) {
1030 			dma_unmap_single(dev,
1031 					 rx_bi->dma,
1032 					 rx_ring->rx_buf_len,
1033 					 DMA_FROM_DEVICE);
1034 			rx_bi->dma = 0;
1035 		}
1036 		if (rx_bi->skb) {
1037 			dev_kfree_skb(rx_bi->skb);
1038 			rx_bi->skb = NULL;
1039 		}
1040 		if (rx_bi->page) {
1041 			if (rx_bi->page_dma) {
1042 				dma_unmap_page(dev,
1043 					       rx_bi->page_dma,
1044 					       PAGE_SIZE / 2,
1045 					       DMA_FROM_DEVICE);
1046 				rx_bi->page_dma = 0;
1047 			}
1048 			__free_page(rx_bi->page);
1049 			rx_bi->page = NULL;
1050 			rx_bi->page_offset = 0;
1051 		}
1052 	}
1053 
1054 	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1055 	memset(rx_ring->rx_bi, 0, bi_size);
1056 
1057 	/* Zero out the descriptor ring */
1058 	memset(rx_ring->desc, 0, rx_ring->size);
1059 
1060 	rx_ring->next_to_clean = 0;
1061 	rx_ring->next_to_use = 0;
1062 }
1063 
1064 /**
1065  * i40e_free_rx_resources - Free Rx resources
1066  * @rx_ring: ring to clean the resources from
1067  *
1068  * Free all receive software resources
1069  **/
1070 void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1071 {
1072 	i40e_clean_rx_ring(rx_ring);
1073 	kfree(rx_ring->rx_bi);
1074 	rx_ring->rx_bi = NULL;
1075 
1076 	if (rx_ring->desc) {
1077 		dma_free_coherent(rx_ring->dev, rx_ring->size,
1078 				  rx_ring->desc, rx_ring->dma);
1079 		rx_ring->desc = NULL;
1080 	}
1081 }
1082 
1083 /**
1084  * i40e_alloc_rx_headers - allocate rx header buffers
1085  * @rx_ring: ring to alloc buffers
1086  *
1087  * Allocate rx header buffers for the entire ring. As these are static,
1088  * this is only called when setting up a new ring.
1089  **/
1090 void i40e_alloc_rx_headers(struct i40e_ring *rx_ring)
1091 {
1092 	struct device *dev = rx_ring->dev;
1093 	struct i40e_rx_buffer *rx_bi;
1094 	dma_addr_t dma;
1095 	void *buffer;
1096 	int buf_size;
1097 	int i;
1098 
1099 	if (rx_ring->rx_bi[0].hdr_buf)
1100 		return;
1101 	/* Make sure the buffers don't cross cache line boundaries. */
1102 	buf_size = ALIGN(rx_ring->rx_hdr_len, 256);
1103 	buffer = dma_alloc_coherent(dev, buf_size * rx_ring->count,
1104 				    &dma, GFP_KERNEL);
1105 	if (!buffer)
1106 		return;
1107 	for (i = 0; i < rx_ring->count; i++) {
1108 		rx_bi = &rx_ring->rx_bi[i];
1109 		rx_bi->dma = dma + (i * buf_size);
1110 		rx_bi->hdr_buf = buffer + (i * buf_size);
1111 	}
1112 }
1113 
1114 /**
1115  * i40e_setup_rx_descriptors - Allocate Rx descriptors
1116  * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1117  *
1118  * Returns 0 on success, negative on failure
1119  **/
1120 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1121 {
1122 	struct device *dev = rx_ring->dev;
1123 	int bi_size;
1124 
1125 	/* warn if we are about to overwrite the pointer */
1126 	WARN_ON(rx_ring->rx_bi);
1127 	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1128 	rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
1129 	if (!rx_ring->rx_bi)
1130 		goto err;
1131 
1132 	u64_stats_init(&rx_ring->syncp);
1133 
1134 	/* Round up to nearest 4K */
1135 	rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
1136 		? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
1137 		: rx_ring->count * sizeof(union i40e_32byte_rx_desc);
1138 	rx_ring->size = ALIGN(rx_ring->size, 4096);
1139 	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
1140 					   &rx_ring->dma, GFP_KERNEL);
1141 
1142 	if (!rx_ring->desc) {
1143 		dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1144 			 rx_ring->size);
1145 		goto err;
1146 	}
1147 
1148 	rx_ring->next_to_clean = 0;
1149 	rx_ring->next_to_use = 0;
1150 
1151 	return 0;
1152 err:
1153 	kfree(rx_ring->rx_bi);
1154 	rx_ring->rx_bi = NULL;
1155 	return -ENOMEM;
1156 }
1157 
1158 /**
1159  * i40e_release_rx_desc - Store the new tail and head values
1160  * @rx_ring: ring to bump
1161  * @val: new head index
1162  **/
1163 static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
1164 {
1165 	rx_ring->next_to_use = val;
1166 	/* Force memory writes to complete before letting h/w
1167 	 * know there are new descriptors to fetch.  (Only
1168 	 * applicable for weak-ordered memory model archs,
1169 	 * such as IA-64).
1170 	 */
1171 	wmb();
1172 	writel(val, rx_ring->tail);
1173 }
1174 
1175 /**
1176  * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split
1177  * @rx_ring: ring to place buffers on
1178  * @cleaned_count: number of buffers to replace
1179  **/
1180 void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
1181 {
1182 	u16 i = rx_ring->next_to_use;
1183 	union i40e_rx_desc *rx_desc;
1184 	struct i40e_rx_buffer *bi;
1185 
1186 	/* do nothing if no valid netdev defined */
1187 	if (!rx_ring->netdev || !cleaned_count)
1188 		return;
1189 
1190 	while (cleaned_count--) {
1191 		rx_desc = I40E_RX_DESC(rx_ring, i);
1192 		bi = &rx_ring->rx_bi[i];
1193 
1194 		if (bi->skb) /* desc is in use */
1195 			goto no_buffers;
1196 		if (!bi->page) {
1197 			bi->page = alloc_page(GFP_ATOMIC);
1198 			if (!bi->page) {
1199 				rx_ring->rx_stats.alloc_page_failed++;
1200 				goto no_buffers;
1201 			}
1202 		}
1203 
1204 		if (!bi->page_dma) {
1205 			/* use a half page if we're re-using */
1206 			bi->page_offset ^= PAGE_SIZE / 2;
1207 			bi->page_dma = dma_map_page(rx_ring->dev,
1208 						    bi->page,
1209 						    bi->page_offset,
1210 						    PAGE_SIZE / 2,
1211 						    DMA_FROM_DEVICE);
1212 			if (dma_mapping_error(rx_ring->dev,
1213 					      bi->page_dma)) {
1214 				rx_ring->rx_stats.alloc_page_failed++;
1215 				bi->page_dma = 0;
1216 				goto no_buffers;
1217 			}
1218 		}
1219 
1220 		dma_sync_single_range_for_device(rx_ring->dev,
1221 						 bi->dma,
1222 						 0,
1223 						 rx_ring->rx_hdr_len,
1224 						 DMA_FROM_DEVICE);
1225 		/* Refresh the desc even if buffer_addrs didn't change
1226 		 * because each write-back erases this info.
1227 		 */
1228 		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
1229 		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
1230 		i++;
1231 		if (i == rx_ring->count)
1232 			i = 0;
1233 	}
1234 
1235 no_buffers:
1236 	if (rx_ring->next_to_use != i)
1237 		i40e_release_rx_desc(rx_ring, i);
1238 }
1239 
1240 /**
1241  * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
1242  * @rx_ring: ring to place buffers on
1243  * @cleaned_count: number of buffers to replace
1244  **/
1245 void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
1246 {
1247 	u16 i = rx_ring->next_to_use;
1248 	union i40e_rx_desc *rx_desc;
1249 	struct i40e_rx_buffer *bi;
1250 	struct sk_buff *skb;
1251 
1252 	/* do nothing if no valid netdev defined */
1253 	if (!rx_ring->netdev || !cleaned_count)
1254 		return;
1255 
1256 	while (cleaned_count--) {
1257 		rx_desc = I40E_RX_DESC(rx_ring, i);
1258 		bi = &rx_ring->rx_bi[i];
1259 		skb = bi->skb;
1260 
1261 		if (!skb) {
1262 			skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1263 							rx_ring->rx_buf_len);
1264 			if (!skb) {
1265 				rx_ring->rx_stats.alloc_buff_failed++;
1266 				goto no_buffers;
1267 			}
1268 			/* initialize queue mapping */
1269 			skb_record_rx_queue(skb, rx_ring->queue_index);
1270 			bi->skb = skb;
1271 		}
1272 
1273 		if (!bi->dma) {
1274 			bi->dma = dma_map_single(rx_ring->dev,
1275 						 skb->data,
1276 						 rx_ring->rx_buf_len,
1277 						 DMA_FROM_DEVICE);
1278 			if (dma_mapping_error(rx_ring->dev, bi->dma)) {
1279 				rx_ring->rx_stats.alloc_buff_failed++;
1280 				bi->dma = 0;
1281 				goto no_buffers;
1282 			}
1283 		}
1284 
1285 		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
1286 		rx_desc->read.hdr_addr = 0;
1287 		i++;
1288 		if (i == rx_ring->count)
1289 			i = 0;
1290 	}
1291 
1292 no_buffers:
1293 	if (rx_ring->next_to_use != i)
1294 		i40e_release_rx_desc(rx_ring, i);
1295 }
1296 
1297 /**
1298  * i40e_receive_skb - Send a completed packet up the stack
1299  * @rx_ring:  rx ring in play
1300  * @skb: packet to send up
1301  * @vlan_tag: vlan tag for packet
1302  **/
1303 static void i40e_receive_skb(struct i40e_ring *rx_ring,
1304 			     struct sk_buff *skb, u16 vlan_tag)
1305 {
1306 	struct i40e_q_vector *q_vector = rx_ring->q_vector;
1307 
1308 	if (vlan_tag & VLAN_VID_MASK)
1309 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
1310 
1311 	napi_gro_receive(&q_vector->napi, skb);
1312 }
1313 
1314 /**
1315  * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1316  * @vsi: the VSI we care about
1317  * @skb: skb currently being received and modified
1318  * @rx_status: status value of last descriptor in packet
1319  * @rx_error: error value of last descriptor in packet
1320  * @rx_ptype: ptype value of last descriptor in packet
1321  **/
1322 static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
1323 				    struct sk_buff *skb,
1324 				    u32 rx_status,
1325 				    u32 rx_error,
1326 				    u16 rx_ptype)
1327 {
1328 	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
1329 	bool ipv4 = false, ipv6 = false;
1330 	bool ipv4_tunnel, ipv6_tunnel;
1331 	__wsum rx_udp_csum;
1332 	struct iphdr *iph;
1333 	__sum16 csum;
1334 
1335 	ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
1336 		     (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
1337 	ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
1338 		     (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
1339 
1340 	skb->ip_summed = CHECKSUM_NONE;
1341 
1342 	/* Rx csum enabled and ip headers found? */
1343 	if (!(vsi->netdev->features & NETIF_F_RXCSUM))
1344 		return;
1345 
1346 	/* did the hardware decode the packet and checksum? */
1347 	if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
1348 		return;
1349 
1350 	/* both known and outer_ip must be set for the below code to work */
1351 	if (!(decoded.known && decoded.outer_ip))
1352 		return;
1353 
1354 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1355 	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4)
1356 		ipv4 = true;
1357 	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1358 		 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1359 		ipv6 = true;
1360 
1361 	if (ipv4 &&
1362 	    (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
1363 			 BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT))))
1364 		goto checksum_fail;
1365 
1366 	/* likely incorrect csum if alternate IP extension headers found */
1367 	if (ipv6 &&
1368 	    rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
1369 		/* don't increment checksum err here, non-fatal err */
1370 		return;
1371 
1372 	/* there was some L4 error, count error and punt packet to the stack */
1373 	if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT))
1374 		goto checksum_fail;
1375 
1376 	/* handle packets that were not able to be checksummed due
1377 	 * to arrival speed, in this case the stack can compute
1378 	 * the csum.
1379 	 */
1380 	if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
1381 		return;
1382 
1383 	/* If VXLAN/GENEVE traffic has an outer UDPv4 checksum we need to check
1384 	 * it in the driver, hardware does not do it for us.
1385 	 * Since L3L4P bit was set we assume a valid IHL value (>=5)
1386 	 * so the total length of IPv4 header is IHL*4 bytes
1387 	 * The UDP_0 bit *may* bet set if the *inner* header is UDP
1388 	 */
1389 	if (!(vsi->back->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) &&
1390 	    (ipv4_tunnel)) {
1391 		skb->transport_header = skb->mac_header +
1392 					sizeof(struct ethhdr) +
1393 					(ip_hdr(skb)->ihl * 4);
1394 
1395 		/* Add 4 bytes for VLAN tagged packets */
1396 		skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) ||
1397 					  skb->protocol == htons(ETH_P_8021AD))
1398 					  ? VLAN_HLEN : 0;
1399 
1400 		if ((ip_hdr(skb)->protocol == IPPROTO_UDP) &&
1401 		    (udp_hdr(skb)->check != 0)) {
1402 			rx_udp_csum = udp_csum(skb);
1403 			iph = ip_hdr(skb);
1404 			csum = csum_tcpudp_magic(
1405 					iph->saddr, iph->daddr,
1406 					(skb->len - skb_transport_offset(skb)),
1407 					IPPROTO_UDP, rx_udp_csum);
1408 
1409 			if (udp_hdr(skb)->check != csum)
1410 				goto checksum_fail;
1411 
1412 		} /* else its GRE and so no outer UDP header */
1413 	}
1414 
1415 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1416 	skb->csum_level = ipv4_tunnel || ipv6_tunnel;
1417 
1418 	return;
1419 
1420 checksum_fail:
1421 	vsi->back->hw_csum_rx_error++;
1422 }
1423 
1424 /**
1425  * i40e_ptype_to_htype - get a hash type
1426  * @ptype: the ptype value from the descriptor
1427  *
1428  * Returns a hash type to be used by skb_set_hash
1429  **/
1430 static inline enum pkt_hash_types i40e_ptype_to_htype(u8 ptype)
1431 {
1432 	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
1433 
1434 	if (!decoded.known)
1435 		return PKT_HASH_TYPE_NONE;
1436 
1437 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1438 	    decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
1439 		return PKT_HASH_TYPE_L4;
1440 	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1441 		 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
1442 		return PKT_HASH_TYPE_L3;
1443 	else
1444 		return PKT_HASH_TYPE_L2;
1445 }
1446 
1447 /**
1448  * i40e_rx_hash - set the hash value in the skb
1449  * @ring: descriptor ring
1450  * @rx_desc: specific descriptor
1451  **/
1452 static inline void i40e_rx_hash(struct i40e_ring *ring,
1453 				union i40e_rx_desc *rx_desc,
1454 				struct sk_buff *skb,
1455 				u8 rx_ptype)
1456 {
1457 	u32 hash;
1458 	const __le64 rss_mask  =
1459 		cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
1460 			    I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
1461 
1462 	if (ring->netdev->features & NETIF_F_RXHASH)
1463 		return;
1464 
1465 	if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) {
1466 		hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
1467 		skb_set_hash(skb, hash, i40e_ptype_to_htype(rx_ptype));
1468 	}
1469 }
1470 
1471 /**
1472  * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split
1473  * @rx_ring:  rx ring to clean
1474  * @budget:   how many cleans we're allowed
1475  *
1476  * Returns true if there's any budget left (e.g. the clean is finished)
1477  **/
1478 static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
1479 {
1480 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1481 	u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
1482 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1483 	const int current_node = numa_mem_id();
1484 	struct i40e_vsi *vsi = rx_ring->vsi;
1485 	u16 i = rx_ring->next_to_clean;
1486 	union i40e_rx_desc *rx_desc;
1487 	u32 rx_error, rx_status;
1488 	u8 rx_ptype;
1489 	u64 qword;
1490 
1491 	if (budget <= 0)
1492 		return 0;
1493 
1494 	do {
1495 		struct i40e_rx_buffer *rx_bi;
1496 		struct sk_buff *skb;
1497 		u16 vlan_tag;
1498 		/* return some buffers to hardware, one at a time is too slow */
1499 		if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1500 			i40e_alloc_rx_buffers_ps(rx_ring, cleaned_count);
1501 			cleaned_count = 0;
1502 		}
1503 
1504 		i = rx_ring->next_to_clean;
1505 		rx_desc = I40E_RX_DESC(rx_ring, i);
1506 		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1507 		rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1508 			I40E_RXD_QW1_STATUS_SHIFT;
1509 
1510 		if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
1511 			break;
1512 
1513 		/* This memory barrier is needed to keep us from reading
1514 		 * any other fields out of the rx_desc until we know the
1515 		 * DD bit is set.
1516 		 */
1517 		dma_rmb();
1518 		if (i40e_rx_is_programming_status(qword)) {
1519 			i40e_clean_programming_status(rx_ring, rx_desc);
1520 			I40E_RX_INCREMENT(rx_ring, i);
1521 			continue;
1522 		}
1523 		rx_bi = &rx_ring->rx_bi[i];
1524 		skb = rx_bi->skb;
1525 		if (likely(!skb)) {
1526 			skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1527 							rx_ring->rx_hdr_len);
1528 			if (!skb) {
1529 				rx_ring->rx_stats.alloc_buff_failed++;
1530 				break;
1531 			}
1532 
1533 			/* initialize queue mapping */
1534 			skb_record_rx_queue(skb, rx_ring->queue_index);
1535 			/* we are reusing so sync this buffer for CPU use */
1536 			dma_sync_single_range_for_cpu(rx_ring->dev,
1537 						      rx_bi->dma,
1538 						      0,
1539 						      rx_ring->rx_hdr_len,
1540 						      DMA_FROM_DEVICE);
1541 		}
1542 		rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1543 				I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1544 		rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >>
1545 				I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1546 		rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >>
1547 			 I40E_RXD_QW1_LENGTH_SPH_SHIFT;
1548 
1549 		rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1550 			   I40E_RXD_QW1_ERROR_SHIFT;
1551 		rx_hbo = rx_error & BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1552 		rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1553 
1554 		rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1555 			   I40E_RXD_QW1_PTYPE_SHIFT;
1556 		prefetch(rx_bi->page);
1557 		rx_bi->skb = NULL;
1558 		cleaned_count++;
1559 		if (rx_hbo || rx_sph) {
1560 			int len;
1561 
1562 			if (rx_hbo)
1563 				len = I40E_RX_HDR_SIZE;
1564 			else
1565 				len = rx_header_len;
1566 			memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len);
1567 		} else if (skb->len == 0) {
1568 			int len;
1569 
1570 			len = (rx_packet_len > skb_headlen(skb) ?
1571 				skb_headlen(skb) : rx_packet_len);
1572 			memcpy(__skb_put(skb, len),
1573 			       rx_bi->page + rx_bi->page_offset,
1574 			       len);
1575 			rx_bi->page_offset += len;
1576 			rx_packet_len -= len;
1577 		}
1578 
1579 		/* Get the rest of the data if this was a header split */
1580 		if (rx_packet_len) {
1581 			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
1582 					   rx_bi->page,
1583 					   rx_bi->page_offset,
1584 					   rx_packet_len);
1585 
1586 			skb->len += rx_packet_len;
1587 			skb->data_len += rx_packet_len;
1588 			skb->truesize += rx_packet_len;
1589 
1590 			if ((page_count(rx_bi->page) == 1) &&
1591 			    (page_to_nid(rx_bi->page) == current_node))
1592 				get_page(rx_bi->page);
1593 			else
1594 				rx_bi->page = NULL;
1595 
1596 			dma_unmap_page(rx_ring->dev,
1597 				       rx_bi->page_dma,
1598 				       PAGE_SIZE / 2,
1599 				       DMA_FROM_DEVICE);
1600 			rx_bi->page_dma = 0;
1601 		}
1602 		I40E_RX_INCREMENT(rx_ring, i);
1603 
1604 		if (unlikely(
1605 		    !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1606 			struct i40e_rx_buffer *next_buffer;
1607 
1608 			next_buffer = &rx_ring->rx_bi[i];
1609 			next_buffer->skb = skb;
1610 			rx_ring->rx_stats.non_eop_descs++;
1611 			continue;
1612 		}
1613 
1614 		/* ERR_MASK will only have valid bits if EOP set */
1615 		if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1616 			dev_kfree_skb_any(skb);
1617 			continue;
1618 		}
1619 
1620 		i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
1621 
1622 		if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1623 			i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1624 					   I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1625 					   I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1626 			rx_ring->last_rx_timestamp = jiffies;
1627 		}
1628 
1629 		/* probably a little skewed due to removing CRC */
1630 		total_rx_bytes += skb->len;
1631 		total_rx_packets++;
1632 
1633 		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1634 
1635 		i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1636 
1637 		vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1638 			 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1639 			 : 0;
1640 #ifdef I40E_FCOE
1641 		if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1642 			dev_kfree_skb_any(skb);
1643 			continue;
1644 		}
1645 #endif
1646 		i40e_receive_skb(rx_ring, skb, vlan_tag);
1647 
1648 		rx_desc->wb.qword1.status_error_len = 0;
1649 
1650 	} while (likely(total_rx_packets < budget));
1651 
1652 	u64_stats_update_begin(&rx_ring->syncp);
1653 	rx_ring->stats.packets += total_rx_packets;
1654 	rx_ring->stats.bytes += total_rx_bytes;
1655 	u64_stats_update_end(&rx_ring->syncp);
1656 	rx_ring->q_vector->rx.total_packets += total_rx_packets;
1657 	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1658 
1659 	return total_rx_packets;
1660 }
1661 
1662 /**
1663  * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer
1664  * @rx_ring:  rx ring to clean
1665  * @budget:   how many cleans we're allowed
1666  *
1667  * Returns number of packets cleaned
1668  **/
1669 static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
1670 {
1671 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1672 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1673 	struct i40e_vsi *vsi = rx_ring->vsi;
1674 	union i40e_rx_desc *rx_desc;
1675 	u32 rx_error, rx_status;
1676 	u16 rx_packet_len;
1677 	u8 rx_ptype;
1678 	u64 qword;
1679 	u16 i;
1680 
1681 	do {
1682 		struct i40e_rx_buffer *rx_bi;
1683 		struct sk_buff *skb;
1684 		u16 vlan_tag;
1685 		/* return some buffers to hardware, one at a time is too slow */
1686 		if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1687 			i40e_alloc_rx_buffers_1buf(rx_ring, cleaned_count);
1688 			cleaned_count = 0;
1689 		}
1690 
1691 		i = rx_ring->next_to_clean;
1692 		rx_desc = I40E_RX_DESC(rx_ring, i);
1693 		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1694 		rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1695 			I40E_RXD_QW1_STATUS_SHIFT;
1696 
1697 		if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
1698 			break;
1699 
1700 		/* This memory barrier is needed to keep us from reading
1701 		 * any other fields out of the rx_desc until we know the
1702 		 * DD bit is set.
1703 		 */
1704 		dma_rmb();
1705 
1706 		if (i40e_rx_is_programming_status(qword)) {
1707 			i40e_clean_programming_status(rx_ring, rx_desc);
1708 			I40E_RX_INCREMENT(rx_ring, i);
1709 			continue;
1710 		}
1711 		rx_bi = &rx_ring->rx_bi[i];
1712 		skb = rx_bi->skb;
1713 		prefetch(skb->data);
1714 
1715 		rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1716 				I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1717 
1718 		rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1719 			   I40E_RXD_QW1_ERROR_SHIFT;
1720 		rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1721 
1722 		rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1723 			   I40E_RXD_QW1_PTYPE_SHIFT;
1724 		rx_bi->skb = NULL;
1725 		cleaned_count++;
1726 
1727 		/* Get the header and possibly the whole packet
1728 		 * If this is an skb from previous receive dma will be 0
1729 		 */
1730 		skb_put(skb, rx_packet_len);
1731 		dma_unmap_single(rx_ring->dev, rx_bi->dma, rx_ring->rx_buf_len,
1732 				 DMA_FROM_DEVICE);
1733 		rx_bi->dma = 0;
1734 
1735 		I40E_RX_INCREMENT(rx_ring, i);
1736 
1737 		if (unlikely(
1738 		    !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1739 			rx_ring->rx_stats.non_eop_descs++;
1740 			continue;
1741 		}
1742 
1743 		/* ERR_MASK will only have valid bits if EOP set */
1744 		if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1745 			dev_kfree_skb_any(skb);
1746 			continue;
1747 		}
1748 
1749 		i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
1750 		if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1751 			i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1752 					   I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1753 					   I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1754 			rx_ring->last_rx_timestamp = jiffies;
1755 		}
1756 
1757 		/* probably a little skewed due to removing CRC */
1758 		total_rx_bytes += skb->len;
1759 		total_rx_packets++;
1760 
1761 		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1762 
1763 		i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1764 
1765 		vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1766 			 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1767 			 : 0;
1768 #ifdef I40E_FCOE
1769 		if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1770 			dev_kfree_skb_any(skb);
1771 			continue;
1772 		}
1773 #endif
1774 		i40e_receive_skb(rx_ring, skb, vlan_tag);
1775 
1776 		rx_desc->wb.qword1.status_error_len = 0;
1777 	} while (likely(total_rx_packets < budget));
1778 
1779 	u64_stats_update_begin(&rx_ring->syncp);
1780 	rx_ring->stats.packets += total_rx_packets;
1781 	rx_ring->stats.bytes += total_rx_bytes;
1782 	u64_stats_update_end(&rx_ring->syncp);
1783 	rx_ring->q_vector->rx.total_packets += total_rx_packets;
1784 	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1785 
1786 	return total_rx_packets;
1787 }
1788 
1789 static u32 i40e_buildreg_itr(const int type, const u16 itr)
1790 {
1791 	u32 val;
1792 
1793 	val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1794 	      I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
1795 	      (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
1796 	      (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
1797 
1798 	return val;
1799 }
1800 
1801 /* a small macro to shorten up some long lines */
1802 #define INTREG I40E_PFINT_DYN_CTLN
1803 
1804 /**
1805  * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
1806  * @vsi: the VSI we care about
1807  * @q_vector: q_vector for which itr is being updated and interrupt enabled
1808  *
1809  **/
1810 static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
1811 					  struct i40e_q_vector *q_vector)
1812 {
1813 	struct i40e_hw *hw = &vsi->back->hw;
1814 	bool rx = false, tx = false;
1815 	u32 rxval, txval;
1816 	int vector;
1817 
1818 	vector = (q_vector->v_idx + vsi->base_vector);
1819 
1820 	/* avoid dynamic calculation if in countdown mode OR if
1821 	 * all dynamic is disabled
1822 	 */
1823 	rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
1824 
1825 	if (q_vector->itr_countdown > 0 ||
1826 	    (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) &&
1827 	     !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) {
1828 		goto enable_int;
1829 	}
1830 
1831 	if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) {
1832 		rx = i40e_set_new_dynamic_itr(&q_vector->rx);
1833 		rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
1834 	}
1835 
1836 	if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) {
1837 		tx = i40e_set_new_dynamic_itr(&q_vector->tx);
1838 		txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
1839 	}
1840 
1841 	if (rx || tx) {
1842 		/* get the higher of the two ITR adjustments and
1843 		 * use the same value for both ITR registers
1844 		 * when in adaptive mode (Rx and/or Tx)
1845 		 */
1846 		u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
1847 
1848 		q_vector->tx.itr = q_vector->rx.itr = itr;
1849 		txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
1850 		tx = true;
1851 		rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
1852 		rx = true;
1853 	}
1854 
1855 	/* only need to enable the interrupt once, but need
1856 	 * to possibly update both ITR values
1857 	 */
1858 	if (rx) {
1859 		/* set the INTENA_MSK_MASK so that this first write
1860 		 * won't actually enable the interrupt, instead just
1861 		 * updating the ITR (it's bit 31 PF and VF)
1862 		 */
1863 		rxval |= BIT(31);
1864 		/* don't check _DOWN because interrupt isn't being enabled */
1865 		wr32(hw, INTREG(vector - 1), rxval);
1866 	}
1867 
1868 enable_int:
1869 	if (!test_bit(__I40E_DOWN, &vsi->state))
1870 		wr32(hw, INTREG(vector - 1), txval);
1871 
1872 	if (q_vector->itr_countdown)
1873 		q_vector->itr_countdown--;
1874 	else
1875 		q_vector->itr_countdown = ITR_COUNTDOWN_START;
1876 }
1877 
1878 /**
1879  * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1880  * @napi: napi struct with our devices info in it
1881  * @budget: amount of work driver is allowed to do this pass, in packets
1882  *
1883  * This function will clean all queues associated with a q_vector.
1884  *
1885  * Returns the amount of work done
1886  **/
1887 int i40e_napi_poll(struct napi_struct *napi, int budget)
1888 {
1889 	struct i40e_q_vector *q_vector =
1890 			       container_of(napi, struct i40e_q_vector, napi);
1891 	struct i40e_vsi *vsi = q_vector->vsi;
1892 	struct i40e_ring *ring;
1893 	bool clean_complete = true;
1894 	bool arm_wb = false;
1895 	int budget_per_ring;
1896 	int work_done = 0;
1897 
1898 	if (test_bit(__I40E_DOWN, &vsi->state)) {
1899 		napi_complete(napi);
1900 		return 0;
1901 	}
1902 
1903 	/* Clear hung_detected bit */
1904 	clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &q_vector->hung_detected);
1905 	/* Since the actual Tx work is minimal, we can give the Tx a larger
1906 	 * budget and be more aggressive about cleaning up the Tx descriptors.
1907 	 */
1908 	i40e_for_each_ring(ring, q_vector->tx) {
1909 		clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
1910 		arm_wb = arm_wb || ring->arm_wb;
1911 		ring->arm_wb = false;
1912 	}
1913 
1914 	/* Handle case where we are called by netpoll with a budget of 0 */
1915 	if (budget <= 0)
1916 		goto tx_only;
1917 
1918 	/* We attempt to distribute budget to each Rx queue fairly, but don't
1919 	 * allow the budget to go below 1 because that would exit polling early.
1920 	 */
1921 	budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
1922 
1923 	i40e_for_each_ring(ring, q_vector->rx) {
1924 		int cleaned;
1925 
1926 		if (ring_is_ps_enabled(ring))
1927 			cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring);
1928 		else
1929 			cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring);
1930 
1931 		work_done += cleaned;
1932 		/* if we didn't clean as many as budgeted, we must be done */
1933 		clean_complete &= (budget_per_ring != cleaned);
1934 	}
1935 
1936 	/* If work not completed, return budget and polling will return */
1937 	if (!clean_complete) {
1938 tx_only:
1939 		if (arm_wb) {
1940 			q_vector->tx.ring[0].tx_stats.tx_force_wb++;
1941 			i40e_force_wb(vsi, q_vector);
1942 		}
1943 		return budget;
1944 	}
1945 
1946 	if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
1947 		q_vector->arm_wb_state = false;
1948 
1949 	/* Work is done so exit the polling mode and re-enable the interrupt */
1950 	napi_complete_done(napi, work_done);
1951 	if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
1952 		i40e_update_enable_itr(vsi, q_vector);
1953 	} else { /* Legacy mode */
1954 		struct i40e_hw *hw = &vsi->back->hw;
1955 		/* We re-enable the queue 0 cause, but
1956 		 * don't worry about dynamic_enable
1957 		 * because we left it on for the other
1958 		 * possible interrupts during napi
1959 		 */
1960 		u32 qval = rd32(hw, I40E_QINT_RQCTL(0)) |
1961 			   I40E_QINT_RQCTL_CAUSE_ENA_MASK;
1962 
1963 		wr32(hw, I40E_QINT_RQCTL(0), qval);
1964 		qval = rd32(hw, I40E_QINT_TQCTL(0)) |
1965 		       I40E_QINT_TQCTL_CAUSE_ENA_MASK;
1966 		wr32(hw, I40E_QINT_TQCTL(0), qval);
1967 		i40e_irq_dynamic_enable_icr0(vsi->back);
1968 	}
1969 	return 0;
1970 }
1971 
1972 /**
1973  * i40e_atr - Add a Flow Director ATR filter
1974  * @tx_ring:  ring to add programming descriptor to
1975  * @skb:      send buffer
1976  * @tx_flags: send tx flags
1977  * @protocol: wire protocol
1978  **/
1979 static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
1980 		     u32 tx_flags, __be16 protocol)
1981 {
1982 	struct i40e_filter_program_desc *fdir_desc;
1983 	struct i40e_pf *pf = tx_ring->vsi->back;
1984 	union {
1985 		unsigned char *network;
1986 		struct iphdr *ipv4;
1987 		struct ipv6hdr *ipv6;
1988 	} hdr;
1989 	struct tcphdr *th;
1990 	unsigned int hlen;
1991 	u32 flex_ptype, dtype_cmd;
1992 	u16 i;
1993 
1994 	/* make sure ATR is enabled */
1995 	if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
1996 		return;
1997 
1998 	if ((pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
1999 		return;
2000 
2001 	/* if sampling is disabled do nothing */
2002 	if (!tx_ring->atr_sample_rate)
2003 		return;
2004 
2005 	if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6)))
2006 		return;
2007 
2008 	if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL)) {
2009 		/* snag network header to get L4 type and address */
2010 		hdr.network = skb_network_header(skb);
2011 
2012 		/* Currently only IPv4/IPv6 with TCP is supported
2013 		 * access ihl as u8 to avoid unaligned access on ia64
2014 		 */
2015 		if (tx_flags & I40E_TX_FLAGS_IPV4)
2016 			hlen = (hdr.network[0] & 0x0F) << 2;
2017 		else if (protocol == htons(ETH_P_IPV6))
2018 			hlen = sizeof(struct ipv6hdr);
2019 		else
2020 			return;
2021 	} else {
2022 		hdr.network = skb_inner_network_header(skb);
2023 		hlen = skb_inner_network_header_len(skb);
2024 	}
2025 
2026 	/* Currently only IPv4/IPv6 with TCP is supported
2027 	 * Note: tx_flags gets modified to reflect inner protocols in
2028 	 * tx_enable_csum function if encap is enabled.
2029 	 */
2030 	if ((tx_flags & I40E_TX_FLAGS_IPV4) &&
2031 	    (hdr.ipv4->protocol != IPPROTO_TCP))
2032 		return;
2033 	else if ((tx_flags & I40E_TX_FLAGS_IPV6) &&
2034 		 (hdr.ipv6->nexthdr != IPPROTO_TCP))
2035 		return;
2036 
2037 	th = (struct tcphdr *)(hdr.network + hlen);
2038 
2039 	/* Due to lack of space, no more new filters can be programmed */
2040 	if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
2041 		return;
2042 	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) {
2043 		/* HW ATR eviction will take care of removing filters on FIN
2044 		 * and RST packets.
2045 		 */
2046 		if (th->fin || th->rst)
2047 			return;
2048 	}
2049 
2050 	tx_ring->atr_count++;
2051 
2052 	/* sample on all syn/fin/rst packets or once every atr sample rate */
2053 	if (!th->fin &&
2054 	    !th->syn &&
2055 	    !th->rst &&
2056 	    (tx_ring->atr_count < tx_ring->atr_sample_rate))
2057 		return;
2058 
2059 	tx_ring->atr_count = 0;
2060 
2061 	/* grab the next descriptor */
2062 	i = tx_ring->next_to_use;
2063 	fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
2064 
2065 	i++;
2066 	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2067 
2068 	flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
2069 		      I40E_TXD_FLTR_QW0_QINDEX_MASK;
2070 	flex_ptype |= (protocol == htons(ETH_P_IP)) ?
2071 		      (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
2072 		       I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
2073 		      (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
2074 		       I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
2075 
2076 	flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
2077 
2078 	dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
2079 
2080 	dtype_cmd |= (th->fin || th->rst) ?
2081 		     (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
2082 		      I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
2083 		     (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
2084 		      I40E_TXD_FLTR_QW1_PCMD_SHIFT);
2085 
2086 	dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
2087 		     I40E_TXD_FLTR_QW1_DEST_SHIFT;
2088 
2089 	dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
2090 		     I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
2091 
2092 	dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
2093 	if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL))
2094 		dtype_cmd |=
2095 			((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) <<
2096 			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2097 			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2098 	else
2099 		dtype_cmd |=
2100 			((u32)I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id) <<
2101 			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2102 			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2103 
2104 	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
2105 		dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
2106 
2107 	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
2108 	fdir_desc->rsvd = cpu_to_le32(0);
2109 	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
2110 	fdir_desc->fd_id = cpu_to_le32(0);
2111 }
2112 
2113 /**
2114  * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
2115  * @skb:     send buffer
2116  * @tx_ring: ring to send buffer on
2117  * @flags:   the tx flags to be set
2118  *
2119  * Checks the skb and set up correspondingly several generic transmit flags
2120  * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
2121  *
2122  * Returns error code indicate the frame should be dropped upon error and the
2123  * otherwise  returns 0 to indicate the flags has been set properly.
2124  **/
2125 #ifdef I40E_FCOE
2126 inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2127 				      struct i40e_ring *tx_ring,
2128 				      u32 *flags)
2129 #else
2130 static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2131 					     struct i40e_ring *tx_ring,
2132 					     u32 *flags)
2133 #endif
2134 {
2135 	__be16 protocol = skb->protocol;
2136 	u32  tx_flags = 0;
2137 
2138 	if (protocol == htons(ETH_P_8021Q) &&
2139 	    !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) {
2140 		/* When HW VLAN acceleration is turned off by the user the
2141 		 * stack sets the protocol to 8021q so that the driver
2142 		 * can take any steps required to support the SW only
2143 		 * VLAN handling.  In our case the driver doesn't need
2144 		 * to take any further steps so just set the protocol
2145 		 * to the encapsulated ethertype.
2146 		 */
2147 		skb->protocol = vlan_get_protocol(skb);
2148 		goto out;
2149 	}
2150 
2151 	/* if we have a HW VLAN tag being added, default to the HW one */
2152 	if (skb_vlan_tag_present(skb)) {
2153 		tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
2154 		tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2155 	/* else if it is a SW VLAN, check the next protocol and store the tag */
2156 	} else if (protocol == htons(ETH_P_8021Q)) {
2157 		struct vlan_hdr *vhdr, _vhdr;
2158 
2159 		vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
2160 		if (!vhdr)
2161 			return -EINVAL;
2162 
2163 		protocol = vhdr->h_vlan_encapsulated_proto;
2164 		tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
2165 		tx_flags |= I40E_TX_FLAGS_SW_VLAN;
2166 	}
2167 
2168 	if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED))
2169 		goto out;
2170 
2171 	/* Insert 802.1p priority into VLAN header */
2172 	if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
2173 	    (skb->priority != TC_PRIO_CONTROL)) {
2174 		tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
2175 		tx_flags |= (skb->priority & 0x7) <<
2176 				I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
2177 		if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
2178 			struct vlan_ethhdr *vhdr;
2179 			int rc;
2180 
2181 			rc = skb_cow_head(skb, 0);
2182 			if (rc < 0)
2183 				return rc;
2184 			vhdr = (struct vlan_ethhdr *)skb->data;
2185 			vhdr->h_vlan_TCI = htons(tx_flags >>
2186 						 I40E_TX_FLAGS_VLAN_SHIFT);
2187 		} else {
2188 			tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2189 		}
2190 	}
2191 
2192 out:
2193 	*flags = tx_flags;
2194 	return 0;
2195 }
2196 
2197 /**
2198  * i40e_tso - set up the tso context descriptor
2199  * @tx_ring:  ptr to the ring to send
2200  * @skb:      ptr to the skb we're sending
2201  * @hdr_len:  ptr to the size of the packet header
2202  * @cd_type_cmd_tso_mss: Quad Word 1
2203  *
2204  * Returns 0 if no TSO can happen, 1 if tso is going, or error
2205  **/
2206 static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
2207 		    u8 *hdr_len, u64 *cd_type_cmd_tso_mss)
2208 {
2209 	u32 cd_cmd, cd_tso_len, cd_mss;
2210 	struct ipv6hdr *ipv6h;
2211 	struct tcphdr *tcph;
2212 	struct iphdr *iph;
2213 	u32 l4len;
2214 	int err;
2215 
2216 	if (!skb_is_gso(skb))
2217 		return 0;
2218 
2219 	err = skb_cow_head(skb, 0);
2220 	if (err < 0)
2221 		return err;
2222 
2223 	iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
2224 	ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
2225 
2226 	if (iph->version == 4) {
2227 		tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
2228 		iph->tot_len = 0;
2229 		iph->check = 0;
2230 		tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
2231 						 0, IPPROTO_TCP, 0);
2232 	} else if (ipv6h->version == 6) {
2233 		tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
2234 		ipv6h->payload_len = 0;
2235 		tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
2236 					       0, IPPROTO_TCP, 0);
2237 	}
2238 
2239 	l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb);
2240 	*hdr_len = (skb->encapsulation
2241 		    ? (skb_inner_transport_header(skb) - skb->data)
2242 		    : skb_transport_offset(skb)) + l4len;
2243 
2244 	/* find the field values */
2245 	cd_cmd = I40E_TX_CTX_DESC_TSO;
2246 	cd_tso_len = skb->len - *hdr_len;
2247 	cd_mss = skb_shinfo(skb)->gso_size;
2248 	*cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
2249 				((u64)cd_tso_len <<
2250 				 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
2251 				((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
2252 	return 1;
2253 }
2254 
2255 /**
2256  * i40e_tsyn - set up the tsyn context descriptor
2257  * @tx_ring:  ptr to the ring to send
2258  * @skb:      ptr to the skb we're sending
2259  * @tx_flags: the collected send information
2260  * @cd_type_cmd_tso_mss: Quad Word 1
2261  *
2262  * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
2263  **/
2264 static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
2265 		     u32 tx_flags, u64 *cd_type_cmd_tso_mss)
2266 {
2267 	struct i40e_pf *pf;
2268 
2269 	if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
2270 		return 0;
2271 
2272 	/* Tx timestamps cannot be sampled when doing TSO */
2273 	if (tx_flags & I40E_TX_FLAGS_TSO)
2274 		return 0;
2275 
2276 	/* only timestamp the outbound packet if the user has requested it and
2277 	 * we are not already transmitting a packet to be timestamped
2278 	 */
2279 	pf = i40e_netdev_to_pf(tx_ring->netdev);
2280 	if (!(pf->flags & I40E_FLAG_PTP))
2281 		return 0;
2282 
2283 	if (pf->ptp_tx &&
2284 	    !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) {
2285 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
2286 		pf->ptp_tx_skb = skb_get(skb);
2287 	} else {
2288 		return 0;
2289 	}
2290 
2291 	*cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
2292 				I40E_TXD_CTX_QW1_CMD_SHIFT;
2293 
2294 	return 1;
2295 }
2296 
2297 /**
2298  * i40e_tx_enable_csum - Enable Tx checksum offloads
2299  * @skb: send buffer
2300  * @tx_flags: pointer to Tx flags currently set
2301  * @td_cmd: Tx descriptor command bits to set
2302  * @td_offset: Tx descriptor header offsets to set
2303  * @tx_ring: Tx descriptor ring
2304  * @cd_tunneling: ptr to context desc bits
2305  **/
2306 static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
2307 				u32 *td_cmd, u32 *td_offset,
2308 				struct i40e_ring *tx_ring,
2309 				u32 *cd_tunneling)
2310 {
2311 	struct ipv6hdr *this_ipv6_hdr;
2312 	unsigned int this_tcp_hdrlen;
2313 	struct iphdr *this_ip_hdr;
2314 	u32 network_hdr_len;
2315 	u8 l4_hdr = 0;
2316 	struct udphdr *oudph = NULL;
2317 	struct iphdr *oiph = NULL;
2318 	u32 l4_tunnel = 0;
2319 
2320 	if (skb->encapsulation) {
2321 		switch (ip_hdr(skb)->protocol) {
2322 		case IPPROTO_UDP:
2323 			oudph = udp_hdr(skb);
2324 			oiph = ip_hdr(skb);
2325 			l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING;
2326 			*tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
2327 			break;
2328 		case IPPROTO_GRE:
2329 			l4_tunnel = I40E_TXD_CTX_GRE_TUNNELING;
2330 			break;
2331 		default:
2332 			return;
2333 		}
2334 		network_hdr_len = skb_inner_network_header_len(skb);
2335 		this_ip_hdr = inner_ip_hdr(skb);
2336 		this_ipv6_hdr = inner_ipv6_hdr(skb);
2337 		this_tcp_hdrlen = inner_tcp_hdrlen(skb);
2338 
2339 		if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2340 			if (*tx_flags & I40E_TX_FLAGS_TSO) {
2341 				*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
2342 				ip_hdr(skb)->check = 0;
2343 			} else {
2344 				*cd_tunneling |=
2345 					 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
2346 			}
2347 		} else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2348 			*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
2349 			if (*tx_flags & I40E_TX_FLAGS_TSO)
2350 				ip_hdr(skb)->check = 0;
2351 		}
2352 
2353 		/* Now set the ctx descriptor fields */
2354 		*cd_tunneling |= (skb_network_header_len(skb) >> 2) <<
2355 				   I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT      |
2356 				   l4_tunnel                             |
2357 				   ((skb_inner_network_offset(skb) -
2358 					skb_transport_offset(skb)) >> 1) <<
2359 				   I40E_TXD_CTX_QW0_NATLEN_SHIFT;
2360 		if (this_ip_hdr->version == 6) {
2361 			*tx_flags &= ~I40E_TX_FLAGS_IPV4;
2362 			*tx_flags |= I40E_TX_FLAGS_IPV6;
2363 		}
2364 		if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) &&
2365 		    (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING)        &&
2366 		    (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) {
2367 			oudph->check = ~csum_tcpudp_magic(oiph->saddr,
2368 					oiph->daddr,
2369 					(skb->len - skb_transport_offset(skb)),
2370 					IPPROTO_UDP, 0);
2371 			*cd_tunneling |= I40E_TXD_CTX_QW0_L4T_CS_MASK;
2372 		}
2373 	} else {
2374 		network_hdr_len = skb_network_header_len(skb);
2375 		this_ip_hdr = ip_hdr(skb);
2376 		this_ipv6_hdr = ipv6_hdr(skb);
2377 		this_tcp_hdrlen = tcp_hdrlen(skb);
2378 	}
2379 
2380 	/* Enable IP checksum offloads */
2381 	if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2382 		l4_hdr = this_ip_hdr->protocol;
2383 		/* the stack computes the IP header already, the only time we
2384 		 * need the hardware to recompute it is in the case of TSO.
2385 		 */
2386 		if (*tx_flags & I40E_TX_FLAGS_TSO) {
2387 			*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
2388 			this_ip_hdr->check = 0;
2389 		} else {
2390 			*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
2391 		}
2392 		/* Now set the td_offset for IP header length */
2393 		*td_offset = (network_hdr_len >> 2) <<
2394 			      I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2395 	} else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2396 		l4_hdr = this_ipv6_hdr->nexthdr;
2397 		*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
2398 		/* Now set the td_offset for IP header length */
2399 		*td_offset = (network_hdr_len >> 2) <<
2400 			      I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2401 	}
2402 	/* words in MACLEN + dwords in IPLEN + dwords in L4Len */
2403 	*td_offset |= (skb_network_offset(skb) >> 1) <<
2404 		       I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
2405 
2406 	/* Enable L4 checksum offloads */
2407 	switch (l4_hdr) {
2408 	case IPPROTO_TCP:
2409 		/* enable checksum offloads */
2410 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
2411 		*td_offset |= (this_tcp_hdrlen >> 2) <<
2412 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2413 		break;
2414 	case IPPROTO_SCTP:
2415 		/* enable SCTP checksum offload */
2416 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
2417 		*td_offset |= (sizeof(struct sctphdr) >> 2) <<
2418 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2419 		break;
2420 	case IPPROTO_UDP:
2421 		/* enable UDP checksum offload */
2422 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
2423 		*td_offset |= (sizeof(struct udphdr) >> 2) <<
2424 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2425 		break;
2426 	default:
2427 		break;
2428 	}
2429 }
2430 
2431 /**
2432  * i40e_create_tx_ctx Build the Tx context descriptor
2433  * @tx_ring:  ring to create the descriptor on
2434  * @cd_type_cmd_tso_mss: Quad Word 1
2435  * @cd_tunneling: Quad Word 0 - bits 0-31
2436  * @cd_l2tag2: Quad Word 0 - bits 32-63
2437  **/
2438 static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
2439 			       const u64 cd_type_cmd_tso_mss,
2440 			       const u32 cd_tunneling, const u32 cd_l2tag2)
2441 {
2442 	struct i40e_tx_context_desc *context_desc;
2443 	int i = tx_ring->next_to_use;
2444 
2445 	if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
2446 	    !cd_tunneling && !cd_l2tag2)
2447 		return;
2448 
2449 	/* grab the next descriptor */
2450 	context_desc = I40E_TX_CTXTDESC(tx_ring, i);
2451 
2452 	i++;
2453 	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2454 
2455 	/* cpu_to_le32 and assign to struct fields */
2456 	context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
2457 	context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
2458 	context_desc->rsvd = cpu_to_le16(0);
2459 	context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
2460 }
2461 
2462 /**
2463  * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2464  * @tx_ring: the ring to be checked
2465  * @size:    the size buffer we want to assure is available
2466  *
2467  * Returns -EBUSY if a stop is needed, else 0
2468  **/
2469 static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2470 {
2471 	netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
2472 	/* Memory barrier before checking head and tail */
2473 	smp_mb();
2474 
2475 	/* Check again in a case another CPU has just made room available. */
2476 	if (likely(I40E_DESC_UNUSED(tx_ring) < size))
2477 		return -EBUSY;
2478 
2479 	/* A reprieve! - use start_queue because it doesn't call schedule */
2480 	netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
2481 	++tx_ring->tx_stats.restart_queue;
2482 	return 0;
2483 }
2484 
2485 /**
2486  * i40e_maybe_stop_tx - 1st level check for tx stop conditions
2487  * @tx_ring: the ring to be checked
2488  * @size:    the size buffer we want to assure is available
2489  *
2490  * Returns 0 if stop is not needed
2491  **/
2492 #ifdef I40E_FCOE
2493 inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2494 #else
2495 static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2496 #endif
2497 {
2498 	if (likely(I40E_DESC_UNUSED(tx_ring) >= size))
2499 		return 0;
2500 	return __i40e_maybe_stop_tx(tx_ring, size);
2501 }
2502 
2503 /**
2504  * i40e_chk_linearize - Check if there are more than 8 fragments per packet
2505  * @skb:      send buffer
2506  * @tx_flags: collected send information
2507  *
2508  * Note: Our HW can't scatter-gather more than 8 fragments to build
2509  * a packet on the wire and so we need to figure out the cases where we
2510  * need to linearize the skb.
2511  **/
2512 static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags)
2513 {
2514 	struct skb_frag_struct *frag;
2515 	bool linearize = false;
2516 	unsigned int size = 0;
2517 	u16 num_frags;
2518 	u16 gso_segs;
2519 
2520 	num_frags = skb_shinfo(skb)->nr_frags;
2521 	gso_segs = skb_shinfo(skb)->gso_segs;
2522 
2523 	if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
2524 		u16 j = 0;
2525 
2526 		if (num_frags < (I40E_MAX_BUFFER_TXD))
2527 			goto linearize_chk_done;
2528 		/* try the simple math, if we have too many frags per segment */
2529 		if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) >
2530 		    I40E_MAX_BUFFER_TXD) {
2531 			linearize = true;
2532 			goto linearize_chk_done;
2533 		}
2534 		frag = &skb_shinfo(skb)->frags[0];
2535 		/* we might still have more fragments per segment */
2536 		do {
2537 			size += skb_frag_size(frag);
2538 			frag++; j++;
2539 			if ((size >= skb_shinfo(skb)->gso_size) &&
2540 			    (j < I40E_MAX_BUFFER_TXD)) {
2541 				size = (size % skb_shinfo(skb)->gso_size);
2542 				j = (size) ? 1 : 0;
2543 			}
2544 			if (j == I40E_MAX_BUFFER_TXD) {
2545 				linearize = true;
2546 				break;
2547 			}
2548 			num_frags--;
2549 		} while (num_frags);
2550 	} else {
2551 		if (num_frags >= I40E_MAX_BUFFER_TXD)
2552 			linearize = true;
2553 	}
2554 
2555 linearize_chk_done:
2556 	return linearize;
2557 }
2558 
2559 /**
2560  * i40e_tx_map - Build the Tx descriptor
2561  * @tx_ring:  ring to send buffer on
2562  * @skb:      send buffer
2563  * @first:    first buffer info buffer to use
2564  * @tx_flags: collected send information
2565  * @hdr_len:  size of the packet header
2566  * @td_cmd:   the command field in the descriptor
2567  * @td_offset: offset for checksum or crc
2568  **/
2569 #ifdef I40E_FCOE
2570 inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2571 			struct i40e_tx_buffer *first, u32 tx_flags,
2572 			const u8 hdr_len, u32 td_cmd, u32 td_offset)
2573 #else
2574 static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2575 			       struct i40e_tx_buffer *first, u32 tx_flags,
2576 			       const u8 hdr_len, u32 td_cmd, u32 td_offset)
2577 #endif
2578 {
2579 	unsigned int data_len = skb->data_len;
2580 	unsigned int size = skb_headlen(skb);
2581 	struct skb_frag_struct *frag;
2582 	struct i40e_tx_buffer *tx_bi;
2583 	struct i40e_tx_desc *tx_desc;
2584 	u16 i = tx_ring->next_to_use;
2585 	u32 td_tag = 0;
2586 	dma_addr_t dma;
2587 	u16 gso_segs;
2588 	u16 desc_count = 0;
2589 	bool tail_bump = true;
2590 	bool do_rs = false;
2591 
2592 	if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
2593 		td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
2594 		td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
2595 			 I40E_TX_FLAGS_VLAN_SHIFT;
2596 	}
2597 
2598 	if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
2599 		gso_segs = skb_shinfo(skb)->gso_segs;
2600 	else
2601 		gso_segs = 1;
2602 
2603 	/* multiply data chunks by size of headers */
2604 	first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
2605 	first->gso_segs = gso_segs;
2606 	first->skb = skb;
2607 	first->tx_flags = tx_flags;
2608 
2609 	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
2610 
2611 	tx_desc = I40E_TX_DESC(tx_ring, i);
2612 	tx_bi = first;
2613 
2614 	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
2615 		if (dma_mapping_error(tx_ring->dev, dma))
2616 			goto dma_error;
2617 
2618 		/* record length, and DMA address */
2619 		dma_unmap_len_set(tx_bi, len, size);
2620 		dma_unmap_addr_set(tx_bi, dma, dma);
2621 
2622 		tx_desc->buffer_addr = cpu_to_le64(dma);
2623 
2624 		while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
2625 			tx_desc->cmd_type_offset_bsz =
2626 				build_ctob(td_cmd, td_offset,
2627 					   I40E_MAX_DATA_PER_TXD, td_tag);
2628 
2629 			tx_desc++;
2630 			i++;
2631 			desc_count++;
2632 
2633 			if (i == tx_ring->count) {
2634 				tx_desc = I40E_TX_DESC(tx_ring, 0);
2635 				i = 0;
2636 			}
2637 
2638 			dma += I40E_MAX_DATA_PER_TXD;
2639 			size -= I40E_MAX_DATA_PER_TXD;
2640 
2641 			tx_desc->buffer_addr = cpu_to_le64(dma);
2642 		}
2643 
2644 		if (likely(!data_len))
2645 			break;
2646 
2647 		tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
2648 							  size, td_tag);
2649 
2650 		tx_desc++;
2651 		i++;
2652 		desc_count++;
2653 
2654 		if (i == tx_ring->count) {
2655 			tx_desc = I40E_TX_DESC(tx_ring, 0);
2656 			i = 0;
2657 		}
2658 
2659 		size = skb_frag_size(frag);
2660 		data_len -= size;
2661 
2662 		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
2663 				       DMA_TO_DEVICE);
2664 
2665 		tx_bi = &tx_ring->tx_bi[i];
2666 	}
2667 
2668 	/* set next_to_watch value indicating a packet is present */
2669 	first->next_to_watch = tx_desc;
2670 
2671 	i++;
2672 	if (i == tx_ring->count)
2673 		i = 0;
2674 
2675 	tx_ring->next_to_use = i;
2676 
2677 	netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
2678 						 tx_ring->queue_index),
2679 						 first->bytecount);
2680 	i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
2681 
2682 	/* Algorithm to optimize tail and RS bit setting:
2683 	 * if xmit_more is supported
2684 	 *	if xmit_more is true
2685 	 *		do not update tail and do not mark RS bit.
2686 	 *	if xmit_more is false and last xmit_more was false
2687 	 *		if every packet spanned less than 4 desc
2688 	 *			then set RS bit on 4th packet and update tail
2689 	 *			on every packet
2690 	 *		else
2691 	 *			update tail and set RS bit on every packet.
2692 	 *	if xmit_more is false and last_xmit_more was true
2693 	 *		update tail and set RS bit.
2694 	 *
2695 	 * Optimization: wmb to be issued only in case of tail update.
2696 	 * Also optimize the Descriptor WB path for RS bit with the same
2697 	 * algorithm.
2698 	 *
2699 	 * Note: If there are less than 4 packets
2700 	 * pending and interrupts were disabled the service task will
2701 	 * trigger a force WB.
2702 	 */
2703 	if (skb->xmit_more  &&
2704 	    !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2705 						    tx_ring->queue_index))) {
2706 		tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
2707 		tail_bump = false;
2708 	} else if (!skb->xmit_more &&
2709 		   !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2710 						       tx_ring->queue_index)) &&
2711 		   (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
2712 		   (tx_ring->packet_stride < WB_STRIDE) &&
2713 		   (desc_count < WB_STRIDE)) {
2714 		tx_ring->packet_stride++;
2715 	} else {
2716 		tx_ring->packet_stride = 0;
2717 		tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
2718 		do_rs = true;
2719 	}
2720 	if (do_rs)
2721 		tx_ring->packet_stride = 0;
2722 
2723 	tx_desc->cmd_type_offset_bsz =
2724 			build_ctob(td_cmd, td_offset, size, td_tag) |
2725 			cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD :
2726 						  I40E_TX_DESC_CMD_EOP) <<
2727 						  I40E_TXD_QW1_CMD_SHIFT);
2728 
2729 	/* notify HW of packet */
2730 	if (!tail_bump)
2731 		prefetchw(tx_desc + 1);
2732 
2733 	if (tail_bump) {
2734 		/* Force memory writes to complete before letting h/w
2735 		 * know there are new descriptors to fetch.  (Only
2736 		 * applicable for weak-ordered memory model archs,
2737 		 * such as IA-64).
2738 		 */
2739 		wmb();
2740 		writel(i, tx_ring->tail);
2741 	}
2742 
2743 	return;
2744 
2745 dma_error:
2746 	dev_info(tx_ring->dev, "TX DMA map failed\n");
2747 
2748 	/* clear dma mappings for failed tx_bi map */
2749 	for (;;) {
2750 		tx_bi = &tx_ring->tx_bi[i];
2751 		i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
2752 		if (tx_bi == first)
2753 			break;
2754 		if (i == 0)
2755 			i = tx_ring->count;
2756 		i--;
2757 	}
2758 
2759 	tx_ring->next_to_use = i;
2760 }
2761 
2762 /**
2763  * i40e_xmit_descriptor_count - calculate number of tx descriptors needed
2764  * @skb:     send buffer
2765  * @tx_ring: ring to send buffer on
2766  *
2767  * Returns number of data descriptors needed for this skb. Returns 0 to indicate
2768  * there is not enough descriptors available in this ring since we need at least
2769  * one descriptor.
2770  **/
2771 #ifdef I40E_FCOE
2772 inline int i40e_xmit_descriptor_count(struct sk_buff *skb,
2773 				      struct i40e_ring *tx_ring)
2774 #else
2775 static inline int i40e_xmit_descriptor_count(struct sk_buff *skb,
2776 					     struct i40e_ring *tx_ring)
2777 #endif
2778 {
2779 	unsigned int f;
2780 	int count = 0;
2781 
2782 	/* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2783 	 *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2784 	 *       + 4 desc gap to avoid the cache line where head is,
2785 	 *       + 1 desc for context descriptor,
2786 	 * otherwise try next time
2787 	 */
2788 	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
2789 		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
2790 
2791 	count += TXD_USE_COUNT(skb_headlen(skb));
2792 	if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
2793 		tx_ring->tx_stats.tx_busy++;
2794 		return 0;
2795 	}
2796 	return count;
2797 }
2798 
2799 /**
2800  * i40e_xmit_frame_ring - Sends buffer on Tx ring
2801  * @skb:     send buffer
2802  * @tx_ring: ring to send buffer on
2803  *
2804  * Returns NETDEV_TX_OK if sent, else an error code
2805  **/
2806 static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
2807 					struct i40e_ring *tx_ring)
2808 {
2809 	u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
2810 	u32 cd_tunneling = 0, cd_l2tag2 = 0;
2811 	struct i40e_tx_buffer *first;
2812 	u32 td_offset = 0;
2813 	u32 tx_flags = 0;
2814 	__be16 protocol;
2815 	u32 td_cmd = 0;
2816 	u8 hdr_len = 0;
2817 	int tsyn;
2818 	int tso;
2819 
2820 	/* prefetch the data, we'll need it later */
2821 	prefetch(skb->data);
2822 
2823 	if (0 == i40e_xmit_descriptor_count(skb, tx_ring))
2824 		return NETDEV_TX_BUSY;
2825 
2826 	/* prepare the xmit flags */
2827 	if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
2828 		goto out_drop;
2829 
2830 	/* obtain protocol of skb */
2831 	protocol = vlan_get_protocol(skb);
2832 
2833 	/* record the location of the first descriptor for this packet */
2834 	first = &tx_ring->tx_bi[tx_ring->next_to_use];
2835 
2836 	/* setup IPv4/IPv6 offloads */
2837 	if (protocol == htons(ETH_P_IP))
2838 		tx_flags |= I40E_TX_FLAGS_IPV4;
2839 	else if (protocol == htons(ETH_P_IPV6))
2840 		tx_flags |= I40E_TX_FLAGS_IPV6;
2841 
2842 	tso = i40e_tso(tx_ring, skb, &hdr_len, &cd_type_cmd_tso_mss);
2843 
2844 	if (tso < 0)
2845 		goto out_drop;
2846 	else if (tso)
2847 		tx_flags |= I40E_TX_FLAGS_TSO;
2848 
2849 	tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
2850 
2851 	if (tsyn)
2852 		tx_flags |= I40E_TX_FLAGS_TSYN;
2853 
2854 	if (i40e_chk_linearize(skb, tx_flags)) {
2855 		if (skb_linearize(skb))
2856 			goto out_drop;
2857 		tx_ring->tx_stats.tx_linearize++;
2858 	}
2859 	skb_tx_timestamp(skb);
2860 
2861 	/* always enable CRC insertion offload */
2862 	td_cmd |= I40E_TX_DESC_CMD_ICRC;
2863 
2864 	/* Always offload the checksum, since it's in the data descriptor */
2865 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
2866 		tx_flags |= I40E_TX_FLAGS_CSUM;
2867 
2868 		i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
2869 				    tx_ring, &cd_tunneling);
2870 	}
2871 
2872 	i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
2873 			   cd_tunneling, cd_l2tag2);
2874 
2875 	/* Add Flow Director ATR if it's enabled.
2876 	 *
2877 	 * NOTE: this must always be directly before the data descriptor.
2878 	 */
2879 	i40e_atr(tx_ring, skb, tx_flags, protocol);
2880 
2881 	i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
2882 		    td_cmd, td_offset);
2883 
2884 	return NETDEV_TX_OK;
2885 
2886 out_drop:
2887 	dev_kfree_skb_any(skb);
2888 	return NETDEV_TX_OK;
2889 }
2890 
2891 /**
2892  * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2893  * @skb:    send buffer
2894  * @netdev: network interface device structure
2895  *
2896  * Returns NETDEV_TX_OK if sent, else an error code
2897  **/
2898 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
2899 {
2900 	struct i40e_netdev_priv *np = netdev_priv(netdev);
2901 	struct i40e_vsi *vsi = np->vsi;
2902 	struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
2903 
2904 	/* hardware can't handle really short frames, hardware padding works
2905 	 * beyond this point
2906 	 */
2907 	if (skb_put_padto(skb, I40E_MIN_TX_LEN))
2908 		return NETDEV_TX_OK;
2909 
2910 	return i40e_xmit_frame_ring(skb, tx_ring);
2911 }
2912