xref: /linux/drivers/net/ethernet/sfc/tx_common.c (revision 189f164e573e18d9f8876dbd3ad8fcbe11f93037)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3  * Driver for Solarflare network controllers and boards
4  * Copyright 2018 Solarflare Communications Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published
8  * by the Free Software Foundation, incorporated herein by reference.
9  */
10 
11 #include "net_driver.h"
12 #include "efx.h"
13 #include "nic_common.h"
14 #include "tx_common.h"
15 #include <net/gso.h>
16 
efx_tx_cb_page_count(struct efx_tx_queue * tx_queue)17 static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue)
18 {
19 	return DIV_ROUND_UP(tx_queue->ptr_mask + 1,
20 			    PAGE_SIZE >> EFX_TX_CB_ORDER);
21 }
22 
efx_probe_tx_queue(struct efx_tx_queue * tx_queue)23 int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
24 {
25 	struct efx_nic *efx = tx_queue->efx;
26 	unsigned int entries;
27 	int rc;
28 
29 	/* Create the smallest power-of-two aligned ring */
30 	entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE);
31 	EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
32 	tx_queue->ptr_mask = entries - 1;
33 
34 	netif_dbg(efx, probe, efx->net_dev,
35 		  "creating TX queue %d size %#x mask %#x\n",
36 		  tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask);
37 
38 	/* Allocate software ring */
39 	tx_queue->buffer = kzalloc_objs(*tx_queue->buffer, entries);
40 	if (!tx_queue->buffer)
41 		return -ENOMEM;
42 
43 	tx_queue->cb_page = kzalloc_objs(tx_queue->cb_page[0],
44 					 efx_tx_cb_page_count(tx_queue));
45 	if (!tx_queue->cb_page) {
46 		rc = -ENOMEM;
47 		goto fail1;
48 	}
49 
50 	/* Allocate hardware ring, determine TXQ type */
51 	rc = efx_nic_probe_tx(tx_queue);
52 	if (rc)
53 		goto fail2;
54 
55 	tx_queue->channel->tx_queue_by_type[tx_queue->type] = tx_queue;
56 	return 0;
57 
58 fail2:
59 	kfree(tx_queue->cb_page);
60 	tx_queue->cb_page = NULL;
61 fail1:
62 	kfree(tx_queue->buffer);
63 	tx_queue->buffer = NULL;
64 	return rc;
65 }
66 
efx_init_tx_queue(struct efx_tx_queue * tx_queue)67 void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
68 {
69 	struct efx_nic *efx = tx_queue->efx;
70 
71 	netif_dbg(efx, drv, efx->net_dev,
72 		  "initialising TX queue %d\n", tx_queue->queue);
73 
74 	tx_queue->insert_count = 0;
75 	tx_queue->notify_count = 0;
76 	tx_queue->write_count = 0;
77 	tx_queue->packet_write_count = 0;
78 	tx_queue->old_write_count = 0;
79 	tx_queue->read_count = 0;
80 	tx_queue->old_read_count = 0;
81 	tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID;
82 	tx_queue->xmit_pending = false;
83 	tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) &&
84 				  tx_queue->channel == efx_ptp_channel(efx));
85 	tx_queue->completed_timestamp_major = 0;
86 	tx_queue->completed_timestamp_minor = 0;
87 
88 	tx_queue->old_complete_packets = tx_queue->complete_packets;
89 	tx_queue->old_complete_bytes = tx_queue->complete_bytes;
90 	tx_queue->old_tso_bursts = tx_queue->tso_bursts;
91 	tx_queue->old_tso_packets = tx_queue->tso_packets;
92 
93 	tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel);
94 	tx_queue->tso_version = 0;
95 
96 	/* Set up TX descriptor ring */
97 	efx_nic_init_tx(tx_queue);
98 
99 	tx_queue->initialised = true;
100 }
101 
efx_fini_tx_queue(struct efx_tx_queue * tx_queue)102 void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
103 {
104 	struct efx_tx_buffer *buffer;
105 
106 	netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
107 		  "shutting down TX queue %d\n", tx_queue->queue);
108 
109 	tx_queue->initialised = false;
110 
111 	if (!tx_queue->buffer)
112 		return;
113 
114 	/* Free any buffers left in the ring */
115 	while (tx_queue->read_count != tx_queue->write_count) {
116 		unsigned int xdp_pkts_compl = 0, xdp_bytes_compl = 0;
117 		unsigned int pkts_compl = 0, bytes_compl = 0;
118 		unsigned int efv_pkts_compl = 0;
119 
120 		buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
121 		efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl,
122 				   &efv_pkts_compl, &xdp_pkts_compl,
123 				   &xdp_bytes_compl);
124 
125 		++tx_queue->read_count;
126 	}
127 	tx_queue->xmit_pending = false;
128 	netdev_tx_reset_queue(tx_queue->core_txq);
129 }
130 
efx_remove_tx_queue(struct efx_tx_queue * tx_queue)131 void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
132 {
133 	int i;
134 
135 	if (!tx_queue->buffer)
136 		return;
137 
138 	netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
139 		  "destroying TX queue %d\n", tx_queue->queue);
140 	efx_nic_remove_tx(tx_queue);
141 
142 	if (tx_queue->cb_page) {
143 		for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++)
144 			efx_nic_free_buffer(tx_queue->efx,
145 					    &tx_queue->cb_page[i]);
146 		kfree(tx_queue->cb_page);
147 		tx_queue->cb_page = NULL;
148 	}
149 
150 	kfree(tx_queue->buffer);
151 	tx_queue->buffer = NULL;
152 	tx_queue->channel->tx_queue_by_type[tx_queue->type] = NULL;
153 }
154 
efx_dequeue_buffer(struct efx_tx_queue * tx_queue,struct efx_tx_buffer * buffer,unsigned int * pkts_compl,unsigned int * bytes_compl,unsigned int * efv_pkts_compl,unsigned int * xdp_pkts,unsigned int * xdp_bytes)155 void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
156 			struct efx_tx_buffer *buffer,
157 			unsigned int *pkts_compl,
158 			unsigned int *bytes_compl,
159 			unsigned int *efv_pkts_compl,
160 			unsigned int *xdp_pkts,
161 			unsigned int *xdp_bytes)
162 {
163 	if (buffer->unmap_len) {
164 		struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
165 		dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset;
166 
167 		if (buffer->flags & EFX_TX_BUF_MAP_SINGLE)
168 			dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len,
169 					 DMA_TO_DEVICE);
170 		else
171 			dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len,
172 				       DMA_TO_DEVICE);
173 		buffer->unmap_len = 0;
174 	}
175 
176 	if (buffer->flags & EFX_TX_BUF_SKB) {
177 		struct sk_buff *skb = (struct sk_buff *)buffer->skb;
178 
179 		if (unlikely(buffer->flags & EFX_TX_BUF_EFV)) {
180 			EFX_WARN_ON_PARANOID(!efv_pkts_compl);
181 			(*efv_pkts_compl)++;
182 		} else {
183 			EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl);
184 			(*pkts_compl)++;
185 			(*bytes_compl) += skb->len;
186 		}
187 
188 		if (tx_queue->timestamping &&
189 		    (tx_queue->completed_timestamp_major ||
190 		     tx_queue->completed_timestamp_minor)) {
191 			struct skb_shared_hwtstamps hwtstamp;
192 
193 			hwtstamp.hwtstamp =
194 				efx_ptp_nic_to_kernel_time(tx_queue);
195 			skb_tstamp_tx(skb, &hwtstamp);
196 
197 			tx_queue->completed_timestamp_major = 0;
198 			tx_queue->completed_timestamp_minor = 0;
199 		}
200 		dev_consume_skb_any((struct sk_buff *)buffer->skb);
201 		netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
202 			   "TX queue %d transmission id %x complete\n",
203 			   tx_queue->queue, tx_queue->read_count);
204 	} else if (buffer->flags & EFX_TX_BUF_XDP) {
205 		xdp_return_frame_rx_napi(buffer->xdpf);
206 		if (xdp_pkts)
207 			(*xdp_pkts)++;
208 		if (xdp_bytes)
209 			(*xdp_bytes) += buffer->xdpf->len;
210 	}
211 
212 	buffer->len = 0;
213 	buffer->flags = 0;
214 }
215 
216 /* Remove packets from the TX queue
217  *
218  * This removes packets from the TX queue, up to and including the
219  * specified index.
220  */
efx_dequeue_buffers(struct efx_tx_queue * tx_queue,unsigned int index,unsigned int * pkts_compl,unsigned int * bytes_compl,unsigned int * efv_pkts_compl,unsigned int * xdp_pkts,unsigned int * xdp_bytes)221 static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
222 				unsigned int index,
223 				unsigned int *pkts_compl,
224 				unsigned int *bytes_compl,
225 				unsigned int *efv_pkts_compl,
226 				unsigned int *xdp_pkts,
227 				unsigned int *xdp_bytes)
228 {
229 	struct efx_nic *efx = tx_queue->efx;
230 	unsigned int stop_index, read_ptr;
231 
232 	stop_index = (index + 1) & tx_queue->ptr_mask;
233 	read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
234 
235 	while (read_ptr != stop_index) {
236 		struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
237 
238 		if (!efx_tx_buffer_in_use(buffer)) {
239 			netif_err(efx, tx_err, efx->net_dev,
240 				  "TX queue %d spurious TX completion id %d\n",
241 				  tx_queue->queue, read_ptr);
242 			efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
243 			return;
244 		}
245 
246 		efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl,
247 				   efv_pkts_compl, xdp_pkts, xdp_bytes);
248 
249 		++tx_queue->read_count;
250 		read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
251 	}
252 }
253 
efx_xmit_done_check_empty(struct efx_tx_queue * tx_queue)254 void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue)
255 {
256 	if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
257 		tx_queue->old_write_count = READ_ONCE(tx_queue->write_count);
258 		if (tx_queue->read_count == tx_queue->old_write_count) {
259 			/* Ensure that read_count is flushed. */
260 			smp_mb();
261 			tx_queue->empty_read_count =
262 				tx_queue->read_count | EFX_EMPTY_COUNT_VALID;
263 		}
264 	}
265 }
266 
efx_xmit_done(struct efx_tx_queue * tx_queue,unsigned int index)267 int efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
268 {
269 	unsigned int fill_level, pkts_compl = 0, bytes_compl = 0;
270 	unsigned int xdp_pkts_compl = 0, xdp_bytes_compl = 0;
271 	unsigned int efv_pkts_compl = 0;
272 	struct efx_nic *efx = tx_queue->efx;
273 
274 	EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask);
275 
276 	efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl,
277 			    &efv_pkts_compl, &xdp_pkts_compl, &xdp_bytes_compl);
278 	tx_queue->pkts_compl += pkts_compl;
279 	tx_queue->bytes_compl += bytes_compl;
280 	tx_queue->complete_xdp_packets += xdp_pkts_compl;
281 	tx_queue->complete_xdp_bytes += xdp_bytes_compl;
282 
283 	if (pkts_compl + efv_pkts_compl > 1)
284 		++tx_queue->merge_events;
285 
286 	/* See if we need to restart the netif queue.  This memory
287 	 * barrier ensures that we write read_count (inside
288 	 * efx_dequeue_buffers()) before reading the queue status.
289 	 */
290 	smp_mb();
291 	if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) &&
292 	    likely(efx->port_enabled) &&
293 	    likely(netif_device_present(efx->net_dev))) {
294 		fill_level = efx_channel_tx_fill_level(tx_queue->channel);
295 		if (fill_level <= efx->txq_wake_thresh)
296 			netif_tx_wake_queue(tx_queue->core_txq);
297 	}
298 
299 	efx_xmit_done_check_empty(tx_queue);
300 
301 	return pkts_compl + efv_pkts_compl;
302 }
303 
304 /* Remove buffers put into a tx_queue for the current packet.
305  * None of the buffers must have an skb attached.
306  */
efx_enqueue_unwind(struct efx_tx_queue * tx_queue,unsigned int insert_count)307 void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
308 			unsigned int insert_count)
309 {
310 	unsigned int xdp_bytes_compl = 0;
311 	unsigned int xdp_pkts_compl = 0;
312 	unsigned int efv_pkts_compl = 0;
313 	struct efx_tx_buffer *buffer;
314 	unsigned int bytes_compl = 0;
315 	unsigned int pkts_compl = 0;
316 
317 	/* Work backwards until we hit the original insert pointer value */
318 	while (tx_queue->insert_count != insert_count) {
319 		--tx_queue->insert_count;
320 		buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
321 		efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl,
322 				   &efv_pkts_compl, &xdp_pkts_compl,
323 				   &xdp_bytes_compl);
324 	}
325 }
326 
efx_tx_map_chunk(struct efx_tx_queue * tx_queue,dma_addr_t dma_addr,size_t len)327 struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue,
328 				       dma_addr_t dma_addr, size_t len)
329 {
330 	const struct efx_nic_type *nic_type = tx_queue->efx->type;
331 	struct efx_tx_buffer *buffer;
332 	unsigned int dma_len;
333 
334 	/* Map the fragment taking account of NIC-dependent DMA limits. */
335 	do {
336 		buffer = efx_tx_queue_get_insert_buffer(tx_queue);
337 
338 		if (nic_type->tx_limit_len)
339 			dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len);
340 		else
341 			dma_len = len;
342 
343 		buffer->len = dma_len;
344 		buffer->dma_addr = dma_addr;
345 		buffer->flags = EFX_TX_BUF_CONT;
346 		len -= dma_len;
347 		dma_addr += dma_len;
348 		++tx_queue->insert_count;
349 	} while (len);
350 
351 	return buffer;
352 }
353 
efx_tx_tso_header_length(struct sk_buff * skb)354 int efx_tx_tso_header_length(struct sk_buff *skb)
355 {
356 	size_t header_len;
357 
358 	if (skb->encapsulation)
359 		header_len = skb_inner_transport_offset(skb) +
360 				(inner_tcp_hdr(skb)->doff << 2u);
361 	else
362 		header_len = skb_transport_offset(skb) +
363 				(tcp_hdr(skb)->doff << 2u);
364 	return header_len;
365 }
366 
367 /* Map all data from an SKB for DMA and create descriptors on the queue. */
efx_tx_map_data(struct efx_tx_queue * tx_queue,struct sk_buff * skb,unsigned int segment_count)368 int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
369 		    unsigned int segment_count)
370 {
371 	struct efx_nic *efx = tx_queue->efx;
372 	struct device *dma_dev = &efx->pci_dev->dev;
373 	unsigned int frag_index, nr_frags;
374 	dma_addr_t dma_addr, unmap_addr;
375 	unsigned short dma_flags;
376 	size_t len, unmap_len;
377 
378 	nr_frags = skb_shinfo(skb)->nr_frags;
379 	frag_index = 0;
380 
381 	/* Map header data. */
382 	len = skb_headlen(skb);
383 	dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE);
384 	dma_flags = EFX_TX_BUF_MAP_SINGLE;
385 	unmap_len = len;
386 	unmap_addr = dma_addr;
387 
388 	if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
389 		return -EIO;
390 
391 	if (segment_count) {
392 		/* For TSO we need to put the header in to a separate
393 		 * descriptor. Map this separately if necessary.
394 		 */
395 		size_t header_len = efx_tx_tso_header_length(skb);
396 
397 		if (header_len != len) {
398 			tx_queue->tso_long_headers++;
399 			efx_tx_map_chunk(tx_queue, dma_addr, header_len);
400 			len -= header_len;
401 			dma_addr += header_len;
402 		}
403 	}
404 
405 	/* Add descriptors for each fragment. */
406 	do {
407 		struct efx_tx_buffer *buffer;
408 		skb_frag_t *fragment;
409 
410 		buffer = efx_tx_map_chunk(tx_queue, dma_addr, len);
411 
412 		/* The final descriptor for a fragment is responsible for
413 		 * unmapping the whole fragment.
414 		 */
415 		buffer->flags = EFX_TX_BUF_CONT | dma_flags;
416 		buffer->unmap_len = unmap_len;
417 		buffer->dma_offset = buffer->dma_addr - unmap_addr;
418 
419 		if (frag_index >= nr_frags) {
420 			/* Store SKB details with the final buffer for
421 			 * the completion.
422 			 */
423 			buffer->skb = skb;
424 			buffer->flags = EFX_TX_BUF_SKB | dma_flags;
425 			return 0;
426 		}
427 
428 		/* Move on to the next fragment. */
429 		fragment = &skb_shinfo(skb)->frags[frag_index++];
430 		len = skb_frag_size(fragment);
431 		dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len,
432 					    DMA_TO_DEVICE);
433 		dma_flags = 0;
434 		unmap_len = len;
435 		unmap_addr = dma_addr;
436 
437 		if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
438 			return -EIO;
439 	} while (1);
440 }
441 
efx_tx_max_skb_descs(struct efx_nic * efx)442 unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
443 {
444 	/* Header and payload descriptor for each output segment, plus
445 	 * one for every input fragment boundary within a segment
446 	 */
447 	unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS;
448 
449 	/* Possibly one more per segment for option descriptors */
450 	if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
451 		max_descs += EFX_TSO_MAX_SEGS;
452 
453 	/* Possibly more for PCIe page boundaries within input fragments */
454 	if (PAGE_SIZE > EFX_PAGE_SIZE)
455 		max_descs += max_t(unsigned int, MAX_SKB_FRAGS,
456 				   DIV_ROUND_UP(GSO_LEGACY_MAX_SIZE,
457 						EFX_PAGE_SIZE));
458 
459 	return max_descs;
460 }
461 
462 /*
463  * Fallback to software TSO.
464  *
465  * This is used if we are unable to send a GSO packet through hardware TSO.
466  * This should only ever happen due to per-queue restrictions - unsupported
467  * packets should first be filtered by the feature flags.
468  *
469  * Returns 0 on success, error code otherwise.
470  */
efx_tx_tso_fallback(struct efx_tx_queue * tx_queue,struct sk_buff * skb)471 int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
472 {
473 	struct sk_buff *segments, *next;
474 
475 	segments = skb_gso_segment(skb, 0);
476 	if (IS_ERR(segments))
477 		return PTR_ERR(segments);
478 
479 	dev_consume_skb_any(skb);
480 
481 	skb_list_walk_safe(segments, skb, next) {
482 		skb_mark_not_on_list(skb);
483 		efx_enqueue_skb(tx_queue, skb);
484 	}
485 
486 	return 0;
487 }
488