1 // SPDX-License-Identifier: GPL-2.0-only 2 /**************************************************************************** 3 * Driver for Solarflare network controllers and boards 4 * Copyright 2018 Solarflare Communications Inc. 5 * Copyright 2019-2020 Xilinx Inc. 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 as published 9 * by the Free Software Foundation, incorporated herein by reference. 10 */ 11 12 #include <net/ip6_checksum.h> 13 14 #include "net_driver.h" 15 #include "tx_common.h" 16 #include "nic_common.h" 17 #include "mcdi_functions.h" 18 #include "ef100_regs.h" 19 #include "io.h" 20 #include "ef100_tx.h" 21 #include "ef100_nic.h" 22 23 int ef100_tx_probe(struct efx_tx_queue *tx_queue) 24 { 25 /* Allocate an extra descriptor for the QMDA status completion entry */ 26 return efx_nic_alloc_buffer(tx_queue->efx, &tx_queue->txd, 27 (tx_queue->ptr_mask + 2) * 28 sizeof(efx_oword_t), 29 GFP_KERNEL); 30 } 31 32 void ef100_tx_init(struct efx_tx_queue *tx_queue) 33 { 34 /* must be the inverse of lookup in efx_get_tx_channel */ 35 tx_queue->core_txq = 36 netdev_get_tx_queue(tx_queue->efx->net_dev, 37 tx_queue->channel->channel - 38 tx_queue->efx->tx_channel_offset); 39 40 /* This value is purely documentational; as EF100 never passes through 41 * the switch statement in tx.c:__efx_enqueue_skb(), that switch does 42 * not handle case 3. EF100's TSOv3 descriptors are generated by 43 * ef100_make_tso_desc(). 44 * Meanwhile, all efx_mcdi_tx_init() cares about is that it's not 2. 45 */ 46 tx_queue->tso_version = 3; 47 if (efx_mcdi_tx_init(tx_queue)) 48 netdev_WARN(tx_queue->efx->net_dev, 49 "failed to initialise TXQ %d\n", tx_queue->queue); 50 } 51 52 static bool ef100_tx_can_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb) 53 { 54 struct efx_nic *efx = tx_queue->efx; 55 struct ef100_nic_data *nic_data; 56 struct efx_tx_buffer *buffer; 57 size_t header_len; 58 u32 mss; 59 60 nic_data = efx->nic_data; 61 62 if (!skb_is_gso_tcp(skb)) 63 return false; 64 if (!(efx->net_dev->features & NETIF_F_TSO)) 65 return false; 66 67 mss = skb_shinfo(skb)->gso_size; 68 if (unlikely(mss < 4)) { 69 WARN_ONCE(1, "MSS of %u is too small for TSO\n", mss); 70 return false; 71 } 72 73 header_len = efx_tx_tso_header_length(skb); 74 if (header_len > nic_data->tso_max_hdr_len) 75 return false; 76 77 if (skb_shinfo(skb)->gso_segs > nic_data->tso_max_payload_num_segs) { 78 /* net_dev->gso_max_segs should've caught this */ 79 WARN_ON_ONCE(1); 80 return false; 81 } 82 83 if (skb->data_len / mss > nic_data->tso_max_frames) 84 return false; 85 86 /* net_dev->gso_max_size should've caught this */ 87 if (WARN_ON_ONCE(skb->data_len > nic_data->tso_max_payload_len)) 88 return false; 89 90 /* Reserve an empty buffer for the TSO V3 descriptor. 91 * Convey the length of the header since we already know it. 92 */ 93 buffer = efx_tx_queue_get_insert_buffer(tx_queue); 94 buffer->flags = EFX_TX_BUF_TSO_V3 | EFX_TX_BUF_CONT; 95 buffer->len = header_len; 96 buffer->unmap_len = 0; 97 buffer->skb = skb; 98 ++tx_queue->insert_count; 99 return true; 100 } 101 102 static efx_oword_t *ef100_tx_desc(struct efx_tx_queue *tx_queue, unsigned int index) 103 { 104 if (likely(tx_queue->txd.addr)) 105 return ((efx_oword_t *)tx_queue->txd.addr) + index; 106 else 107 return NULL; 108 } 109 110 static void ef100_notify_tx_desc(struct efx_tx_queue *tx_queue) 111 { 112 unsigned int write_ptr; 113 efx_dword_t reg; 114 115 tx_queue->xmit_pending = false; 116 117 if (unlikely(tx_queue->notify_count == tx_queue->write_count)) 118 return; 119 120 write_ptr = tx_queue->write_count & tx_queue->ptr_mask; 121 /* The write pointer goes into the high word */ 122 EFX_POPULATE_DWORD_1(reg, ERF_GZ_TX_RING_PIDX, write_ptr); 123 efx_writed_page(tx_queue->efx, ®, 124 ER_GZ_TX_RING_DOORBELL, tx_queue->queue); 125 tx_queue->notify_count = tx_queue->write_count; 126 } 127 128 static void ef100_tx_push_buffers(struct efx_tx_queue *tx_queue) 129 { 130 ef100_notify_tx_desc(tx_queue); 131 ++tx_queue->pushes; 132 } 133 134 static void ef100_set_tx_csum_partial(const struct sk_buff *skb, 135 struct efx_tx_buffer *buffer, efx_oword_t *txd) 136 { 137 efx_oword_t csum; 138 int csum_start; 139 140 if (!skb || skb->ip_summed != CHECKSUM_PARTIAL) 141 return; 142 143 /* skb->csum_start has the offset from head, but we need the offset 144 * from data. 145 */ 146 csum_start = skb_checksum_start_offset(skb); 147 EFX_POPULATE_OWORD_3(csum, 148 ESF_GZ_TX_SEND_CSO_PARTIAL_EN, 1, 149 ESF_GZ_TX_SEND_CSO_PARTIAL_START_W, 150 csum_start >> 1, 151 ESF_GZ_TX_SEND_CSO_PARTIAL_CSUM_W, 152 skb->csum_offset >> 1); 153 EFX_OR_OWORD(*txd, *txd, csum); 154 } 155 156 static void ef100_set_tx_hw_vlan(const struct sk_buff *skb, efx_oword_t *txd) 157 { 158 u16 vlan_tci = skb_vlan_tag_get(skb); 159 efx_oword_t vlan; 160 161 EFX_POPULATE_OWORD_2(vlan, 162 ESF_GZ_TX_SEND_VLAN_INSERT_EN, 1, 163 ESF_GZ_TX_SEND_VLAN_INSERT_TCI, vlan_tci); 164 EFX_OR_OWORD(*txd, *txd, vlan); 165 } 166 167 static void ef100_make_send_desc(struct efx_nic *efx, 168 const struct sk_buff *skb, 169 struct efx_tx_buffer *buffer, efx_oword_t *txd, 170 unsigned int segment_count) 171 { 172 /* TX send descriptor */ 173 EFX_POPULATE_OWORD_3(*txd, 174 ESF_GZ_TX_SEND_NUM_SEGS, segment_count, 175 ESF_GZ_TX_SEND_LEN, buffer->len, 176 ESF_GZ_TX_SEND_ADDR, buffer->dma_addr); 177 178 if (likely(efx->net_dev->features & NETIF_F_HW_CSUM)) 179 ef100_set_tx_csum_partial(skb, buffer, txd); 180 if (efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_TX && 181 skb && skb_vlan_tag_present(skb)) 182 ef100_set_tx_hw_vlan(skb, txd); 183 } 184 185 static void ef100_make_tso_desc(struct efx_nic *efx, 186 const struct sk_buff *skb, 187 struct efx_tx_buffer *buffer, efx_oword_t *txd, 188 unsigned int segment_count) 189 { 190 bool gso_partial = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL; 191 unsigned int len, ip_offset, tcp_offset, payload_segs; 192 u32 mangleid_outer = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; 193 u32 mangleid = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; 194 unsigned int outer_ip_offset, outer_l4_offset; 195 u16 vlan_tci = skb_vlan_tag_get(skb); 196 u32 mss = skb_shinfo(skb)->gso_size; 197 bool encap = skb->encapsulation; 198 bool udp_encap = false; 199 u16 vlan_enable = 0; 200 struct tcphdr *tcp; 201 bool outer_csum; 202 u32 paylen; 203 204 if (encap) { 205 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID_INNER) 206 mangleid = ESE_GZ_TX_DESC_IP4_ID_NO_OP; 207 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID) 208 mangleid_outer = ESE_GZ_TX_DESC_IP4_ID_NO_OP; 209 } else { 210 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID) 211 mangleid = ESE_GZ_TX_DESC_IP4_ID_NO_OP; 212 mangleid_outer = ESE_GZ_TX_DESC_IP4_ID_NO_OP; 213 } 214 215 if (efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_TX) 216 vlan_enable = skb_vlan_tag_present(skb); 217 218 len = skb->len - buffer->len; 219 /* We use 1 for the TSO descriptor and 1 for the header */ 220 payload_segs = segment_count - 2; 221 if (encap) { 222 outer_ip_offset = skb_network_offset(skb); 223 outer_l4_offset = skb_transport_offset(skb); 224 ip_offset = skb_inner_network_offset(skb); 225 tcp_offset = skb_inner_transport_offset(skb); 226 if (skb_shinfo(skb)->gso_type & 227 (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)) 228 udp_encap = true; 229 } else { 230 ip_offset = skb_network_offset(skb); 231 tcp_offset = skb_transport_offset(skb); 232 outer_ip_offset = outer_l4_offset = 0; 233 } 234 outer_csum = skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM; 235 236 /* subtract TCP payload length from inner checksum */ 237 tcp = (void *)skb->data + tcp_offset; 238 paylen = skb->len - tcp_offset; 239 csum_replace_by_diff(&tcp->check, (__force __wsum)htonl(paylen)); 240 241 EFX_POPULATE_OWORD_19(*txd, 242 ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_TSO, 243 ESF_GZ_TX_TSO_MSS, mss, 244 ESF_GZ_TX_TSO_HDR_NUM_SEGS, 1, 245 ESF_GZ_TX_TSO_PAYLOAD_NUM_SEGS, payload_segs, 246 ESF_GZ_TX_TSO_HDR_LEN_W, buffer->len >> 1, 247 ESF_GZ_TX_TSO_PAYLOAD_LEN, len, 248 ESF_GZ_TX_TSO_CSO_OUTER_L4, outer_csum, 249 ESF_GZ_TX_TSO_CSO_INNER_L4, 1, 250 ESF_GZ_TX_TSO_INNER_L3_OFF_W, ip_offset >> 1, 251 ESF_GZ_TX_TSO_INNER_L4_OFF_W, tcp_offset >> 1, 252 ESF_GZ_TX_TSO_ED_INNER_IP4_ID, mangleid, 253 ESF_GZ_TX_TSO_ED_INNER_IP_LEN, 1, 254 ESF_GZ_TX_TSO_OUTER_L3_OFF_W, outer_ip_offset >> 1, 255 ESF_GZ_TX_TSO_OUTER_L4_OFF_W, outer_l4_offset >> 1, 256 ESF_GZ_TX_TSO_ED_OUTER_UDP_LEN, udp_encap && !gso_partial, 257 ESF_GZ_TX_TSO_ED_OUTER_IP_LEN, encap && !gso_partial, 258 ESF_GZ_TX_TSO_ED_OUTER_IP4_ID, mangleid_outer, 259 ESF_GZ_TX_TSO_VLAN_INSERT_EN, vlan_enable, 260 ESF_GZ_TX_TSO_VLAN_INSERT_TCI, vlan_tci 261 ); 262 } 263 264 static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue, 265 const struct sk_buff *skb, 266 unsigned int segment_count, 267 struct efx_rep *efv) 268 { 269 unsigned int old_write_count = tx_queue->write_count; 270 unsigned int new_write_count = old_write_count; 271 struct efx_tx_buffer *buffer; 272 unsigned int next_desc_type; 273 unsigned int write_ptr; 274 efx_oword_t *txd; 275 unsigned int nr_descs = tx_queue->insert_count - old_write_count; 276 277 if (unlikely(nr_descs == 0)) 278 return; 279 280 if (segment_count) 281 next_desc_type = ESE_GZ_TX_DESC_TYPE_TSO; 282 else 283 next_desc_type = ESE_GZ_TX_DESC_TYPE_SEND; 284 285 if (unlikely(efv)) { 286 /* Create TX override descriptor */ 287 write_ptr = new_write_count & tx_queue->ptr_mask; 288 txd = ef100_tx_desc(tx_queue, write_ptr); 289 ++new_write_count; 290 291 tx_queue->packet_write_count = new_write_count; 292 EFX_POPULATE_OWORD_3(*txd, 293 ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_PREFIX, 294 ESF_GZ_TX_PREFIX_EGRESS_MPORT, efv->mport, 295 ESF_GZ_TX_PREFIX_EGRESS_MPORT_EN, 1); 296 nr_descs--; 297 } 298 299 /* if it's a raw write (such as XDP) then always SEND single frames */ 300 if (!skb) 301 nr_descs = 1; 302 303 do { 304 write_ptr = new_write_count & tx_queue->ptr_mask; 305 buffer = &tx_queue->buffer[write_ptr]; 306 txd = ef100_tx_desc(tx_queue, write_ptr); 307 ++new_write_count; 308 309 /* Create TX descriptor ring entry */ 310 tx_queue->packet_write_count = new_write_count; 311 312 switch (next_desc_type) { 313 case ESE_GZ_TX_DESC_TYPE_SEND: 314 ef100_make_send_desc(tx_queue->efx, skb, 315 buffer, txd, nr_descs); 316 break; 317 case ESE_GZ_TX_DESC_TYPE_TSO: 318 /* TX TSO descriptor */ 319 WARN_ON_ONCE(!(buffer->flags & EFX_TX_BUF_TSO_V3)); 320 ef100_make_tso_desc(tx_queue->efx, skb, 321 buffer, txd, nr_descs); 322 break; 323 default: 324 /* TX segment descriptor */ 325 EFX_POPULATE_OWORD_3(*txd, 326 ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_SEG, 327 ESF_GZ_TX_SEG_LEN, buffer->len, 328 ESF_GZ_TX_SEG_ADDR, buffer->dma_addr); 329 } 330 /* if it's a raw write (such as XDP) then always SEND */ 331 next_desc_type = skb ? ESE_GZ_TX_DESC_TYPE_SEG : 332 ESE_GZ_TX_DESC_TYPE_SEND; 333 /* mark as an EFV buffer if applicable */ 334 if (unlikely(efv)) 335 buffer->flags |= EFX_TX_BUF_EFV; 336 337 } while (new_write_count != tx_queue->insert_count); 338 339 wmb(); /* Ensure descriptors are written before they are fetched */ 340 341 tx_queue->write_count = new_write_count; 342 343 /* The write_count above must be updated before reading 344 * channel->holdoff_doorbell to avoid a race with the 345 * completion path, so ensure these operations are not 346 * re-ordered. This also flushes the update of write_count 347 * back into the cache. 348 */ 349 smp_mb(); 350 } 351 352 void ef100_tx_write(struct efx_tx_queue *tx_queue) 353 { 354 ef100_tx_make_descriptors(tx_queue, NULL, 0, NULL); 355 ef100_tx_push_buffers(tx_queue); 356 } 357 358 int ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event) 359 { 360 unsigned int tx_done = 361 EFX_QWORD_FIELD(*p_event, ESF_GZ_EV_TXCMPL_NUM_DESC); 362 unsigned int qlabel = 363 EFX_QWORD_FIELD(*p_event, ESF_GZ_EV_TXCMPL_Q_LABEL); 364 struct efx_tx_queue *tx_queue = 365 efx_channel_get_tx_queue(channel, qlabel); 366 unsigned int tx_index = (tx_queue->read_count + tx_done - 1) & 367 tx_queue->ptr_mask; 368 369 return efx_xmit_done(tx_queue, tx_index); 370 } 371 372 /* Add a socket buffer to a TX queue 373 * 374 * You must hold netif_tx_lock() to call this function. 375 * 376 * Returns 0 on success, error code otherwise. In case of an error this 377 * function will free the SKB. 378 */ 379 netdev_tx_t ef100_enqueue_skb(struct efx_tx_queue *tx_queue, 380 struct sk_buff *skb) 381 { 382 return __ef100_enqueue_skb(tx_queue, skb, NULL); 383 } 384 385 int __ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb, 386 struct efx_rep *efv) 387 { 388 unsigned int old_insert_count = tx_queue->insert_count; 389 struct efx_nic *efx = tx_queue->efx; 390 bool xmit_more = netdev_xmit_more(); 391 unsigned int fill_level; 392 unsigned int segments; 393 int rc; 394 395 if (!tx_queue->buffer || !tx_queue->ptr_mask) { 396 netif_stop_queue(efx->net_dev); 397 dev_kfree_skb_any(skb); 398 return -ENODEV; 399 } 400 401 segments = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 0; 402 if (segments == 1) 403 segments = 0; /* Don't use TSO/GSO for a single segment. */ 404 if (segments && !ef100_tx_can_tso(tx_queue, skb)) { 405 rc = efx_tx_tso_fallback(tx_queue, skb); 406 tx_queue->tso_fallbacks++; 407 if (rc) 408 goto err; 409 else 410 return 0; 411 } 412 413 if (unlikely(efv)) { 414 struct efx_tx_buffer *buffer = __efx_tx_queue_get_insert_buffer(tx_queue); 415 416 /* Drop representor packets if the queue is stopped. 417 * We currently don't assert backoff to representors so this is 418 * to make sure representor traffic can't starve the main 419 * net device. 420 * And, of course, if there are no TX descriptors left. 421 */ 422 if (netif_tx_queue_stopped(tx_queue->core_txq) || 423 unlikely(efx_tx_buffer_in_use(buffer))) { 424 atomic64_inc(&efv->stats.tx_errors); 425 rc = -ENOSPC; 426 goto err; 427 } 428 429 /* Also drop representor traffic if it could cause us to 430 * stop the queue. If we assert backoff and we haven't 431 * received traffic on the main net device recently then the 432 * TX watchdog can go off erroneously. 433 */ 434 fill_level = efx_channel_tx_old_fill_level(tx_queue->channel); 435 fill_level += efx_tx_max_skb_descs(efx); 436 if (fill_level > efx->txq_stop_thresh) { 437 struct efx_tx_queue *txq2; 438 439 /* Refresh cached fill level and re-check */ 440 efx_for_each_channel_tx_queue(txq2, tx_queue->channel) 441 txq2->old_read_count = READ_ONCE(txq2->read_count); 442 443 fill_level = efx_channel_tx_old_fill_level(tx_queue->channel); 444 fill_level += efx_tx_max_skb_descs(efx); 445 if (fill_level > efx->txq_stop_thresh) { 446 atomic64_inc(&efv->stats.tx_errors); 447 rc = -ENOSPC; 448 goto err; 449 } 450 } 451 452 buffer->flags = EFX_TX_BUF_OPTION | EFX_TX_BUF_EFV; 453 tx_queue->insert_count++; 454 } 455 456 /* Map for DMA and create descriptors */ 457 rc = efx_tx_map_data(tx_queue, skb, segments); 458 if (rc) 459 goto err; 460 ef100_tx_make_descriptors(tx_queue, skb, segments, efv); 461 462 fill_level = efx_channel_tx_old_fill_level(tx_queue->channel); 463 if (fill_level > efx->txq_stop_thresh) { 464 struct efx_tx_queue *txq2; 465 466 /* Because of checks above, representor traffic should 467 * not be able to stop the queue. 468 */ 469 WARN_ON(efv); 470 471 netif_tx_stop_queue(tx_queue->core_txq); 472 /* Re-read after a memory barrier in case we've raced with 473 * the completion path. Otherwise there's a danger we'll never 474 * restart the queue if all completions have just happened. 475 */ 476 smp_mb(); 477 efx_for_each_channel_tx_queue(txq2, tx_queue->channel) 478 txq2->old_read_count = READ_ONCE(txq2->read_count); 479 fill_level = efx_channel_tx_old_fill_level(tx_queue->channel); 480 if (fill_level < efx->txq_stop_thresh) 481 netif_tx_start_queue(tx_queue->core_txq); 482 } 483 484 tx_queue->xmit_pending = true; 485 486 /* If xmit_more then we don't need to push the doorbell, unless there 487 * are 256 descriptors already queued in which case we have to push to 488 * ensure we never push more than 256 at once. 489 * 490 * Always push for representor traffic, and don't account it to parent 491 * PF netdevice's BQL. 492 */ 493 if (unlikely(efv) || 494 __netdev_tx_sent_queue(tx_queue->core_txq, skb->len, xmit_more) || 495 tx_queue->write_count - tx_queue->notify_count > 255) 496 ef100_tx_push_buffers(tx_queue); 497 498 if (segments) { 499 tx_queue->tso_bursts++; 500 tx_queue->tso_packets += segments; 501 tx_queue->tx_packets += segments; 502 } else { 503 tx_queue->tx_packets++; 504 } 505 return 0; 506 507 err: 508 efx_enqueue_unwind(tx_queue, old_insert_count); 509 if (!IS_ERR_OR_NULL(skb)) 510 dev_kfree_skb_any(skb); 511 512 /* If we're not expecting another transmit and we had something to push 513 * on this queue then we need to push here to get the previous packets 514 * out. We only enter this branch from before the xmit_more handling 515 * above, so xmit_pending still refers to the old state. 516 */ 517 if (tx_queue->xmit_pending && !xmit_more) 518 ef100_tx_push_buffers(tx_queue); 519 return rc; 520 } 521