1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright(c) 2018 Intel Corporation. */ 3 4 #include <linux/bpf_trace.h> 5 #include <net/xdp_sock_drv.h> 6 #include <net/xdp.h> 7 8 #include "e1000_hw.h" 9 #include "igb.h" 10 11 static int igb_realloc_rx_buffer_info(struct igb_ring *ring, bool pool_present) 12 { 13 int size = pool_present ? 14 sizeof(*ring->rx_buffer_info_zc) * ring->count : 15 sizeof(*ring->rx_buffer_info) * ring->count; 16 void *buff_info = vmalloc(size); 17 18 if (!buff_info) 19 return -ENOMEM; 20 21 if (pool_present) { 22 vfree(ring->rx_buffer_info); 23 ring->rx_buffer_info = NULL; 24 ring->rx_buffer_info_zc = buff_info; 25 } else { 26 vfree(ring->rx_buffer_info_zc); 27 ring->rx_buffer_info_zc = NULL; 28 ring->rx_buffer_info = buff_info; 29 } 30 31 return 0; 32 } 33 34 static void igb_txrx_ring_disable(struct igb_adapter *adapter, u16 qid) 35 { 36 struct igb_ring *tx_ring = adapter->tx_ring[qid]; 37 struct igb_ring *rx_ring = adapter->rx_ring[qid]; 38 struct e1000_hw *hw = &adapter->hw; 39 40 set_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags); 41 42 wr32(E1000_TXDCTL(tx_ring->reg_idx), 0); 43 wr32(E1000_RXDCTL(rx_ring->reg_idx), 0); 44 45 synchronize_net(); 46 47 /* Rx/Tx share the same napi context. */ 48 napi_disable(&rx_ring->q_vector->napi); 49 50 igb_clean_tx_ring(tx_ring); 51 igb_clean_rx_ring(rx_ring); 52 53 memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats)); 54 memset(&tx_ring->tx_stats, 0, sizeof(tx_ring->tx_stats)); 55 } 56 57 static void igb_txrx_ring_enable(struct igb_adapter *adapter, u16 qid) 58 { 59 struct igb_ring *tx_ring = adapter->tx_ring[qid]; 60 struct igb_ring *rx_ring = adapter->rx_ring[qid]; 61 62 igb_configure_tx_ring(adapter, tx_ring); 63 igb_configure_rx_ring(adapter, rx_ring); 64 65 synchronize_net(); 66 67 clear_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags); 68 69 /* call igb_desc_unused which always leaves 70 * at least 1 descriptor unused to make sure 71 * next_to_use != next_to_clean 72 */ 73 if (rx_ring->xsk_pool) 74 igb_alloc_rx_buffers_zc(rx_ring, rx_ring->xsk_pool, 75 igb_desc_unused(rx_ring)); 76 else 77 igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring)); 78 79 /* Rx/Tx share the same napi context. */ 80 napi_enable(&rx_ring->q_vector->napi); 81 } 82 83 struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter, 84 struct igb_ring *ring) 85 { 86 int qid = ring->queue_index; 87 struct xsk_buff_pool *pool; 88 89 pool = xsk_get_pool_from_qid(adapter->netdev, qid); 90 91 if (!igb_xdp_is_enabled(adapter)) 92 return NULL; 93 94 return (pool && pool->dev) ? pool : NULL; 95 } 96 97 static int igb_xsk_pool_enable(struct igb_adapter *adapter, 98 struct xsk_buff_pool *pool, 99 u16 qid) 100 { 101 struct net_device *netdev = adapter->netdev; 102 struct igb_ring *rx_ring; 103 bool if_running; 104 int err; 105 106 if (qid >= adapter->num_rx_queues) 107 return -EINVAL; 108 109 if (qid >= netdev->real_num_rx_queues || 110 qid >= netdev->real_num_tx_queues) 111 return -EINVAL; 112 113 err = xsk_pool_dma_map(pool, &adapter->pdev->dev, IGB_RX_DMA_ATTR); 114 if (err) 115 return err; 116 117 rx_ring = adapter->rx_ring[qid]; 118 if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter); 119 if (if_running) 120 igb_txrx_ring_disable(adapter, qid); 121 122 if (if_running) { 123 err = igb_realloc_rx_buffer_info(rx_ring, true); 124 if (!err) { 125 igb_txrx_ring_enable(adapter, qid); 126 /* Kick start the NAPI context so that receiving will start */ 127 err = igb_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX); 128 } 129 130 if (err) { 131 xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR); 132 return err; 133 } 134 } 135 136 return 0; 137 } 138 139 static int igb_xsk_pool_disable(struct igb_adapter *adapter, u16 qid) 140 { 141 struct xsk_buff_pool *pool; 142 struct igb_ring *rx_ring; 143 bool if_running; 144 int err; 145 146 pool = xsk_get_pool_from_qid(adapter->netdev, qid); 147 if (!pool) 148 return -EINVAL; 149 150 rx_ring = adapter->rx_ring[qid]; 151 if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter); 152 if (if_running) 153 igb_txrx_ring_disable(adapter, qid); 154 155 xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR); 156 157 if (if_running) { 158 err = igb_realloc_rx_buffer_info(rx_ring, false); 159 if (err) 160 return err; 161 162 igb_txrx_ring_enable(adapter, qid); 163 } 164 165 return 0; 166 } 167 168 int igb_xsk_pool_setup(struct igb_adapter *adapter, 169 struct xsk_buff_pool *pool, 170 u16 qid) 171 { 172 return pool ? igb_xsk_pool_enable(adapter, pool, qid) : 173 igb_xsk_pool_disable(adapter, qid); 174 } 175 176 static u16 igb_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp, 177 union e1000_adv_rx_desc *rx_desc, u16 count) 178 { 179 dma_addr_t dma; 180 u16 buffs; 181 int i; 182 183 /* nothing to do */ 184 if (!count) 185 return 0; 186 187 buffs = xsk_buff_alloc_batch(pool, xdp, count); 188 for (i = 0; i < buffs; i++) { 189 dma = xsk_buff_xdp_get_dma(*xdp); 190 rx_desc->read.pkt_addr = cpu_to_le64(dma); 191 rx_desc->wb.upper.length = 0; 192 193 rx_desc++; 194 xdp++; 195 } 196 197 return buffs; 198 } 199 200 bool igb_alloc_rx_buffers_zc(struct igb_ring *rx_ring, 201 struct xsk_buff_pool *xsk_pool, u16 count) 202 { 203 u32 nb_buffs_extra = 0, nb_buffs = 0; 204 union e1000_adv_rx_desc *rx_desc; 205 u16 ntu = rx_ring->next_to_use; 206 u16 total_count = count; 207 struct xdp_buff **xdp; 208 209 rx_desc = IGB_RX_DESC(rx_ring, ntu); 210 xdp = &rx_ring->rx_buffer_info_zc[ntu]; 211 212 if (ntu + count >= rx_ring->count) { 213 nb_buffs_extra = igb_fill_rx_descs(xsk_pool, xdp, rx_desc, 214 rx_ring->count - ntu); 215 if (nb_buffs_extra != rx_ring->count - ntu) { 216 ntu += nb_buffs_extra; 217 goto exit; 218 } 219 rx_desc = IGB_RX_DESC(rx_ring, 0); 220 xdp = rx_ring->rx_buffer_info_zc; 221 ntu = 0; 222 count -= nb_buffs_extra; 223 } 224 225 nb_buffs = igb_fill_rx_descs(xsk_pool, xdp, rx_desc, count); 226 ntu += nb_buffs; 227 if (ntu == rx_ring->count) 228 ntu = 0; 229 230 /* clear the length for the next_to_use descriptor */ 231 rx_desc = IGB_RX_DESC(rx_ring, ntu); 232 rx_desc->wb.upper.length = 0; 233 234 exit: 235 if (rx_ring->next_to_use != ntu) { 236 rx_ring->next_to_use = ntu; 237 238 /* Force memory writes to complete before letting h/w 239 * know there are new descriptors to fetch. (Only 240 * applicable for weak-ordered memory model archs, 241 * such as IA-64). 242 */ 243 wmb(); 244 writel(ntu, rx_ring->tail); 245 } 246 247 return total_count == (nb_buffs + nb_buffs_extra); 248 } 249 250 void igb_clean_rx_ring_zc(struct igb_ring *rx_ring) 251 { 252 u16 ntc = rx_ring->next_to_clean; 253 u16 ntu = rx_ring->next_to_use; 254 255 while (ntc != ntu) { 256 struct xdp_buff *xdp = rx_ring->rx_buffer_info_zc[ntc]; 257 258 xsk_buff_free(xdp); 259 ntc++; 260 if (ntc >= rx_ring->count) 261 ntc = 0; 262 } 263 } 264 265 static struct sk_buff *igb_construct_skb_zc(struct igb_ring *rx_ring, 266 struct xdp_buff *xdp, 267 ktime_t timestamp) 268 { 269 unsigned int totalsize = xdp->data_end - xdp->data_meta; 270 unsigned int metasize = xdp->data - xdp->data_meta; 271 struct sk_buff *skb; 272 273 net_prefetch(xdp->data_meta); 274 275 /* allocate a skb to store the frags */ 276 skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize); 277 if (unlikely(!skb)) 278 return NULL; 279 280 if (timestamp) 281 skb_hwtstamps(skb)->hwtstamp = timestamp; 282 283 memcpy(__skb_put(skb, totalsize), xdp->data_meta, 284 ALIGN(totalsize, sizeof(long))); 285 286 if (metasize) { 287 skb_metadata_set(skb, metasize); 288 __skb_pull(skb, metasize); 289 } 290 291 return skb; 292 } 293 294 static int igb_run_xdp_zc(struct igb_adapter *adapter, struct igb_ring *rx_ring, 295 struct xdp_buff *xdp, struct xsk_buff_pool *xsk_pool, 296 struct bpf_prog *xdp_prog) 297 { 298 int err, result = IGB_XDP_PASS; 299 u32 act; 300 301 prefetchw(xdp->data_hard_start); /* xdp_frame write */ 302 303 act = bpf_prog_run_xdp(xdp_prog, xdp); 304 305 if (likely(act == XDP_REDIRECT)) { 306 err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog); 307 if (!err) 308 return IGB_XDP_REDIR; 309 310 if (xsk_uses_need_wakeup(xsk_pool) && 311 err == -ENOBUFS) 312 result = IGB_XDP_EXIT; 313 else 314 result = IGB_XDP_CONSUMED; 315 goto out_failure; 316 } 317 318 switch (act) { 319 case XDP_PASS: 320 break; 321 case XDP_TX: 322 result = igb_xdp_xmit_back(adapter, xdp); 323 if (result == IGB_XDP_CONSUMED) 324 goto out_failure; 325 break; 326 default: 327 bpf_warn_invalid_xdp_action(adapter->netdev, xdp_prog, act); 328 fallthrough; 329 case XDP_ABORTED: 330 out_failure: 331 trace_xdp_exception(rx_ring->netdev, xdp_prog, act); 332 fallthrough; 333 case XDP_DROP: 334 result = IGB_XDP_CONSUMED; 335 break; 336 } 337 338 return result; 339 } 340 341 int igb_clean_rx_irq_zc(struct igb_q_vector *q_vector, 342 struct xsk_buff_pool *xsk_pool, const int budget) 343 { 344 struct igb_adapter *adapter = q_vector->adapter; 345 unsigned int total_bytes = 0, total_packets = 0; 346 struct igb_ring *rx_ring = q_vector->rx.ring; 347 u32 ntc = rx_ring->next_to_clean; 348 struct bpf_prog *xdp_prog; 349 unsigned int xdp_xmit = 0; 350 bool failure = false; 351 u16 entries_to_alloc; 352 struct sk_buff *skb; 353 354 /* xdp_prog cannot be NULL in the ZC path */ 355 xdp_prog = READ_ONCE(rx_ring->xdp_prog); 356 357 while (likely(total_packets < budget)) { 358 union e1000_adv_rx_desc *rx_desc; 359 ktime_t timestamp = 0; 360 struct xdp_buff *xdp; 361 unsigned int size; 362 int xdp_res = 0; 363 364 rx_desc = IGB_RX_DESC(rx_ring, ntc); 365 size = le16_to_cpu(rx_desc->wb.upper.length); 366 if (!size) 367 break; 368 369 /* This memory barrier is needed to keep us from reading 370 * any other fields out of the rx_desc until we know the 371 * descriptor has been written back 372 */ 373 dma_rmb(); 374 375 xdp = rx_ring->rx_buffer_info_zc[ntc]; 376 xsk_buff_set_size(xdp, size); 377 xsk_buff_dma_sync_for_cpu(xdp); 378 379 /* pull rx packet timestamp if available and valid */ 380 if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { 381 int ts_hdr_len; 382 383 ts_hdr_len = igb_ptp_rx_pktstamp(rx_ring->q_vector, 384 xdp->data, 385 ×tamp); 386 387 xdp->data += ts_hdr_len; 388 xdp->data_meta += ts_hdr_len; 389 size -= ts_hdr_len; 390 } 391 392 xdp_res = igb_run_xdp_zc(adapter, rx_ring, xdp, xsk_pool, 393 xdp_prog); 394 395 if (xdp_res) { 396 if (likely(xdp_res & (IGB_XDP_TX | IGB_XDP_REDIR))) { 397 xdp_xmit |= xdp_res; 398 } else if (xdp_res == IGB_XDP_EXIT) { 399 failure = true; 400 break; 401 } else if (xdp_res == IGB_XDP_CONSUMED) { 402 xsk_buff_free(xdp); 403 } 404 405 total_packets++; 406 total_bytes += size; 407 ntc++; 408 if (ntc == rx_ring->count) 409 ntc = 0; 410 continue; 411 } 412 413 skb = igb_construct_skb_zc(rx_ring, xdp, timestamp); 414 415 /* exit if we failed to retrieve a buffer */ 416 if (!skb) { 417 rx_ring->rx_stats.alloc_failed++; 418 break; 419 } 420 421 xsk_buff_free(xdp); 422 ntc++; 423 if (ntc == rx_ring->count) 424 ntc = 0; 425 426 if (eth_skb_pad(skb)) 427 continue; 428 429 /* probably a little skewed due to removing CRC */ 430 total_bytes += skb->len; 431 432 /* populate checksum, timestamp, VLAN, and protocol */ 433 igb_process_skb_fields(rx_ring, rx_desc, skb); 434 435 napi_gro_receive(&q_vector->napi, skb); 436 437 /* update budget accounting */ 438 total_packets++; 439 } 440 441 rx_ring->next_to_clean = ntc; 442 443 if (xdp_xmit) 444 igb_finalize_xdp(adapter, xdp_xmit); 445 446 igb_update_rx_stats(q_vector, total_packets, total_bytes); 447 448 entries_to_alloc = igb_desc_unused(rx_ring); 449 if (entries_to_alloc >= IGB_RX_BUFFER_WRITE) 450 failure |= !igb_alloc_rx_buffers_zc(rx_ring, xsk_pool, 451 entries_to_alloc); 452 453 if (xsk_uses_need_wakeup(xsk_pool)) { 454 if (failure || rx_ring->next_to_clean == rx_ring->next_to_use) 455 xsk_set_rx_need_wakeup(xsk_pool); 456 else 457 xsk_clear_rx_need_wakeup(xsk_pool); 458 459 return (int)total_packets; 460 } 461 return failure ? budget : (int)total_packets; 462 } 463 464 bool igb_xmit_zc(struct igb_ring *tx_ring, struct xsk_buff_pool *xsk_pool) 465 { 466 unsigned int budget = igb_desc_unused(tx_ring); 467 u32 cmd_type, olinfo_status, nb_pkts, i = 0; 468 struct xdp_desc *descs = xsk_pool->tx_descs; 469 union e1000_adv_tx_desc *tx_desc = NULL; 470 struct igb_tx_buffer *tx_buffer_info; 471 unsigned int total_bytes = 0; 472 dma_addr_t dma; 473 474 if (!netif_carrier_ok(tx_ring->netdev)) 475 return true; 476 477 if (test_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags)) 478 return true; 479 480 nb_pkts = xsk_tx_peek_release_desc_batch(xsk_pool, budget); 481 if (!nb_pkts) 482 return true; 483 484 while (nb_pkts-- > 0) { 485 dma = xsk_buff_raw_get_dma(xsk_pool, descs[i].addr); 486 xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, descs[i].len); 487 488 tx_buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 489 tx_buffer_info->bytecount = descs[i].len; 490 tx_buffer_info->type = IGB_TYPE_XSK; 491 tx_buffer_info->xdpf = NULL; 492 tx_buffer_info->gso_segs = 1; 493 tx_buffer_info->time_stamp = jiffies; 494 495 tx_desc = IGB_TX_DESC(tx_ring, tx_ring->next_to_use); 496 tx_desc->read.buffer_addr = cpu_to_le64(dma); 497 498 /* put descriptor type bits */ 499 cmd_type = E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_DEXT | 500 E1000_ADVTXD_DCMD_IFCS; 501 olinfo_status = descs[i].len << E1000_ADVTXD_PAYLEN_SHIFT; 502 503 /* FIXME: This sets the Report Status (RS) bit for every 504 * descriptor. One nice to have optimization would be to set it 505 * only for the last descriptor in the whole batch. See Intel 506 * ice driver for an example on how to do it. 507 */ 508 cmd_type |= descs[i].len | IGB_TXD_DCMD; 509 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); 510 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 511 512 total_bytes += descs[i].len; 513 514 i++; 515 tx_ring->next_to_use++; 516 tx_buffer_info->next_to_watch = tx_desc; 517 if (tx_ring->next_to_use == tx_ring->count) 518 tx_ring->next_to_use = 0; 519 } 520 521 netdev_tx_sent_queue(txring_txq(tx_ring), total_bytes); 522 igb_xdp_ring_update_tail(tx_ring); 523 524 return nb_pkts < budget; 525 } 526 527 int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) 528 { 529 struct igb_adapter *adapter = netdev_priv(dev); 530 struct e1000_hw *hw = &adapter->hw; 531 struct igb_ring *ring; 532 u32 eics = 0; 533 534 if (test_bit(__IGB_DOWN, &adapter->state)) 535 return -ENETDOWN; 536 537 if (!igb_xdp_is_enabled(adapter)) 538 return -EINVAL; 539 540 if (qid >= adapter->num_tx_queues) 541 return -EINVAL; 542 543 ring = adapter->tx_ring[qid]; 544 545 if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags)) 546 return -ENETDOWN; 547 548 if (!READ_ONCE(ring->xsk_pool)) 549 return -EINVAL; 550 551 if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) { 552 /* Cause software interrupt */ 553 if (adapter->flags & IGB_FLAG_HAS_MSIX) { 554 eics |= ring->q_vector->eims_value; 555 wr32(E1000_EICS, eics); 556 } else { 557 wr32(E1000_ICS, E1000_ICS_RXDMT0); 558 } 559 } 560 561 return 0; 562 } 563