1 /* 2 * Huawei HiNIC PCI Express Linux driver 3 * Copyright(c) 2017 Huawei Technologies Co., Ltd 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * for more details. 13 * 14 */ 15 16 #include <linux/kernel.h> 17 #include <linux/types.h> 18 #include <linux/errno.h> 19 #include <linux/pci.h> 20 #include <linux/device.h> 21 #include <linux/netdevice.h> 22 #include <linux/etherdevice.h> 23 #include <linux/u64_stats_sync.h> 24 #include <linux/slab.h> 25 #include <linux/interrupt.h> 26 #include <linux/skbuff.h> 27 #include <linux/dma-mapping.h> 28 #include <linux/prefetch.h> 29 #include <linux/cpumask.h> 30 #include <asm/barrier.h> 31 32 #include "hinic_common.h" 33 #include "hinic_hw_if.h" 34 #include "hinic_hw_wqe.h" 35 #include "hinic_hw_wq.h" 36 #include "hinic_hw_qp.h" 37 #include "hinic_hw_dev.h" 38 #include "hinic_rx.h" 39 #include "hinic_dev.h" 40 41 #define RX_IRQ_NO_PENDING 0 42 #define RX_IRQ_NO_COALESC 0 43 #define RX_IRQ_NO_LLI_TIMER 0 44 #define RX_IRQ_NO_CREDIT 0 45 #define RX_IRQ_NO_RESEND_TIMER 0 46 47 /** 48 * hinic_rxq_clean_stats - Clean the statistics of specific queue 49 * @rxq: Logical Rx Queue 50 **/ 51 void hinic_rxq_clean_stats(struct hinic_rxq *rxq) 52 { 53 struct hinic_rxq_stats *rxq_stats = &rxq->rxq_stats; 54 55 u64_stats_update_begin(&rxq_stats->syncp); 56 rxq_stats->pkts = 0; 57 rxq_stats->bytes = 0; 58 u64_stats_update_end(&rxq_stats->syncp); 59 } 60 61 /** 62 * hinic_rxq_get_stats - get statistics of Rx Queue 63 * @rxq: Logical Rx Queue 64 * @stats: return updated stats here 65 **/ 66 void hinic_rxq_get_stats(struct hinic_rxq *rxq, struct hinic_rxq_stats *stats) 67 { 68 struct hinic_rxq_stats *rxq_stats = &rxq->rxq_stats; 69 unsigned int start; 70 71 u64_stats_update_begin(&stats->syncp); 72 do { 73 start = u64_stats_fetch_begin(&rxq_stats->syncp); 74 stats->pkts = rxq_stats->pkts; 75 stats->bytes = rxq_stats->bytes; 76 } while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); 77 u64_stats_update_end(&stats->syncp); 78 } 79 80 /** 81 * rxq_stats_init - Initialize the statistics of specific queue 82 * @rxq: Logical Rx Queue 83 **/ 84 static void rxq_stats_init(struct hinic_rxq *rxq) 85 { 86 struct hinic_rxq_stats *rxq_stats = &rxq->rxq_stats; 87 88 u64_stats_init(&rxq_stats->syncp); 89 hinic_rxq_clean_stats(rxq); 90 } 91 92 static void rx_csum(struct hinic_rxq *rxq, u16 cons_idx, 93 struct sk_buff *skb) 94 { 95 struct net_device *netdev = rxq->netdev; 96 struct hinic_rq_cqe *cqe; 97 struct hinic_rq *rq; 98 u32 csum_err; 99 u32 status; 100 101 rq = rxq->rq; 102 cqe = rq->cqe[cons_idx]; 103 status = be32_to_cpu(cqe->status); 104 csum_err = HINIC_RQ_CQE_STATUS_GET(status, CSUM_ERR); 105 106 if (!(netdev->features & NETIF_F_RXCSUM)) 107 return; 108 109 if (!csum_err) 110 skb->ip_summed = CHECKSUM_UNNECESSARY; 111 else 112 skb->ip_summed = CHECKSUM_NONE; 113 } 114 /** 115 * rx_alloc_skb - allocate skb and map it to dma address 116 * @rxq: rx queue 117 * @dma_addr: returned dma address for the skb 118 * 119 * Return skb 120 **/ 121 static struct sk_buff *rx_alloc_skb(struct hinic_rxq *rxq, 122 dma_addr_t *dma_addr) 123 { 124 struct hinic_dev *nic_dev = netdev_priv(rxq->netdev); 125 struct hinic_hwdev *hwdev = nic_dev->hwdev; 126 struct hinic_hwif *hwif = hwdev->hwif; 127 struct pci_dev *pdev = hwif->pdev; 128 struct sk_buff *skb; 129 dma_addr_t addr; 130 int err; 131 132 skb = netdev_alloc_skb_ip_align(rxq->netdev, rxq->rq->buf_sz); 133 if (!skb) { 134 netdev_err(rxq->netdev, "Failed to allocate Rx SKB\n"); 135 return NULL; 136 } 137 138 addr = dma_map_single(&pdev->dev, skb->data, rxq->rq->buf_sz, 139 DMA_FROM_DEVICE); 140 err = dma_mapping_error(&pdev->dev, addr); 141 if (err) { 142 dev_err(&pdev->dev, "Failed to map Rx DMA, err = %d\n", err); 143 goto err_rx_map; 144 } 145 146 *dma_addr = addr; 147 return skb; 148 149 err_rx_map: 150 dev_kfree_skb_any(skb); 151 return NULL; 152 } 153 154 /** 155 * rx_unmap_skb - unmap the dma address of the skb 156 * @rxq: rx queue 157 * @dma_addr: dma address of the skb 158 **/ 159 static void rx_unmap_skb(struct hinic_rxq *rxq, dma_addr_t dma_addr) 160 { 161 struct hinic_dev *nic_dev = netdev_priv(rxq->netdev); 162 struct hinic_hwdev *hwdev = nic_dev->hwdev; 163 struct hinic_hwif *hwif = hwdev->hwif; 164 struct pci_dev *pdev = hwif->pdev; 165 166 dma_unmap_single(&pdev->dev, dma_addr, rxq->rq->buf_sz, 167 DMA_FROM_DEVICE); 168 } 169 170 /** 171 * rx_free_skb - unmap and free skb 172 * @rxq: rx queue 173 * @skb: skb to free 174 * @dma_addr: dma address of the skb 175 **/ 176 static void rx_free_skb(struct hinic_rxq *rxq, struct sk_buff *skb, 177 dma_addr_t dma_addr) 178 { 179 rx_unmap_skb(rxq, dma_addr); 180 dev_kfree_skb_any(skb); 181 } 182 183 /** 184 * rx_alloc_pkts - allocate pkts in rx queue 185 * @rxq: rx queue 186 * 187 * Return number of skbs allocated 188 **/ 189 static int rx_alloc_pkts(struct hinic_rxq *rxq) 190 { 191 struct hinic_dev *nic_dev = netdev_priv(rxq->netdev); 192 struct hinic_rq_wqe *rq_wqe; 193 unsigned int free_wqebbs; 194 struct hinic_sge sge; 195 dma_addr_t dma_addr; 196 struct sk_buff *skb; 197 u16 prod_idx; 198 int i; 199 200 free_wqebbs = hinic_get_rq_free_wqebbs(rxq->rq); 201 202 /* Limit the allocation chunks */ 203 if (free_wqebbs > nic_dev->rx_weight) 204 free_wqebbs = nic_dev->rx_weight; 205 206 for (i = 0; i < free_wqebbs; i++) { 207 skb = rx_alloc_skb(rxq, &dma_addr); 208 if (!skb) { 209 netdev_err(rxq->netdev, "Failed to alloc Rx skb\n"); 210 goto skb_out; 211 } 212 213 hinic_set_sge(&sge, dma_addr, skb->len); 214 215 rq_wqe = hinic_rq_get_wqe(rxq->rq, HINIC_RQ_WQE_SIZE, 216 &prod_idx); 217 if (!rq_wqe) { 218 rx_free_skb(rxq, skb, dma_addr); 219 goto skb_out; 220 } 221 222 hinic_rq_prepare_wqe(rxq->rq, prod_idx, rq_wqe, &sge); 223 224 hinic_rq_write_wqe(rxq->rq, prod_idx, rq_wqe, skb); 225 } 226 227 skb_out: 228 if (i) { 229 wmb(); /* write all the wqes before update PI */ 230 231 hinic_rq_update(rxq->rq, prod_idx); 232 tasklet_schedule(&rxq->rx_task); 233 } 234 235 return i; 236 } 237 238 /** 239 * free_all_rx_skbs - free all skbs in rx queue 240 * @rxq: rx queue 241 **/ 242 static void free_all_rx_skbs(struct hinic_rxq *rxq) 243 { 244 struct hinic_rq *rq = rxq->rq; 245 struct hinic_hw_wqe *hw_wqe; 246 struct hinic_sge sge; 247 u16 ci; 248 249 while ((hw_wqe = hinic_read_wqe(rq->wq, HINIC_RQ_WQE_SIZE, &ci))) { 250 if (IS_ERR(hw_wqe)) 251 break; 252 253 hinic_rq_get_sge(rq, &hw_wqe->rq_wqe, ci, &sge); 254 255 hinic_put_wqe(rq->wq, HINIC_RQ_WQE_SIZE); 256 257 rx_free_skb(rxq, rq->saved_skb[ci], hinic_sge_to_dma(&sge)); 258 } 259 } 260 261 /** 262 * rx_alloc_task - tasklet for queue allocation 263 * @data: rx queue 264 **/ 265 static void rx_alloc_task(unsigned long data) 266 { 267 struct hinic_rxq *rxq = (struct hinic_rxq *)data; 268 269 (void)rx_alloc_pkts(rxq); 270 } 271 272 /** 273 * rx_recv_jumbo_pkt - Rx handler for jumbo pkt 274 * @rxq: rx queue 275 * @head_skb: the first skb in the list 276 * @left_pkt_len: left size of the pkt exclude head skb 277 * @ci: consumer index 278 * 279 * Return number of wqes that used for the left of the pkt 280 **/ 281 static int rx_recv_jumbo_pkt(struct hinic_rxq *rxq, struct sk_buff *head_skb, 282 unsigned int left_pkt_len, u16 ci) 283 { 284 struct sk_buff *skb, *curr_skb = head_skb; 285 struct hinic_rq_wqe *rq_wqe; 286 unsigned int curr_len; 287 struct hinic_sge sge; 288 int num_wqes = 0; 289 290 while (left_pkt_len > 0) { 291 rq_wqe = hinic_rq_read_next_wqe(rxq->rq, HINIC_RQ_WQE_SIZE, 292 &skb, &ci); 293 294 num_wqes++; 295 296 hinic_rq_get_sge(rxq->rq, rq_wqe, ci, &sge); 297 298 rx_unmap_skb(rxq, hinic_sge_to_dma(&sge)); 299 300 prefetch(skb->data); 301 302 curr_len = (left_pkt_len > HINIC_RX_BUF_SZ) ? HINIC_RX_BUF_SZ : 303 left_pkt_len; 304 305 left_pkt_len -= curr_len; 306 307 __skb_put(skb, curr_len); 308 309 if (curr_skb == head_skb) 310 skb_shinfo(head_skb)->frag_list = skb; 311 else 312 curr_skb->next = skb; 313 314 head_skb->len += skb->len; 315 head_skb->data_len += skb->len; 316 head_skb->truesize += skb->truesize; 317 318 curr_skb = skb; 319 } 320 321 return num_wqes; 322 } 323 324 /** 325 * rxq_recv - Rx handler 326 * @rxq: rx queue 327 * @budget: maximum pkts to process 328 * 329 * Return number of pkts received 330 **/ 331 static int rxq_recv(struct hinic_rxq *rxq, int budget) 332 { 333 struct hinic_qp *qp = container_of(rxq->rq, struct hinic_qp, rq); 334 u64 pkt_len = 0, rx_bytes = 0; 335 struct hinic_rq_wqe *rq_wqe; 336 int num_wqes, pkts = 0; 337 struct hinic_sge sge; 338 struct sk_buff *skb; 339 u16 ci; 340 341 while (pkts < budget) { 342 num_wqes = 0; 343 344 rq_wqe = hinic_rq_read_wqe(rxq->rq, HINIC_RQ_WQE_SIZE, &skb, 345 &ci); 346 if (!rq_wqe) 347 break; 348 349 hinic_rq_get_sge(rxq->rq, rq_wqe, ci, &sge); 350 351 rx_unmap_skb(rxq, hinic_sge_to_dma(&sge)); 352 353 rx_csum(rxq, ci, skb); 354 355 prefetch(skb->data); 356 357 pkt_len = sge.len; 358 359 if (pkt_len <= HINIC_RX_BUF_SZ) { 360 __skb_put(skb, pkt_len); 361 } else { 362 __skb_put(skb, HINIC_RX_BUF_SZ); 363 num_wqes = rx_recv_jumbo_pkt(rxq, skb, pkt_len - 364 HINIC_RX_BUF_SZ, ci); 365 } 366 367 hinic_rq_put_wqe(rxq->rq, ci, 368 (num_wqes + 1) * HINIC_RQ_WQE_SIZE); 369 370 skb_record_rx_queue(skb, qp->q_id); 371 skb->protocol = eth_type_trans(skb, rxq->netdev); 372 373 napi_gro_receive(&rxq->napi, skb); 374 375 pkts++; 376 rx_bytes += pkt_len; 377 } 378 379 if (pkts) 380 tasklet_schedule(&rxq->rx_task); /* rx_alloc_pkts */ 381 382 u64_stats_update_begin(&rxq->rxq_stats.syncp); 383 rxq->rxq_stats.pkts += pkts; 384 rxq->rxq_stats.bytes += rx_bytes; 385 u64_stats_update_end(&rxq->rxq_stats.syncp); 386 387 return pkts; 388 } 389 390 static int rx_poll(struct napi_struct *napi, int budget) 391 { 392 struct hinic_rxq *rxq = container_of(napi, struct hinic_rxq, napi); 393 struct hinic_rq *rq = rxq->rq; 394 int pkts; 395 396 pkts = rxq_recv(rxq, budget); 397 if (pkts >= budget) 398 return budget; 399 400 napi_complete(napi); 401 enable_irq(rq->irq); 402 return pkts; 403 } 404 405 static void rx_add_napi(struct hinic_rxq *rxq) 406 { 407 struct hinic_dev *nic_dev = netdev_priv(rxq->netdev); 408 409 netif_napi_add(rxq->netdev, &rxq->napi, rx_poll, nic_dev->rx_weight); 410 napi_enable(&rxq->napi); 411 } 412 413 static void rx_del_napi(struct hinic_rxq *rxq) 414 { 415 napi_disable(&rxq->napi); 416 netif_napi_del(&rxq->napi); 417 } 418 419 static irqreturn_t rx_irq(int irq, void *data) 420 { 421 struct hinic_rxq *rxq = (struct hinic_rxq *)data; 422 struct hinic_rq *rq = rxq->rq; 423 struct hinic_dev *nic_dev; 424 425 /* Disable the interrupt until napi will be completed */ 426 disable_irq_nosync(rq->irq); 427 428 nic_dev = netdev_priv(rxq->netdev); 429 hinic_hwdev_msix_cnt_set(nic_dev->hwdev, rq->msix_entry); 430 431 napi_schedule(&rxq->napi); 432 return IRQ_HANDLED; 433 } 434 435 static int rx_request_irq(struct hinic_rxq *rxq) 436 { 437 struct hinic_dev *nic_dev = netdev_priv(rxq->netdev); 438 struct hinic_hwdev *hwdev = nic_dev->hwdev; 439 struct hinic_rq *rq = rxq->rq; 440 struct hinic_qp *qp; 441 struct cpumask mask; 442 int err; 443 444 rx_add_napi(rxq); 445 446 hinic_hwdev_msix_set(hwdev, rq->msix_entry, 447 RX_IRQ_NO_PENDING, RX_IRQ_NO_COALESC, 448 RX_IRQ_NO_LLI_TIMER, RX_IRQ_NO_CREDIT, 449 RX_IRQ_NO_RESEND_TIMER); 450 451 err = request_irq(rq->irq, rx_irq, 0, rxq->irq_name, rxq); 452 if (err) { 453 rx_del_napi(rxq); 454 return err; 455 } 456 457 qp = container_of(rq, struct hinic_qp, rq); 458 cpumask_set_cpu(qp->q_id % num_online_cpus(), &mask); 459 return irq_set_affinity_hint(rq->irq, &mask); 460 } 461 462 static void rx_free_irq(struct hinic_rxq *rxq) 463 { 464 struct hinic_rq *rq = rxq->rq; 465 466 irq_set_affinity_hint(rq->irq, NULL); 467 free_irq(rq->irq, rxq); 468 rx_del_napi(rxq); 469 } 470 471 /** 472 * hinic_init_rxq - Initialize the Rx Queue 473 * @rxq: Logical Rx Queue 474 * @rq: Hardware Rx Queue to connect the Logical queue with 475 * @netdev: network device to connect the Logical queue with 476 * 477 * Return 0 - Success, negative - Failure 478 **/ 479 int hinic_init_rxq(struct hinic_rxq *rxq, struct hinic_rq *rq, 480 struct net_device *netdev) 481 { 482 struct hinic_qp *qp = container_of(rq, struct hinic_qp, rq); 483 int err, pkts, irqname_len; 484 485 rxq->netdev = netdev; 486 rxq->rq = rq; 487 488 rxq_stats_init(rxq); 489 490 irqname_len = snprintf(NULL, 0, "hinic_rxq%d", qp->q_id) + 1; 491 rxq->irq_name = devm_kzalloc(&netdev->dev, irqname_len, GFP_KERNEL); 492 if (!rxq->irq_name) 493 return -ENOMEM; 494 495 sprintf(rxq->irq_name, "hinic_rxq%d", qp->q_id); 496 497 tasklet_init(&rxq->rx_task, rx_alloc_task, (unsigned long)rxq); 498 499 pkts = rx_alloc_pkts(rxq); 500 if (!pkts) { 501 err = -ENOMEM; 502 goto err_rx_pkts; 503 } 504 505 err = rx_request_irq(rxq); 506 if (err) { 507 netdev_err(netdev, "Failed to request Rx irq\n"); 508 goto err_req_rx_irq; 509 } 510 511 return 0; 512 513 err_req_rx_irq: 514 err_rx_pkts: 515 tasklet_kill(&rxq->rx_task); 516 free_all_rx_skbs(rxq); 517 devm_kfree(&netdev->dev, rxq->irq_name); 518 return err; 519 } 520 521 /** 522 * hinic_clean_rxq - Clean the Rx Queue 523 * @rxq: Logical Rx Queue 524 **/ 525 void hinic_clean_rxq(struct hinic_rxq *rxq) 526 { 527 struct net_device *netdev = rxq->netdev; 528 529 rx_free_irq(rxq); 530 531 tasklet_kill(&rxq->rx_task); 532 free_all_rx_skbs(rxq); 533 devm_kfree(&netdev->dev, rxq->irq_name); 534 } 535