1 /* 2 * This file is provided under a dual BSD/GPLv2 license. When using or 3 * redistributing this file, you may do so under either license. 4 * 5 * GPL LICENSE SUMMARY 6 * 7 * Copyright(c) 2012 Intel Corporation. All rights reserved. 8 * Copyright (C) 2015 EMC Corporation. All Rights Reserved. 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of version 2 of the GNU General Public License as 12 * published by the Free Software Foundation. 13 * 14 * BSD LICENSE 15 * 16 * Copyright(c) 2012 Intel Corporation. All rights reserved. 17 * Copyright (C) 2015 EMC Corporation. All Rights Reserved. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions 21 * are met: 22 * 23 * * Redistributions of source code must retain the above copyright 24 * notice, this list of conditions and the following disclaimer. 25 * * Redistributions in binary form must reproduce the above copy 26 * notice, this list of conditions and the following disclaimer in 27 * the documentation and/or other materials provided with the 28 * distribution. 29 * * Neither the name of Intel Corporation nor the names of its 30 * contributors may be used to endorse or promote products derived 31 * from this software without specific prior written permission. 32 * 33 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 34 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 35 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 36 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 37 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 38 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 39 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 40 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 41 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 42 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 43 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 44 * 45 * PCIe NTB Transport Linux driver 46 * 47 * Contact Information: 48 * Jon Mason <jon.mason@intel.com> 49 */ 50 #include <linux/debugfs.h> 51 #include <linux/delay.h> 52 #include <linux/dmaengine.h> 53 #include <linux/dma-mapping.h> 54 #include <linux/errno.h> 55 #include <linux/export.h> 56 #include <linux/interrupt.h> 57 #include <linux/kthread.h> 58 #include <linux/module.h> 59 #include <linux/pci.h> 60 #include <linux/slab.h> 61 #include <linux/seq_file.h> 62 #include <linux/types.h> 63 #include <linux/uaccess.h> 64 #include <linux/mutex.h> 65 #include <linux/wait.h> 66 #include "linux/ntb.h" 67 #include "linux/ntb_transport.h" 68 69 #define NTB_TRANSPORT_VERSION 4 70 #define NTB_TRANSPORT_VER "4" 71 #define NTB_TRANSPORT_NAME "ntb_transport" 72 #define NTB_TRANSPORT_DESC "Software Queue-Pair Transport over NTB" 73 #define NTB_TRANSPORT_MIN_SPADS (MW0_SZ_HIGH + 2) 74 75 MODULE_DESCRIPTION(NTB_TRANSPORT_DESC); 76 MODULE_VERSION(NTB_TRANSPORT_VER); 77 MODULE_LICENSE("Dual BSD/GPL"); 78 MODULE_AUTHOR("Intel Corporation"); 79 80 static unsigned long max_mw_size; 81 module_param(max_mw_size, ulong, 0644); 82 MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows"); 83 84 static unsigned int transport_mtu = 0x10000; 85 module_param(transport_mtu, uint, 0644); 86 MODULE_PARM_DESC(transport_mtu, "Maximum size of NTB transport packets"); 87 88 static unsigned char max_num_clients; 89 module_param(max_num_clients, byte, 0644); 90 MODULE_PARM_DESC(max_num_clients, "Maximum number of NTB transport clients"); 91 92 static unsigned int copy_bytes = 1024; 93 module_param(copy_bytes, uint, 0644); 94 MODULE_PARM_DESC(copy_bytes, "Threshold under which NTB will use the CPU to copy instead of DMA"); 95 96 static bool use_dma; 97 module_param(use_dma, bool, 0644); 98 MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy"); 99 100 static bool use_msi; 101 #ifdef CONFIG_NTB_MSI 102 module_param(use_msi, bool, 0644); 103 MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells"); 104 #endif 105 106 static bool tx_memcpy_offload; 107 module_param(tx_memcpy_offload, bool, 0644); 108 MODULE_PARM_DESC(tx_memcpy_offload, "Offload TX memcpy_toio() to a kernel thread"); 109 110 static struct dentry *nt_debugfs_dir; 111 112 /* Only two-ports NTB devices are supported */ 113 #define PIDX NTB_DEF_PEER_IDX 114 115 struct ntb_queue_entry { 116 /* ntb_queue list reference */ 117 struct list_head entry; 118 /* pointers to data to be transferred */ 119 void *cb_data; 120 void *buf; 121 unsigned int len; 122 unsigned int flags; 123 int errors; 124 unsigned int tx_index; 125 unsigned int rx_index; 126 127 struct ntb_transport_qp *qp; 128 union { 129 struct ntb_payload_header __iomem *tx_hdr; 130 struct ntb_payload_header *rx_hdr; 131 }; 132 }; 133 134 struct ntb_rx_info { 135 unsigned int entry; 136 }; 137 138 struct ntb_transport_qp { 139 struct ntb_transport_ctx *transport; 140 struct ntb_dev *ndev; 141 void *cb_data; 142 struct dma_chan *tx_dma_chan; 143 struct dma_chan *rx_dma_chan; 144 145 bool client_ready; 146 bool link_is_up; 147 bool active; 148 149 u8 qp_num; /* Only 64 QP's are allowed. 0-63 */ 150 u64 qp_bit; 151 152 struct ntb_rx_info __iomem *rx_info; 153 struct ntb_rx_info *remote_rx_info; 154 155 void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data, 156 void *data, int len); 157 struct list_head tx_free_q; 158 struct list_head tx_offl_q; 159 spinlock_t ntb_tx_free_q_lock; 160 spinlock_t ntb_tx_offl_q_lock; 161 void __iomem *tx_mw; 162 phys_addr_t tx_mw_phys; 163 size_t tx_mw_size; 164 dma_addr_t tx_mw_dma_addr; 165 unsigned int tx_index; 166 unsigned int tx_max_entry; 167 unsigned int tx_max_frame; 168 169 void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, 170 void *data, int len); 171 struct list_head rx_post_q; 172 struct list_head rx_pend_q; 173 struct list_head rx_free_q; 174 /* ntb_rx_q_lock: synchronize access to rx_XXXX_q */ 175 spinlock_t ntb_rx_q_lock; 176 void *rx_buff; 177 unsigned int rx_index; 178 unsigned int rx_max_entry; 179 unsigned int rx_max_frame; 180 unsigned int rx_alloc_entry; 181 dma_cookie_t last_cookie; 182 struct tasklet_struct rxc_db_work; 183 184 void (*event_handler)(void *data, int status); 185 struct delayed_work link_work; 186 struct work_struct link_cleanup; 187 188 struct dentry *debugfs_dir; 189 struct dentry *debugfs_stats; 190 191 /* Stats */ 192 u64 rx_bytes; 193 u64 rx_pkts; 194 u64 rx_ring_empty; 195 u64 rx_err_no_buf; 196 u64 rx_err_oflow; 197 u64 rx_err_ver; 198 u64 rx_memcpy; 199 u64 rx_async; 200 u64 tx_bytes; 201 u64 tx_pkts; 202 u64 tx_ring_full; 203 u64 tx_err_no_buf; 204 u64 tx_memcpy; 205 u64 tx_async; 206 207 bool use_msi; 208 int msi_irq; 209 struct ntb_msi_desc msi_desc; 210 struct ntb_msi_desc peer_msi_desc; 211 212 struct task_struct *tx_offload_thread; 213 wait_queue_head_t tx_offload_wq; 214 }; 215 216 struct ntb_transport_mw { 217 phys_addr_t phys_addr; 218 resource_size_t phys_size; 219 void __iomem *vbase; 220 size_t xlat_size; 221 size_t buff_size; 222 size_t alloc_size; 223 void *alloc_addr; 224 void *virt_addr; 225 dma_addr_t dma_addr; 226 }; 227 228 struct ntb_transport_client_dev { 229 struct list_head entry; 230 struct ntb_transport_ctx *nt; 231 struct device dev; 232 }; 233 234 struct ntb_transport_ctx { 235 struct list_head entry; 236 struct list_head client_devs; 237 238 struct ntb_dev *ndev; 239 240 struct ntb_transport_mw *mw_vec; 241 struct ntb_transport_qp *qp_vec; 242 unsigned int mw_count; 243 unsigned int qp_count; 244 u64 qp_bitmap; 245 u64 qp_bitmap_free; 246 247 bool use_msi; 248 unsigned int msi_spad_offset; 249 u64 msi_db_mask; 250 251 bool link_is_up; 252 struct delayed_work link_work; 253 struct work_struct link_cleanup; 254 255 struct dentry *debugfs_node_dir; 256 257 /* Make sure workq of link event be executed serially */ 258 struct mutex link_event_lock; 259 }; 260 261 enum { 262 DESC_DONE_FLAG = BIT(0), 263 LINK_DOWN_FLAG = BIT(1), 264 }; 265 266 struct ntb_payload_header { 267 unsigned int ver; 268 unsigned int len; 269 unsigned int flags; 270 }; 271 272 enum { 273 VERSION = 0, 274 QP_LINKS, 275 NUM_QPS, 276 NUM_MWS, 277 MW0_SZ_HIGH, 278 MW0_SZ_LOW, 279 }; 280 281 #define dev_client_dev(__dev) \ 282 container_of((__dev), struct ntb_transport_client_dev, dev) 283 284 #define drv_client(__drv) \ 285 container_of((__drv), struct ntb_transport_client, driver) 286 287 #define QP_TO_MW(nt, qp) ((qp) % nt->mw_count) 288 #define NTB_QP_DEF_NUM_ENTRIES 100 289 #define NTB_LINK_DOWN_TIMEOUT 10 290 291 static void ntb_transport_rxc_db(unsigned long data); 292 static const struct ntb_ctx_ops ntb_transport_ops; 293 static struct ntb_client ntb_transport_client; 294 static int ntb_async_tx_submit(struct ntb_transport_qp *qp, 295 struct ntb_queue_entry *entry); 296 static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset); 297 static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset); 298 static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset); 299 static int ntb_tx_memcpy_kthread(void *data); 300 301 302 static inline bool ntb_tx_offload_enabled(struct ntb_transport_qp *qp) 303 { 304 return tx_memcpy_offload && qp && qp->tx_offload_thread; 305 } 306 307 static int ntb_transport_bus_match(struct device *dev, 308 const struct device_driver *drv) 309 { 310 return !strncmp(dev_name(dev), drv->name, strlen(drv->name)); 311 } 312 313 static int ntb_transport_bus_probe(struct device *dev) 314 { 315 const struct ntb_transport_client *client; 316 int rc; 317 318 get_device(dev); 319 320 client = drv_client(dev->driver); 321 rc = client->probe(dev); 322 if (rc) 323 put_device(dev); 324 325 return rc; 326 } 327 328 static void ntb_transport_bus_remove(struct device *dev) 329 { 330 const struct ntb_transport_client *client; 331 332 client = drv_client(dev->driver); 333 client->remove(dev); 334 335 put_device(dev); 336 } 337 338 static const struct bus_type ntb_transport_bus = { 339 .name = "ntb_transport", 340 .match = ntb_transport_bus_match, 341 .probe = ntb_transport_bus_probe, 342 .remove = ntb_transport_bus_remove, 343 }; 344 345 static LIST_HEAD(ntb_transport_list); 346 347 static int ntb_bus_init(struct ntb_transport_ctx *nt) 348 { 349 list_add_tail(&nt->entry, &ntb_transport_list); 350 return 0; 351 } 352 353 static void ntb_bus_remove(struct ntb_transport_ctx *nt) 354 { 355 struct ntb_transport_client_dev *client_dev, *cd; 356 357 list_for_each_entry_safe(client_dev, cd, &nt->client_devs, entry) { 358 dev_err(client_dev->dev.parent, "%s still attached to bus, removing\n", 359 dev_name(&client_dev->dev)); 360 list_del(&client_dev->entry); 361 device_unregister(&client_dev->dev); 362 } 363 364 list_del(&nt->entry); 365 } 366 367 static void ntb_transport_client_release(struct device *dev) 368 { 369 struct ntb_transport_client_dev *client_dev; 370 371 client_dev = dev_client_dev(dev); 372 kfree(client_dev); 373 } 374 375 /** 376 * ntb_transport_unregister_client_dev - Unregister NTB client device 377 * @device_name: Name of NTB client device 378 * 379 * Unregister an NTB client device with the NTB transport layer 380 */ 381 void ntb_transport_unregister_client_dev(char *device_name) 382 { 383 struct ntb_transport_client_dev *client, *cd; 384 struct ntb_transport_ctx *nt; 385 386 list_for_each_entry(nt, &ntb_transport_list, entry) 387 list_for_each_entry_safe(client, cd, &nt->client_devs, entry) 388 if (!strncmp(dev_name(&client->dev), device_name, 389 strlen(device_name))) { 390 list_del(&client->entry); 391 device_unregister(&client->dev); 392 } 393 } 394 EXPORT_SYMBOL_GPL(ntb_transport_unregister_client_dev); 395 396 /** 397 * ntb_transport_register_client_dev - Register NTB client device 398 * @device_name: Name of NTB client device 399 * 400 * Register an NTB client device with the NTB transport layer 401 * 402 * Returns: %0 on success or -errno code on error 403 */ 404 int ntb_transport_register_client_dev(char *device_name) 405 { 406 struct ntb_transport_client_dev *client_dev; 407 struct ntb_transport_ctx *nt; 408 int node; 409 int rc, i = 0; 410 411 if (list_empty(&ntb_transport_list)) 412 return -ENODEV; 413 414 list_for_each_entry(nt, &ntb_transport_list, entry) { 415 struct device *dev; 416 417 node = dev_to_node(&nt->ndev->dev); 418 419 client_dev = kzalloc_node(sizeof(*client_dev), 420 GFP_KERNEL, node); 421 if (!client_dev) { 422 rc = -ENOMEM; 423 goto err; 424 } 425 426 dev = &client_dev->dev; 427 428 /* setup and register client devices */ 429 dev_set_name(dev, "%s%d", device_name, i); 430 dev->bus = &ntb_transport_bus; 431 dev->release = ntb_transport_client_release; 432 dev->parent = &nt->ndev->dev; 433 434 rc = device_register(dev); 435 if (rc) { 436 put_device(dev); 437 goto err; 438 } 439 440 list_add_tail(&client_dev->entry, &nt->client_devs); 441 i++; 442 } 443 444 return 0; 445 446 err: 447 ntb_transport_unregister_client_dev(device_name); 448 449 return rc; 450 } 451 EXPORT_SYMBOL_GPL(ntb_transport_register_client_dev); 452 453 /** 454 * ntb_transport_register_client - Register NTB client driver 455 * @drv: NTB client driver to be registered 456 * 457 * Register an NTB client driver with the NTB transport layer 458 * 459 * RETURNS: An appropriate -ERRNO error value on error, or zero for success. 460 */ 461 int ntb_transport_register_client(struct ntb_transport_client *drv) 462 { 463 drv->driver.bus = &ntb_transport_bus; 464 465 if (list_empty(&ntb_transport_list)) 466 return -ENODEV; 467 468 return driver_register(&drv->driver); 469 } 470 EXPORT_SYMBOL_GPL(ntb_transport_register_client); 471 472 /** 473 * ntb_transport_unregister_client - Unregister NTB client driver 474 * @drv: NTB client driver to be unregistered 475 * 476 * Unregister an NTB client driver with the NTB transport layer 477 * 478 * RETURNS: An appropriate -ERRNO error value on error, or zero for success. 479 */ 480 void ntb_transport_unregister_client(struct ntb_transport_client *drv) 481 { 482 driver_unregister(&drv->driver); 483 } 484 EXPORT_SYMBOL_GPL(ntb_transport_unregister_client); 485 486 static int ntb_qp_debugfs_stats_show(struct seq_file *s, void *v) 487 { 488 struct ntb_transport_qp *qp = s->private; 489 490 if (!qp || !qp->link_is_up) 491 return 0; 492 493 seq_puts(s, "\nNTB QP stats:\n\n"); 494 495 seq_printf(s, "rx_bytes - \t%llu\n", qp->rx_bytes); 496 seq_printf(s, "rx_pkts - \t%llu\n", qp->rx_pkts); 497 seq_printf(s, "rx_memcpy - \t%llu\n", qp->rx_memcpy); 498 seq_printf(s, "rx_async - \t%llu\n", qp->rx_async); 499 seq_printf(s, "rx_ring_empty - %llu\n", qp->rx_ring_empty); 500 seq_printf(s, "rx_err_no_buf - %llu\n", qp->rx_err_no_buf); 501 seq_printf(s, "rx_err_oflow - \t%llu\n", qp->rx_err_oflow); 502 seq_printf(s, "rx_err_ver - \t%llu\n", qp->rx_err_ver); 503 seq_printf(s, "rx_buff - \t0x%p\n", qp->rx_buff); 504 seq_printf(s, "rx_index - \t%u\n", qp->rx_index); 505 seq_printf(s, "rx_max_entry - \t%u\n", qp->rx_max_entry); 506 seq_printf(s, "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry); 507 508 seq_printf(s, "tx_bytes - \t%llu\n", qp->tx_bytes); 509 seq_printf(s, "tx_pkts - \t%llu\n", qp->tx_pkts); 510 seq_printf(s, "tx_memcpy - \t%llu\n", qp->tx_memcpy); 511 seq_printf(s, "tx_async - \t%llu\n", qp->tx_async); 512 seq_printf(s, "tx_ring_full - \t%llu\n", qp->tx_ring_full); 513 seq_printf(s, "tx_err_no_buf - %llu\n", qp->tx_err_no_buf); 514 seq_printf(s, "tx_mw - \t0x%p\n", qp->tx_mw); 515 seq_printf(s, "tx_index (H) - \t%u\n", qp->tx_index); 516 seq_printf(s, "RRI (T) - \t%u\n", qp->remote_rx_info->entry); 517 seq_printf(s, "tx_max_entry - \t%u\n", qp->tx_max_entry); 518 seq_printf(s, "free tx - \t%u\n", ntb_transport_tx_free_entry(qp)); 519 seq_putc(s, '\n'); 520 521 seq_printf(s, "Using TX DMA - \t%s\n", qp->tx_dma_chan ? "Yes" : "No"); 522 seq_printf(s, "Using RX DMA - \t%s\n", qp->rx_dma_chan ? "Yes" : "No"); 523 seq_printf(s, "QP Link - \t%s\n", qp->link_is_up ? "Up" : "Down"); 524 seq_putc(s, '\n'); 525 526 return 0; 527 } 528 DEFINE_SHOW_ATTRIBUTE(ntb_qp_debugfs_stats); 529 530 static void ntb_list_add(spinlock_t *lock, struct list_head *entry, 531 struct list_head *list) 532 { 533 unsigned long flags; 534 535 spin_lock_irqsave(lock, flags); 536 list_add_tail(entry, list); 537 spin_unlock_irqrestore(lock, flags); 538 } 539 540 static struct ntb_queue_entry *ntb_list_rm(spinlock_t *lock, 541 struct list_head *list) 542 { 543 struct ntb_queue_entry *entry; 544 unsigned long flags; 545 546 spin_lock_irqsave(lock, flags); 547 if (list_empty(list)) { 548 entry = NULL; 549 goto out; 550 } 551 entry = list_first_entry(list, struct ntb_queue_entry, entry); 552 list_del(&entry->entry); 553 554 out: 555 spin_unlock_irqrestore(lock, flags); 556 557 return entry; 558 } 559 560 static struct ntb_queue_entry *ntb_list_mv(spinlock_t *lock, 561 struct list_head *list, 562 struct list_head *to_list) 563 { 564 struct ntb_queue_entry *entry; 565 unsigned long flags; 566 567 spin_lock_irqsave(lock, flags); 568 569 if (list_empty(list)) { 570 entry = NULL; 571 } else { 572 entry = list_first_entry(list, struct ntb_queue_entry, entry); 573 list_move_tail(&entry->entry, to_list); 574 } 575 576 spin_unlock_irqrestore(lock, flags); 577 578 return entry; 579 } 580 581 static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, 582 unsigned int qp_num) 583 { 584 struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; 585 struct ntb_transport_mw *mw; 586 struct ntb_dev *ndev = nt->ndev; 587 struct ntb_queue_entry *entry; 588 unsigned int rx_size, num_qps_mw; 589 unsigned int mw_num, mw_count, qp_count; 590 unsigned int i; 591 int node; 592 593 mw_count = nt->mw_count; 594 qp_count = nt->qp_count; 595 596 mw_num = QP_TO_MW(nt, qp_num); 597 mw = &nt->mw_vec[mw_num]; 598 599 if (!mw->virt_addr) 600 return -ENOMEM; 601 602 if (mw_num < qp_count % mw_count) 603 num_qps_mw = qp_count / mw_count + 1; 604 else 605 num_qps_mw = qp_count / mw_count; 606 607 rx_size = (unsigned int)mw->xlat_size / num_qps_mw; 608 qp->rx_buff = mw->virt_addr + rx_size * (qp_num / mw_count); 609 rx_size -= sizeof(struct ntb_rx_info); 610 611 qp->remote_rx_info = qp->rx_buff + rx_size; 612 613 /* Due to housekeeping, there must be atleast 2 buffs */ 614 qp->rx_max_frame = min(transport_mtu, rx_size / 2); 615 qp->rx_max_entry = rx_size / qp->rx_max_frame; 616 qp->rx_index = 0; 617 618 /* 619 * Checking to see if we have more entries than the default. 620 * We should add additional entries if that is the case so we 621 * can be in sync with the transport frames. 622 */ 623 node = dev_to_node(&ndev->dev); 624 for (i = qp->rx_alloc_entry; i < qp->rx_max_entry; i++) { 625 entry = kzalloc_node(sizeof(*entry), GFP_KERNEL, node); 626 if (!entry) 627 return -ENOMEM; 628 629 entry->qp = qp; 630 ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, 631 &qp->rx_free_q); 632 qp->rx_alloc_entry++; 633 } 634 635 qp->remote_rx_info->entry = qp->rx_max_entry - 1; 636 637 /* setup the hdr offsets with 0's */ 638 for (i = 0; i < qp->rx_max_entry; i++) { 639 void *offset = (qp->rx_buff + qp->rx_max_frame * (i + 1) - 640 sizeof(struct ntb_payload_header)); 641 memset(offset, 0, sizeof(struct ntb_payload_header)); 642 } 643 644 qp->rx_pkts = 0; 645 qp->tx_pkts = 0; 646 qp->tx_index = 0; 647 648 return 0; 649 } 650 651 static irqreturn_t ntb_transport_isr(int irq, void *dev) 652 { 653 struct ntb_transport_qp *qp = dev; 654 655 tasklet_schedule(&qp->rxc_db_work); 656 657 return IRQ_HANDLED; 658 } 659 660 static void ntb_transport_setup_qp_peer_msi(struct ntb_transport_ctx *nt, 661 unsigned int qp_num) 662 { 663 struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; 664 int spad = qp_num * 2 + nt->msi_spad_offset; 665 666 if (!nt->use_msi) 667 return; 668 669 if (spad >= ntb_spad_count(nt->ndev)) 670 return; 671 672 qp->peer_msi_desc.addr_offset = 673 ntb_peer_spad_read(qp->ndev, PIDX, spad); 674 qp->peer_msi_desc.data = 675 ntb_peer_spad_read(qp->ndev, PIDX, spad + 1); 676 677 dev_dbg(&qp->ndev->pdev->dev, "QP%d Peer MSI addr=%x data=%x\n", 678 qp_num, qp->peer_msi_desc.addr_offset, qp->peer_msi_desc.data); 679 680 if (qp->peer_msi_desc.addr_offset) { 681 qp->use_msi = true; 682 dev_info(&qp->ndev->pdev->dev, 683 "Using MSI interrupts for QP%d\n", qp_num); 684 } 685 } 686 687 static void ntb_transport_setup_qp_msi(struct ntb_transport_ctx *nt, 688 unsigned int qp_num) 689 { 690 struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; 691 int spad = qp_num * 2 + nt->msi_spad_offset; 692 int rc; 693 694 if (!nt->use_msi) 695 return; 696 697 if (spad >= ntb_spad_count(nt->ndev)) { 698 dev_warn_once(&qp->ndev->pdev->dev, 699 "Not enough SPADS to use MSI interrupts\n"); 700 return; 701 } 702 703 ntb_spad_write(qp->ndev, spad, 0); 704 ntb_spad_write(qp->ndev, spad + 1, 0); 705 706 if (!qp->msi_irq) { 707 qp->msi_irq = ntbm_msi_request_irq(qp->ndev, ntb_transport_isr, 708 KBUILD_MODNAME, qp, 709 &qp->msi_desc); 710 if (qp->msi_irq < 0) { 711 dev_warn(&qp->ndev->pdev->dev, 712 "Unable to allocate MSI interrupt for qp%d\n", 713 qp_num); 714 return; 715 } 716 } 717 718 rc = ntb_spad_write(qp->ndev, spad, qp->msi_desc.addr_offset); 719 if (rc) 720 goto err_free_interrupt; 721 722 rc = ntb_spad_write(qp->ndev, spad + 1, qp->msi_desc.data); 723 if (rc) 724 goto err_free_interrupt; 725 726 dev_dbg(&qp->ndev->pdev->dev, "QP%d MSI %d addr=%x data=%x\n", 727 qp_num, qp->msi_irq, qp->msi_desc.addr_offset, 728 qp->msi_desc.data); 729 730 return; 731 732 err_free_interrupt: 733 devm_free_irq(&nt->ndev->dev, qp->msi_irq, qp); 734 } 735 736 static void ntb_transport_msi_peer_desc_changed(struct ntb_transport_ctx *nt) 737 { 738 int i; 739 740 dev_dbg(&nt->ndev->pdev->dev, "Peer MSI descriptors changed"); 741 742 for (i = 0; i < nt->qp_count; i++) 743 ntb_transport_setup_qp_peer_msi(nt, i); 744 } 745 746 static void ntb_transport_msi_desc_changed(void *data) 747 { 748 struct ntb_transport_ctx *nt = data; 749 int i; 750 751 dev_dbg(&nt->ndev->pdev->dev, "MSI descriptors changed"); 752 753 for (i = 0; i < nt->qp_count; i++) 754 ntb_transport_setup_qp_msi(nt, i); 755 756 ntb_peer_db_set(nt->ndev, nt->msi_db_mask); 757 } 758 759 static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw) 760 { 761 struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; 762 struct pci_dev *pdev = nt->ndev->pdev; 763 764 if (!mw->virt_addr) 765 return; 766 767 ntb_mw_clear_trans(nt->ndev, PIDX, num_mw); 768 dma_free_coherent(&pdev->dev, mw->alloc_size, 769 mw->alloc_addr, mw->dma_addr); 770 mw->xlat_size = 0; 771 mw->buff_size = 0; 772 mw->alloc_size = 0; 773 mw->alloc_addr = NULL; 774 mw->virt_addr = NULL; 775 } 776 777 static int ntb_alloc_mw_buffer(struct ntb_transport_mw *mw, 778 struct device *ntb_dev, size_t align) 779 { 780 dma_addr_t dma_addr; 781 void *alloc_addr, *virt_addr; 782 int rc; 783 784 /* 785 * The buffer here is allocated against the NTB device. The reason to 786 * use dma_alloc_*() call is to allocate a large IOVA contiguous buffer 787 * backing the NTB BAR for the remote host to write to. During receive 788 * processing, the data is being copied out of the receive buffer to 789 * the kernel skbuff. When a DMA device is being used, dma_map_page() 790 * is called on the kvaddr of the receive buffer (from dma_alloc_*()) 791 * and remapped against the DMA device. It appears to be a double 792 * DMA mapping of buffers, but first is mapped to the NTB device and 793 * second is to the DMA device. DMA_ATTR_FORCE_CONTIGUOUS is necessary 794 * in order for the later dma_map_page() to not fail. 795 */ 796 alloc_addr = dma_alloc_attrs(ntb_dev, mw->alloc_size, 797 &dma_addr, GFP_KERNEL, 798 DMA_ATTR_FORCE_CONTIGUOUS); 799 if (!alloc_addr) { 800 dev_err(ntb_dev, "Unable to alloc MW buff of size %zu\n", 801 mw->alloc_size); 802 return -ENOMEM; 803 } 804 virt_addr = alloc_addr; 805 806 /* 807 * we must ensure that the memory address allocated is BAR size 808 * aligned in order for the XLAT register to take the value. This 809 * is a requirement of the hardware. It is recommended to setup CMA 810 * for BAR sizes equal or greater than 4MB. 811 */ 812 if (!IS_ALIGNED(dma_addr, align)) { 813 if (mw->alloc_size > mw->buff_size) { 814 virt_addr = PTR_ALIGN(alloc_addr, align); 815 dma_addr = ALIGN(dma_addr, align); 816 } else { 817 rc = -ENOMEM; 818 goto err; 819 } 820 } 821 822 mw->alloc_addr = alloc_addr; 823 mw->virt_addr = virt_addr; 824 mw->dma_addr = dma_addr; 825 826 return 0; 827 828 err: 829 dma_free_coherent(ntb_dev, mw->alloc_size, alloc_addr, dma_addr); 830 831 return rc; 832 } 833 834 static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, 835 resource_size_t size) 836 { 837 struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; 838 struct pci_dev *pdev = nt->ndev->pdev; 839 size_t xlat_size, buff_size; 840 resource_size_t xlat_align; 841 resource_size_t xlat_align_size; 842 int rc; 843 844 if (!size) 845 return -EINVAL; 846 847 rc = ntb_mw_get_align(nt->ndev, PIDX, num_mw, &xlat_align, 848 &xlat_align_size, NULL); 849 if (rc) 850 return rc; 851 852 xlat_size = round_up(size, xlat_align_size); 853 buff_size = round_up(size, xlat_align); 854 855 /* No need to re-setup */ 856 if (mw->xlat_size == xlat_size) 857 return 0; 858 859 if (mw->buff_size) 860 ntb_free_mw(nt, num_mw); 861 862 /* Alloc memory for receiving data. Must be aligned */ 863 mw->xlat_size = xlat_size; 864 mw->buff_size = buff_size; 865 mw->alloc_size = buff_size; 866 867 rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align); 868 if (rc) { 869 mw->alloc_size *= 2; 870 rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align); 871 if (rc) { 872 dev_err(&pdev->dev, 873 "Unable to alloc aligned MW buff\n"); 874 mw->xlat_size = 0; 875 mw->buff_size = 0; 876 mw->alloc_size = 0; 877 return rc; 878 } 879 } 880 881 /* Notify HW the memory location of the receive buffer */ 882 rc = ntb_mw_set_trans(nt->ndev, PIDX, num_mw, mw->dma_addr, 883 mw->xlat_size); 884 if (rc) { 885 dev_err(&pdev->dev, "Unable to set mw%d translation", num_mw); 886 ntb_free_mw(nt, num_mw); 887 return -EIO; 888 } 889 890 return 0; 891 } 892 893 static void ntb_qp_link_context_reset(struct ntb_transport_qp *qp) 894 { 895 qp->link_is_up = false; 896 qp->active = false; 897 898 qp->tx_index = 0; 899 qp->rx_index = 0; 900 qp->rx_bytes = 0; 901 qp->rx_pkts = 0; 902 qp->rx_ring_empty = 0; 903 qp->rx_err_no_buf = 0; 904 qp->rx_err_oflow = 0; 905 qp->rx_err_ver = 0; 906 qp->rx_memcpy = 0; 907 qp->rx_async = 0; 908 qp->tx_bytes = 0; 909 qp->tx_pkts = 0; 910 qp->tx_ring_full = 0; 911 qp->tx_err_no_buf = 0; 912 qp->tx_memcpy = 0; 913 qp->tx_async = 0; 914 } 915 916 static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp) 917 { 918 ntb_qp_link_context_reset(qp); 919 if (qp->remote_rx_info) 920 qp->remote_rx_info->entry = qp->rx_max_entry - 1; 921 } 922 923 static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp) 924 { 925 struct ntb_transport_ctx *nt = qp->transport; 926 struct pci_dev *pdev = nt->ndev->pdev; 927 928 dev_info(&pdev->dev, "qp %d: Link Cleanup\n", qp->qp_num); 929 930 cancel_delayed_work_sync(&qp->link_work); 931 ntb_qp_link_down_reset(qp); 932 933 if (qp->event_handler) 934 qp->event_handler(qp->cb_data, qp->link_is_up); 935 } 936 937 static void ntb_qp_link_cleanup_work(struct work_struct *work) 938 { 939 struct ntb_transport_qp *qp = container_of(work, 940 struct ntb_transport_qp, 941 link_cleanup); 942 struct ntb_transport_ctx *nt = qp->transport; 943 944 ntb_qp_link_cleanup(qp); 945 946 if (nt->link_is_up) 947 schedule_delayed_work(&qp->link_work, 948 msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT)); 949 } 950 951 static void ntb_qp_link_down(struct ntb_transport_qp *qp) 952 { 953 schedule_work(&qp->link_cleanup); 954 } 955 956 static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt) 957 { 958 struct ntb_transport_qp *qp; 959 u64 qp_bitmap_alloc; 960 unsigned int i, count; 961 962 qp_bitmap_alloc = nt->qp_bitmap & ~nt->qp_bitmap_free; 963 964 /* Pass along the info to any clients */ 965 for (i = 0; i < nt->qp_count; i++) 966 if (qp_bitmap_alloc & BIT_ULL(i)) { 967 qp = &nt->qp_vec[i]; 968 ntb_qp_link_cleanup(qp); 969 cancel_work_sync(&qp->link_cleanup); 970 cancel_delayed_work_sync(&qp->link_work); 971 } 972 973 if (!nt->link_is_up) 974 cancel_delayed_work_sync(&nt->link_work); 975 976 for (i = 0; i < nt->mw_count; i++) 977 ntb_free_mw(nt, i); 978 979 /* The scratchpad registers keep the values if the remote side 980 * goes down, blast them now to give them a sane value the next 981 * time they are accessed 982 */ 983 count = ntb_spad_count(nt->ndev); 984 for (i = 0; i < count; i++) 985 ntb_spad_write(nt->ndev, i, 0); 986 } 987 988 static void ntb_transport_link_cleanup_work(struct work_struct *work) 989 { 990 struct ntb_transport_ctx *nt = 991 container_of(work, struct ntb_transport_ctx, link_cleanup); 992 993 guard(mutex)(&nt->link_event_lock); 994 ntb_transport_link_cleanup(nt); 995 } 996 997 static void ntb_transport_event_callback(void *data) 998 { 999 struct ntb_transport_ctx *nt = data; 1000 1001 if (ntb_link_is_up(nt->ndev, NULL, NULL) == 1) 1002 schedule_delayed_work(&nt->link_work, 0); 1003 else 1004 schedule_work(&nt->link_cleanup); 1005 } 1006 1007 static void ntb_transport_link_work(struct work_struct *work) 1008 { 1009 struct ntb_transport_ctx *nt = 1010 container_of(work, struct ntb_transport_ctx, link_work.work); 1011 struct ntb_dev *ndev = nt->ndev; 1012 struct pci_dev *pdev = ndev->pdev; 1013 resource_size_t size; 1014 u32 val; 1015 int rc = 0, i, spad; 1016 1017 guard(mutex)(&nt->link_event_lock); 1018 1019 /* send the local info, in the opposite order of the way we read it */ 1020 1021 if (nt->use_msi) { 1022 rc = ntb_msi_setup_mws(ndev); 1023 if (rc) { 1024 dev_warn(&pdev->dev, 1025 "Failed to register MSI memory window: %d\n", 1026 rc); 1027 nt->use_msi = false; 1028 } 1029 } 1030 1031 for (i = 0; i < nt->qp_count; i++) 1032 ntb_transport_setup_qp_msi(nt, i); 1033 1034 for (i = 0; i < nt->mw_count; i++) { 1035 size = nt->mw_vec[i].phys_size; 1036 1037 if (max_mw_size && size > max_mw_size) 1038 size = max_mw_size; 1039 1040 spad = MW0_SZ_HIGH + (i * 2); 1041 ntb_peer_spad_write(ndev, PIDX, spad, upper_32_bits(size)); 1042 1043 spad = MW0_SZ_LOW + (i * 2); 1044 ntb_peer_spad_write(ndev, PIDX, spad, lower_32_bits(size)); 1045 } 1046 1047 ntb_peer_spad_write(ndev, PIDX, NUM_MWS, nt->mw_count); 1048 1049 ntb_peer_spad_write(ndev, PIDX, NUM_QPS, nt->qp_count); 1050 1051 ntb_peer_spad_write(ndev, PIDX, VERSION, NTB_TRANSPORT_VERSION); 1052 1053 /* Query the remote side for its info */ 1054 val = ntb_spad_read(ndev, VERSION); 1055 dev_dbg(&pdev->dev, "Remote version = %d\n", val); 1056 if (val != NTB_TRANSPORT_VERSION) 1057 goto out; 1058 1059 val = ntb_spad_read(ndev, NUM_QPS); 1060 dev_dbg(&pdev->dev, "Remote max number of qps = %d\n", val); 1061 if (val != nt->qp_count) 1062 goto out; 1063 1064 val = ntb_spad_read(ndev, NUM_MWS); 1065 dev_dbg(&pdev->dev, "Remote number of mws = %d\n", val); 1066 if (val != nt->mw_count) 1067 goto out; 1068 1069 for (i = 0; i < nt->mw_count; i++) { 1070 u64 val64; 1071 1072 val = ntb_spad_read(ndev, MW0_SZ_HIGH + (i * 2)); 1073 val64 = (u64)val << 32; 1074 1075 val = ntb_spad_read(ndev, MW0_SZ_LOW + (i * 2)); 1076 val64 |= val; 1077 1078 dev_dbg(&pdev->dev, "Remote MW%d size = %#llx\n", i, val64); 1079 1080 rc = ntb_set_mw(nt, i, val64); 1081 if (rc) 1082 goto out1; 1083 } 1084 1085 nt->link_is_up = true; 1086 1087 for (i = 0; i < nt->qp_count; i++) { 1088 struct ntb_transport_qp *qp = &nt->qp_vec[i]; 1089 1090 ntb_transport_setup_qp_mw(nt, i); 1091 ntb_transport_setup_qp_peer_msi(nt, i); 1092 1093 if (qp->client_ready) 1094 schedule_delayed_work(&qp->link_work, 0); 1095 } 1096 1097 return; 1098 1099 out1: 1100 for (i = 0; i < nt->mw_count; i++) 1101 ntb_free_mw(nt, i); 1102 1103 /* if there's an actual failure, we should just bail */ 1104 if (rc < 0) 1105 return; 1106 1107 out: 1108 if (ntb_link_is_up(ndev, NULL, NULL) == 1) 1109 schedule_delayed_work(&nt->link_work, 1110 msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT)); 1111 } 1112 1113 static void ntb_qp_link_work(struct work_struct *work) 1114 { 1115 struct ntb_transport_qp *qp = container_of(work, 1116 struct ntb_transport_qp, 1117 link_work.work); 1118 struct pci_dev *pdev = qp->ndev->pdev; 1119 struct ntb_transport_ctx *nt = qp->transport; 1120 int val; 1121 1122 WARN_ON(!nt->link_is_up); 1123 1124 val = ntb_spad_read(nt->ndev, QP_LINKS); 1125 1126 ntb_peer_spad_write(nt->ndev, PIDX, QP_LINKS, val | BIT(qp->qp_num)); 1127 1128 /* query remote spad for qp ready bits */ 1129 dev_dbg_ratelimited(&pdev->dev, "Remote QP link status = %x\n", val); 1130 1131 /* See if the remote side is up */ 1132 if (val & BIT(qp->qp_num)) { 1133 dev_info(&pdev->dev, "qp %d: Link Up\n", qp->qp_num); 1134 qp->link_is_up = true; 1135 qp->active = true; 1136 1137 if (qp->event_handler) 1138 qp->event_handler(qp->cb_data, qp->link_is_up); 1139 1140 if (qp->active) 1141 tasklet_schedule(&qp->rxc_db_work); 1142 } else if (nt->link_is_up) 1143 schedule_delayed_work(&qp->link_work, 1144 msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT)); 1145 } 1146 1147 static int ntb_transport_init_queue(struct ntb_transport_ctx *nt, 1148 unsigned int qp_num) 1149 { 1150 struct ntb_transport_qp *qp; 1151 phys_addr_t mw_base; 1152 resource_size_t mw_size; 1153 unsigned int num_qps_mw, tx_size; 1154 unsigned int mw_num, mw_count, qp_count; 1155 u64 qp_offset; 1156 1157 mw_count = nt->mw_count; 1158 qp_count = nt->qp_count; 1159 1160 mw_num = QP_TO_MW(nt, qp_num); 1161 1162 qp = &nt->qp_vec[qp_num]; 1163 qp->qp_num = qp_num; 1164 qp->transport = nt; 1165 qp->ndev = nt->ndev; 1166 qp->client_ready = false; 1167 qp->event_handler = NULL; 1168 ntb_qp_link_context_reset(qp); 1169 1170 if (mw_num < qp_count % mw_count) 1171 num_qps_mw = qp_count / mw_count + 1; 1172 else 1173 num_qps_mw = qp_count / mw_count; 1174 1175 mw_base = nt->mw_vec[mw_num].phys_addr; 1176 mw_size = nt->mw_vec[mw_num].phys_size; 1177 1178 if (max_mw_size && mw_size > max_mw_size) 1179 mw_size = max_mw_size; 1180 1181 tx_size = (unsigned int)mw_size / num_qps_mw; 1182 qp_offset = tx_size * (qp_num / mw_count); 1183 1184 qp->tx_mw_size = tx_size; 1185 qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset; 1186 if (!qp->tx_mw) 1187 return -EINVAL; 1188 1189 qp->tx_mw_phys = mw_base + qp_offset; 1190 if (!qp->tx_mw_phys) 1191 return -EINVAL; 1192 1193 tx_size -= sizeof(struct ntb_rx_info); 1194 qp->rx_info = qp->tx_mw + tx_size; 1195 1196 /* Due to housekeeping, there must be atleast 2 buffs */ 1197 qp->tx_max_frame = min(transport_mtu, tx_size / 2); 1198 qp->tx_max_entry = tx_size / qp->tx_max_frame; 1199 1200 if (nt->debugfs_node_dir) { 1201 char debugfs_name[8]; 1202 1203 snprintf(debugfs_name, sizeof(debugfs_name), "qp%d", qp_num); 1204 qp->debugfs_dir = debugfs_create_dir(debugfs_name, 1205 nt->debugfs_node_dir); 1206 1207 qp->debugfs_stats = debugfs_create_file("stats", S_IRUSR, 1208 qp->debugfs_dir, qp, 1209 &ntb_qp_debugfs_stats_fops); 1210 } else { 1211 qp->debugfs_dir = NULL; 1212 qp->debugfs_stats = NULL; 1213 } 1214 1215 INIT_DELAYED_WORK(&qp->link_work, ntb_qp_link_work); 1216 INIT_WORK(&qp->link_cleanup, ntb_qp_link_cleanup_work); 1217 1218 spin_lock_init(&qp->ntb_rx_q_lock); 1219 spin_lock_init(&qp->ntb_tx_free_q_lock); 1220 spin_lock_init(&qp->ntb_tx_offl_q_lock); 1221 1222 INIT_LIST_HEAD(&qp->rx_post_q); 1223 INIT_LIST_HEAD(&qp->rx_pend_q); 1224 INIT_LIST_HEAD(&qp->rx_free_q); 1225 INIT_LIST_HEAD(&qp->tx_free_q); 1226 INIT_LIST_HEAD(&qp->tx_offl_q); 1227 1228 tasklet_init(&qp->rxc_db_work, ntb_transport_rxc_db, 1229 (unsigned long)qp); 1230 1231 return 0; 1232 } 1233 1234 static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) 1235 { 1236 struct ntb_transport_ctx *nt; 1237 struct ntb_transport_mw *mw; 1238 unsigned int mw_count, qp_count, spad_count, max_mw_count_for_spads; 1239 u64 qp_bitmap; 1240 int node; 1241 int rc, i; 1242 1243 mw_count = ntb_peer_mw_count(ndev); 1244 1245 if (!ndev->ops->mw_set_trans) { 1246 dev_err(&ndev->dev, "Inbound MW based NTB API is required\n"); 1247 return -EINVAL; 1248 } 1249 1250 if (ntb_db_is_unsafe(ndev)) 1251 dev_dbg(&ndev->dev, 1252 "doorbell is unsafe, proceed anyway...\n"); 1253 if (ntb_spad_is_unsafe(ndev)) 1254 dev_dbg(&ndev->dev, 1255 "scratchpad is unsafe, proceed anyway...\n"); 1256 1257 if (ntb_peer_port_count(ndev) != NTB_DEF_PEER_CNT) 1258 dev_warn(&ndev->dev, "Multi-port NTB devices unsupported\n"); 1259 1260 node = dev_to_node(&ndev->dev); 1261 1262 nt = kzalloc_node(sizeof(*nt), GFP_KERNEL, node); 1263 if (!nt) 1264 return -ENOMEM; 1265 1266 nt->ndev = ndev; 1267 1268 /* 1269 * If we are using MSI, and have at least one extra memory window, 1270 * we will reserve the last MW for the MSI window. 1271 */ 1272 if (use_msi && mw_count > 1) { 1273 rc = ntb_msi_init(ndev, ntb_transport_msi_desc_changed); 1274 if (!rc) { 1275 mw_count -= 1; 1276 nt->use_msi = true; 1277 } 1278 } 1279 1280 spad_count = ntb_spad_count(ndev); 1281 1282 /* Limit the MW's based on the availability of scratchpads */ 1283 1284 if (spad_count < NTB_TRANSPORT_MIN_SPADS) { 1285 nt->mw_count = 0; 1286 rc = -EINVAL; 1287 goto err; 1288 } 1289 1290 max_mw_count_for_spads = (spad_count - MW0_SZ_HIGH) / 2; 1291 nt->mw_count = min(mw_count, max_mw_count_for_spads); 1292 1293 nt->msi_spad_offset = nt->mw_count * 2 + MW0_SZ_HIGH; 1294 1295 nt->mw_vec = kcalloc_node(mw_count, sizeof(*nt->mw_vec), 1296 GFP_KERNEL, node); 1297 if (!nt->mw_vec) { 1298 rc = -ENOMEM; 1299 goto err; 1300 } 1301 1302 for (i = 0; i < mw_count; i++) { 1303 mw = &nt->mw_vec[i]; 1304 1305 rc = ntb_peer_mw_get_addr(ndev, i, &mw->phys_addr, 1306 &mw->phys_size); 1307 if (rc) 1308 goto err1; 1309 1310 mw->vbase = ioremap_wc(mw->phys_addr, mw->phys_size); 1311 if (!mw->vbase) { 1312 rc = -ENOMEM; 1313 goto err1; 1314 } 1315 1316 mw->buff_size = 0; 1317 mw->xlat_size = 0; 1318 mw->virt_addr = NULL; 1319 mw->dma_addr = 0; 1320 } 1321 1322 qp_bitmap = ntb_db_valid_mask(ndev); 1323 1324 qp_count = ilog2(qp_bitmap); 1325 if (nt->use_msi) { 1326 qp_count -= 1; 1327 nt->msi_db_mask = BIT_ULL(qp_count); 1328 ntb_db_clear_mask(ndev, nt->msi_db_mask); 1329 } 1330 1331 if (max_num_clients && max_num_clients < qp_count) 1332 qp_count = max_num_clients; 1333 else if (nt->mw_count < qp_count) 1334 qp_count = nt->mw_count; 1335 1336 qp_bitmap &= BIT_ULL(qp_count) - 1; 1337 1338 nt->qp_count = qp_count; 1339 nt->qp_bitmap = qp_bitmap; 1340 nt->qp_bitmap_free = qp_bitmap; 1341 1342 nt->qp_vec = kcalloc_node(qp_count, sizeof(*nt->qp_vec), 1343 GFP_KERNEL, node); 1344 if (!nt->qp_vec) { 1345 rc = -ENOMEM; 1346 goto err1; 1347 } 1348 1349 if (nt_debugfs_dir) { 1350 nt->debugfs_node_dir = 1351 debugfs_create_dir(pci_name(ndev->pdev), 1352 nt_debugfs_dir); 1353 } 1354 1355 for (i = 0; i < qp_count; i++) { 1356 rc = ntb_transport_init_queue(nt, i); 1357 if (rc) 1358 goto err2; 1359 } 1360 1361 mutex_init(&nt->link_event_lock); 1362 INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work); 1363 INIT_WORK(&nt->link_cleanup, ntb_transport_link_cleanup_work); 1364 1365 rc = ntb_set_ctx(ndev, nt, &ntb_transport_ops); 1366 if (rc) 1367 goto err2; 1368 1369 INIT_LIST_HEAD(&nt->client_devs); 1370 rc = ntb_bus_init(nt); 1371 if (rc) 1372 goto err3; 1373 1374 nt->link_is_up = false; 1375 ntb_link_enable(ndev, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); 1376 ntb_link_event(ndev); 1377 1378 return 0; 1379 1380 err3: 1381 ntb_clear_ctx(ndev); 1382 err2: 1383 kfree(nt->qp_vec); 1384 err1: 1385 while (i--) { 1386 mw = &nt->mw_vec[i]; 1387 iounmap(mw->vbase); 1388 } 1389 kfree(nt->mw_vec); 1390 err: 1391 kfree(nt); 1392 return rc; 1393 } 1394 1395 static void ntb_transport_free(struct ntb_client *self, struct ntb_dev *ndev) 1396 { 1397 struct ntb_transport_ctx *nt = ndev->ctx; 1398 struct ntb_transport_qp *qp; 1399 u64 qp_bitmap_alloc; 1400 int i; 1401 1402 ntb_transport_link_cleanup(nt); 1403 cancel_work_sync(&nt->link_cleanup); 1404 cancel_delayed_work_sync(&nt->link_work); 1405 1406 qp_bitmap_alloc = nt->qp_bitmap & ~nt->qp_bitmap_free; 1407 1408 /* verify that all the qp's are freed */ 1409 for (i = 0; i < nt->qp_count; i++) { 1410 qp = &nt->qp_vec[i]; 1411 if (qp_bitmap_alloc & BIT_ULL(i)) 1412 ntb_transport_free_queue(qp); 1413 debugfs_remove_recursive(qp->debugfs_dir); 1414 } 1415 1416 ntb_link_disable(ndev); 1417 ntb_clear_ctx(ndev); 1418 1419 ntb_bus_remove(nt); 1420 1421 for (i = nt->mw_count; i--; ) { 1422 ntb_free_mw(nt, i); 1423 iounmap(nt->mw_vec[i].vbase); 1424 } 1425 1426 kfree(nt->qp_vec); 1427 kfree(nt->mw_vec); 1428 kfree(nt); 1429 } 1430 1431 static void ntb_complete_rxc(struct ntb_transport_qp *qp) 1432 { 1433 struct ntb_queue_entry *entry; 1434 void *cb_data; 1435 unsigned int len; 1436 unsigned long irqflags; 1437 1438 spin_lock_irqsave(&qp->ntb_rx_q_lock, irqflags); 1439 1440 while (!list_empty(&qp->rx_post_q)) { 1441 entry = list_first_entry(&qp->rx_post_q, 1442 struct ntb_queue_entry, entry); 1443 if (!(entry->flags & DESC_DONE_FLAG)) 1444 break; 1445 1446 entry->rx_hdr->flags = 0; 1447 iowrite32(entry->rx_index, &qp->rx_info->entry); 1448 1449 cb_data = entry->cb_data; 1450 len = entry->len; 1451 1452 list_move_tail(&entry->entry, &qp->rx_free_q); 1453 1454 spin_unlock_irqrestore(&qp->ntb_rx_q_lock, irqflags); 1455 1456 if (qp->rx_handler && qp->client_ready) 1457 qp->rx_handler(qp, qp->cb_data, cb_data, len); 1458 1459 spin_lock_irqsave(&qp->ntb_rx_q_lock, irqflags); 1460 } 1461 1462 spin_unlock_irqrestore(&qp->ntb_rx_q_lock, irqflags); 1463 } 1464 1465 static void ntb_rx_copy_callback(void *data, 1466 const struct dmaengine_result *res) 1467 { 1468 struct ntb_queue_entry *entry = data; 1469 1470 /* we need to check DMA results if we are using DMA */ 1471 if (res) { 1472 enum dmaengine_tx_result dma_err = res->result; 1473 1474 switch (dma_err) { 1475 case DMA_TRANS_READ_FAILED: 1476 case DMA_TRANS_WRITE_FAILED: 1477 entry->errors++; 1478 fallthrough; 1479 case DMA_TRANS_ABORTED: 1480 { 1481 struct ntb_transport_qp *qp = entry->qp; 1482 void *offset = qp->rx_buff + qp->rx_max_frame * 1483 qp->rx_index; 1484 1485 ntb_memcpy_rx(entry, offset); 1486 qp->rx_memcpy++; 1487 return; 1488 } 1489 1490 case DMA_TRANS_NOERROR: 1491 default: 1492 break; 1493 } 1494 } 1495 1496 entry->flags |= DESC_DONE_FLAG; 1497 1498 ntb_complete_rxc(entry->qp); 1499 } 1500 1501 static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset) 1502 { 1503 void *buf = entry->buf; 1504 size_t len = entry->len; 1505 1506 memcpy(buf, offset, len); 1507 1508 /* Ensure that the data is fully copied out before clearing the flag */ 1509 wmb(); 1510 1511 ntb_rx_copy_callback(entry, NULL); 1512 } 1513 1514 static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset) 1515 { 1516 struct dma_async_tx_descriptor *txd; 1517 struct ntb_transport_qp *qp = entry->qp; 1518 struct dma_chan *chan = qp->rx_dma_chan; 1519 struct dma_device *device; 1520 size_t pay_off, buff_off, len; 1521 struct dmaengine_unmap_data *unmap; 1522 dma_cookie_t cookie; 1523 void *buf = entry->buf; 1524 1525 len = entry->len; 1526 device = chan->device; 1527 pay_off = (size_t)offset & ~PAGE_MASK; 1528 buff_off = (size_t)buf & ~PAGE_MASK; 1529 1530 if (!is_dma_copy_aligned(device, pay_off, buff_off, len)) 1531 goto err; 1532 1533 unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT); 1534 if (!unmap) 1535 goto err; 1536 1537 unmap->len = len; 1538 unmap->addr[0] = dma_map_phys(device->dev, virt_to_phys(offset), 1539 len, DMA_TO_DEVICE, 0); 1540 if (dma_mapping_error(device->dev, unmap->addr[0])) 1541 goto err_get_unmap; 1542 1543 unmap->to_cnt = 1; 1544 1545 unmap->addr[1] = dma_map_phys(device->dev, virt_to_phys(buf), 1546 len, DMA_FROM_DEVICE, 0); 1547 if (dma_mapping_error(device->dev, unmap->addr[1])) 1548 goto err_get_unmap; 1549 1550 unmap->from_cnt = 1; 1551 1552 txd = device->device_prep_dma_memcpy(chan, unmap->addr[1], 1553 unmap->addr[0], len, 1554 DMA_PREP_INTERRUPT); 1555 if (!txd) 1556 goto err_get_unmap; 1557 1558 txd->callback_result = ntb_rx_copy_callback; 1559 txd->callback_param = entry; 1560 dma_set_unmap(txd, unmap); 1561 1562 cookie = dmaengine_submit(txd); 1563 if (dma_submit_error(cookie)) 1564 goto err_set_unmap; 1565 1566 dmaengine_unmap_put(unmap); 1567 1568 qp->last_cookie = cookie; 1569 1570 qp->rx_async++; 1571 1572 return 0; 1573 1574 err_set_unmap: 1575 dmaengine_unmap_put(unmap); 1576 err_get_unmap: 1577 dmaengine_unmap_put(unmap); 1578 err: 1579 return -ENXIO; 1580 } 1581 1582 static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset) 1583 { 1584 struct ntb_transport_qp *qp = entry->qp; 1585 struct dma_chan *chan = qp->rx_dma_chan; 1586 int res; 1587 1588 if (!chan) 1589 goto err; 1590 1591 if (entry->len < copy_bytes) 1592 goto err; 1593 1594 res = ntb_async_rx_submit(entry, offset); 1595 if (res < 0) 1596 goto err; 1597 1598 qp->rx_async++; 1599 return; 1600 1601 err: 1602 ntb_memcpy_rx(entry, offset); 1603 qp->rx_memcpy++; 1604 } 1605 1606 static int ntb_process_rxc(struct ntb_transport_qp *qp) 1607 { 1608 struct ntb_payload_header *hdr; 1609 struct ntb_queue_entry *entry; 1610 void *offset; 1611 1612 offset = qp->rx_buff + qp->rx_max_frame * qp->rx_index; 1613 hdr = offset + qp->rx_max_frame - sizeof(struct ntb_payload_header); 1614 1615 dev_dbg(&qp->ndev->pdev->dev, "qp %d: RX ver %u len %d flags %x\n", 1616 qp->qp_num, hdr->ver, hdr->len, hdr->flags); 1617 1618 if (!(hdr->flags & DESC_DONE_FLAG)) { 1619 dev_dbg(&qp->ndev->pdev->dev, "done flag not set\n"); 1620 qp->rx_ring_empty++; 1621 return -EAGAIN; 1622 } 1623 1624 if (hdr->flags & LINK_DOWN_FLAG) { 1625 dev_dbg(&qp->ndev->pdev->dev, "link down flag set\n"); 1626 ntb_qp_link_down(qp); 1627 hdr->flags = 0; 1628 return -EAGAIN; 1629 } 1630 1631 if (hdr->ver != (u32)qp->rx_pkts) { 1632 dev_dbg(&qp->ndev->pdev->dev, 1633 "version mismatch, expected %llu - got %u\n", 1634 qp->rx_pkts, hdr->ver); 1635 qp->rx_err_ver++; 1636 return -EIO; 1637 } 1638 1639 entry = ntb_list_mv(&qp->ntb_rx_q_lock, &qp->rx_pend_q, &qp->rx_post_q); 1640 if (!entry) { 1641 dev_dbg(&qp->ndev->pdev->dev, "no receive buffer\n"); 1642 qp->rx_err_no_buf++; 1643 return -EAGAIN; 1644 } 1645 1646 entry->rx_hdr = hdr; 1647 entry->rx_index = qp->rx_index; 1648 1649 if (hdr->len > entry->len) { 1650 dev_dbg(&qp->ndev->pdev->dev, 1651 "receive buffer overflow! Wanted %d got %d\n", 1652 hdr->len, entry->len); 1653 qp->rx_err_oflow++; 1654 1655 entry->len = -EIO; 1656 entry->flags |= DESC_DONE_FLAG; 1657 1658 ntb_complete_rxc(qp); 1659 } else { 1660 dev_dbg(&qp->ndev->pdev->dev, 1661 "RX OK index %u ver %u size %d into buf size %d\n", 1662 qp->rx_index, hdr->ver, hdr->len, entry->len); 1663 1664 qp->rx_bytes += hdr->len; 1665 qp->rx_pkts++; 1666 1667 entry->len = hdr->len; 1668 1669 ntb_async_rx(entry, offset); 1670 } 1671 1672 qp->rx_index++; 1673 qp->rx_index %= qp->rx_max_entry; 1674 1675 return 0; 1676 } 1677 1678 static void ntb_transport_rxc_db(unsigned long data) 1679 { 1680 struct ntb_transport_qp *qp = (void *)data; 1681 int rc, i; 1682 1683 dev_dbg(&qp->ndev->pdev->dev, "%s: doorbell %d received\n", 1684 __func__, qp->qp_num); 1685 1686 /* Limit the number of packets processed in a single interrupt to 1687 * provide fairness to others 1688 */ 1689 for (i = 0; i < qp->rx_max_entry; i++) { 1690 rc = ntb_process_rxc(qp); 1691 if (rc) 1692 break; 1693 } 1694 1695 if (i && qp->rx_dma_chan) 1696 dma_async_issue_pending(qp->rx_dma_chan); 1697 1698 if (i == qp->rx_max_entry) { 1699 /* there is more work to do */ 1700 if (qp->active) 1701 tasklet_schedule(&qp->rxc_db_work); 1702 } else if (ntb_db_read(qp->ndev) & BIT_ULL(qp->qp_num)) { 1703 /* the doorbell bit is set: clear it */ 1704 ntb_db_clear(qp->ndev, BIT_ULL(qp->qp_num)); 1705 /* ntb_db_read ensures ntb_db_clear write is committed */ 1706 ntb_db_read(qp->ndev); 1707 1708 /* an interrupt may have arrived between finishing 1709 * ntb_process_rxc and clearing the doorbell bit: 1710 * there might be some more work to do. 1711 */ 1712 if (qp->active) 1713 tasklet_schedule(&qp->rxc_db_work); 1714 } 1715 } 1716 1717 static void ntb_tx_copy_callback(void *data, 1718 const struct dmaengine_result *res) 1719 { 1720 struct ntb_queue_entry *entry = data; 1721 struct ntb_transport_qp *qp = entry->qp; 1722 struct ntb_payload_header __iomem *hdr = entry->tx_hdr; 1723 1724 /* we need to check DMA results if we are using DMA */ 1725 if (res) { 1726 enum dmaengine_tx_result dma_err = res->result; 1727 1728 switch (dma_err) { 1729 case DMA_TRANS_READ_FAILED: 1730 case DMA_TRANS_WRITE_FAILED: 1731 entry->errors++; 1732 fallthrough; 1733 case DMA_TRANS_ABORTED: 1734 { 1735 void __iomem *offset = 1736 qp->tx_mw + qp->tx_max_frame * 1737 entry->tx_index; 1738 1739 /* resubmit via CPU */ 1740 ntb_memcpy_tx(entry, offset); 1741 qp->tx_memcpy++; 1742 return; 1743 } 1744 1745 case DMA_TRANS_NOERROR: 1746 default: 1747 break; 1748 } 1749 } 1750 1751 iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags); 1752 1753 /* 1754 * Make DONE flag visible before DB/MSI. WC + posted MWr may reorder 1755 * across iATU/bridge (platform-dependent). Order and flush here. 1756 */ 1757 dma_mb(); 1758 ioread32(&hdr->flags); 1759 1760 if (qp->use_msi) 1761 ntb_msi_peer_trigger(qp->ndev, PIDX, &qp->peer_msi_desc); 1762 else 1763 ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num)); 1764 1765 /* The entry length can only be zero if the packet is intended to be a 1766 * "link down" or similar. Since no payload is being sent in these 1767 * cases, there is nothing to add to the completion queue. 1768 */ 1769 if (entry->len > 0) { 1770 qp->tx_bytes += entry->len; 1771 1772 if (qp->tx_handler) 1773 qp->tx_handler(qp, qp->cb_data, entry->cb_data, 1774 entry->len); 1775 } 1776 1777 ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, &qp->tx_free_q); 1778 } 1779 1780 static void ntb_memcpy_tx_on_stack(struct ntb_queue_entry *entry, void __iomem *offset) 1781 { 1782 #ifdef copy_to_nontemporal 1783 /* 1784 * Using non-temporal mov to improve performance on non-cached 1785 * writes. This only works if __iomem is strictly memory-like, 1786 * but that is the case on x86-64 1787 */ 1788 copy_to_nontemporal(offset, entry->buf, entry->len); 1789 #else 1790 memcpy_toio(offset, entry->buf, entry->len); 1791 #endif 1792 1793 /* Ensure that the data is fully copied out before setting the flags */ 1794 wmb(); 1795 1796 ntb_tx_copy_callback(entry, NULL); 1797 } 1798 1799 static int ntb_tx_memcpy_kthread(void *data) 1800 { 1801 struct ntb_transport_qp *qp = data; 1802 struct ntb_queue_entry *entry, *tmp; 1803 const int resched_nr = 64; 1804 LIST_HEAD(local_list); 1805 void __iomem *offset; 1806 int processed = 0; 1807 1808 while (!kthread_should_stop()) { 1809 spin_lock_irq(&qp->ntb_tx_offl_q_lock); 1810 wait_event_interruptible_lock_irq_timeout(qp->tx_offload_wq, 1811 kthread_should_stop() || 1812 !list_empty(&qp->tx_offl_q), 1813 qp->ntb_tx_offl_q_lock, 5*HZ); 1814 list_splice_tail_init(&qp->tx_offl_q, &local_list); 1815 spin_unlock_irq(&qp->ntb_tx_offl_q_lock); 1816 1817 list_for_each_entry_safe(entry, tmp, &local_list, entry) { 1818 list_del(&entry->entry); 1819 offset = qp->tx_mw + qp->tx_max_frame * entry->tx_index; 1820 ntb_memcpy_tx_on_stack(entry, offset); 1821 if (++processed >= resched_nr) { 1822 cond_resched(); 1823 processed = 0; 1824 } 1825 } 1826 cond_resched(); 1827 } 1828 1829 return 0; 1830 } 1831 1832 static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset) 1833 { 1834 struct ntb_transport_qp *qp = entry->qp; 1835 1836 if (WARN_ON_ONCE(!qp)) 1837 return; 1838 1839 if (ntb_tx_offload_enabled(qp)) { 1840 ntb_list_add(&qp->ntb_tx_offl_q_lock, &entry->entry, 1841 &qp->tx_offl_q); 1842 wake_up(&qp->tx_offload_wq); 1843 } else 1844 ntb_memcpy_tx_on_stack(entry, offset); 1845 } 1846 1847 static int ntb_async_tx_submit(struct ntb_transport_qp *qp, 1848 struct ntb_queue_entry *entry) 1849 { 1850 struct dma_async_tx_descriptor *txd; 1851 struct dma_chan *chan = qp->tx_dma_chan; 1852 struct dma_device *device; 1853 size_t len = entry->len; 1854 void *buf = entry->buf; 1855 size_t dest_off, buff_off; 1856 struct dmaengine_unmap_data *unmap; 1857 dma_addr_t dest; 1858 dma_cookie_t cookie; 1859 1860 device = chan->device; 1861 dest = qp->tx_mw_dma_addr + qp->tx_max_frame * entry->tx_index; 1862 buff_off = (size_t)buf & ~PAGE_MASK; 1863 dest_off = (size_t)dest & ~PAGE_MASK; 1864 1865 if (!is_dma_copy_aligned(device, buff_off, dest_off, len)) 1866 goto err; 1867 1868 unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT); 1869 if (!unmap) 1870 goto err; 1871 1872 unmap->len = len; 1873 unmap->addr[0] = dma_map_phys(device->dev, virt_to_phys(buf), 1874 len, DMA_TO_DEVICE, 0); 1875 if (dma_mapping_error(device->dev, unmap->addr[0])) 1876 goto err_get_unmap; 1877 1878 unmap->to_cnt = 1; 1879 1880 txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len, 1881 DMA_PREP_INTERRUPT); 1882 if (!txd) 1883 goto err_get_unmap; 1884 1885 txd->callback_result = ntb_tx_copy_callback; 1886 txd->callback_param = entry; 1887 dma_set_unmap(txd, unmap); 1888 1889 cookie = dmaengine_submit(txd); 1890 if (dma_submit_error(cookie)) 1891 goto err_set_unmap; 1892 1893 dmaengine_unmap_put(unmap); 1894 1895 dma_async_issue_pending(chan); 1896 1897 return 0; 1898 err_set_unmap: 1899 dmaengine_unmap_put(unmap); 1900 err_get_unmap: 1901 dmaengine_unmap_put(unmap); 1902 err: 1903 return -ENXIO; 1904 } 1905 1906 static void ntb_async_tx(struct ntb_transport_qp *qp, 1907 struct ntb_queue_entry *entry) 1908 { 1909 struct ntb_payload_header __iomem *hdr; 1910 struct dma_chan *chan = qp->tx_dma_chan; 1911 void __iomem *offset; 1912 int res; 1913 1914 entry->tx_index = qp->tx_index; 1915 offset = qp->tx_mw + qp->tx_max_frame * entry->tx_index; 1916 hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header); 1917 entry->tx_hdr = hdr; 1918 1919 WARN_ON_ONCE(!ntb_transport_tx_free_entry(qp)); 1920 WRITE_ONCE(qp->tx_index, (qp->tx_index + 1) % qp->tx_max_entry); 1921 1922 iowrite32(entry->len, &hdr->len); 1923 iowrite32((u32)qp->tx_pkts, &hdr->ver); 1924 1925 if (!chan) 1926 goto err; 1927 1928 if (entry->len < copy_bytes) 1929 goto err; 1930 1931 res = ntb_async_tx_submit(qp, entry); 1932 if (res < 0) 1933 goto err; 1934 1935 qp->tx_async++; 1936 return; 1937 1938 err: 1939 ntb_memcpy_tx(entry, offset); 1940 qp->tx_memcpy++; 1941 } 1942 1943 static int ntb_process_tx(struct ntb_transport_qp *qp, 1944 struct ntb_queue_entry *entry) 1945 { 1946 if (!ntb_transport_tx_free_entry(qp)) { 1947 qp->tx_ring_full++; 1948 return -EAGAIN; 1949 } 1950 1951 if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) { 1952 if (qp->tx_handler) 1953 qp->tx_handler(qp, qp->cb_data, NULL, -EIO); 1954 1955 ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, 1956 &qp->tx_free_q); 1957 return 0; 1958 } 1959 1960 ntb_async_tx(qp, entry); 1961 1962 qp->tx_pkts++; 1963 1964 return 0; 1965 } 1966 1967 static void ntb_send_link_down(struct ntb_transport_qp *qp) 1968 { 1969 struct pci_dev *pdev = qp->ndev->pdev; 1970 struct ntb_queue_entry *entry; 1971 int i, rc; 1972 1973 if (!qp->link_is_up) 1974 return; 1975 1976 dev_info(&pdev->dev, "qp %d: Send Link Down\n", qp->qp_num); 1977 1978 for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) { 1979 entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); 1980 if (entry) 1981 break; 1982 msleep(100); 1983 } 1984 1985 if (!entry) 1986 return; 1987 1988 entry->cb_data = NULL; 1989 entry->buf = NULL; 1990 entry->len = 0; 1991 entry->flags = LINK_DOWN_FLAG; 1992 1993 rc = ntb_process_tx(qp, entry); 1994 if (rc) 1995 dev_err(&pdev->dev, "ntb: QP%d unable to send linkdown msg\n", 1996 qp->qp_num); 1997 1998 ntb_qp_link_down_reset(qp); 1999 } 2000 2001 static bool ntb_dma_filter_fn(struct dma_chan *chan, void *node) 2002 { 2003 return dev_to_node(&chan->dev->device) == (int)(unsigned long)node; 2004 } 2005 2006 /** 2007 * ntb_transport_create_queue - Create a new NTB transport layer queue 2008 * @data: pointer for callback data 2009 * @client_dev: &struct device pointer 2010 * @handlers: pointer to various ntb queue (callback) handlers 2011 * 2012 * Create a new NTB transport layer queue and provide the queue with a callback 2013 * routine for both transmit and receive. The receive callback routine will be 2014 * used to pass up data when the transport has received it on the queue. The 2015 * transmit callback routine will be called when the transport has completed the 2016 * transmission of the data on the queue and the data is ready to be freed. 2017 * 2018 * RETURNS: pointer to newly created ntb_queue, NULL on error. 2019 */ 2020 struct ntb_transport_qp * 2021 ntb_transport_create_queue(void *data, struct device *client_dev, 2022 const struct ntb_queue_handlers *handlers) 2023 { 2024 struct ntb_dev *ndev; 2025 struct pci_dev *pdev; 2026 struct ntb_transport_ctx *nt; 2027 struct ntb_queue_entry *entry; 2028 struct ntb_transport_qp *qp; 2029 u64 qp_bit; 2030 unsigned int free_queue; 2031 dma_cap_mask_t dma_mask; 2032 int node; 2033 int i; 2034 2035 ndev = dev_ntb(client_dev->parent); 2036 pdev = ndev->pdev; 2037 nt = ndev->ctx; 2038 2039 node = dev_to_node(&ndev->dev); 2040 2041 free_queue = ffs(nt->qp_bitmap_free); 2042 if (!free_queue) 2043 goto err; 2044 2045 /* decrement free_queue to make it zero based */ 2046 free_queue--; 2047 2048 qp = &nt->qp_vec[free_queue]; 2049 qp_bit = BIT_ULL(qp->qp_num); 2050 2051 nt->qp_bitmap_free &= ~qp_bit; 2052 2053 qp->cb_data = data; 2054 qp->rx_handler = handlers->rx_handler; 2055 qp->tx_handler = handlers->tx_handler; 2056 qp->event_handler = handlers->event_handler; 2057 2058 init_waitqueue_head(&qp->tx_offload_wq); 2059 if (tx_memcpy_offload) { 2060 qp->tx_offload_thread = kthread_run(ntb_tx_memcpy_kthread, qp, 2061 "ntb-txcpy/%s/%u", 2062 pci_name(ndev->pdev), qp->qp_num); 2063 if (IS_ERR(qp->tx_offload_thread)) { 2064 dev_warn(&nt->ndev->dev, 2065 "tx memcpy offload thread creation failed: %ld; falling back to inline copy\n", 2066 PTR_ERR(qp->tx_offload_thread)); 2067 qp->tx_offload_thread = NULL; 2068 } 2069 } else 2070 qp->tx_offload_thread = NULL; 2071 2072 dma_cap_zero(dma_mask); 2073 dma_cap_set(DMA_MEMCPY, dma_mask); 2074 2075 if (use_dma) { 2076 qp->tx_dma_chan = 2077 dma_request_channel(dma_mask, ntb_dma_filter_fn, 2078 (void *)(unsigned long)node); 2079 if (!qp->tx_dma_chan) 2080 dev_info(&pdev->dev, "Unable to allocate TX DMA channel\n"); 2081 2082 qp->rx_dma_chan = 2083 dma_request_channel(dma_mask, ntb_dma_filter_fn, 2084 (void *)(unsigned long)node); 2085 if (!qp->rx_dma_chan) 2086 dev_info(&pdev->dev, "Unable to allocate RX DMA channel\n"); 2087 } else { 2088 qp->tx_dma_chan = NULL; 2089 qp->rx_dma_chan = NULL; 2090 } 2091 2092 qp->tx_mw_dma_addr = 0; 2093 if (qp->tx_dma_chan) { 2094 qp->tx_mw_dma_addr = 2095 dma_map_resource(qp->tx_dma_chan->device->dev, 2096 qp->tx_mw_phys, qp->tx_mw_size, 2097 DMA_FROM_DEVICE, 0); 2098 if (dma_mapping_error(qp->tx_dma_chan->device->dev, 2099 qp->tx_mw_dma_addr)) { 2100 qp->tx_mw_dma_addr = 0; 2101 goto err1; 2102 } 2103 } 2104 2105 dev_dbg(&pdev->dev, "Using %s memcpy for TX\n", 2106 qp->tx_dma_chan ? "DMA" : "CPU"); 2107 2108 dev_dbg(&pdev->dev, "Using %s memcpy for RX\n", 2109 qp->rx_dma_chan ? "DMA" : "CPU"); 2110 2111 for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { 2112 entry = kzalloc_node(sizeof(*entry), GFP_KERNEL, node); 2113 if (!entry) 2114 goto err1; 2115 2116 entry->qp = qp; 2117 ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, 2118 &qp->rx_free_q); 2119 } 2120 qp->rx_alloc_entry = NTB_QP_DEF_NUM_ENTRIES; 2121 2122 for (i = 0; i < qp->tx_max_entry; i++) { 2123 entry = kzalloc_node(sizeof(*entry), GFP_KERNEL, node); 2124 if (!entry) 2125 goto err2; 2126 2127 entry->qp = qp; 2128 ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, 2129 &qp->tx_free_q); 2130 } 2131 2132 ntb_db_clear(qp->ndev, qp_bit); 2133 ntb_db_clear_mask(qp->ndev, qp_bit); 2134 2135 dev_info(&pdev->dev, "NTB Transport QP %d created\n", qp->qp_num); 2136 2137 return qp; 2138 2139 err2: 2140 while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) 2141 kfree(entry); 2142 err1: 2143 qp->rx_alloc_entry = 0; 2144 while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q))) 2145 kfree(entry); 2146 if (qp->tx_mw_dma_addr) 2147 dma_unmap_resource(qp->tx_dma_chan->device->dev, 2148 qp->tx_mw_dma_addr, qp->tx_mw_size, 2149 DMA_FROM_DEVICE, 0); 2150 if (qp->tx_dma_chan) 2151 dma_release_channel(qp->tx_dma_chan); 2152 if (qp->rx_dma_chan) 2153 dma_release_channel(qp->rx_dma_chan); 2154 nt->qp_bitmap_free |= qp_bit; 2155 err: 2156 return NULL; 2157 } 2158 EXPORT_SYMBOL_GPL(ntb_transport_create_queue); 2159 2160 /** 2161 * ntb_transport_free_queue - Frees NTB transport queue 2162 * @qp: NTB queue to be freed 2163 * 2164 * Frees NTB transport queue 2165 */ 2166 void ntb_transport_free_queue(struct ntb_transport_qp *qp) 2167 { 2168 struct pci_dev *pdev; 2169 struct ntb_queue_entry *entry; 2170 u64 qp_bit; 2171 2172 if (!qp) 2173 return; 2174 2175 pdev = qp->ndev->pdev; 2176 2177 qp->active = false; 2178 2179 if (qp->tx_offload_thread) { 2180 kthread_stop(qp->tx_offload_thread); 2181 qp->tx_offload_thread = NULL; 2182 } 2183 2184 if (qp->tx_dma_chan) { 2185 struct dma_chan *chan = qp->tx_dma_chan; 2186 /* Putting the dma_chan to NULL will force any new traffic to be 2187 * processed by the CPU instead of the DAM engine 2188 */ 2189 qp->tx_dma_chan = NULL; 2190 2191 /* Try to be nice and wait for any queued DMA engine 2192 * transactions to process before smashing it with a rock 2193 */ 2194 dma_sync_wait(chan, qp->last_cookie); 2195 dmaengine_terminate_all(chan); 2196 2197 dma_unmap_resource(chan->device->dev, 2198 qp->tx_mw_dma_addr, qp->tx_mw_size, 2199 DMA_FROM_DEVICE, 0); 2200 2201 dma_release_channel(chan); 2202 } 2203 2204 if (qp->rx_dma_chan) { 2205 struct dma_chan *chan = qp->rx_dma_chan; 2206 /* Putting the dma_chan to NULL will force any new traffic to be 2207 * processed by the CPU instead of the DAM engine 2208 */ 2209 qp->rx_dma_chan = NULL; 2210 2211 /* Try to be nice and wait for any queued DMA engine 2212 * transactions to process before smashing it with a rock 2213 */ 2214 dma_sync_wait(chan, qp->last_cookie); 2215 dmaengine_terminate_all(chan); 2216 dma_release_channel(chan); 2217 } 2218 2219 qp_bit = BIT_ULL(qp->qp_num); 2220 2221 ntb_db_set_mask(qp->ndev, qp_bit); 2222 tasklet_kill(&qp->rxc_db_work); 2223 2224 cancel_delayed_work_sync(&qp->link_work); 2225 2226 qp->cb_data = NULL; 2227 qp->rx_handler = NULL; 2228 qp->tx_handler = NULL; 2229 qp->event_handler = NULL; 2230 2231 while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q))) 2232 kfree(entry); 2233 2234 while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q))) { 2235 dev_warn(&pdev->dev, "Freeing item from non-empty rx_pend_q\n"); 2236 kfree(entry); 2237 } 2238 2239 while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_post_q))) { 2240 dev_warn(&pdev->dev, "Freeing item from non-empty rx_post_q\n"); 2241 kfree(entry); 2242 } 2243 2244 while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) 2245 kfree(entry); 2246 2247 while ((entry = ntb_list_rm(&qp->ntb_tx_offl_q_lock, &qp->tx_offl_q))) 2248 kfree(entry); 2249 2250 qp->transport->qp_bitmap_free |= qp_bit; 2251 2252 dev_info(&pdev->dev, "NTB Transport QP %d freed\n", qp->qp_num); 2253 } 2254 EXPORT_SYMBOL_GPL(ntb_transport_free_queue); 2255 2256 /** 2257 * ntb_transport_rx_remove - Dequeues enqueued rx packet 2258 * @qp: NTB queue to be freed 2259 * @len: pointer to variable to write enqueued buffers length 2260 * 2261 * Dequeues unused buffers from receive queue. Should only be used during 2262 * shutdown of qp. 2263 * 2264 * RETURNS: NULL error value on error, or void* for success. 2265 */ 2266 void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len) 2267 { 2268 struct ntb_queue_entry *entry; 2269 void *buf; 2270 2271 if (!qp || qp->client_ready) 2272 return NULL; 2273 2274 entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q); 2275 if (!entry) 2276 return NULL; 2277 2278 buf = entry->cb_data; 2279 *len = entry->len; 2280 2281 ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q); 2282 2283 return buf; 2284 } 2285 EXPORT_SYMBOL_GPL(ntb_transport_rx_remove); 2286 2287 /** 2288 * ntb_transport_rx_enqueue - Enqueue a new NTB queue entry 2289 * @qp: NTB transport layer queue the entry is to be enqueued on 2290 * @cb: per buffer pointer for callback function to use 2291 * @data: pointer to data buffer that incoming packets will be copied into 2292 * @len: length of the data buffer 2293 * 2294 * Enqueue a new receive buffer onto the transport queue into which a NTB 2295 * payload can be received into. 2296 * 2297 * RETURNS: An appropriate -ERRNO error value on error, or zero for success. 2298 */ 2299 int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, 2300 unsigned int len) 2301 { 2302 struct ntb_queue_entry *entry; 2303 2304 if (!qp) 2305 return -EINVAL; 2306 2307 entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q); 2308 if (!entry) 2309 return -ENOMEM; 2310 2311 entry->cb_data = cb; 2312 entry->buf = data; 2313 entry->len = len; 2314 entry->flags = 0; 2315 entry->errors = 0; 2316 entry->rx_index = 0; 2317 2318 ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_pend_q); 2319 2320 if (qp->active) 2321 tasklet_schedule(&qp->rxc_db_work); 2322 2323 return 0; 2324 } 2325 EXPORT_SYMBOL_GPL(ntb_transport_rx_enqueue); 2326 2327 /** 2328 * ntb_transport_tx_enqueue - Enqueue a new NTB queue entry 2329 * @qp: NTB transport layer queue the entry is to be enqueued on 2330 * @cb: per buffer pointer for callback function to use 2331 * @data: pointer to data buffer that will be sent 2332 * @len: length of the data buffer 2333 * 2334 * Enqueue a new transmit buffer onto the transport queue from which a NTB 2335 * payload will be transmitted. This assumes that a lock is being held to 2336 * serialize access to the qp. 2337 * 2338 * RETURNS: An appropriate -ERRNO error value on error, or zero for success. 2339 */ 2340 int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, 2341 unsigned int len) 2342 { 2343 struct ntb_queue_entry *entry; 2344 int rc; 2345 2346 if (!qp || !len) 2347 return -EINVAL; 2348 2349 /* If the qp link is down already, just ignore. */ 2350 if (!qp->link_is_up) 2351 return 0; 2352 2353 entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); 2354 if (!entry) { 2355 qp->tx_err_no_buf++; 2356 return -EBUSY; 2357 } 2358 2359 entry->cb_data = cb; 2360 entry->buf = data; 2361 entry->len = len; 2362 entry->flags = 0; 2363 entry->errors = 0; 2364 entry->tx_index = 0; 2365 2366 rc = ntb_process_tx(qp, entry); 2367 if (rc) 2368 ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, 2369 &qp->tx_free_q); 2370 2371 return rc; 2372 } 2373 EXPORT_SYMBOL_GPL(ntb_transport_tx_enqueue); 2374 2375 /** 2376 * ntb_transport_link_up - Notify NTB transport of client readiness to use queue 2377 * @qp: NTB transport layer queue to be enabled 2378 * 2379 * Notify NTB transport layer of client readiness to use queue 2380 */ 2381 void ntb_transport_link_up(struct ntb_transport_qp *qp) 2382 { 2383 if (!qp) 2384 return; 2385 2386 qp->client_ready = true; 2387 2388 if (qp->transport->link_is_up) 2389 schedule_delayed_work(&qp->link_work, 0); 2390 } 2391 EXPORT_SYMBOL_GPL(ntb_transport_link_up); 2392 2393 /** 2394 * ntb_transport_link_down - Notify NTB transport to no longer enqueue data 2395 * @qp: NTB transport layer queue to be disabled 2396 * 2397 * Notify NTB transport layer of client's desire to no longer receive data on 2398 * transport queue specified. It is the client's responsibility to ensure all 2399 * entries on queue are purged or otherwise handled appropriately. 2400 */ 2401 void ntb_transport_link_down(struct ntb_transport_qp *qp) 2402 { 2403 int val; 2404 2405 if (!qp) 2406 return; 2407 2408 qp->client_ready = false; 2409 2410 val = ntb_spad_read(qp->ndev, QP_LINKS); 2411 2412 ntb_peer_spad_write(qp->ndev, PIDX, QP_LINKS, val & ~BIT(qp->qp_num)); 2413 2414 if (qp->link_is_up) 2415 ntb_send_link_down(qp); 2416 else 2417 cancel_delayed_work_sync(&qp->link_work); 2418 } 2419 EXPORT_SYMBOL_GPL(ntb_transport_link_down); 2420 2421 /** 2422 * ntb_transport_link_query - Query transport link state 2423 * @qp: NTB transport layer queue to be queried 2424 * 2425 * Query connectivity to the remote system of the NTB transport queue 2426 * 2427 * RETURNS: true for link up or false for link down 2428 */ 2429 bool ntb_transport_link_query(struct ntb_transport_qp *qp) 2430 { 2431 if (!qp) 2432 return false; 2433 2434 return qp->link_is_up; 2435 } 2436 EXPORT_SYMBOL_GPL(ntb_transport_link_query); 2437 2438 /** 2439 * ntb_transport_qp_num - Query the qp number 2440 * @qp: NTB transport layer queue to be queried 2441 * 2442 * Query qp number of the NTB transport queue 2443 * 2444 * RETURNS: a zero based number specifying the qp number 2445 */ 2446 unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp) 2447 { 2448 if (!qp) 2449 return 0; 2450 2451 return qp->qp_num; 2452 } 2453 EXPORT_SYMBOL_GPL(ntb_transport_qp_num); 2454 2455 /** 2456 * ntb_transport_max_size - Query the max payload size of a qp 2457 * @qp: NTB transport layer queue to be queried 2458 * 2459 * Query the maximum payload size permissible on the given qp 2460 * 2461 * RETURNS: the max payload size of a qp 2462 */ 2463 unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp) 2464 { 2465 unsigned int max_size; 2466 unsigned int copy_align; 2467 struct dma_chan *rx_chan, *tx_chan; 2468 2469 if (!qp) 2470 return 0; 2471 2472 rx_chan = qp->rx_dma_chan; 2473 tx_chan = qp->tx_dma_chan; 2474 2475 copy_align = max(rx_chan ? rx_chan->device->copy_align : 0, 2476 tx_chan ? tx_chan->device->copy_align : 0); 2477 2478 /* If DMA engine usage is possible, try to find the max size for that */ 2479 max_size = qp->tx_max_frame - sizeof(struct ntb_payload_header); 2480 max_size = round_down(max_size, 1 << copy_align); 2481 2482 return max_size; 2483 } 2484 EXPORT_SYMBOL_GPL(ntb_transport_max_size); 2485 2486 unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp) 2487 { 2488 unsigned int head = qp->tx_index; 2489 unsigned int tail = qp->remote_rx_info->entry; 2490 2491 return tail >= head ? tail - head : qp->tx_max_entry + tail - head; 2492 } 2493 EXPORT_SYMBOL_GPL(ntb_transport_tx_free_entry); 2494 2495 static void ntb_transport_doorbell_callback(void *data, int vector) 2496 { 2497 struct ntb_transport_ctx *nt = data; 2498 struct ntb_transport_qp *qp; 2499 u64 db_bits; 2500 unsigned int qp_num; 2501 2502 if (ntb_db_read(nt->ndev) & nt->msi_db_mask) { 2503 ntb_transport_msi_peer_desc_changed(nt); 2504 ntb_db_clear(nt->ndev, nt->msi_db_mask); 2505 } 2506 2507 db_bits = (nt->qp_bitmap & ~nt->qp_bitmap_free & 2508 ntb_db_vector_mask(nt->ndev, vector)); 2509 2510 while (db_bits) { 2511 qp_num = __ffs(db_bits); 2512 qp = &nt->qp_vec[qp_num]; 2513 2514 if (qp->active) 2515 tasklet_schedule(&qp->rxc_db_work); 2516 2517 db_bits &= ~BIT_ULL(qp_num); 2518 } 2519 } 2520 2521 static const struct ntb_ctx_ops ntb_transport_ops = { 2522 .link_event = ntb_transport_event_callback, 2523 .db_event = ntb_transport_doorbell_callback, 2524 }; 2525 2526 static struct ntb_client ntb_transport_client = { 2527 .ops = { 2528 .probe = ntb_transport_probe, 2529 .remove = ntb_transport_free, 2530 }, 2531 }; 2532 2533 static int __init ntb_transport_init(void) 2534 { 2535 int rc; 2536 2537 pr_info("%s, version %s\n", NTB_TRANSPORT_DESC, NTB_TRANSPORT_VER); 2538 2539 if (debugfs_initialized()) 2540 nt_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); 2541 2542 rc = bus_register(&ntb_transport_bus); 2543 if (rc) 2544 goto err_bus; 2545 2546 rc = ntb_register_client(&ntb_transport_client); 2547 if (rc) 2548 goto err_client; 2549 2550 return 0; 2551 2552 err_client: 2553 bus_unregister(&ntb_transport_bus); 2554 err_bus: 2555 debugfs_remove_recursive(nt_debugfs_dir); 2556 return rc; 2557 } 2558 module_init(ntb_transport_init); 2559 2560 static void __exit ntb_transport_exit(void) 2561 { 2562 ntb_unregister_client(&ntb_transport_client); 2563 bus_unregister(&ntb_transport_bus); 2564 debugfs_remove_recursive(nt_debugfs_dir); 2565 } 2566 module_exit(ntb_transport_exit); 2567