1 /* 2 * This file is provided under a dual BSD/GPLv2 license. When using or 3 * redistributing this file, you may do so under either license. 4 * 5 * GPL LICENSE SUMMARY 6 * 7 * Copyright(c) 2015 Intel Corporation. All rights reserved. 8 * Copyright(c) 2017 T-Platforms. All Rights Reserved. 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of version 2 of the GNU General Public License as 12 * published by the Free Software Foundation. 13 * 14 * BSD LICENSE 15 * 16 * Copyright(c) 2015 Intel Corporation. All rights reserved. 17 * Copyright(c) 2017 T-Platforms. All Rights Reserved. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions 21 * are met: 22 * 23 * * Redistributions of source code must retain the above copyright 24 * notice, this list of conditions and the following disclaimer. 25 * * Redistributions in binary form must reproduce the above copy 26 * notice, this list of conditions and the following disclaimer in 27 * the documentation and/or other materials provided with the 28 * distribution. 29 * * Neither the name of Intel Corporation nor the names of its 30 * contributors may be used to endorse or promote products derived 31 * from this software without specific prior written permission. 32 * 33 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 34 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 35 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 36 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 37 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 38 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 39 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 40 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 41 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 42 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 43 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 44 * 45 * PCIe NTB Perf Linux driver 46 */ 47 48 /* 49 * How to use this tool, by example. 50 * 51 * Assuming $DBG_DIR is something like: 52 * '/sys/kernel/debug/ntb_perf/0000:00:03.0' 53 * Suppose aside from local device there is at least one remote device 54 * connected to NTB with index 0. 55 *----------------------------------------------------------------------------- 56 * Eg: install driver with specified chunk/total orders and dma-enabled flag 57 * 58 * root@self# insmod ntb_perf.ko chunk_order=19 total_order=28 use_dma 59 *----------------------------------------------------------------------------- 60 * Eg: check NTB ports (index) and MW mapping information 61 * 62 * root@self# cat $DBG_DIR/info 63 *----------------------------------------------------------------------------- 64 * Eg: start performance test with peer (index 0) and get the test metrics 65 * 66 * root@self# echo 0 > $DBG_DIR/run 67 * root@self# cat $DBG_DIR/run 68 */ 69 70 #include <linux/init.h> 71 #include <linux/kernel.h> 72 #include <linux/module.h> 73 #include <linux/sched.h> 74 #include <linux/wait.h> 75 #include <linux/dma-mapping.h> 76 #include <linux/dmaengine.h> 77 #include <linux/pci.h> 78 #include <linux/ktime.h> 79 #include <linux/slab.h> 80 #include <linux/delay.h> 81 #include <linux/sizes.h> 82 #include <linux/workqueue.h> 83 #include <linux/debugfs.h> 84 #include <linux/random.h> 85 #include <linux/ntb.h> 86 87 #define DRIVER_NAME "ntb_perf" 88 #define DRIVER_VERSION "2.0" 89 90 MODULE_LICENSE("Dual BSD/GPL"); 91 MODULE_VERSION(DRIVER_VERSION); 92 MODULE_AUTHOR("Dave Jiang <dave.jiang@intel.com>"); 93 MODULE_DESCRIPTION("PCIe NTB Performance Measurement Tool"); 94 95 #define MAX_THREADS_CNT 32 96 #define DEF_THREADS_CNT 1 97 #define MAX_CHUNK_SIZE SZ_1M 98 #define MAX_CHUNK_ORDER 20 /* no larger than 1M */ 99 100 #define DMA_TRIES 100 101 #define DMA_MDELAY 10 102 103 #define MSG_TRIES 500 104 #define MSG_UDELAY_LOW 1000 105 #define MSG_UDELAY_HIGH 2000 106 107 #define PERF_BUF_LEN 1024 108 109 static unsigned long max_mw_size; 110 module_param(max_mw_size, ulong, 0644); 111 MODULE_PARM_DESC(max_mw_size, "Upper limit of memory window size"); 112 113 static unsigned char chunk_order = 19; /* 512K */ 114 module_param(chunk_order, byte, 0644); 115 MODULE_PARM_DESC(chunk_order, "Data chunk order [2^n] to transfer"); 116 117 static unsigned char total_order = 30; /* 1G */ 118 module_param(total_order, byte, 0644); 119 MODULE_PARM_DESC(total_order, "Total data order [2^n] to transfer"); 120 121 static bool use_dma; /* default to 0 */ 122 module_param(use_dma, bool, 0644); 123 MODULE_PARM_DESC(use_dma, "Use DMA engine to measure performance"); 124 125 /*============================================================================== 126 * Perf driver data definition 127 *============================================================================== 128 */ 129 130 enum perf_cmd { 131 PERF_CMD_INVAL = -1,/* invalid spad command */ 132 PERF_CMD_SSIZE = 0, /* send out buffer size */ 133 PERF_CMD_RSIZE = 1, /* recv in buffer size */ 134 PERF_CMD_SXLAT = 2, /* send in buffer xlat */ 135 PERF_CMD_RXLAT = 3, /* recv out buffer xlat */ 136 PERF_CMD_CLEAR = 4, /* clear allocated memory */ 137 PERF_STS_DONE = 5, /* init is done */ 138 PERF_STS_LNKUP = 6, /* link up state flag */ 139 }; 140 141 struct perf_ctx; 142 143 struct perf_peer { 144 struct perf_ctx *perf; 145 int pidx; 146 int gidx; 147 148 /* Outbound MW params */ 149 u64 outbuf_xlat; 150 resource_size_t outbuf_size; 151 void __iomem *outbuf; 152 153 /* Inbound MW params */ 154 dma_addr_t inbuf_xlat; 155 resource_size_t inbuf_size; 156 void *inbuf; 157 158 /* NTB connection setup service */ 159 struct work_struct service; 160 unsigned long sts; 161 }; 162 #define to_peer_service(__work) \ 163 container_of(__work, struct perf_peer, service) 164 165 struct perf_thread { 166 struct perf_ctx *perf; 167 int tidx; 168 169 /* DMA-based test sync parameters */ 170 atomic_t dma_sync; 171 wait_queue_head_t dma_wait; 172 struct dma_chan *dma_chan; 173 174 /* Data source and measured statistics */ 175 void *src; 176 u64 copied; 177 ktime_t duration; 178 int status; 179 struct work_struct work; 180 }; 181 #define to_thread_work(__work) \ 182 container_of(__work, struct perf_thread, work) 183 184 struct perf_ctx { 185 struct ntb_dev *ntb; 186 187 /* Global device index and peers descriptors */ 188 int gidx; 189 int pcnt; 190 struct perf_peer *peers; 191 192 /* Performance measuring work-threads interface */ 193 unsigned long busy_flag; 194 wait_queue_head_t twait; 195 atomic_t tsync; 196 u8 tcnt; 197 struct perf_peer *test_peer; 198 struct perf_thread threads[MAX_THREADS_CNT]; 199 200 /* Scratchpad/Message IO operations */ 201 int (*cmd_send)(struct perf_peer *peer, enum perf_cmd cmd, u64 data); 202 int (*cmd_recv)(struct perf_ctx *perf, int *pidx, enum perf_cmd *cmd, 203 u64 *data); 204 205 struct dentry *dbgfs_dir; 206 }; 207 208 /* 209 * Scratchpads-base commands interface 210 */ 211 #define PERF_SPAD_CNT(_pcnt) \ 212 (3*((_pcnt) + 1)) 213 #define PERF_SPAD_CMD(_gidx) \ 214 (3*(_gidx)) 215 #define PERF_SPAD_LDATA(_gidx) \ 216 (3*(_gidx) + 1) 217 #define PERF_SPAD_HDATA(_gidx) \ 218 (3*(_gidx) + 2) 219 #define PERF_SPAD_NOTIFY(_gidx) \ 220 (BIT_ULL(_gidx)) 221 222 /* 223 * Messages-base commands interface 224 */ 225 #define PERF_MSG_CNT 3 226 #define PERF_MSG_CMD 0 227 #define PERF_MSG_LDATA 1 228 #define PERF_MSG_HDATA 2 229 230 /*============================================================================== 231 * Static data declarations 232 *============================================================================== 233 */ 234 235 static struct dentry *perf_dbgfs_topdir; 236 237 static struct workqueue_struct *perf_wq __read_mostly; 238 239 /*============================================================================== 240 * NTB cross-link commands execution service 241 *============================================================================== 242 */ 243 244 static void perf_terminate_test(struct perf_ctx *perf); 245 246 static inline bool perf_link_is_up(struct perf_peer *peer) 247 { 248 u64 link; 249 250 link = ntb_link_is_up(peer->perf->ntb, NULL, NULL); 251 return !!(link & BIT_ULL_MASK(peer->pidx)); 252 } 253 254 static int perf_spad_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, 255 u64 data) 256 { 257 struct perf_ctx *perf = peer->perf; 258 int try; 259 u32 sts; 260 261 dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data); 262 263 /* 264 * Perform predefined number of attempts before give up. 265 * We are sending the data to the port specific scratchpad, so 266 * to prevent a multi-port access race-condition. Additionally 267 * there is no need in local locking since only thread-safe 268 * service work is using this method. 269 */ 270 for (try = 0; try < MSG_TRIES; try++) { 271 if (!perf_link_is_up(peer)) 272 return -ENOLINK; 273 274 sts = ntb_peer_spad_read(perf->ntb, peer->pidx, 275 PERF_SPAD_CMD(perf->gidx)); 276 if (le32_to_cpu(sts) != PERF_CMD_INVAL) { 277 usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH); 278 continue; 279 } 280 281 ntb_peer_spad_write(perf->ntb, peer->pidx, 282 PERF_SPAD_LDATA(perf->gidx), 283 cpu_to_le32(lower_32_bits(data))); 284 ntb_peer_spad_write(perf->ntb, peer->pidx, 285 PERF_SPAD_HDATA(perf->gidx), 286 cpu_to_le32(upper_32_bits(data))); 287 mmiowb(); 288 ntb_peer_spad_write(perf->ntb, peer->pidx, 289 PERF_SPAD_CMD(perf->gidx), 290 cpu_to_le32(cmd)); 291 mmiowb(); 292 ntb_peer_db_set(perf->ntb, PERF_SPAD_NOTIFY(peer->gidx)); 293 294 dev_dbg(&perf->ntb->dev, "DB ring peer %#llx\n", 295 PERF_SPAD_NOTIFY(peer->gidx)); 296 297 break; 298 } 299 300 return try < MSG_TRIES ? 0 : -EAGAIN; 301 } 302 303 static int perf_spad_cmd_recv(struct perf_ctx *perf, int *pidx, 304 enum perf_cmd *cmd, u64 *data) 305 { 306 struct perf_peer *peer; 307 u32 val; 308 309 ntb_db_clear(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx)); 310 311 /* 312 * We start scanning all over, since cleared DB may have been set 313 * by any peer. Yes, it makes peer with smaller index being 314 * serviced with greater priority, but it's convenient for spad 315 * and message code unification and simplicity. 316 */ 317 for (*pidx = 0; *pidx < perf->pcnt; (*pidx)++) { 318 peer = &perf->peers[*pidx]; 319 320 if (!perf_link_is_up(peer)) 321 continue; 322 323 val = ntb_spad_read(perf->ntb, PERF_SPAD_CMD(peer->gidx)); 324 val = le32_to_cpu(val); 325 if (val == PERF_CMD_INVAL) 326 continue; 327 328 *cmd = val; 329 330 val = ntb_spad_read(perf->ntb, PERF_SPAD_LDATA(peer->gidx)); 331 *data = le32_to_cpu(val); 332 333 val = ntb_spad_read(perf->ntb, PERF_SPAD_HDATA(peer->gidx)); 334 *data |= (u64)le32_to_cpu(val) << 32; 335 336 /* Next command can be retrieved from now */ 337 ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx), 338 cpu_to_le32(PERF_CMD_INVAL)); 339 340 dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data); 341 342 return 0; 343 } 344 345 return -ENODATA; 346 } 347 348 static int perf_msg_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, 349 u64 data) 350 { 351 struct perf_ctx *perf = peer->perf; 352 int try, ret; 353 u64 outbits; 354 355 dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data); 356 357 /* 358 * Perform predefined number of attempts before give up. Message 359 * registers are free of race-condition problem when accessed 360 * from different ports, so we don't need splitting registers 361 * by global device index. We also won't have local locking, 362 * since the method is used from service work only. 363 */ 364 outbits = ntb_msg_outbits(perf->ntb); 365 for (try = 0; try < MSG_TRIES; try++) { 366 if (!perf_link_is_up(peer)) 367 return -ENOLINK; 368 369 ret = ntb_msg_clear_sts(perf->ntb, outbits); 370 if (ret) 371 return ret; 372 373 ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_LDATA, 374 cpu_to_le32(lower_32_bits(data))); 375 376 if (ntb_msg_read_sts(perf->ntb) & outbits) { 377 usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH); 378 continue; 379 } 380 381 ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_HDATA, 382 cpu_to_le32(upper_32_bits(data))); 383 mmiowb(); 384 385 /* This call shall trigger peer message event */ 386 ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_CMD, 387 cpu_to_le32(cmd)); 388 389 break; 390 } 391 392 return try < MSG_TRIES ? 0 : -EAGAIN; 393 } 394 395 static int perf_msg_cmd_recv(struct perf_ctx *perf, int *pidx, 396 enum perf_cmd *cmd, u64 *data) 397 { 398 u64 inbits; 399 u32 val; 400 401 inbits = ntb_msg_inbits(perf->ntb); 402 403 if (hweight64(ntb_msg_read_sts(perf->ntb) & inbits) < 3) 404 return -ENODATA; 405 406 val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_CMD); 407 *cmd = le32_to_cpu(val); 408 409 val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_LDATA); 410 *data = le32_to_cpu(val); 411 412 val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_HDATA); 413 *data |= (u64)le32_to_cpu(val) << 32; 414 415 /* Next command can be retrieved from now */ 416 ntb_msg_clear_sts(perf->ntb, inbits); 417 418 dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data); 419 420 return 0; 421 } 422 423 static int perf_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, u64 data) 424 { 425 struct perf_ctx *perf = peer->perf; 426 427 if (cmd == PERF_CMD_SSIZE || cmd == PERF_CMD_SXLAT) 428 return perf->cmd_send(peer, cmd, data); 429 430 dev_err(&perf->ntb->dev, "Send invalid command\n"); 431 return -EINVAL; 432 } 433 434 static int perf_cmd_exec(struct perf_peer *peer, enum perf_cmd cmd) 435 { 436 switch (cmd) { 437 case PERF_CMD_SSIZE: 438 case PERF_CMD_RSIZE: 439 case PERF_CMD_SXLAT: 440 case PERF_CMD_RXLAT: 441 case PERF_CMD_CLEAR: 442 break; 443 default: 444 dev_err(&peer->perf->ntb->dev, "Exec invalid command\n"); 445 return -EINVAL; 446 } 447 448 /* No need of memory barrier, since bit ops have invernal lock */ 449 set_bit(cmd, &peer->sts); 450 451 dev_dbg(&peer->perf->ntb->dev, "CMD exec: %d\n", cmd); 452 453 (void)queue_work(system_highpri_wq, &peer->service); 454 455 return 0; 456 } 457 458 static int perf_cmd_recv(struct perf_ctx *perf) 459 { 460 struct perf_peer *peer; 461 int ret, pidx, cmd; 462 u64 data; 463 464 while (!(ret = perf->cmd_recv(perf, &pidx, &cmd, &data))) { 465 peer = &perf->peers[pidx]; 466 467 switch (cmd) { 468 case PERF_CMD_SSIZE: 469 peer->inbuf_size = data; 470 return perf_cmd_exec(peer, PERF_CMD_RSIZE); 471 case PERF_CMD_SXLAT: 472 peer->outbuf_xlat = data; 473 return perf_cmd_exec(peer, PERF_CMD_RXLAT); 474 default: 475 dev_err(&perf->ntb->dev, "Recv invalid command\n"); 476 return -EINVAL; 477 } 478 } 479 480 /* Return 0 if no data left to process, otherwise an error */ 481 return ret == -ENODATA ? 0 : ret; 482 } 483 484 static void perf_link_event(void *ctx) 485 { 486 struct perf_ctx *perf = ctx; 487 struct perf_peer *peer; 488 bool lnk_up; 489 int pidx; 490 491 for (pidx = 0; pidx < perf->pcnt; pidx++) { 492 peer = &perf->peers[pidx]; 493 494 lnk_up = perf_link_is_up(peer); 495 496 if (lnk_up && 497 !test_and_set_bit(PERF_STS_LNKUP, &peer->sts)) { 498 perf_cmd_exec(peer, PERF_CMD_SSIZE); 499 } else if (!lnk_up && 500 test_and_clear_bit(PERF_STS_LNKUP, &peer->sts)) { 501 perf_cmd_exec(peer, PERF_CMD_CLEAR); 502 } 503 } 504 } 505 506 static void perf_db_event(void *ctx, int vec) 507 { 508 struct perf_ctx *perf = ctx; 509 510 dev_dbg(&perf->ntb->dev, "DB vec %d mask %#llx bits %#llx\n", vec, 511 ntb_db_vector_mask(perf->ntb, vec), ntb_db_read(perf->ntb)); 512 513 /* Just receive all available commands */ 514 (void)perf_cmd_recv(perf); 515 } 516 517 static void perf_msg_event(void *ctx) 518 { 519 struct perf_ctx *perf = ctx; 520 521 dev_dbg(&perf->ntb->dev, "Msg status bits %#llx\n", 522 ntb_msg_read_sts(perf->ntb)); 523 524 /* Messages are only sent one-by-one */ 525 (void)perf_cmd_recv(perf); 526 } 527 528 static const struct ntb_ctx_ops perf_ops = { 529 .link_event = perf_link_event, 530 .db_event = perf_db_event, 531 .msg_event = perf_msg_event 532 }; 533 534 static void perf_free_outbuf(struct perf_peer *peer) 535 { 536 (void)ntb_peer_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx); 537 } 538 539 static int perf_setup_outbuf(struct perf_peer *peer) 540 { 541 struct perf_ctx *perf = peer->perf; 542 int ret; 543 544 /* Outbuf size can be unaligned due to custom max_mw_size */ 545 ret = ntb_peer_mw_set_trans(perf->ntb, peer->pidx, peer->gidx, 546 peer->outbuf_xlat, peer->outbuf_size); 547 if (ret) { 548 dev_err(&perf->ntb->dev, "Failed to set outbuf translation\n"); 549 return ret; 550 } 551 552 /* Initialization is finally done */ 553 set_bit(PERF_STS_DONE, &peer->sts); 554 555 return 0; 556 } 557 558 static void perf_free_inbuf(struct perf_peer *peer) 559 { 560 if (!peer->inbuf) 561 return; 562 563 (void)ntb_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx); 564 dma_free_coherent(&peer->perf->ntb->dev, peer->inbuf_size, 565 peer->inbuf, peer->inbuf_xlat); 566 peer->inbuf = NULL; 567 } 568 569 static int perf_setup_inbuf(struct perf_peer *peer) 570 { 571 resource_size_t xlat_align, size_align, size_max; 572 struct perf_ctx *perf = peer->perf; 573 int ret; 574 575 /* Get inbound MW parameters */ 576 ret = ntb_mw_get_align(perf->ntb, peer->pidx, perf->gidx, 577 &xlat_align, &size_align, &size_max); 578 if (ret) { 579 dev_err(&perf->ntb->dev, "Couldn't get inbuf restrictions\n"); 580 return ret; 581 } 582 583 if (peer->inbuf_size > size_max) { 584 dev_err(&perf->ntb->dev, "Too big inbuf size %pa > %pa\n", 585 &peer->inbuf_size, &size_max); 586 return -EINVAL; 587 } 588 589 peer->inbuf_size = round_up(peer->inbuf_size, size_align); 590 591 perf_free_inbuf(peer); 592 593 peer->inbuf = dma_alloc_coherent(&perf->ntb->dev, peer->inbuf_size, 594 &peer->inbuf_xlat, GFP_KERNEL); 595 if (!peer->inbuf) { 596 dev_err(&perf->ntb->dev, "Failed to alloc inbuf of %pa\n", 597 &peer->inbuf_size); 598 return -ENOMEM; 599 } 600 if (!IS_ALIGNED(peer->inbuf_xlat, xlat_align)) { 601 dev_err(&perf->ntb->dev, "Unaligned inbuf allocated\n"); 602 goto err_free_inbuf; 603 } 604 605 ret = ntb_mw_set_trans(perf->ntb, peer->pidx, peer->gidx, 606 peer->inbuf_xlat, peer->inbuf_size); 607 if (ret) { 608 dev_err(&perf->ntb->dev, "Failed to set inbuf translation\n"); 609 goto err_free_inbuf; 610 } 611 612 /* 613 * We submit inbuf xlat transmission cmd for execution here to follow 614 * the code architecture, even though this method is called from service 615 * work itself so the command will be executed right after it returns. 616 */ 617 (void)perf_cmd_exec(peer, PERF_CMD_SXLAT); 618 619 return 0; 620 621 err_free_inbuf: 622 perf_free_inbuf(peer); 623 624 return ret; 625 } 626 627 static void perf_service_work(struct work_struct *work) 628 { 629 struct perf_peer *peer = to_peer_service(work); 630 631 if (test_and_clear_bit(PERF_CMD_SSIZE, &peer->sts)) 632 perf_cmd_send(peer, PERF_CMD_SSIZE, peer->outbuf_size); 633 634 if (test_and_clear_bit(PERF_CMD_RSIZE, &peer->sts)) 635 perf_setup_inbuf(peer); 636 637 if (test_and_clear_bit(PERF_CMD_SXLAT, &peer->sts)) 638 perf_cmd_send(peer, PERF_CMD_SXLAT, peer->inbuf_xlat); 639 640 if (test_and_clear_bit(PERF_CMD_RXLAT, &peer->sts)) 641 perf_setup_outbuf(peer); 642 643 if (test_and_clear_bit(PERF_CMD_CLEAR, &peer->sts)) { 644 clear_bit(PERF_STS_DONE, &peer->sts); 645 if (test_bit(0, &peer->perf->busy_flag) && 646 peer == peer->perf->test_peer) { 647 dev_warn(&peer->perf->ntb->dev, 648 "Freeing while test on-fly\n"); 649 perf_terminate_test(peer->perf); 650 } 651 perf_free_outbuf(peer); 652 perf_free_inbuf(peer); 653 } 654 } 655 656 static int perf_init_service(struct perf_ctx *perf) 657 { 658 u64 mask; 659 660 if (ntb_peer_mw_count(perf->ntb) < perf->pcnt + 1) { 661 dev_err(&perf->ntb->dev, "Not enough memory windows\n"); 662 return -EINVAL; 663 } 664 665 if (ntb_msg_count(perf->ntb) >= PERF_MSG_CNT) { 666 perf->cmd_send = perf_msg_cmd_send; 667 perf->cmd_recv = perf_msg_cmd_recv; 668 669 dev_dbg(&perf->ntb->dev, "Message service initialized\n"); 670 671 return 0; 672 } 673 674 dev_dbg(&perf->ntb->dev, "Message service unsupported\n"); 675 676 mask = GENMASK_ULL(perf->pcnt, 0); 677 if (ntb_spad_count(perf->ntb) >= PERF_SPAD_CNT(perf->pcnt) && 678 (ntb_db_valid_mask(perf->ntb) & mask) == mask) { 679 perf->cmd_send = perf_spad_cmd_send; 680 perf->cmd_recv = perf_spad_cmd_recv; 681 682 dev_dbg(&perf->ntb->dev, "Scratchpad service initialized\n"); 683 684 return 0; 685 } 686 687 dev_dbg(&perf->ntb->dev, "Scratchpad service unsupported\n"); 688 689 dev_err(&perf->ntb->dev, "Command services unsupported\n"); 690 691 return -EINVAL; 692 } 693 694 static int perf_enable_service(struct perf_ctx *perf) 695 { 696 u64 mask, incmd_bit; 697 int ret, sidx, scnt; 698 699 mask = ntb_db_valid_mask(perf->ntb); 700 (void)ntb_db_set_mask(perf->ntb, mask); 701 702 ret = ntb_set_ctx(perf->ntb, perf, &perf_ops); 703 if (ret) 704 return ret; 705 706 if (perf->cmd_send == perf_msg_cmd_send) { 707 u64 inbits, outbits; 708 709 inbits = ntb_msg_inbits(perf->ntb); 710 outbits = ntb_msg_outbits(perf->ntb); 711 (void)ntb_msg_set_mask(perf->ntb, inbits | outbits); 712 713 incmd_bit = BIT_ULL(__ffs64(inbits)); 714 ret = ntb_msg_clear_mask(perf->ntb, incmd_bit); 715 716 dev_dbg(&perf->ntb->dev, "MSG sts unmasked %#llx\n", incmd_bit); 717 } else { 718 scnt = ntb_spad_count(perf->ntb); 719 for (sidx = 0; sidx < scnt; sidx++) 720 ntb_spad_write(perf->ntb, sidx, PERF_CMD_INVAL); 721 incmd_bit = PERF_SPAD_NOTIFY(perf->gidx); 722 ret = ntb_db_clear_mask(perf->ntb, incmd_bit); 723 724 dev_dbg(&perf->ntb->dev, "DB bits unmasked %#llx\n", incmd_bit); 725 } 726 if (ret) { 727 ntb_clear_ctx(perf->ntb); 728 return ret; 729 } 730 731 ntb_link_enable(perf->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); 732 /* Might be not necessary */ 733 ntb_link_event(perf->ntb); 734 735 return 0; 736 } 737 738 static void perf_disable_service(struct perf_ctx *perf) 739 { 740 int pidx; 741 742 ntb_link_disable(perf->ntb); 743 744 if (perf->cmd_send == perf_msg_cmd_send) { 745 u64 inbits; 746 747 inbits = ntb_msg_inbits(perf->ntb); 748 (void)ntb_msg_set_mask(perf->ntb, inbits); 749 } else { 750 (void)ntb_db_set_mask(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx)); 751 } 752 753 ntb_clear_ctx(perf->ntb); 754 755 for (pidx = 0; pidx < perf->pcnt; pidx++) 756 perf_cmd_exec(&perf->peers[pidx], PERF_CMD_CLEAR); 757 758 for (pidx = 0; pidx < perf->pcnt; pidx++) 759 flush_work(&perf->peers[pidx].service); 760 } 761 762 /*============================================================================== 763 * Performance measuring work-thread 764 *============================================================================== 765 */ 766 767 static void perf_dma_copy_callback(void *data) 768 { 769 struct perf_thread *pthr = data; 770 771 atomic_dec(&pthr->dma_sync); 772 wake_up(&pthr->dma_wait); 773 } 774 775 static int perf_copy_chunk(struct perf_thread *pthr, 776 void __iomem *dst, void *src, size_t len) 777 { 778 struct dma_async_tx_descriptor *tx; 779 struct dmaengine_unmap_data *unmap; 780 struct device *dma_dev; 781 int try = 0, ret = 0; 782 783 if (!use_dma) { 784 memcpy_toio(dst, src, len); 785 goto ret_check_tsync; 786 } 787 788 dma_dev = pthr->dma_chan->device->dev; 789 790 if (!is_dma_copy_aligned(pthr->dma_chan->device, offset_in_page(src), 791 offset_in_page(dst), len)) 792 return -EIO; 793 794 unmap = dmaengine_get_unmap_data(dma_dev, 2, GFP_NOWAIT); 795 if (!unmap) 796 return -ENOMEM; 797 798 unmap->len = len; 799 unmap->addr[0] = dma_map_page(dma_dev, virt_to_page(src), 800 offset_in_page(src), len, DMA_TO_DEVICE); 801 if (dma_mapping_error(dma_dev, unmap->addr[0])) { 802 ret = -EIO; 803 goto err_free_resource; 804 } 805 unmap->to_cnt = 1; 806 807 unmap->addr[1] = dma_map_page(dma_dev, virt_to_page(dst), 808 offset_in_page(dst), len, DMA_FROM_DEVICE); 809 if (dma_mapping_error(dma_dev, unmap->addr[1])) { 810 ret = -EIO; 811 goto err_free_resource; 812 } 813 unmap->from_cnt = 1; 814 815 do { 816 tx = dmaengine_prep_dma_memcpy(pthr->dma_chan, unmap->addr[1], 817 unmap->addr[0], len, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 818 if (!tx) 819 msleep(DMA_MDELAY); 820 } while (!tx && (try++ < DMA_TRIES)); 821 822 if (!tx) { 823 ret = -EIO; 824 goto err_free_resource; 825 } 826 827 tx->callback = perf_dma_copy_callback; 828 tx->callback_param = pthr; 829 dma_set_unmap(tx, unmap); 830 831 if (dma_submit_error(dmaengine_submit(tx))) { 832 dmaengine_unmap_put(unmap); 833 goto err_free_resource; 834 } 835 836 dmaengine_unmap_put(unmap); 837 838 atomic_inc(&pthr->dma_sync); 839 dma_async_issue_pending(pthr->dma_chan); 840 841 ret_check_tsync: 842 return likely(atomic_read(&pthr->perf->tsync) > 0) ? 0 : -EINTR; 843 844 err_free_resource: 845 dmaengine_unmap_put(unmap); 846 847 return ret; 848 } 849 850 static bool perf_dma_filter(struct dma_chan *chan, void *data) 851 { 852 struct perf_ctx *perf = data; 853 int node; 854 855 node = dev_to_node(&perf->ntb->dev); 856 857 return node == NUMA_NO_NODE || node == dev_to_node(chan->device->dev); 858 } 859 860 static int perf_init_test(struct perf_thread *pthr) 861 { 862 struct perf_ctx *perf = pthr->perf; 863 dma_cap_mask_t dma_mask; 864 865 pthr->src = kmalloc_node(perf->test_peer->outbuf_size, GFP_KERNEL, 866 dev_to_node(&perf->ntb->dev)); 867 if (!pthr->src) 868 return -ENOMEM; 869 870 get_random_bytes(pthr->src, perf->test_peer->outbuf_size); 871 872 if (!use_dma) 873 return 0; 874 875 dma_cap_zero(dma_mask); 876 dma_cap_set(DMA_MEMCPY, dma_mask); 877 pthr->dma_chan = dma_request_channel(dma_mask, perf_dma_filter, perf); 878 if (!pthr->dma_chan) { 879 dev_err(&perf->ntb->dev, "%d: Failed to get DMA channel\n", 880 pthr->tidx); 881 atomic_dec(&perf->tsync); 882 wake_up(&perf->twait); 883 kfree(pthr->src); 884 return -ENODEV; 885 } 886 887 atomic_set(&pthr->dma_sync, 0); 888 889 return 0; 890 } 891 892 static int perf_run_test(struct perf_thread *pthr) 893 { 894 struct perf_peer *peer = pthr->perf->test_peer; 895 struct perf_ctx *perf = pthr->perf; 896 void __iomem *flt_dst, *bnd_dst; 897 u64 total_size, chunk_size; 898 void *flt_src; 899 int ret = 0; 900 901 total_size = 1ULL << total_order; 902 chunk_size = 1ULL << chunk_order; 903 chunk_size = min_t(u64, peer->outbuf_size, chunk_size); 904 905 flt_src = pthr->src; 906 bnd_dst = peer->outbuf + peer->outbuf_size; 907 flt_dst = peer->outbuf; 908 909 pthr->duration = ktime_get(); 910 911 /* Copied field is cleared on test launch stage */ 912 while (pthr->copied < total_size) { 913 ret = perf_copy_chunk(pthr, flt_dst, flt_src, chunk_size); 914 if (ret) { 915 dev_err(&perf->ntb->dev, "%d: Got error %d on test\n", 916 pthr->tidx, ret); 917 return ret; 918 } 919 920 pthr->copied += chunk_size; 921 922 flt_dst += chunk_size; 923 flt_src += chunk_size; 924 if (flt_dst >= bnd_dst || flt_dst < peer->outbuf) { 925 flt_dst = peer->outbuf; 926 flt_src = pthr->src; 927 } 928 929 /* Give up CPU to give a chance for other threads to use it */ 930 schedule(); 931 } 932 933 return 0; 934 } 935 936 static int perf_sync_test(struct perf_thread *pthr) 937 { 938 struct perf_ctx *perf = pthr->perf; 939 940 if (!use_dma) 941 goto no_dma_ret; 942 943 wait_event(pthr->dma_wait, 944 (atomic_read(&pthr->dma_sync) == 0 || 945 atomic_read(&perf->tsync) < 0)); 946 947 if (atomic_read(&perf->tsync) < 0) 948 return -EINTR; 949 950 no_dma_ret: 951 pthr->duration = ktime_sub(ktime_get(), pthr->duration); 952 953 dev_dbg(&perf->ntb->dev, "%d: copied %llu bytes\n", 954 pthr->tidx, pthr->copied); 955 956 dev_dbg(&perf->ntb->dev, "%d: lasted %llu usecs\n", 957 pthr->tidx, ktime_to_us(pthr->duration)); 958 959 dev_dbg(&perf->ntb->dev, "%d: %llu MBytes/s\n", pthr->tidx, 960 div64_u64(pthr->copied, ktime_to_us(pthr->duration))); 961 962 return 0; 963 } 964 965 static void perf_clear_test(struct perf_thread *pthr) 966 { 967 struct perf_ctx *perf = pthr->perf; 968 969 if (!use_dma) 970 goto no_dma_notify; 971 972 /* 973 * If test finished without errors, termination isn't needed. 974 * We call it anyway just to be sure of the transfers completion. 975 */ 976 (void)dmaengine_terminate_sync(pthr->dma_chan); 977 978 dma_release_channel(pthr->dma_chan); 979 980 no_dma_notify: 981 atomic_dec(&perf->tsync); 982 wake_up(&perf->twait); 983 kfree(pthr->src); 984 } 985 986 static void perf_thread_work(struct work_struct *work) 987 { 988 struct perf_thread *pthr = to_thread_work(work); 989 int ret; 990 991 /* 992 * Perform stages in compliance with use_dma flag value. 993 * Test status is changed only if error happened, otherwise 994 * status -ENODATA is kept while test is on-fly. Results 995 * synchronization is performed only if test fininshed 996 * without an error or interruption. 997 */ 998 ret = perf_init_test(pthr); 999 if (ret) { 1000 pthr->status = ret; 1001 return; 1002 } 1003 1004 ret = perf_run_test(pthr); 1005 if (ret) { 1006 pthr->status = ret; 1007 goto err_clear_test; 1008 } 1009 1010 pthr->status = perf_sync_test(pthr); 1011 1012 err_clear_test: 1013 perf_clear_test(pthr); 1014 } 1015 1016 static int perf_set_tcnt(struct perf_ctx *perf, u8 tcnt) 1017 { 1018 if (tcnt == 0 || tcnt > MAX_THREADS_CNT) 1019 return -EINVAL; 1020 1021 if (test_and_set_bit_lock(0, &perf->busy_flag)) 1022 return -EBUSY; 1023 1024 perf->tcnt = tcnt; 1025 1026 clear_bit_unlock(0, &perf->busy_flag); 1027 1028 return 0; 1029 } 1030 1031 static void perf_terminate_test(struct perf_ctx *perf) 1032 { 1033 int tidx; 1034 1035 atomic_set(&perf->tsync, -1); 1036 wake_up(&perf->twait); 1037 1038 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { 1039 wake_up(&perf->threads[tidx].dma_wait); 1040 cancel_work_sync(&perf->threads[tidx].work); 1041 } 1042 } 1043 1044 static int perf_submit_test(struct perf_peer *peer) 1045 { 1046 struct perf_ctx *perf = peer->perf; 1047 struct perf_thread *pthr; 1048 int tidx, ret; 1049 1050 if (!test_bit(PERF_STS_DONE, &peer->sts)) 1051 return -ENOLINK; 1052 1053 if (test_and_set_bit_lock(0, &perf->busy_flag)) 1054 return -EBUSY; 1055 1056 perf->test_peer = peer; 1057 atomic_set(&perf->tsync, perf->tcnt); 1058 1059 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { 1060 pthr = &perf->threads[tidx]; 1061 1062 pthr->status = -ENODATA; 1063 pthr->copied = 0; 1064 pthr->duration = ktime_set(0, 0); 1065 if (tidx < perf->tcnt) 1066 (void)queue_work(perf_wq, &pthr->work); 1067 } 1068 1069 ret = wait_event_interruptible(perf->twait, 1070 atomic_read(&perf->tsync) <= 0); 1071 if (ret == -ERESTARTSYS) { 1072 perf_terminate_test(perf); 1073 ret = -EINTR; 1074 } 1075 1076 clear_bit_unlock(0, &perf->busy_flag); 1077 1078 return ret; 1079 } 1080 1081 static int perf_read_stats(struct perf_ctx *perf, char *buf, 1082 size_t size, ssize_t *pos) 1083 { 1084 struct perf_thread *pthr; 1085 int tidx; 1086 1087 if (test_and_set_bit_lock(0, &perf->busy_flag)) 1088 return -EBUSY; 1089 1090 (*pos) += scnprintf(buf + *pos, size - *pos, 1091 " Peer %d test statistics:\n", perf->test_peer->pidx); 1092 1093 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { 1094 pthr = &perf->threads[tidx]; 1095 1096 if (pthr->status == -ENODATA) 1097 continue; 1098 1099 if (pthr->status) { 1100 (*pos) += scnprintf(buf + *pos, size - *pos, 1101 "%d: error status %d\n", tidx, pthr->status); 1102 continue; 1103 } 1104 1105 (*pos) += scnprintf(buf + *pos, size - *pos, 1106 "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", 1107 tidx, pthr->copied, ktime_to_us(pthr->duration), 1108 div64_u64(pthr->copied, ktime_to_us(pthr->duration))); 1109 } 1110 1111 clear_bit_unlock(0, &perf->busy_flag); 1112 1113 return 0; 1114 } 1115 1116 static void perf_init_threads(struct perf_ctx *perf) 1117 { 1118 struct perf_thread *pthr; 1119 int tidx; 1120 1121 perf->tcnt = DEF_THREADS_CNT; 1122 perf->test_peer = &perf->peers[0]; 1123 init_waitqueue_head(&perf->twait); 1124 1125 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { 1126 pthr = &perf->threads[tidx]; 1127 1128 pthr->perf = perf; 1129 pthr->tidx = tidx; 1130 pthr->status = -ENODATA; 1131 init_waitqueue_head(&pthr->dma_wait); 1132 INIT_WORK(&pthr->work, perf_thread_work); 1133 } 1134 } 1135 1136 static void perf_clear_threads(struct perf_ctx *perf) 1137 { 1138 perf_terminate_test(perf); 1139 } 1140 1141 /*============================================================================== 1142 * DebugFS nodes 1143 *============================================================================== 1144 */ 1145 1146 static ssize_t perf_dbgfs_read_info(struct file *filep, char __user *ubuf, 1147 size_t size, loff_t *offp) 1148 { 1149 struct perf_ctx *perf = filep->private_data; 1150 struct perf_peer *peer; 1151 size_t buf_size; 1152 ssize_t pos = 0; 1153 int ret, pidx; 1154 char *buf; 1155 1156 buf_size = min_t(size_t, size, 0x1000U); 1157 1158 buf = kmalloc(buf_size, GFP_KERNEL); 1159 if (!buf) 1160 return -ENOMEM; 1161 1162 pos += scnprintf(buf + pos, buf_size - pos, 1163 " Performance measuring tool info:\n\n"); 1164 1165 pos += scnprintf(buf + pos, buf_size - pos, 1166 "Local port %d, Global index %d\n", ntb_port_number(perf->ntb), 1167 perf->gidx); 1168 pos += scnprintf(buf + pos, buf_size - pos, "Test status: "); 1169 if (test_bit(0, &perf->busy_flag)) { 1170 pos += scnprintf(buf + pos, buf_size - pos, 1171 "on-fly with port %d (%d)\n", 1172 ntb_peer_port_number(perf->ntb, perf->test_peer->pidx), 1173 perf->test_peer->pidx); 1174 } else { 1175 pos += scnprintf(buf + pos, buf_size - pos, "idle\n"); 1176 } 1177 1178 for (pidx = 0; pidx < perf->pcnt; pidx++) { 1179 peer = &perf->peers[pidx]; 1180 1181 pos += scnprintf(buf + pos, buf_size - pos, 1182 "Port %d (%d), Global index %d:\n", 1183 ntb_peer_port_number(perf->ntb, peer->pidx), peer->pidx, 1184 peer->gidx); 1185 1186 pos += scnprintf(buf + pos, buf_size - pos, 1187 "\tLink status: %s\n", 1188 test_bit(PERF_STS_LNKUP, &peer->sts) ? "up" : "down"); 1189 1190 pos += scnprintf(buf + pos, buf_size - pos, 1191 "\tOut buffer addr 0x%pK\n", peer->outbuf); 1192 1193 pos += scnprintf(buf + pos, buf_size - pos, 1194 "\tOut buffer size %pa\n", &peer->outbuf_size); 1195 1196 pos += scnprintf(buf + pos, buf_size - pos, 1197 "\tOut buffer xlat 0x%016llx[p]\n", peer->outbuf_xlat); 1198 1199 if (!peer->inbuf) { 1200 pos += scnprintf(buf + pos, buf_size - pos, 1201 "\tIn buffer addr: unallocated\n"); 1202 continue; 1203 } 1204 1205 pos += scnprintf(buf + pos, buf_size - pos, 1206 "\tIn buffer addr 0x%pK\n", peer->inbuf); 1207 1208 pos += scnprintf(buf + pos, buf_size - pos, 1209 "\tIn buffer size %pa\n", &peer->inbuf_size); 1210 1211 pos += scnprintf(buf + pos, buf_size - pos, 1212 "\tIn buffer xlat %pad[p]\n", &peer->inbuf_xlat); 1213 } 1214 1215 ret = simple_read_from_buffer(ubuf, size, offp, buf, pos); 1216 kfree(buf); 1217 1218 return ret; 1219 } 1220 1221 static const struct file_operations perf_dbgfs_info = { 1222 .open = simple_open, 1223 .read = perf_dbgfs_read_info 1224 }; 1225 1226 static ssize_t perf_dbgfs_read_run(struct file *filep, char __user *ubuf, 1227 size_t size, loff_t *offp) 1228 { 1229 struct perf_ctx *perf = filep->private_data; 1230 ssize_t ret, pos = 0; 1231 char *buf; 1232 1233 buf = kmalloc(PERF_BUF_LEN, GFP_KERNEL); 1234 if (!buf) 1235 return -ENOMEM; 1236 1237 ret = perf_read_stats(perf, buf, PERF_BUF_LEN, &pos); 1238 if (ret) 1239 goto err_free; 1240 1241 ret = simple_read_from_buffer(ubuf, size, offp, buf, pos); 1242 err_free: 1243 kfree(buf); 1244 1245 return ret; 1246 } 1247 1248 static ssize_t perf_dbgfs_write_run(struct file *filep, const char __user *ubuf, 1249 size_t size, loff_t *offp) 1250 { 1251 struct perf_ctx *perf = filep->private_data; 1252 struct perf_peer *peer; 1253 int pidx, ret; 1254 1255 ret = kstrtoint_from_user(ubuf, size, 0, &pidx); 1256 if (ret) 1257 return ret; 1258 1259 if (pidx < 0 || pidx >= perf->pcnt) 1260 return -EINVAL; 1261 1262 peer = &perf->peers[pidx]; 1263 1264 ret = perf_submit_test(peer); 1265 if (ret) 1266 return ret; 1267 1268 return size; 1269 } 1270 1271 static const struct file_operations perf_dbgfs_run = { 1272 .open = simple_open, 1273 .read = perf_dbgfs_read_run, 1274 .write = perf_dbgfs_write_run 1275 }; 1276 1277 static ssize_t perf_dbgfs_read_tcnt(struct file *filep, char __user *ubuf, 1278 size_t size, loff_t *offp) 1279 { 1280 struct perf_ctx *perf = filep->private_data; 1281 char buf[8]; 1282 ssize_t pos; 1283 1284 pos = scnprintf(buf, sizeof(buf), "%hhu\n", perf->tcnt); 1285 1286 return simple_read_from_buffer(ubuf, size, offp, buf, pos); 1287 } 1288 1289 static ssize_t perf_dbgfs_write_tcnt(struct file *filep, 1290 const char __user *ubuf, 1291 size_t size, loff_t *offp) 1292 { 1293 struct perf_ctx *perf = filep->private_data; 1294 int ret; 1295 u8 val; 1296 1297 ret = kstrtou8_from_user(ubuf, size, 0, &val); 1298 if (ret) 1299 return ret; 1300 1301 ret = perf_set_tcnt(perf, val); 1302 if (ret) 1303 return ret; 1304 1305 return size; 1306 } 1307 1308 static const struct file_operations perf_dbgfs_tcnt = { 1309 .open = simple_open, 1310 .read = perf_dbgfs_read_tcnt, 1311 .write = perf_dbgfs_write_tcnt 1312 }; 1313 1314 static void perf_setup_dbgfs(struct perf_ctx *perf) 1315 { 1316 struct pci_dev *pdev = perf->ntb->pdev; 1317 1318 perf->dbgfs_dir = debugfs_create_dir(pci_name(pdev), perf_dbgfs_topdir); 1319 if (!perf->dbgfs_dir) { 1320 dev_warn(&perf->ntb->dev, "DebugFS unsupported\n"); 1321 return; 1322 } 1323 1324 debugfs_create_file("info", 0600, perf->dbgfs_dir, perf, 1325 &perf_dbgfs_info); 1326 1327 debugfs_create_file("run", 0600, perf->dbgfs_dir, perf, 1328 &perf_dbgfs_run); 1329 1330 debugfs_create_file("threads_count", 0600, perf->dbgfs_dir, perf, 1331 &perf_dbgfs_tcnt); 1332 1333 /* They are made read-only for test exec safety and integrity */ 1334 debugfs_create_u8("chunk_order", 0500, perf->dbgfs_dir, &chunk_order); 1335 1336 debugfs_create_u8("total_order", 0500, perf->dbgfs_dir, &total_order); 1337 1338 debugfs_create_bool("use_dma", 0500, perf->dbgfs_dir, &use_dma); 1339 } 1340 1341 static void perf_clear_dbgfs(struct perf_ctx *perf) 1342 { 1343 debugfs_remove_recursive(perf->dbgfs_dir); 1344 } 1345 1346 /*============================================================================== 1347 * Basic driver initialization 1348 *============================================================================== 1349 */ 1350 1351 static struct perf_ctx *perf_create_data(struct ntb_dev *ntb) 1352 { 1353 struct perf_ctx *perf; 1354 1355 perf = devm_kzalloc(&ntb->dev, sizeof(*perf), GFP_KERNEL); 1356 if (!perf) 1357 return ERR_PTR(-ENOMEM); 1358 1359 perf->pcnt = ntb_peer_port_count(ntb); 1360 perf->peers = devm_kcalloc(&ntb->dev, perf->pcnt, sizeof(*perf->peers), 1361 GFP_KERNEL); 1362 if (!perf->peers) 1363 return ERR_PTR(-ENOMEM); 1364 1365 perf->ntb = ntb; 1366 1367 return perf; 1368 } 1369 1370 static int perf_setup_peer_mw(struct perf_peer *peer) 1371 { 1372 struct perf_ctx *perf = peer->perf; 1373 phys_addr_t phys_addr; 1374 int ret; 1375 1376 /* Get outbound MW parameters and map it */ 1377 ret = ntb_peer_mw_get_addr(perf->ntb, peer->gidx, &phys_addr, 1378 &peer->outbuf_size); 1379 if (ret) 1380 return ret; 1381 1382 peer->outbuf = devm_ioremap_wc(&perf->ntb->dev, phys_addr, 1383 peer->outbuf_size); 1384 if (!peer->outbuf) 1385 return -ENOMEM; 1386 1387 if (max_mw_size && peer->outbuf_size > max_mw_size) { 1388 peer->outbuf_size = max_mw_size; 1389 dev_warn(&peer->perf->ntb->dev, 1390 "Peer %d outbuf reduced to %pa\n", peer->pidx, 1391 &peer->outbuf_size); 1392 } 1393 1394 return 0; 1395 } 1396 1397 static int perf_init_peers(struct perf_ctx *perf) 1398 { 1399 struct perf_peer *peer; 1400 int pidx, lport, ret; 1401 1402 lport = ntb_port_number(perf->ntb); 1403 perf->gidx = -1; 1404 for (pidx = 0; pidx < perf->pcnt; pidx++) { 1405 peer = &perf->peers[pidx]; 1406 1407 peer->perf = perf; 1408 peer->pidx = pidx; 1409 if (lport < ntb_peer_port_number(perf->ntb, pidx)) { 1410 if (perf->gidx == -1) 1411 perf->gidx = pidx; 1412 peer->gidx = pidx + 1; 1413 } else { 1414 peer->gidx = pidx; 1415 } 1416 INIT_WORK(&peer->service, perf_service_work); 1417 } 1418 if (perf->gidx == -1) 1419 perf->gidx = pidx; 1420 1421 for (pidx = 0; pidx < perf->pcnt; pidx++) { 1422 ret = perf_setup_peer_mw(&perf->peers[pidx]); 1423 if (ret) 1424 return ret; 1425 } 1426 1427 dev_dbg(&perf->ntb->dev, "Global port index %d\n", perf->gidx); 1428 1429 return 0; 1430 } 1431 1432 static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) 1433 { 1434 struct perf_ctx *perf; 1435 int ret; 1436 1437 perf = perf_create_data(ntb); 1438 if (IS_ERR(perf)) 1439 return PTR_ERR(perf); 1440 1441 ret = perf_init_peers(perf); 1442 if (ret) 1443 return ret; 1444 1445 perf_init_threads(perf); 1446 1447 ret = perf_init_service(perf); 1448 if (ret) 1449 return ret; 1450 1451 ret = perf_enable_service(perf); 1452 if (ret) 1453 return ret; 1454 1455 perf_setup_dbgfs(perf); 1456 1457 return 0; 1458 } 1459 1460 static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) 1461 { 1462 struct perf_ctx *perf = ntb->ctx; 1463 1464 perf_clear_dbgfs(perf); 1465 1466 perf_disable_service(perf); 1467 1468 perf_clear_threads(perf); 1469 } 1470 1471 static struct ntb_client perf_client = { 1472 .ops = { 1473 .probe = perf_probe, 1474 .remove = perf_remove 1475 } 1476 }; 1477 1478 static int __init perf_init(void) 1479 { 1480 int ret; 1481 1482 if (chunk_order > MAX_CHUNK_ORDER) { 1483 chunk_order = MAX_CHUNK_ORDER; 1484 pr_info("Chunk order reduced to %hhu\n", chunk_order); 1485 } 1486 1487 if (total_order < chunk_order) { 1488 total_order = chunk_order; 1489 pr_info("Total data order reduced to %hhu\n", total_order); 1490 } 1491 1492 perf_wq = alloc_workqueue("perf_wq", WQ_UNBOUND | WQ_SYSFS, 0); 1493 if (!perf_wq) 1494 return -ENOMEM; 1495 1496 if (debugfs_initialized()) 1497 perf_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL); 1498 1499 ret = ntb_register_client(&perf_client); 1500 if (ret) { 1501 debugfs_remove_recursive(perf_dbgfs_topdir); 1502 destroy_workqueue(perf_wq); 1503 } 1504 1505 return ret; 1506 } 1507 module_init(perf_init); 1508 1509 static void __exit perf_exit(void) 1510 { 1511 ntb_unregister_client(&perf_client); 1512 debugfs_remove_recursive(perf_dbgfs_topdir); 1513 destroy_workqueue(perf_wq); 1514 } 1515 module_exit(perf_exit); 1516 1517