1 /* 2 * This file is provided under a dual BSD/GPLv2 license. When using or 3 * redistributing this file, you may do so under either license. 4 * 5 * GPL LICENSE SUMMARY 6 * 7 * Copyright(c) 2015 Intel Corporation. All rights reserved. 8 * Copyright(c) 2017 T-Platforms. All Rights Reserved. 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of version 2 of the GNU General Public License as 12 * published by the Free Software Foundation. 13 * 14 * BSD LICENSE 15 * 16 * Copyright(c) 2015 Intel Corporation. All rights reserved. 17 * Copyright(c) 2017 T-Platforms. All Rights Reserved. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions 21 * are met: 22 * 23 * * Redistributions of source code must retain the above copyright 24 * notice, this list of conditions and the following disclaimer. 25 * * Redistributions in binary form must reproduce the above copy 26 * notice, this list of conditions and the following disclaimer in 27 * the documentation and/or other materials provided with the 28 * distribution. 29 * * Neither the name of Intel Corporation nor the names of its 30 * contributors may be used to endorse or promote products derived 31 * from this software without specific prior written permission. 32 * 33 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 34 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 35 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 36 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 37 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 38 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 39 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 40 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 41 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 42 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 43 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 44 * 45 * PCIe NTB Perf Linux driver 46 */ 47 48 /* 49 * How to use this tool, by example. 50 * 51 * Assuming $DBG_DIR is something like: 52 * '/sys/kernel/debug/ntb_perf/0000:00:03.0' 53 * Suppose aside from local device there is at least one remote device 54 * connected to NTB with index 0. 55 *----------------------------------------------------------------------------- 56 * Eg: install driver with specified chunk/total orders and dma-enabled flag 57 * 58 * root@self# insmod ntb_perf.ko chunk_order=19 total_order=28 use_dma 59 *----------------------------------------------------------------------------- 60 * Eg: check NTB ports (index) and MW mapping information 61 * 62 * root@self# cat $DBG_DIR/info 63 *----------------------------------------------------------------------------- 64 * Eg: start performance test with peer (index 0) and get the test metrics 65 * 66 * root@self# echo 0 > $DBG_DIR/run 67 * root@self# cat $DBG_DIR/run 68 */ 69 70 #include <linux/init.h> 71 #include <linux/kernel.h> 72 #include <linux/module.h> 73 #include <linux/sched.h> 74 #include <linux/wait.h> 75 #include <linux/dma-mapping.h> 76 #include <linux/dmaengine.h> 77 #include <linux/pci.h> 78 #include <linux/ktime.h> 79 #include <linux/slab.h> 80 #include <linux/delay.h> 81 #include <linux/sizes.h> 82 #include <linux/workqueue.h> 83 #include <linux/debugfs.h> 84 #include <linux/random.h> 85 #include <linux/ntb.h> 86 87 #define DRIVER_NAME "ntb_perf" 88 #define DRIVER_VERSION "2.0" 89 90 MODULE_LICENSE("Dual BSD/GPL"); 91 MODULE_VERSION(DRIVER_VERSION); 92 MODULE_AUTHOR("Dave Jiang <dave.jiang@intel.com>"); 93 MODULE_DESCRIPTION("PCIe NTB Performance Measurement Tool"); 94 95 #define MAX_THREADS_CNT 32 96 #define DEF_THREADS_CNT 1 97 #define MAX_CHUNK_SIZE SZ_1M 98 #define MAX_CHUNK_ORDER 20 /* no larger than 1M */ 99 100 #define DMA_TRIES 100 101 #define DMA_MDELAY 10 102 103 #define MSG_TRIES 1000 104 #define MSG_UDELAY_LOW 1000 105 #define MSG_UDELAY_HIGH 2000 106 107 #define PERF_BUF_LEN 1024 108 109 static unsigned long max_mw_size; 110 module_param(max_mw_size, ulong, 0644); 111 MODULE_PARM_DESC(max_mw_size, "Upper limit of memory window size"); 112 113 static unsigned char chunk_order = 19; /* 512K */ 114 module_param(chunk_order, byte, 0644); 115 MODULE_PARM_DESC(chunk_order, "Data chunk order [2^n] to transfer"); 116 117 static unsigned char total_order = 30; /* 1G */ 118 module_param(total_order, byte, 0644); 119 MODULE_PARM_DESC(total_order, "Total data order [2^n] to transfer"); 120 121 static bool use_dma; /* default to 0 */ 122 module_param(use_dma, bool, 0644); 123 MODULE_PARM_DESC(use_dma, "Use DMA engine to measure performance"); 124 125 /*============================================================================== 126 * Perf driver data definition 127 *============================================================================== 128 */ 129 130 enum perf_cmd { 131 PERF_CMD_INVAL = -1,/* invalid spad command */ 132 PERF_CMD_SSIZE = 0, /* send out buffer size */ 133 PERF_CMD_RSIZE = 1, /* recv in buffer size */ 134 PERF_CMD_SXLAT = 2, /* send in buffer xlat */ 135 PERF_CMD_RXLAT = 3, /* recv out buffer xlat */ 136 PERF_CMD_CLEAR = 4, /* clear allocated memory */ 137 PERF_STS_DONE = 5, /* init is done */ 138 PERF_STS_LNKUP = 6, /* link up state flag */ 139 }; 140 141 struct perf_ctx; 142 143 struct perf_peer { 144 struct perf_ctx *perf; 145 int pidx; 146 int gidx; 147 148 /* Outbound MW params */ 149 u64 outbuf_xlat; 150 resource_size_t outbuf_size; 151 void __iomem *outbuf; 152 phys_addr_t out_phys_addr; 153 dma_addr_t dma_dst_addr; 154 /* Inbound MW params */ 155 dma_addr_t inbuf_xlat; 156 resource_size_t inbuf_size; 157 void *inbuf; 158 159 /* NTB connection setup service */ 160 struct work_struct service; 161 unsigned long sts; 162 }; 163 #define to_peer_service(__work) \ 164 container_of(__work, struct perf_peer, service) 165 166 struct perf_thread { 167 struct perf_ctx *perf; 168 int tidx; 169 170 /* DMA-based test sync parameters */ 171 atomic_t dma_sync; 172 wait_queue_head_t dma_wait; 173 struct dma_chan *dma_chan; 174 175 /* Data source and measured statistics */ 176 void *src; 177 u64 copied; 178 ktime_t duration; 179 int status; 180 struct work_struct work; 181 }; 182 #define to_thread_work(__work) \ 183 container_of(__work, struct perf_thread, work) 184 185 struct perf_ctx { 186 struct ntb_dev *ntb; 187 188 /* Global device index and peers descriptors */ 189 int gidx; 190 int pcnt; 191 struct perf_peer *peers; 192 193 /* Performance measuring work-threads interface */ 194 unsigned long busy_flag; 195 wait_queue_head_t twait; 196 atomic_t tsync; 197 u8 tcnt; 198 struct perf_peer *test_peer; 199 struct perf_thread threads[MAX_THREADS_CNT]; 200 201 /* Scratchpad/Message IO operations */ 202 int (*cmd_send)(struct perf_peer *peer, enum perf_cmd cmd, u64 data); 203 int (*cmd_recv)(struct perf_ctx *perf, int *pidx, enum perf_cmd *cmd, 204 u64 *data); 205 206 struct dentry *dbgfs_dir; 207 }; 208 209 /* 210 * Scratchpads-base commands interface 211 */ 212 #define PERF_SPAD_CNT(_pcnt) \ 213 (3*((_pcnt) + 1)) 214 #define PERF_SPAD_CMD(_gidx) \ 215 (3*(_gidx)) 216 #define PERF_SPAD_LDATA(_gidx) \ 217 (3*(_gidx) + 1) 218 #define PERF_SPAD_HDATA(_gidx) \ 219 (3*(_gidx) + 2) 220 #define PERF_SPAD_NOTIFY(_gidx) \ 221 (BIT_ULL(_gidx)) 222 223 /* 224 * Messages-base commands interface 225 */ 226 #define PERF_MSG_CNT 3 227 #define PERF_MSG_CMD 0 228 #define PERF_MSG_LDATA 1 229 #define PERF_MSG_HDATA 2 230 231 /*============================================================================== 232 * Static data declarations 233 *============================================================================== 234 */ 235 236 static struct dentry *perf_dbgfs_topdir; 237 238 static struct workqueue_struct *perf_wq __read_mostly; 239 240 /*============================================================================== 241 * NTB cross-link commands execution service 242 *============================================================================== 243 */ 244 245 static void perf_terminate_test(struct perf_ctx *perf); 246 247 static inline bool perf_link_is_up(struct perf_peer *peer) 248 { 249 u64 link; 250 251 link = ntb_link_is_up(peer->perf->ntb, NULL, NULL); 252 return !!(link & BIT_ULL_MASK(peer->pidx)); 253 } 254 255 static int perf_spad_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, 256 u64 data) 257 { 258 struct perf_ctx *perf = peer->perf; 259 int try; 260 u32 sts; 261 262 dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data); 263 264 /* 265 * Perform predefined number of attempts before give up. 266 * We are sending the data to the port specific scratchpad, so 267 * to prevent a multi-port access race-condition. Additionally 268 * there is no need in local locking since only thread-safe 269 * service work is using this method. 270 */ 271 for (try = 0; try < MSG_TRIES; try++) { 272 if (!perf_link_is_up(peer)) 273 return -ENOLINK; 274 275 sts = ntb_peer_spad_read(perf->ntb, peer->pidx, 276 PERF_SPAD_CMD(perf->gidx)); 277 if (sts != PERF_CMD_INVAL) { 278 usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH); 279 continue; 280 } 281 282 ntb_peer_spad_write(perf->ntb, peer->pidx, 283 PERF_SPAD_LDATA(perf->gidx), 284 lower_32_bits(data)); 285 ntb_peer_spad_write(perf->ntb, peer->pidx, 286 PERF_SPAD_HDATA(perf->gidx), 287 upper_32_bits(data)); 288 ntb_peer_spad_write(perf->ntb, peer->pidx, 289 PERF_SPAD_CMD(perf->gidx), 290 cmd); 291 ntb_peer_db_set(perf->ntb, PERF_SPAD_NOTIFY(peer->gidx)); 292 293 dev_dbg(&perf->ntb->dev, "DB ring peer %#llx\n", 294 PERF_SPAD_NOTIFY(peer->gidx)); 295 296 break; 297 } 298 299 return try < MSG_TRIES ? 0 : -EAGAIN; 300 } 301 302 static int perf_spad_cmd_recv(struct perf_ctx *perf, int *pidx, 303 enum perf_cmd *cmd, u64 *data) 304 { 305 struct perf_peer *peer; 306 u32 val; 307 308 ntb_db_clear(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx)); 309 310 /* 311 * We start scanning all over, since cleared DB may have been set 312 * by any peer. Yes, it makes peer with smaller index being 313 * serviced with greater priority, but it's convenient for spad 314 * and message code unification and simplicity. 315 */ 316 for (*pidx = 0; *pidx < perf->pcnt; (*pidx)++) { 317 peer = &perf->peers[*pidx]; 318 319 if (!perf_link_is_up(peer)) 320 continue; 321 322 val = ntb_spad_read(perf->ntb, PERF_SPAD_CMD(peer->gidx)); 323 if (val == PERF_CMD_INVAL) 324 continue; 325 326 *cmd = val; 327 328 val = ntb_spad_read(perf->ntb, PERF_SPAD_LDATA(peer->gidx)); 329 *data = val; 330 331 val = ntb_spad_read(perf->ntb, PERF_SPAD_HDATA(peer->gidx)); 332 *data |= (u64)val << 32; 333 334 /* Next command can be retrieved from now */ 335 ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx), 336 PERF_CMD_INVAL); 337 338 dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data); 339 340 return 0; 341 } 342 343 return -ENODATA; 344 } 345 346 static int perf_msg_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, 347 u64 data) 348 { 349 struct perf_ctx *perf = peer->perf; 350 int try, ret; 351 u64 outbits; 352 353 dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data); 354 355 /* 356 * Perform predefined number of attempts before give up. Message 357 * registers are free of race-condition problem when accessed 358 * from different ports, so we don't need splitting registers 359 * by global device index. We also won't have local locking, 360 * since the method is used from service work only. 361 */ 362 outbits = ntb_msg_outbits(perf->ntb); 363 for (try = 0; try < MSG_TRIES; try++) { 364 if (!perf_link_is_up(peer)) 365 return -ENOLINK; 366 367 ret = ntb_msg_clear_sts(perf->ntb, outbits); 368 if (ret) 369 return ret; 370 371 ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_LDATA, 372 lower_32_bits(data)); 373 374 if (ntb_msg_read_sts(perf->ntb) & outbits) { 375 usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH); 376 continue; 377 } 378 379 ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_HDATA, 380 upper_32_bits(data)); 381 382 /* This call shall trigger peer message event */ 383 ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_CMD, cmd); 384 385 break; 386 } 387 388 return try < MSG_TRIES ? 0 : -EAGAIN; 389 } 390 391 static int perf_msg_cmd_recv(struct perf_ctx *perf, int *pidx, 392 enum perf_cmd *cmd, u64 *data) 393 { 394 u64 inbits; 395 u32 val; 396 397 inbits = ntb_msg_inbits(perf->ntb); 398 399 if (hweight64(ntb_msg_read_sts(perf->ntb) & inbits) < 3) 400 return -ENODATA; 401 402 val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_CMD); 403 *cmd = val; 404 405 val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_LDATA); 406 *data = val; 407 408 val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_HDATA); 409 *data |= (u64)val << 32; 410 411 /* Next command can be retrieved from now */ 412 ntb_msg_clear_sts(perf->ntb, inbits); 413 414 dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data); 415 416 return 0; 417 } 418 419 static int perf_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, u64 data) 420 { 421 struct perf_ctx *perf = peer->perf; 422 423 if (cmd == PERF_CMD_SSIZE || cmd == PERF_CMD_SXLAT) 424 return perf->cmd_send(peer, cmd, data); 425 426 dev_err(&perf->ntb->dev, "Send invalid command\n"); 427 return -EINVAL; 428 } 429 430 static int perf_cmd_exec(struct perf_peer *peer, enum perf_cmd cmd) 431 { 432 switch (cmd) { 433 case PERF_CMD_SSIZE: 434 case PERF_CMD_RSIZE: 435 case PERF_CMD_SXLAT: 436 case PERF_CMD_RXLAT: 437 case PERF_CMD_CLEAR: 438 break; 439 default: 440 dev_err(&peer->perf->ntb->dev, "Exec invalid command\n"); 441 return -EINVAL; 442 } 443 444 /* No need of memory barrier, since bit ops have invernal lock */ 445 set_bit(cmd, &peer->sts); 446 447 dev_dbg(&peer->perf->ntb->dev, "CMD exec: %d\n", cmd); 448 449 (void)queue_work(system_highpri_wq, &peer->service); 450 451 return 0; 452 } 453 454 static int perf_cmd_recv(struct perf_ctx *perf) 455 { 456 struct perf_peer *peer; 457 int ret, pidx, cmd; 458 u64 data; 459 460 while (!(ret = perf->cmd_recv(perf, &pidx, &cmd, &data))) { 461 peer = &perf->peers[pidx]; 462 463 switch (cmd) { 464 case PERF_CMD_SSIZE: 465 peer->inbuf_size = data; 466 return perf_cmd_exec(peer, PERF_CMD_RSIZE); 467 case PERF_CMD_SXLAT: 468 peer->outbuf_xlat = data; 469 return perf_cmd_exec(peer, PERF_CMD_RXLAT); 470 default: 471 dev_err(&perf->ntb->dev, "Recv invalid command\n"); 472 return -EINVAL; 473 } 474 } 475 476 /* Return 0 if no data left to process, otherwise an error */ 477 return ret == -ENODATA ? 0 : ret; 478 } 479 480 static void perf_link_event(void *ctx) 481 { 482 struct perf_ctx *perf = ctx; 483 struct perf_peer *peer; 484 bool lnk_up; 485 int pidx; 486 487 for (pidx = 0; pidx < perf->pcnt; pidx++) { 488 peer = &perf->peers[pidx]; 489 490 lnk_up = perf_link_is_up(peer); 491 492 if (lnk_up && 493 !test_and_set_bit(PERF_STS_LNKUP, &peer->sts)) { 494 perf_cmd_exec(peer, PERF_CMD_SSIZE); 495 } else if (!lnk_up && 496 test_and_clear_bit(PERF_STS_LNKUP, &peer->sts)) { 497 perf_cmd_exec(peer, PERF_CMD_CLEAR); 498 } 499 } 500 } 501 502 static void perf_db_event(void *ctx, int vec) 503 { 504 struct perf_ctx *perf = ctx; 505 506 dev_dbg(&perf->ntb->dev, "DB vec %d mask %#llx bits %#llx\n", vec, 507 ntb_db_vector_mask(perf->ntb, vec), ntb_db_read(perf->ntb)); 508 509 /* Just receive all available commands */ 510 (void)perf_cmd_recv(perf); 511 } 512 513 static void perf_msg_event(void *ctx) 514 { 515 struct perf_ctx *perf = ctx; 516 517 dev_dbg(&perf->ntb->dev, "Msg status bits %#llx\n", 518 ntb_msg_read_sts(perf->ntb)); 519 520 /* Messages are only sent one-by-one */ 521 (void)perf_cmd_recv(perf); 522 } 523 524 static const struct ntb_ctx_ops perf_ops = { 525 .link_event = perf_link_event, 526 .db_event = perf_db_event, 527 .msg_event = perf_msg_event 528 }; 529 530 static void perf_free_outbuf(struct perf_peer *peer) 531 { 532 (void)ntb_peer_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx); 533 } 534 535 static int perf_setup_outbuf(struct perf_peer *peer) 536 { 537 struct perf_ctx *perf = peer->perf; 538 int ret; 539 540 /* Outbuf size can be unaligned due to custom max_mw_size */ 541 ret = ntb_peer_mw_set_trans(perf->ntb, peer->pidx, peer->gidx, 542 peer->outbuf_xlat, peer->outbuf_size); 543 if (ret) { 544 dev_err(&perf->ntb->dev, "Failed to set outbuf translation\n"); 545 return ret; 546 } 547 548 /* Initialization is finally done */ 549 set_bit(PERF_STS_DONE, &peer->sts); 550 551 return 0; 552 } 553 554 static void perf_free_inbuf(struct perf_peer *peer) 555 { 556 if (!peer->inbuf) 557 return; 558 559 (void)ntb_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx); 560 dma_free_coherent(&peer->perf->ntb->pdev->dev, peer->inbuf_size, 561 peer->inbuf, peer->inbuf_xlat); 562 peer->inbuf = NULL; 563 } 564 565 static int perf_setup_inbuf(struct perf_peer *peer) 566 { 567 resource_size_t xlat_align, size_align, size_max; 568 struct perf_ctx *perf = peer->perf; 569 int ret; 570 571 /* Get inbound MW parameters */ 572 ret = ntb_mw_get_align(perf->ntb, peer->pidx, perf->gidx, 573 &xlat_align, &size_align, &size_max); 574 if (ret) { 575 dev_err(&perf->ntb->dev, "Couldn't get inbuf restrictions\n"); 576 return ret; 577 } 578 579 if (peer->inbuf_size > size_max) { 580 dev_err(&perf->ntb->dev, "Too big inbuf size %pa > %pa\n", 581 &peer->inbuf_size, &size_max); 582 return -EINVAL; 583 } 584 585 peer->inbuf_size = round_up(peer->inbuf_size, size_align); 586 587 perf_free_inbuf(peer); 588 589 peer->inbuf = dma_alloc_coherent(&perf->ntb->pdev->dev, 590 peer->inbuf_size, &peer->inbuf_xlat, 591 GFP_KERNEL); 592 if (!peer->inbuf) { 593 dev_err(&perf->ntb->dev, "Failed to alloc inbuf of %pa\n", 594 &peer->inbuf_size); 595 return -ENOMEM; 596 } 597 if (!IS_ALIGNED(peer->inbuf_xlat, xlat_align)) { 598 dev_err(&perf->ntb->dev, "Unaligned inbuf allocated\n"); 599 goto err_free_inbuf; 600 } 601 602 ret = ntb_mw_set_trans(perf->ntb, peer->pidx, peer->gidx, 603 peer->inbuf_xlat, peer->inbuf_size); 604 if (ret) { 605 dev_err(&perf->ntb->dev, "Failed to set inbuf translation\n"); 606 goto err_free_inbuf; 607 } 608 609 /* 610 * We submit inbuf xlat transmission cmd for execution here to follow 611 * the code architecture, even though this method is called from service 612 * work itself so the command will be executed right after it returns. 613 */ 614 (void)perf_cmd_exec(peer, PERF_CMD_SXLAT); 615 616 return 0; 617 618 err_free_inbuf: 619 perf_free_inbuf(peer); 620 621 return ret; 622 } 623 624 static void perf_service_work(struct work_struct *work) 625 { 626 struct perf_peer *peer = to_peer_service(work); 627 628 if (test_and_clear_bit(PERF_CMD_SSIZE, &peer->sts)) 629 perf_cmd_send(peer, PERF_CMD_SSIZE, peer->outbuf_size); 630 631 if (test_and_clear_bit(PERF_CMD_RSIZE, &peer->sts)) 632 perf_setup_inbuf(peer); 633 634 if (test_and_clear_bit(PERF_CMD_SXLAT, &peer->sts)) 635 perf_cmd_send(peer, PERF_CMD_SXLAT, peer->inbuf_xlat); 636 637 if (test_and_clear_bit(PERF_CMD_RXLAT, &peer->sts)) 638 perf_setup_outbuf(peer); 639 640 if (test_and_clear_bit(PERF_CMD_CLEAR, &peer->sts)) { 641 clear_bit(PERF_STS_DONE, &peer->sts); 642 if (test_bit(0, &peer->perf->busy_flag) && 643 peer == peer->perf->test_peer) { 644 dev_warn(&peer->perf->ntb->dev, 645 "Freeing while test on-fly\n"); 646 perf_terminate_test(peer->perf); 647 } 648 perf_free_outbuf(peer); 649 perf_free_inbuf(peer); 650 } 651 } 652 653 static int perf_init_service(struct perf_ctx *perf) 654 { 655 u64 mask; 656 657 if (ntb_peer_mw_count(perf->ntb) < perf->pcnt + 1) { 658 dev_err(&perf->ntb->dev, "Not enough memory windows\n"); 659 return -EINVAL; 660 } 661 662 if (ntb_msg_count(perf->ntb) >= PERF_MSG_CNT) { 663 perf->cmd_send = perf_msg_cmd_send; 664 perf->cmd_recv = perf_msg_cmd_recv; 665 666 dev_dbg(&perf->ntb->dev, "Message service initialized\n"); 667 668 return 0; 669 } 670 671 dev_dbg(&perf->ntb->dev, "Message service unsupported\n"); 672 673 mask = GENMASK_ULL(perf->pcnt, 0); 674 if (ntb_spad_count(perf->ntb) >= PERF_SPAD_CNT(perf->pcnt) && 675 (ntb_db_valid_mask(perf->ntb) & mask) == mask) { 676 perf->cmd_send = perf_spad_cmd_send; 677 perf->cmd_recv = perf_spad_cmd_recv; 678 679 dev_dbg(&perf->ntb->dev, "Scratchpad service initialized\n"); 680 681 return 0; 682 } 683 684 dev_dbg(&perf->ntb->dev, "Scratchpad service unsupported\n"); 685 686 dev_err(&perf->ntb->dev, "Command services unsupported\n"); 687 688 return -EINVAL; 689 } 690 691 static int perf_enable_service(struct perf_ctx *perf) 692 { 693 u64 mask, incmd_bit; 694 int ret, sidx, scnt; 695 696 mask = ntb_db_valid_mask(perf->ntb); 697 (void)ntb_db_set_mask(perf->ntb, mask); 698 699 ret = ntb_set_ctx(perf->ntb, perf, &perf_ops); 700 if (ret) 701 return ret; 702 703 if (perf->cmd_send == perf_msg_cmd_send) { 704 u64 inbits, outbits; 705 706 inbits = ntb_msg_inbits(perf->ntb); 707 outbits = ntb_msg_outbits(perf->ntb); 708 (void)ntb_msg_set_mask(perf->ntb, inbits | outbits); 709 710 incmd_bit = BIT_ULL(__ffs64(inbits)); 711 ret = ntb_msg_clear_mask(perf->ntb, incmd_bit); 712 713 dev_dbg(&perf->ntb->dev, "MSG sts unmasked %#llx\n", incmd_bit); 714 } else { 715 scnt = ntb_spad_count(perf->ntb); 716 for (sidx = 0; sidx < scnt; sidx++) 717 ntb_spad_write(perf->ntb, sidx, PERF_CMD_INVAL); 718 incmd_bit = PERF_SPAD_NOTIFY(perf->gidx); 719 ret = ntb_db_clear_mask(perf->ntb, incmd_bit); 720 721 dev_dbg(&perf->ntb->dev, "DB bits unmasked %#llx\n", incmd_bit); 722 } 723 if (ret) { 724 ntb_clear_ctx(perf->ntb); 725 return ret; 726 } 727 728 ntb_link_enable(perf->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); 729 /* Might be not necessary */ 730 ntb_link_event(perf->ntb); 731 732 return 0; 733 } 734 735 static void perf_disable_service(struct perf_ctx *perf) 736 { 737 int pidx; 738 739 if (perf->cmd_send == perf_msg_cmd_send) { 740 u64 inbits; 741 742 inbits = ntb_msg_inbits(perf->ntb); 743 (void)ntb_msg_set_mask(perf->ntb, inbits); 744 } else { 745 (void)ntb_db_set_mask(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx)); 746 } 747 748 ntb_clear_ctx(perf->ntb); 749 750 for (pidx = 0; pidx < perf->pcnt; pidx++) 751 perf_cmd_exec(&perf->peers[pidx], PERF_CMD_CLEAR); 752 753 for (pidx = 0; pidx < perf->pcnt; pidx++) 754 flush_work(&perf->peers[pidx].service); 755 756 for (pidx = 0; pidx < perf->pcnt; pidx++) { 757 struct perf_peer *peer = &perf->peers[pidx]; 758 759 ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx), 0); 760 } 761 762 ntb_db_clear(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx)); 763 764 ntb_link_disable(perf->ntb); 765 } 766 767 /*============================================================================== 768 * Performance measuring work-thread 769 *============================================================================== 770 */ 771 772 static void perf_dma_copy_callback(void *data) 773 { 774 struct perf_thread *pthr = data; 775 776 atomic_dec(&pthr->dma_sync); 777 wake_up(&pthr->dma_wait); 778 } 779 780 static int perf_copy_chunk(struct perf_thread *pthr, 781 void __iomem *dst, void *src, size_t len) 782 { 783 struct dma_async_tx_descriptor *tx; 784 struct dmaengine_unmap_data *unmap; 785 struct device *dma_dev; 786 int try = 0, ret = 0; 787 struct perf_peer *peer = pthr->perf->test_peer; 788 void __iomem *vbase; 789 void __iomem *dst_vaddr; 790 dma_addr_t dst_dma_addr; 791 792 if (!use_dma) { 793 memcpy_toio(dst, src, len); 794 goto ret_check_tsync; 795 } 796 797 dma_dev = pthr->dma_chan->device->dev; 798 799 if (!is_dma_copy_aligned(pthr->dma_chan->device, offset_in_page(src), 800 offset_in_page(dst), len)) 801 return -EIO; 802 803 vbase = peer->outbuf; 804 dst_vaddr = dst; 805 dst_dma_addr = peer->dma_dst_addr + (dst_vaddr - vbase); 806 807 unmap = dmaengine_get_unmap_data(dma_dev, 2, GFP_NOWAIT); 808 if (!unmap) 809 return -ENOMEM; 810 811 unmap->len = len; 812 unmap->addr[0] = dma_map_page(dma_dev, virt_to_page(src), 813 offset_in_page(src), len, DMA_TO_DEVICE); 814 if (dma_mapping_error(dma_dev, unmap->addr[0])) { 815 ret = -EIO; 816 goto err_free_resource; 817 } 818 unmap->to_cnt = 1; 819 820 unmap->addr[1] = dst_dma_addr; 821 if (dma_mapping_error(dma_dev, unmap->addr[1])) { 822 ret = -EIO; 823 goto err_free_resource; 824 } 825 unmap->from_cnt = 1; 826 827 do { 828 tx = dmaengine_prep_dma_memcpy(pthr->dma_chan, unmap->addr[1], 829 unmap->addr[0], len, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 830 if (!tx) 831 msleep(DMA_MDELAY); 832 } while (!tx && (try++ < DMA_TRIES)); 833 834 if (!tx) { 835 ret = -EIO; 836 goto err_free_resource; 837 } 838 839 tx->callback = perf_dma_copy_callback; 840 tx->callback_param = pthr; 841 dma_set_unmap(tx, unmap); 842 843 ret = dma_submit_error(dmaengine_submit(tx)); 844 if (ret) { 845 dmaengine_unmap_put(unmap); 846 goto err_free_resource; 847 } 848 849 dmaengine_unmap_put(unmap); 850 851 atomic_inc(&pthr->dma_sync); 852 dma_async_issue_pending(pthr->dma_chan); 853 854 ret_check_tsync: 855 return likely(atomic_read(&pthr->perf->tsync) > 0) ? 0 : -EINTR; 856 857 err_free_resource: 858 dmaengine_unmap_put(unmap); 859 860 return ret; 861 } 862 863 static bool perf_dma_filter(struct dma_chan *chan, void *data) 864 { 865 struct perf_ctx *perf = data; 866 int node; 867 868 node = dev_to_node(&perf->ntb->dev); 869 870 return node == NUMA_NO_NODE || node == dev_to_node(chan->device->dev); 871 } 872 873 static int perf_init_test(struct perf_thread *pthr) 874 { 875 struct perf_ctx *perf = pthr->perf; 876 dma_cap_mask_t dma_mask; 877 struct perf_peer *peer = pthr->perf->test_peer; 878 879 pthr->src = kmalloc_node(perf->test_peer->outbuf_size, GFP_KERNEL, 880 dev_to_node(&perf->ntb->dev)); 881 if (!pthr->src) 882 return -ENOMEM; 883 884 get_random_bytes(pthr->src, perf->test_peer->outbuf_size); 885 886 if (!use_dma) 887 return 0; 888 889 dma_cap_zero(dma_mask); 890 dma_cap_set(DMA_MEMCPY, dma_mask); 891 pthr->dma_chan = dma_request_channel(dma_mask, perf_dma_filter, perf); 892 if (!pthr->dma_chan) { 893 dev_err(&perf->ntb->dev, "%d: Failed to get DMA channel\n", 894 pthr->tidx); 895 goto err_free; 896 } 897 peer->dma_dst_addr = 898 dma_map_resource(pthr->dma_chan->device->dev, 899 peer->out_phys_addr, peer->outbuf_size, 900 DMA_FROM_DEVICE, 0); 901 if (dma_mapping_error(pthr->dma_chan->device->dev, 902 peer->dma_dst_addr)) { 903 dev_err(pthr->dma_chan->device->dev, "%d: Failed to map DMA addr\n", 904 pthr->tidx); 905 peer->dma_dst_addr = 0; 906 dma_release_channel(pthr->dma_chan); 907 goto err_free; 908 } 909 dev_dbg(pthr->dma_chan->device->dev, "%d: Map MMIO %pa to DMA addr %pad\n", 910 pthr->tidx, 911 &peer->out_phys_addr, 912 &peer->dma_dst_addr); 913 914 atomic_set(&pthr->dma_sync, 0); 915 return 0; 916 917 err_free: 918 atomic_dec(&perf->tsync); 919 wake_up(&perf->twait); 920 kfree(pthr->src); 921 return -ENODEV; 922 } 923 924 static int perf_run_test(struct perf_thread *pthr) 925 { 926 struct perf_peer *peer = pthr->perf->test_peer; 927 struct perf_ctx *perf = pthr->perf; 928 void __iomem *flt_dst, *bnd_dst; 929 u64 total_size, chunk_size; 930 void *flt_src; 931 int ret = 0; 932 933 total_size = 1ULL << total_order; 934 chunk_size = 1ULL << chunk_order; 935 chunk_size = min_t(u64, peer->outbuf_size, chunk_size); 936 937 flt_src = pthr->src; 938 bnd_dst = peer->outbuf + peer->outbuf_size; 939 flt_dst = peer->outbuf; 940 941 pthr->duration = ktime_get(); 942 943 /* Copied field is cleared on test launch stage */ 944 while (pthr->copied < total_size) { 945 ret = perf_copy_chunk(pthr, flt_dst, flt_src, chunk_size); 946 if (ret) { 947 dev_err(&perf->ntb->dev, "%d: Got error %d on test\n", 948 pthr->tidx, ret); 949 return ret; 950 } 951 952 pthr->copied += chunk_size; 953 954 flt_dst += chunk_size; 955 flt_src += chunk_size; 956 if (flt_dst >= bnd_dst || flt_dst < peer->outbuf) { 957 flt_dst = peer->outbuf; 958 flt_src = pthr->src; 959 } 960 961 /* Give up CPU to give a chance for other threads to use it */ 962 schedule(); 963 } 964 965 return 0; 966 } 967 968 static int perf_sync_test(struct perf_thread *pthr) 969 { 970 struct perf_ctx *perf = pthr->perf; 971 972 if (!use_dma) 973 goto no_dma_ret; 974 975 wait_event(pthr->dma_wait, 976 (atomic_read(&pthr->dma_sync) == 0 || 977 atomic_read(&perf->tsync) < 0)); 978 979 if (atomic_read(&perf->tsync) < 0) 980 return -EINTR; 981 982 no_dma_ret: 983 pthr->duration = ktime_sub(ktime_get(), pthr->duration); 984 985 dev_dbg(&perf->ntb->dev, "%d: copied %llu bytes\n", 986 pthr->tidx, pthr->copied); 987 988 dev_dbg(&perf->ntb->dev, "%d: lasted %llu usecs\n", 989 pthr->tidx, ktime_to_us(pthr->duration)); 990 991 dev_dbg(&perf->ntb->dev, "%d: %llu MBytes/s\n", pthr->tidx, 992 div64_u64(pthr->copied, ktime_to_us(pthr->duration))); 993 994 return 0; 995 } 996 997 static void perf_clear_test(struct perf_thread *pthr) 998 { 999 struct perf_ctx *perf = pthr->perf; 1000 1001 if (!use_dma) 1002 goto no_dma_notify; 1003 1004 /* 1005 * If test finished without errors, termination isn't needed. 1006 * We call it anyway just to be sure of the transfers completion. 1007 */ 1008 (void)dmaengine_terminate_sync(pthr->dma_chan); 1009 if (pthr->perf->test_peer->dma_dst_addr) 1010 dma_unmap_resource(pthr->dma_chan->device->dev, 1011 pthr->perf->test_peer->dma_dst_addr, 1012 pthr->perf->test_peer->outbuf_size, 1013 DMA_FROM_DEVICE, 0); 1014 1015 dma_release_channel(pthr->dma_chan); 1016 1017 no_dma_notify: 1018 atomic_dec(&perf->tsync); 1019 wake_up(&perf->twait); 1020 kfree(pthr->src); 1021 } 1022 1023 static void perf_thread_work(struct work_struct *work) 1024 { 1025 struct perf_thread *pthr = to_thread_work(work); 1026 int ret; 1027 1028 /* 1029 * Perform stages in compliance with use_dma flag value. 1030 * Test status is changed only if error happened, otherwise 1031 * status -ENODATA is kept while test is on-fly. Results 1032 * synchronization is performed only if test fininshed 1033 * without an error or interruption. 1034 */ 1035 ret = perf_init_test(pthr); 1036 if (ret) { 1037 pthr->status = ret; 1038 return; 1039 } 1040 1041 ret = perf_run_test(pthr); 1042 if (ret) { 1043 pthr->status = ret; 1044 goto err_clear_test; 1045 } 1046 1047 pthr->status = perf_sync_test(pthr); 1048 1049 err_clear_test: 1050 perf_clear_test(pthr); 1051 } 1052 1053 static int perf_set_tcnt(struct perf_ctx *perf, u8 tcnt) 1054 { 1055 if (tcnt == 0 || tcnt > MAX_THREADS_CNT) 1056 return -EINVAL; 1057 1058 if (test_and_set_bit_lock(0, &perf->busy_flag)) 1059 return -EBUSY; 1060 1061 perf->tcnt = tcnt; 1062 1063 clear_bit_unlock(0, &perf->busy_flag); 1064 1065 return 0; 1066 } 1067 1068 static void perf_terminate_test(struct perf_ctx *perf) 1069 { 1070 int tidx; 1071 1072 atomic_set(&perf->tsync, -1); 1073 wake_up(&perf->twait); 1074 1075 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { 1076 wake_up(&perf->threads[tidx].dma_wait); 1077 cancel_work_sync(&perf->threads[tidx].work); 1078 } 1079 } 1080 1081 static int perf_submit_test(struct perf_peer *peer) 1082 { 1083 struct perf_ctx *perf = peer->perf; 1084 struct perf_thread *pthr; 1085 int tidx, ret; 1086 1087 if (!test_bit(PERF_STS_DONE, &peer->sts)) 1088 return -ENOLINK; 1089 1090 if (test_and_set_bit_lock(0, &perf->busy_flag)) 1091 return -EBUSY; 1092 1093 perf->test_peer = peer; 1094 atomic_set(&perf->tsync, perf->tcnt); 1095 1096 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { 1097 pthr = &perf->threads[tidx]; 1098 1099 pthr->status = -ENODATA; 1100 pthr->copied = 0; 1101 pthr->duration = ktime_set(0, 0); 1102 if (tidx < perf->tcnt) 1103 (void)queue_work(perf_wq, &pthr->work); 1104 } 1105 1106 ret = wait_event_interruptible(perf->twait, 1107 atomic_read(&perf->tsync) <= 0); 1108 if (ret == -ERESTARTSYS) { 1109 perf_terminate_test(perf); 1110 ret = -EINTR; 1111 } 1112 1113 clear_bit_unlock(0, &perf->busy_flag); 1114 1115 return ret; 1116 } 1117 1118 static int perf_read_stats(struct perf_ctx *perf, char *buf, 1119 size_t size, ssize_t *pos) 1120 { 1121 struct perf_thread *pthr; 1122 int tidx; 1123 1124 if (test_and_set_bit_lock(0, &perf->busy_flag)) 1125 return -EBUSY; 1126 1127 (*pos) += scnprintf(buf + *pos, size - *pos, 1128 " Peer %d test statistics:\n", perf->test_peer->pidx); 1129 1130 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { 1131 pthr = &perf->threads[tidx]; 1132 1133 if (pthr->status == -ENODATA) 1134 continue; 1135 1136 if (pthr->status) { 1137 (*pos) += scnprintf(buf + *pos, size - *pos, 1138 "%d: error status %d\n", tidx, pthr->status); 1139 continue; 1140 } 1141 1142 (*pos) += scnprintf(buf + *pos, size - *pos, 1143 "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", 1144 tidx, pthr->copied, ktime_to_us(pthr->duration), 1145 div64_u64(pthr->copied, ktime_to_us(pthr->duration))); 1146 } 1147 1148 clear_bit_unlock(0, &perf->busy_flag); 1149 1150 return 0; 1151 } 1152 1153 static void perf_init_threads(struct perf_ctx *perf) 1154 { 1155 struct perf_thread *pthr; 1156 int tidx; 1157 1158 perf->tcnt = DEF_THREADS_CNT; 1159 perf->test_peer = &perf->peers[0]; 1160 init_waitqueue_head(&perf->twait); 1161 1162 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { 1163 pthr = &perf->threads[tidx]; 1164 1165 pthr->perf = perf; 1166 pthr->tidx = tidx; 1167 pthr->status = -ENODATA; 1168 init_waitqueue_head(&pthr->dma_wait); 1169 INIT_WORK(&pthr->work, perf_thread_work); 1170 } 1171 } 1172 1173 static void perf_clear_threads(struct perf_ctx *perf) 1174 { 1175 perf_terminate_test(perf); 1176 } 1177 1178 /*============================================================================== 1179 * DebugFS nodes 1180 *============================================================================== 1181 */ 1182 1183 static ssize_t perf_dbgfs_read_info(struct file *filep, char __user *ubuf, 1184 size_t size, loff_t *offp) 1185 { 1186 struct perf_ctx *perf = filep->private_data; 1187 struct perf_peer *peer; 1188 size_t buf_size; 1189 ssize_t pos = 0; 1190 int ret, pidx; 1191 char *buf; 1192 1193 buf_size = min_t(size_t, size, 0x1000U); 1194 1195 buf = kmalloc(buf_size, GFP_KERNEL); 1196 if (!buf) 1197 return -ENOMEM; 1198 1199 pos += scnprintf(buf + pos, buf_size - pos, 1200 " Performance measuring tool info:\n\n"); 1201 1202 pos += scnprintf(buf + pos, buf_size - pos, 1203 "Local port %d, Global index %d\n", ntb_port_number(perf->ntb), 1204 perf->gidx); 1205 pos += scnprintf(buf + pos, buf_size - pos, "Test status: "); 1206 if (test_bit(0, &perf->busy_flag)) { 1207 pos += scnprintf(buf + pos, buf_size - pos, 1208 "on-fly with port %d (%d)\n", 1209 ntb_peer_port_number(perf->ntb, perf->test_peer->pidx), 1210 perf->test_peer->pidx); 1211 } else { 1212 pos += scnprintf(buf + pos, buf_size - pos, "idle\n"); 1213 } 1214 1215 for (pidx = 0; pidx < perf->pcnt; pidx++) { 1216 peer = &perf->peers[pidx]; 1217 1218 pos += scnprintf(buf + pos, buf_size - pos, 1219 "Port %d (%d), Global index %d:\n", 1220 ntb_peer_port_number(perf->ntb, peer->pidx), peer->pidx, 1221 peer->gidx); 1222 1223 pos += scnprintf(buf + pos, buf_size - pos, 1224 "\tLink status: %s\n", 1225 test_bit(PERF_STS_LNKUP, &peer->sts) ? "up" : "down"); 1226 1227 pos += scnprintf(buf + pos, buf_size - pos, 1228 "\tOut buffer addr 0x%pK\n", peer->outbuf); 1229 1230 pos += scnprintf(buf + pos, buf_size - pos, 1231 "\tOut buff phys addr %pa[p]\n", &peer->out_phys_addr); 1232 1233 pos += scnprintf(buf + pos, buf_size - pos, 1234 "\tOut buffer size %pa\n", &peer->outbuf_size); 1235 1236 pos += scnprintf(buf + pos, buf_size - pos, 1237 "\tOut buffer xlat 0x%016llx[p]\n", peer->outbuf_xlat); 1238 1239 if (!peer->inbuf) { 1240 pos += scnprintf(buf + pos, buf_size - pos, 1241 "\tIn buffer addr: unallocated\n"); 1242 continue; 1243 } 1244 1245 pos += scnprintf(buf + pos, buf_size - pos, 1246 "\tIn buffer addr 0x%pK\n", peer->inbuf); 1247 1248 pos += scnprintf(buf + pos, buf_size - pos, 1249 "\tIn buffer size %pa\n", &peer->inbuf_size); 1250 1251 pos += scnprintf(buf + pos, buf_size - pos, 1252 "\tIn buffer xlat %pad[p]\n", &peer->inbuf_xlat); 1253 } 1254 1255 ret = simple_read_from_buffer(ubuf, size, offp, buf, pos); 1256 kfree(buf); 1257 1258 return ret; 1259 } 1260 1261 static const struct file_operations perf_dbgfs_info = { 1262 .open = simple_open, 1263 .read = perf_dbgfs_read_info 1264 }; 1265 1266 static ssize_t perf_dbgfs_read_run(struct file *filep, char __user *ubuf, 1267 size_t size, loff_t *offp) 1268 { 1269 struct perf_ctx *perf = filep->private_data; 1270 ssize_t ret, pos = 0; 1271 char *buf; 1272 1273 buf = kmalloc(PERF_BUF_LEN, GFP_KERNEL); 1274 if (!buf) 1275 return -ENOMEM; 1276 1277 ret = perf_read_stats(perf, buf, PERF_BUF_LEN, &pos); 1278 if (ret) 1279 goto err_free; 1280 1281 ret = simple_read_from_buffer(ubuf, size, offp, buf, pos); 1282 err_free: 1283 kfree(buf); 1284 1285 return ret; 1286 } 1287 1288 static ssize_t perf_dbgfs_write_run(struct file *filep, const char __user *ubuf, 1289 size_t size, loff_t *offp) 1290 { 1291 struct perf_ctx *perf = filep->private_data; 1292 struct perf_peer *peer; 1293 int pidx, ret; 1294 1295 ret = kstrtoint_from_user(ubuf, size, 0, &pidx); 1296 if (ret) 1297 return ret; 1298 1299 if (pidx < 0 || pidx >= perf->pcnt) 1300 return -EINVAL; 1301 1302 peer = &perf->peers[pidx]; 1303 1304 ret = perf_submit_test(peer); 1305 if (ret) 1306 return ret; 1307 1308 return size; 1309 } 1310 1311 static const struct file_operations perf_dbgfs_run = { 1312 .open = simple_open, 1313 .read = perf_dbgfs_read_run, 1314 .write = perf_dbgfs_write_run 1315 }; 1316 1317 static ssize_t perf_dbgfs_read_tcnt(struct file *filep, char __user *ubuf, 1318 size_t size, loff_t *offp) 1319 { 1320 struct perf_ctx *perf = filep->private_data; 1321 char buf[8]; 1322 ssize_t pos; 1323 1324 pos = scnprintf(buf, sizeof(buf), "%hhu\n", perf->tcnt); 1325 1326 return simple_read_from_buffer(ubuf, size, offp, buf, pos); 1327 } 1328 1329 static ssize_t perf_dbgfs_write_tcnt(struct file *filep, 1330 const char __user *ubuf, 1331 size_t size, loff_t *offp) 1332 { 1333 struct perf_ctx *perf = filep->private_data; 1334 int ret; 1335 u8 val; 1336 1337 ret = kstrtou8_from_user(ubuf, size, 0, &val); 1338 if (ret) 1339 return ret; 1340 1341 ret = perf_set_tcnt(perf, val); 1342 if (ret) 1343 return ret; 1344 1345 return size; 1346 } 1347 1348 static const struct file_operations perf_dbgfs_tcnt = { 1349 .open = simple_open, 1350 .read = perf_dbgfs_read_tcnt, 1351 .write = perf_dbgfs_write_tcnt 1352 }; 1353 1354 static void perf_setup_dbgfs(struct perf_ctx *perf) 1355 { 1356 struct pci_dev *pdev = perf->ntb->pdev; 1357 1358 perf->dbgfs_dir = debugfs_create_dir(pci_name(pdev), perf_dbgfs_topdir); 1359 if (!perf->dbgfs_dir) { 1360 dev_warn(&perf->ntb->dev, "DebugFS unsupported\n"); 1361 return; 1362 } 1363 1364 debugfs_create_file("info", 0600, perf->dbgfs_dir, perf, 1365 &perf_dbgfs_info); 1366 1367 debugfs_create_file("run", 0600, perf->dbgfs_dir, perf, 1368 &perf_dbgfs_run); 1369 1370 debugfs_create_file("threads_count", 0600, perf->dbgfs_dir, perf, 1371 &perf_dbgfs_tcnt); 1372 1373 /* They are made read-only for test exec safety and integrity */ 1374 debugfs_create_u8("chunk_order", 0500, perf->dbgfs_dir, &chunk_order); 1375 1376 debugfs_create_u8("total_order", 0500, perf->dbgfs_dir, &total_order); 1377 1378 debugfs_create_bool("use_dma", 0500, perf->dbgfs_dir, &use_dma); 1379 } 1380 1381 static void perf_clear_dbgfs(struct perf_ctx *perf) 1382 { 1383 debugfs_remove_recursive(perf->dbgfs_dir); 1384 } 1385 1386 /*============================================================================== 1387 * Basic driver initialization 1388 *============================================================================== 1389 */ 1390 1391 static struct perf_ctx *perf_create_data(struct ntb_dev *ntb) 1392 { 1393 struct perf_ctx *perf; 1394 1395 perf = devm_kzalloc(&ntb->dev, sizeof(*perf), GFP_KERNEL); 1396 if (!perf) 1397 return ERR_PTR(-ENOMEM); 1398 1399 perf->pcnt = ntb_peer_port_count(ntb); 1400 perf->peers = devm_kcalloc(&ntb->dev, perf->pcnt, sizeof(*perf->peers), 1401 GFP_KERNEL); 1402 if (!perf->peers) 1403 return ERR_PTR(-ENOMEM); 1404 1405 perf->ntb = ntb; 1406 1407 return perf; 1408 } 1409 1410 static int perf_setup_peer_mw(struct perf_peer *peer) 1411 { 1412 struct perf_ctx *perf = peer->perf; 1413 phys_addr_t phys_addr; 1414 int ret; 1415 1416 /* Get outbound MW parameters and map it */ 1417 ret = ntb_peer_mw_get_addr(perf->ntb, perf->gidx, &phys_addr, 1418 &peer->outbuf_size); 1419 if (ret) 1420 return ret; 1421 1422 peer->outbuf = devm_ioremap_wc(&perf->ntb->dev, phys_addr, 1423 peer->outbuf_size); 1424 if (!peer->outbuf) 1425 return -ENOMEM; 1426 1427 peer->out_phys_addr = phys_addr; 1428 1429 if (max_mw_size && peer->outbuf_size > max_mw_size) { 1430 peer->outbuf_size = max_mw_size; 1431 dev_warn(&peer->perf->ntb->dev, 1432 "Peer %d outbuf reduced to %pa\n", peer->pidx, 1433 &peer->outbuf_size); 1434 } 1435 1436 return 0; 1437 } 1438 1439 static int perf_init_peers(struct perf_ctx *perf) 1440 { 1441 struct perf_peer *peer; 1442 int pidx, lport, ret; 1443 1444 lport = ntb_port_number(perf->ntb); 1445 perf->gidx = -1; 1446 for (pidx = 0; pidx < perf->pcnt; pidx++) { 1447 peer = &perf->peers[pidx]; 1448 1449 peer->perf = perf; 1450 peer->pidx = pidx; 1451 if (lport < ntb_peer_port_number(perf->ntb, pidx)) { 1452 if (perf->gidx == -1) 1453 perf->gidx = pidx; 1454 peer->gidx = pidx + 1; 1455 } else { 1456 peer->gidx = pidx; 1457 } 1458 INIT_WORK(&peer->service, perf_service_work); 1459 } 1460 if (perf->gidx == -1) 1461 perf->gidx = pidx; 1462 1463 for (pidx = 0; pidx < perf->pcnt; pidx++) { 1464 ret = perf_setup_peer_mw(&perf->peers[pidx]); 1465 if (ret) 1466 return ret; 1467 } 1468 1469 dev_dbg(&perf->ntb->dev, "Global port index %d\n", perf->gidx); 1470 1471 return 0; 1472 } 1473 1474 static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) 1475 { 1476 struct perf_ctx *perf; 1477 int ret; 1478 1479 perf = perf_create_data(ntb); 1480 if (IS_ERR(perf)) 1481 return PTR_ERR(perf); 1482 1483 ret = perf_init_peers(perf); 1484 if (ret) 1485 return ret; 1486 1487 perf_init_threads(perf); 1488 1489 ret = perf_init_service(perf); 1490 if (ret) 1491 return ret; 1492 1493 ret = perf_enable_service(perf); 1494 if (ret) 1495 return ret; 1496 1497 perf_setup_dbgfs(perf); 1498 1499 return 0; 1500 } 1501 1502 static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) 1503 { 1504 struct perf_ctx *perf = ntb->ctx; 1505 1506 perf_clear_dbgfs(perf); 1507 1508 perf_disable_service(perf); 1509 1510 perf_clear_threads(perf); 1511 } 1512 1513 static struct ntb_client perf_client = { 1514 .ops = { 1515 .probe = perf_probe, 1516 .remove = perf_remove 1517 } 1518 }; 1519 1520 static int __init perf_init(void) 1521 { 1522 int ret; 1523 1524 if (chunk_order > MAX_CHUNK_ORDER) { 1525 chunk_order = MAX_CHUNK_ORDER; 1526 pr_info("Chunk order reduced to %hhu\n", chunk_order); 1527 } 1528 1529 if (total_order < chunk_order) { 1530 total_order = chunk_order; 1531 pr_info("Total data order reduced to %hhu\n", total_order); 1532 } 1533 1534 perf_wq = alloc_workqueue("perf_wq", WQ_UNBOUND | WQ_SYSFS, 0); 1535 if (!perf_wq) 1536 return -ENOMEM; 1537 1538 if (debugfs_initialized()) 1539 perf_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL); 1540 1541 ret = ntb_register_client(&perf_client); 1542 if (ret) { 1543 debugfs_remove_recursive(perf_dbgfs_topdir); 1544 destroy_workqueue(perf_wq); 1545 } 1546 1547 return ret; 1548 } 1549 module_init(perf_init); 1550 1551 static void __exit perf_exit(void) 1552 { 1553 ntb_unregister_client(&perf_client); 1554 debugfs_remove_recursive(perf_dbgfs_topdir); 1555 destroy_workqueue(perf_wq); 1556 } 1557 module_exit(perf_exit); 1558