1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 /* 3 * Copyright(c) 2015-2018 Intel Corporation. 4 */ 5 6 #include <linux/delay.h> 7 #include "hfi.h" 8 #include "qp.h" 9 #include "trace.h" 10 11 #define SC(name) SEND_CTXT_##name 12 /* 13 * Send Context functions 14 */ 15 static void sc_wait_for_packet_egress(struct send_context *sc, int pause); 16 17 /* 18 * Set the CM reset bit and wait for it to clear. Use the provided 19 * sendctrl register. This routine has no locking. 20 */ 21 void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl) 22 { 23 write_csr(dd, SEND_CTRL, sendctrl | SEND_CTRL_CM_RESET_SMASK); 24 while (1) { 25 udelay(1); 26 sendctrl = read_csr(dd, SEND_CTRL); 27 if ((sendctrl & SEND_CTRL_CM_RESET_SMASK) == 0) 28 break; 29 } 30 } 31 32 /* global control of PIO send */ 33 void pio_send_control(struct hfi1_devdata *dd, int op) 34 { 35 u64 reg, mask; 36 unsigned long flags; 37 int write = 1; /* write sendctrl back */ 38 int flush = 0; /* re-read sendctrl to make sure it is flushed */ 39 int i; 40 41 spin_lock_irqsave(&dd->sendctrl_lock, flags); 42 43 reg = read_csr(dd, SEND_CTRL); 44 switch (op) { 45 case PSC_GLOBAL_ENABLE: 46 reg |= SEND_CTRL_SEND_ENABLE_SMASK; 47 fallthrough; 48 case PSC_DATA_VL_ENABLE: 49 mask = 0; 50 for (i = 0; i < ARRAY_SIZE(dd->vld); i++) 51 if (!dd->vld[i].mtu) 52 mask |= BIT_ULL(i); 53 /* Disallow sending on VLs not enabled */ 54 mask = (mask & SEND_CTRL_UNSUPPORTED_VL_MASK) << 55 SEND_CTRL_UNSUPPORTED_VL_SHIFT; 56 reg = (reg & ~SEND_CTRL_UNSUPPORTED_VL_SMASK) | mask; 57 break; 58 case PSC_GLOBAL_DISABLE: 59 reg &= ~SEND_CTRL_SEND_ENABLE_SMASK; 60 break; 61 case PSC_GLOBAL_VLARB_ENABLE: 62 reg |= SEND_CTRL_VL_ARBITER_ENABLE_SMASK; 63 break; 64 case PSC_GLOBAL_VLARB_DISABLE: 65 reg &= ~SEND_CTRL_VL_ARBITER_ENABLE_SMASK; 66 break; 67 case PSC_CM_RESET: 68 __cm_reset(dd, reg); 69 write = 0; /* CSR already written (and flushed) */ 70 break; 71 case PSC_DATA_VL_DISABLE: 72 reg |= SEND_CTRL_UNSUPPORTED_VL_SMASK; 73 flush = 1; 74 break; 75 default: 76 dd_dev_err(dd, "%s: invalid control %d\n", __func__, op); 77 break; 78 } 79 80 if (write) { 81 write_csr(dd, SEND_CTRL, reg); 82 if (flush) 83 (void)read_csr(dd, SEND_CTRL); /* flush write */ 84 } 85 86 spin_unlock_irqrestore(&dd->sendctrl_lock, flags); 87 } 88 89 /* number of send context memory pools */ 90 #define NUM_SC_POOLS 2 91 92 /* Send Context Size (SCS) wildcards */ 93 #define SCS_POOL_0 -1 94 #define SCS_POOL_1 -2 95 96 /* Send Context Count (SCC) wildcards */ 97 #define SCC_PER_VL -1 98 #define SCC_PER_CPU -2 99 #define SCC_PER_KRCVQ -3 100 101 /* Send Context Size (SCS) constants */ 102 #define SCS_ACK_CREDITS 32 103 #define SCS_VL15_CREDITS 102 /* 3 pkts of 2048B data + 128B header */ 104 105 #define PIO_THRESHOLD_CEILING 4096 106 107 #define PIO_WAIT_BATCH_SIZE 5 108 109 /* default send context sizes */ 110 static struct sc_config_sizes sc_config_sizes[SC_MAX] = { 111 [SC_KERNEL] = { .size = SCS_POOL_0, /* even divide, pool 0 */ 112 .count = SCC_PER_VL }, /* one per NUMA */ 113 [SC_ACK] = { .size = SCS_ACK_CREDITS, 114 .count = SCC_PER_KRCVQ }, 115 [SC_USER] = { .size = SCS_POOL_0, /* even divide, pool 0 */ 116 .count = SCC_PER_CPU }, /* one per CPU */ 117 [SC_VL15] = { .size = SCS_VL15_CREDITS, 118 .count = 1 }, 119 120 }; 121 122 /* send context memory pool configuration */ 123 struct mem_pool_config { 124 int centipercent; /* % of memory, in 100ths of 1% */ 125 int absolute_blocks; /* absolute block count */ 126 }; 127 128 /* default memory pool configuration: 100% in pool 0 */ 129 static struct mem_pool_config sc_mem_pool_config[NUM_SC_POOLS] = { 130 /* centi%, abs blocks */ 131 { 10000, -1 }, /* pool 0 */ 132 { 0, -1 }, /* pool 1 */ 133 }; 134 135 /* memory pool information, used when calculating final sizes */ 136 struct mem_pool_info { 137 int centipercent; /* 138 * 100th of 1% of memory to use, -1 if blocks 139 * already set 140 */ 141 int count; /* count of contexts in the pool */ 142 int blocks; /* block size of the pool */ 143 int size; /* context size, in blocks */ 144 }; 145 146 /* 147 * Convert a pool wildcard to a valid pool index. The wildcards 148 * start at -1 and increase negatively. Map them as: 149 * -1 => 0 150 * -2 => 1 151 * etc. 152 * 153 * Return -1 on non-wildcard input, otherwise convert to a pool number. 154 */ 155 static int wildcard_to_pool(int wc) 156 { 157 if (wc >= 0) 158 return -1; /* non-wildcard */ 159 return -wc - 1; 160 } 161 162 static const char *sc_type_names[SC_MAX] = { 163 "kernel", 164 "ack", 165 "user", 166 "vl15" 167 }; 168 169 static const char *sc_type_name(int index) 170 { 171 if (index < 0 || index >= SC_MAX) 172 return "unknown"; 173 return sc_type_names[index]; 174 } 175 176 /* 177 * Read the send context memory pool configuration and send context 178 * size configuration. Replace any wildcards and come up with final 179 * counts and sizes for the send context types. 180 */ 181 int init_sc_pools_and_sizes(struct hfi1_devdata *dd) 182 { 183 struct mem_pool_info mem_pool_info[NUM_SC_POOLS] = { { 0 } }; 184 int total_blocks = (chip_pio_mem_size(dd) / PIO_BLOCK_SIZE) - 1; 185 int total_contexts = 0; 186 int fixed_blocks; 187 int pool_blocks; 188 int used_blocks; 189 int cp_total; /* centipercent total */ 190 int ab_total; /* absolute block total */ 191 int extra; 192 int i; 193 194 /* 195 * When SDMA is enabled, kernel context pio packet size is capped by 196 * "piothreshold". Reduce pio buffer allocation for kernel context by 197 * setting it to a fixed size. The allocation allows 3-deep buffering 198 * of the largest pio packets plus up to 128 bytes header, sufficient 199 * to maintain verbs performance. 200 * 201 * When SDMA is disabled, keep the default pooling allocation. 202 */ 203 if (HFI1_CAP_IS_KSET(SDMA)) { 204 u16 max_pkt_size = (piothreshold < PIO_THRESHOLD_CEILING) ? 205 piothreshold : PIO_THRESHOLD_CEILING; 206 sc_config_sizes[SC_KERNEL].size = 207 3 * (max_pkt_size + 128) / PIO_BLOCK_SIZE; 208 } 209 210 /* 211 * Step 0: 212 * - copy the centipercents/absolute sizes from the pool config 213 * - sanity check these values 214 * - add up centipercents, then later check for full value 215 * - add up absolute blocks, then later check for over-commit 216 */ 217 cp_total = 0; 218 ab_total = 0; 219 for (i = 0; i < NUM_SC_POOLS; i++) { 220 int cp = sc_mem_pool_config[i].centipercent; 221 int ab = sc_mem_pool_config[i].absolute_blocks; 222 223 /* 224 * A negative value is "unused" or "invalid". Both *can* 225 * be valid, but centipercent wins, so check that first 226 */ 227 if (cp >= 0) { /* centipercent valid */ 228 cp_total += cp; 229 } else if (ab >= 0) { /* absolute blocks valid */ 230 ab_total += ab; 231 } else { /* neither valid */ 232 dd_dev_err( 233 dd, 234 "Send context memory pool %d: both the block count and centipercent are invalid\n", 235 i); 236 return -EINVAL; 237 } 238 239 mem_pool_info[i].centipercent = cp; 240 mem_pool_info[i].blocks = ab; 241 } 242 243 /* do not use both % and absolute blocks for different pools */ 244 if (cp_total != 0 && ab_total != 0) { 245 dd_dev_err( 246 dd, 247 "All send context memory pools must be described as either centipercent or blocks, no mixing between pools\n"); 248 return -EINVAL; 249 } 250 251 /* if any percentages are present, they must add up to 100% x 100 */ 252 if (cp_total != 0 && cp_total != 10000) { 253 dd_dev_err( 254 dd, 255 "Send context memory pool centipercent is %d, expecting 10000\n", 256 cp_total); 257 return -EINVAL; 258 } 259 260 /* the absolute pool total cannot be more than the mem total */ 261 if (ab_total > total_blocks) { 262 dd_dev_err( 263 dd, 264 "Send context memory pool absolute block count %d is larger than the memory size %d\n", 265 ab_total, total_blocks); 266 return -EINVAL; 267 } 268 269 /* 270 * Step 2: 271 * - copy from the context size config 272 * - replace context type wildcard counts with real values 273 * - add up non-memory pool block sizes 274 * - add up memory pool user counts 275 */ 276 fixed_blocks = 0; 277 for (i = 0; i < SC_MAX; i++) { 278 int count = sc_config_sizes[i].count; 279 int size = sc_config_sizes[i].size; 280 int pool; 281 282 /* 283 * Sanity check count: Either a positive value or 284 * one of the expected wildcards is valid. The positive 285 * value is checked later when we compare against total 286 * memory available. 287 */ 288 if (i == SC_ACK) { 289 count = dd->n_krcv_queues; 290 } else if (i == SC_KERNEL) { 291 count = INIT_SC_PER_VL * num_vls; 292 } else if (count == SCC_PER_CPU) { 293 count = dd->num_rcv_contexts - dd->n_krcv_queues; 294 } else if (count < 0) { 295 dd_dev_err( 296 dd, 297 "%s send context invalid count wildcard %d\n", 298 sc_type_name(i), count); 299 return -EINVAL; 300 } 301 if (total_contexts + count > chip_send_contexts(dd)) 302 count = chip_send_contexts(dd) - total_contexts; 303 304 total_contexts += count; 305 306 /* 307 * Sanity check pool: The conversion will return a pool 308 * number or -1 if a fixed (non-negative) value. The fixed 309 * value is checked later when we compare against 310 * total memory available. 311 */ 312 pool = wildcard_to_pool(size); 313 if (pool == -1) { /* non-wildcard */ 314 fixed_blocks += size * count; 315 } else if (pool < NUM_SC_POOLS) { /* valid wildcard */ 316 mem_pool_info[pool].count += count; 317 } else { /* invalid wildcard */ 318 dd_dev_err( 319 dd, 320 "%s send context invalid pool wildcard %d\n", 321 sc_type_name(i), size); 322 return -EINVAL; 323 } 324 325 dd->sc_sizes[i].count = count; 326 dd->sc_sizes[i].size = size; 327 } 328 if (fixed_blocks > total_blocks) { 329 dd_dev_err( 330 dd, 331 "Send context fixed block count, %u, larger than total block count %u\n", 332 fixed_blocks, total_blocks); 333 return -EINVAL; 334 } 335 336 /* step 3: calculate the blocks in the pools, and pool context sizes */ 337 pool_blocks = total_blocks - fixed_blocks; 338 if (ab_total > pool_blocks) { 339 dd_dev_err( 340 dd, 341 "Send context fixed pool sizes, %u, larger than pool block count %u\n", 342 ab_total, pool_blocks); 343 return -EINVAL; 344 } 345 /* subtract off the fixed pool blocks */ 346 pool_blocks -= ab_total; 347 348 for (i = 0; i < NUM_SC_POOLS; i++) { 349 struct mem_pool_info *pi = &mem_pool_info[i]; 350 351 /* % beats absolute blocks */ 352 if (pi->centipercent >= 0) 353 pi->blocks = (pool_blocks * pi->centipercent) / 10000; 354 355 if (pi->blocks == 0 && pi->count != 0) { 356 dd_dev_err( 357 dd, 358 "Send context memory pool %d has %u contexts, but no blocks\n", 359 i, pi->count); 360 return -EINVAL; 361 } 362 if (pi->count == 0) { 363 /* warn about wasted blocks */ 364 if (pi->blocks != 0) 365 dd_dev_err( 366 dd, 367 "Send context memory pool %d has %u blocks, but zero contexts\n", 368 i, pi->blocks); 369 pi->size = 0; 370 } else { 371 pi->size = pi->blocks / pi->count; 372 } 373 } 374 375 /* step 4: fill in the context type sizes from the pool sizes */ 376 used_blocks = 0; 377 for (i = 0; i < SC_MAX; i++) { 378 if (dd->sc_sizes[i].size < 0) { 379 unsigned pool = wildcard_to_pool(dd->sc_sizes[i].size); 380 381 WARN_ON_ONCE(pool >= NUM_SC_POOLS); 382 dd->sc_sizes[i].size = mem_pool_info[pool].size; 383 } 384 /* make sure we are not larger than what is allowed by the HW */ 385 #define PIO_MAX_BLOCKS 1024 386 if (dd->sc_sizes[i].size > PIO_MAX_BLOCKS) 387 dd->sc_sizes[i].size = PIO_MAX_BLOCKS; 388 389 /* calculate our total usage */ 390 used_blocks += dd->sc_sizes[i].size * dd->sc_sizes[i].count; 391 } 392 extra = total_blocks - used_blocks; 393 if (extra != 0) 394 dd_dev_info(dd, "unused send context blocks: %d\n", extra); 395 396 return total_contexts; 397 } 398 399 int init_send_contexts(struct hfi1_devdata *dd) 400 { 401 u16 base; 402 int ret, i, j, context; 403 404 ret = init_credit_return(dd); 405 if (ret) 406 return ret; 407 408 dd->hw_to_sw = kmalloc_array(TXE_NUM_CONTEXTS, sizeof(u8), 409 GFP_KERNEL); 410 dd->send_contexts = kcalloc(dd->num_send_contexts, 411 sizeof(struct send_context_info), 412 GFP_KERNEL); 413 if (!dd->send_contexts || !dd->hw_to_sw) { 414 kfree(dd->hw_to_sw); 415 kfree(dd->send_contexts); 416 free_credit_return(dd); 417 return -ENOMEM; 418 } 419 420 /* hardware context map starts with invalid send context indices */ 421 for (i = 0; i < TXE_NUM_CONTEXTS; i++) 422 dd->hw_to_sw[i] = INVALID_SCI; 423 424 /* 425 * All send contexts have their credit sizes. Allocate credits 426 * for each context one after another from the global space. 427 */ 428 context = 0; 429 base = 1; 430 for (i = 0; i < SC_MAX; i++) { 431 struct sc_config_sizes *scs = &dd->sc_sizes[i]; 432 433 for (j = 0; j < scs->count; j++) { 434 struct send_context_info *sci = 435 &dd->send_contexts[context]; 436 sci->type = i; 437 sci->base = base; 438 sci->credits = scs->size; 439 440 context++; 441 base += scs->size; 442 } 443 } 444 445 return 0; 446 } 447 448 /* 449 * Allocate a software index and hardware context of the given type. 450 * 451 * Must be called with dd->sc_lock held. 452 */ 453 static int sc_hw_alloc(struct hfi1_devdata *dd, int type, u32 *sw_index, 454 u32 *hw_context) 455 { 456 struct send_context_info *sci; 457 u32 index; 458 u32 context; 459 460 for (index = 0, sci = &dd->send_contexts[0]; 461 index < dd->num_send_contexts; index++, sci++) { 462 if (sci->type == type && sci->allocated == 0) { 463 sci->allocated = 1; 464 /* use a 1:1 mapping, but make them non-equal */ 465 context = chip_send_contexts(dd) - index - 1; 466 dd->hw_to_sw[context] = index; 467 *sw_index = index; 468 *hw_context = context; 469 return 0; /* success */ 470 } 471 } 472 dd_dev_err(dd, "Unable to locate a free type %d send context\n", type); 473 return -ENOSPC; 474 } 475 476 /* 477 * Free the send context given by its software index. 478 * 479 * Must be called with dd->sc_lock held. 480 */ 481 static void sc_hw_free(struct hfi1_devdata *dd, u32 sw_index, u32 hw_context) 482 { 483 struct send_context_info *sci; 484 485 sci = &dd->send_contexts[sw_index]; 486 if (!sci->allocated) { 487 dd_dev_err(dd, "%s: sw_index %u not allocated? hw_context %u\n", 488 __func__, sw_index, hw_context); 489 } 490 sci->allocated = 0; 491 dd->hw_to_sw[hw_context] = INVALID_SCI; 492 } 493 494 /* return the base context of a context in a group */ 495 static inline u32 group_context(u32 context, u32 group) 496 { 497 return (context >> group) << group; 498 } 499 500 /* return the size of a group */ 501 static inline u32 group_size(u32 group) 502 { 503 return 1 << group; 504 } 505 506 /* 507 * Obtain the credit return addresses, kernel virtual and bus, for the 508 * given sc. 509 * 510 * To understand this routine: 511 * o va and dma are arrays of struct credit_return. One for each physical 512 * send context, per NUMA. 513 * o Each send context always looks in its relative location in a struct 514 * credit_return for its credit return. 515 * o Each send context in a group must have its return address CSR programmed 516 * with the same value. Use the address of the first send context in the 517 * group. 518 */ 519 static void cr_group_addresses(struct send_context *sc, dma_addr_t *dma) 520 { 521 u32 gc = group_context(sc->hw_context, sc->group); 522 u32 index = sc->hw_context & 0x7; 523 524 sc->hw_free = &sc->dd->cr_base[sc->node].va[gc].cr[index]; 525 *dma = (unsigned long) 526 &((struct credit_return *)sc->dd->cr_base[sc->node].dma)[gc]; 527 } 528 529 /* 530 * Work queue function triggered in error interrupt routine for 531 * kernel contexts. 532 */ 533 static void sc_halted(struct work_struct *work) 534 { 535 struct send_context *sc; 536 537 sc = container_of(work, struct send_context, halt_work); 538 sc_restart(sc); 539 } 540 541 /* 542 * Calculate PIO block threshold for this send context using the given MTU. 543 * Trigger a return when one MTU plus optional header of credits remain. 544 * 545 * Parameter mtu is in bytes. 546 * Parameter hdrqentsize is in DWORDs. 547 * 548 * Return value is what to write into the CSR: trigger return when 549 * unreturned credits pass this count. 550 */ 551 u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize) 552 { 553 u32 release_credits; 554 u32 threshold; 555 556 /* add in the header size, then divide by the PIO block size */ 557 mtu += hdrqentsize << 2; 558 release_credits = DIV_ROUND_UP(mtu, PIO_BLOCK_SIZE); 559 560 /* check against this context's credits */ 561 if (sc->credits <= release_credits) 562 threshold = 1; 563 else 564 threshold = sc->credits - release_credits; 565 566 return threshold; 567 } 568 569 /* 570 * Calculate credit threshold in terms of percent of the allocated credits. 571 * Trigger when unreturned credits equal or exceed the percentage of the whole. 572 * 573 * Return value is what to write into the CSR: trigger return when 574 * unreturned credits pass this count. 575 */ 576 u32 sc_percent_to_threshold(struct send_context *sc, u32 percent) 577 { 578 return (sc->credits * percent) / 100; 579 } 580 581 /* 582 * Set the credit return threshold. 583 */ 584 void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold) 585 { 586 unsigned long flags; 587 u32 old_threshold; 588 int force_return = 0; 589 590 spin_lock_irqsave(&sc->credit_ctrl_lock, flags); 591 592 old_threshold = (sc->credit_ctrl >> 593 SC(CREDIT_CTRL_THRESHOLD_SHIFT)) 594 & SC(CREDIT_CTRL_THRESHOLD_MASK); 595 596 if (new_threshold != old_threshold) { 597 sc->credit_ctrl = 598 (sc->credit_ctrl 599 & ~SC(CREDIT_CTRL_THRESHOLD_SMASK)) 600 | ((new_threshold 601 & SC(CREDIT_CTRL_THRESHOLD_MASK)) 602 << SC(CREDIT_CTRL_THRESHOLD_SHIFT)); 603 write_kctxt_csr(sc->dd, sc->hw_context, 604 SC(CREDIT_CTRL), sc->credit_ctrl); 605 606 /* force a credit return on change to avoid a possible stall */ 607 force_return = 1; 608 } 609 610 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags); 611 612 if (force_return) 613 sc_return_credits(sc); 614 } 615 616 /* 617 * set_pio_integrity 618 * 619 * Set the CHECK_ENABLE register for the send context 'sc'. 620 */ 621 void set_pio_integrity(struct send_context *sc) 622 { 623 struct hfi1_devdata *dd = sc->dd; 624 u32 hw_context = sc->hw_context; 625 int type = sc->type; 626 627 write_kctxt_csr(dd, hw_context, 628 SC(CHECK_ENABLE), 629 hfi1_pkt_default_send_ctxt_mask(dd, type)); 630 } 631 632 static u32 get_buffers_allocated(struct send_context *sc) 633 { 634 int cpu; 635 u32 ret = 0; 636 637 for_each_possible_cpu(cpu) 638 ret += *per_cpu_ptr(sc->buffers_allocated, cpu); 639 return ret; 640 } 641 642 static void reset_buffers_allocated(struct send_context *sc) 643 { 644 int cpu; 645 646 for_each_possible_cpu(cpu) 647 (*per_cpu_ptr(sc->buffers_allocated, cpu)) = 0; 648 } 649 650 /* 651 * Allocate a NUMA relative send context structure of the given type along 652 * with a HW context. 653 */ 654 struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, 655 uint hdrqentsize, int numa) 656 { 657 struct send_context_info *sci; 658 struct send_context *sc = NULL; 659 dma_addr_t dma; 660 unsigned long flags; 661 u64 reg; 662 u32 thresh; 663 u32 sw_index; 664 u32 hw_context; 665 int ret; 666 u8 opval, opmask; 667 668 /* do not allocate while frozen */ 669 if (dd->flags & HFI1_FROZEN) 670 return NULL; 671 672 sc = kzalloc_node(sizeof(*sc), GFP_KERNEL, numa); 673 if (!sc) 674 return NULL; 675 676 sc->buffers_allocated = alloc_percpu(u32); 677 if (!sc->buffers_allocated) { 678 kfree(sc); 679 dd_dev_err(dd, 680 "Cannot allocate buffers_allocated per cpu counters\n" 681 ); 682 return NULL; 683 } 684 685 spin_lock_irqsave(&dd->sc_lock, flags); 686 ret = sc_hw_alloc(dd, type, &sw_index, &hw_context); 687 if (ret) { 688 spin_unlock_irqrestore(&dd->sc_lock, flags); 689 free_percpu(sc->buffers_allocated); 690 kfree(sc); 691 return NULL; 692 } 693 694 sci = &dd->send_contexts[sw_index]; 695 sci->sc = sc; 696 697 sc->dd = dd; 698 sc->node = numa; 699 sc->type = type; 700 spin_lock_init(&sc->alloc_lock); 701 spin_lock_init(&sc->release_lock); 702 spin_lock_init(&sc->credit_ctrl_lock); 703 seqlock_init(&sc->waitlock); 704 INIT_LIST_HEAD(&sc->piowait); 705 INIT_WORK(&sc->halt_work, sc_halted); 706 init_waitqueue_head(&sc->halt_wait); 707 708 /* grouping is always single context for now */ 709 sc->group = 0; 710 711 sc->sw_index = sw_index; 712 sc->hw_context = hw_context; 713 cr_group_addresses(sc, &dma); 714 sc->credits = sci->credits; 715 sc->size = sc->credits * PIO_BLOCK_SIZE; 716 717 /* PIO Send Memory Address details */ 718 #define PIO_ADDR_CONTEXT_MASK 0xfful 719 #define PIO_ADDR_CONTEXT_SHIFT 16 720 sc->base_addr = dd->piobase + ((hw_context & PIO_ADDR_CONTEXT_MASK) 721 << PIO_ADDR_CONTEXT_SHIFT); 722 723 /* set base and credits */ 724 reg = ((sci->credits & SC(CTRL_CTXT_DEPTH_MASK)) 725 << SC(CTRL_CTXT_DEPTH_SHIFT)) 726 | ((sci->base & SC(CTRL_CTXT_BASE_MASK)) 727 << SC(CTRL_CTXT_BASE_SHIFT)); 728 write_kctxt_csr(dd, hw_context, SC(CTRL), reg); 729 730 set_pio_integrity(sc); 731 732 /* unmask all errors */ 733 write_kctxt_csr(dd, hw_context, SC(ERR_MASK), (u64)-1); 734 735 /* set the default partition key */ 736 write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY), 737 (SC(CHECK_PARTITION_KEY_VALUE_MASK) & 738 DEFAULT_PKEY) << 739 SC(CHECK_PARTITION_KEY_VALUE_SHIFT)); 740 741 /* per context type checks */ 742 if (type == SC_USER) { 743 opval = USER_OPCODE_CHECK_VAL; 744 opmask = USER_OPCODE_CHECK_MASK; 745 } else { 746 opval = OPCODE_CHECK_VAL_DISABLED; 747 opmask = OPCODE_CHECK_MASK_DISABLED; 748 } 749 750 /* set the send context check opcode mask and value */ 751 write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE), 752 ((u64)opmask << SC(CHECK_OPCODE_MASK_SHIFT)) | 753 ((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT))); 754 755 /* set up credit return */ 756 reg = dma & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK); 757 write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), reg); 758 759 /* 760 * Calculate the initial credit return threshold. 761 * 762 * For Ack contexts, set a threshold for half the credits. 763 * For User contexts use the given percentage. This has been 764 * sanitized on driver start-up. 765 * For Kernel contexts, use the default MTU plus a header 766 * or half the credits, whichever is smaller. This should 767 * work for both the 3-deep buffering allocation and the 768 * pooling allocation. 769 */ 770 if (type == SC_ACK) { 771 thresh = sc_percent_to_threshold(sc, 50); 772 } else if (type == SC_USER) { 773 thresh = sc_percent_to_threshold(sc, 774 user_credit_return_threshold); 775 } else { /* kernel */ 776 thresh = min(sc_percent_to_threshold(sc, 50), 777 sc_mtu_to_threshold(sc, hfi1_max_mtu, 778 hdrqentsize)); 779 } 780 reg = thresh << SC(CREDIT_CTRL_THRESHOLD_SHIFT); 781 /* add in early return */ 782 if (type == SC_USER && HFI1_CAP_IS_USET(EARLY_CREDIT_RETURN)) 783 reg |= SC(CREDIT_CTRL_EARLY_RETURN_SMASK); 784 else if (HFI1_CAP_IS_KSET(EARLY_CREDIT_RETURN)) /* kernel, ack */ 785 reg |= SC(CREDIT_CTRL_EARLY_RETURN_SMASK); 786 787 /* set up write-through credit_ctrl */ 788 sc->credit_ctrl = reg; 789 write_kctxt_csr(dd, hw_context, SC(CREDIT_CTRL), reg); 790 791 /* User send contexts should not allow sending on VL15 */ 792 if (type == SC_USER) { 793 reg = 1ULL << 15; 794 write_kctxt_csr(dd, hw_context, SC(CHECK_VL), reg); 795 } 796 797 spin_unlock_irqrestore(&dd->sc_lock, flags); 798 799 /* 800 * Allocate shadow ring to track outstanding PIO buffers _after_ 801 * unlocking. We don't know the size until the lock is held and 802 * we can't allocate while the lock is held. No one is using 803 * the context yet, so allocate it now. 804 * 805 * User contexts do not get a shadow ring. 806 */ 807 if (type != SC_USER) { 808 /* 809 * Size the shadow ring 1 larger than the number of credits 810 * so head == tail can mean empty. 811 */ 812 sc->sr_size = sci->credits + 1; 813 sc->sr = kcalloc_node(sc->sr_size, 814 sizeof(union pio_shadow_ring), 815 GFP_KERNEL, numa); 816 if (!sc->sr) { 817 sc_free(sc); 818 return NULL; 819 } 820 } 821 822 hfi1_cdbg(PIO, 823 "Send context %u(%u) %s group %u credits %u credit_ctrl 0x%llx threshold %u", 824 sw_index, 825 hw_context, 826 sc_type_name(type), 827 sc->group, 828 sc->credits, 829 sc->credit_ctrl, 830 thresh); 831 832 return sc; 833 } 834 835 /* free a per-NUMA send context structure */ 836 void sc_free(struct send_context *sc) 837 { 838 struct hfi1_devdata *dd; 839 unsigned long flags; 840 u32 sw_index; 841 u32 hw_context; 842 843 if (!sc) 844 return; 845 846 sc->flags |= SCF_IN_FREE; /* ensure no restarts */ 847 dd = sc->dd; 848 if (!list_empty(&sc->piowait)) 849 dd_dev_err(dd, "piowait list not empty!\n"); 850 sw_index = sc->sw_index; 851 hw_context = sc->hw_context; 852 sc_disable(sc); /* make sure the HW is disabled */ 853 flush_work(&sc->halt_work); 854 855 spin_lock_irqsave(&dd->sc_lock, flags); 856 dd->send_contexts[sw_index].sc = NULL; 857 858 /* clear/disable all registers set in sc_alloc */ 859 write_kctxt_csr(dd, hw_context, SC(CTRL), 0); 860 write_kctxt_csr(dd, hw_context, SC(CHECK_ENABLE), 0); 861 write_kctxt_csr(dd, hw_context, SC(ERR_MASK), 0); 862 write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY), 0); 863 write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE), 0); 864 write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), 0); 865 write_kctxt_csr(dd, hw_context, SC(CREDIT_CTRL), 0); 866 867 /* release the index and context for re-use */ 868 sc_hw_free(dd, sw_index, hw_context); 869 spin_unlock_irqrestore(&dd->sc_lock, flags); 870 871 kfree(sc->sr); 872 free_percpu(sc->buffers_allocated); 873 kfree(sc); 874 } 875 876 /* disable the context */ 877 void sc_disable(struct send_context *sc) 878 { 879 u64 reg; 880 struct pio_buf *pbuf; 881 LIST_HEAD(wake_list); 882 883 if (!sc) 884 return; 885 886 /* do all steps, even if already disabled */ 887 spin_lock_irq(&sc->alloc_lock); 888 reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL)); 889 reg &= ~SC(CTRL_CTXT_ENABLE_SMASK); 890 sc->flags &= ~SCF_ENABLED; 891 sc_wait_for_packet_egress(sc, 1); 892 write_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL), reg); 893 894 /* 895 * Flush any waiters. Once the context is disabled, 896 * credit return interrupts are stopped (although there 897 * could be one in-process when the context is disabled). 898 * Wait one microsecond for any lingering interrupts, then 899 * proceed with the flush. 900 */ 901 udelay(1); 902 spin_lock(&sc->release_lock); 903 if (sc->sr) { /* this context has a shadow ring */ 904 while (sc->sr_tail != sc->sr_head) { 905 pbuf = &sc->sr[sc->sr_tail].pbuf; 906 if (pbuf->cb) 907 (*pbuf->cb)(pbuf->arg, PRC_SC_DISABLE); 908 sc->sr_tail++; 909 if (sc->sr_tail >= sc->sr_size) 910 sc->sr_tail = 0; 911 } 912 } 913 spin_unlock(&sc->release_lock); 914 915 write_seqlock(&sc->waitlock); 916 list_splice_init(&sc->piowait, &wake_list); 917 write_sequnlock(&sc->waitlock); 918 while (!list_empty(&wake_list)) { 919 struct iowait *wait; 920 struct rvt_qp *qp; 921 struct hfi1_qp_priv *priv; 922 923 wait = list_first_entry(&wake_list, struct iowait, list); 924 qp = iowait_to_qp(wait); 925 priv = qp->priv; 926 list_del_init(&priv->s_iowait.list); 927 priv->s_iowait.lock = NULL; 928 hfi1_qp_wakeup(qp, RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); 929 } 930 931 spin_unlock_irq(&sc->alloc_lock); 932 } 933 934 /* return SendEgressCtxtStatus.PacketOccupancy */ 935 static u64 packet_occupancy(u64 reg) 936 { 937 return (reg & 938 SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK) 939 >> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT; 940 } 941 942 /* is egress halted on the context? */ 943 static bool egress_halted(u64 reg) 944 { 945 return !!(reg & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK); 946 } 947 948 /* is the send context halted? */ 949 static bool is_sc_halted(struct hfi1_devdata *dd, u32 hw_context) 950 { 951 return !!(read_kctxt_csr(dd, hw_context, SC(STATUS)) & 952 SC(STATUS_CTXT_HALTED_SMASK)); 953 } 954 955 /** 956 * sc_wait_for_packet_egress - wait for packet 957 * @sc: valid send context 958 * @pause: wait for credit return 959 * 960 * Wait for packet egress, optionally pause for credit return 961 * 962 * Egress halt and Context halt are not necessarily the same thing, so 963 * check for both. 964 * 965 * NOTE: The context halt bit may not be set immediately. Because of this, 966 * it is necessary to check the SW SFC_HALTED bit (set in the IRQ) and the HW 967 * context bit to determine if the context is halted. 968 */ 969 static void sc_wait_for_packet_egress(struct send_context *sc, int pause) 970 { 971 struct hfi1_devdata *dd = sc->dd; 972 u64 reg = 0; 973 u64 reg_prev; 974 u32 loop = 0; 975 976 while (1) { 977 reg_prev = reg; 978 reg = read_csr(dd, sc->hw_context * 8 + 979 SEND_EGRESS_CTXT_STATUS); 980 /* done if any halt bits, SW or HW are set */ 981 if (sc->flags & SCF_HALTED || 982 is_sc_halted(dd, sc->hw_context) || egress_halted(reg)) 983 break; 984 reg = packet_occupancy(reg); 985 if (reg == 0) 986 break; 987 /* counter is reset if occupancy count changes */ 988 if (reg != reg_prev) 989 loop = 0; 990 if (loop > 50000) { 991 /* timed out - bounce the link */ 992 dd_dev_err(dd, 993 "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n", 994 __func__, sc->sw_index, 995 sc->hw_context, (u32)reg); 996 queue_work(dd->pport->link_wq, 997 &dd->pport->link_bounce_work); 998 break; 999 } 1000 loop++; 1001 udelay(1); 1002 } 1003 1004 if (pause) 1005 /* Add additional delay to ensure chip returns all credits */ 1006 pause_for_credit_return(dd); 1007 } 1008 1009 void sc_wait(struct hfi1_devdata *dd) 1010 { 1011 int i; 1012 1013 for (i = 0; i < dd->num_send_contexts; i++) { 1014 struct send_context *sc = dd->send_contexts[i].sc; 1015 1016 if (!sc) 1017 continue; 1018 sc_wait_for_packet_egress(sc, 0); 1019 } 1020 } 1021 1022 /* 1023 * Restart a context after it has been halted due to error. 1024 * 1025 * If the first step fails - wait for the halt to be asserted, return early. 1026 * Otherwise complain about timeouts but keep going. 1027 * 1028 * It is expected that allocations (enabled flag bit) have been shut off 1029 * already (only applies to kernel contexts). 1030 */ 1031 int sc_restart(struct send_context *sc) 1032 { 1033 struct hfi1_devdata *dd = sc->dd; 1034 u64 reg; 1035 u32 loop; 1036 int count; 1037 1038 /* bounce off if not halted, or being free'd */ 1039 if (!(sc->flags & SCF_HALTED) || (sc->flags & SCF_IN_FREE)) 1040 return -EINVAL; 1041 1042 dd_dev_info(dd, "restarting send context %u(%u)\n", sc->sw_index, 1043 sc->hw_context); 1044 1045 /* 1046 * Step 1: Wait for the context to actually halt. 1047 * 1048 * The error interrupt is asynchronous to actually setting halt 1049 * on the context. 1050 */ 1051 loop = 0; 1052 while (1) { 1053 reg = read_kctxt_csr(dd, sc->hw_context, SC(STATUS)); 1054 if (reg & SC(STATUS_CTXT_HALTED_SMASK)) 1055 break; 1056 if (loop > 100) { 1057 dd_dev_err(dd, "%s: context %u(%u) not halting, skipping\n", 1058 __func__, sc->sw_index, sc->hw_context); 1059 return -ETIME; 1060 } 1061 loop++; 1062 udelay(1); 1063 } 1064 1065 /* 1066 * Step 2: Ensure no users are still trying to write to PIO. 1067 * 1068 * For kernel contexts, we have already turned off buffer allocation. 1069 * Now wait for the buffer count to go to zero. 1070 * 1071 * For user contexts, the user handling code has cut off write access 1072 * to the context's PIO pages before calling this routine and will 1073 * restore write access after this routine returns. 1074 */ 1075 if (sc->type != SC_USER) { 1076 /* kernel context */ 1077 loop = 0; 1078 while (1) { 1079 count = get_buffers_allocated(sc); 1080 if (count == 0) 1081 break; 1082 if (loop > 100) { 1083 dd_dev_err(dd, 1084 "%s: context %u(%u) timeout waiting for PIO buffers to zero, remaining %d\n", 1085 __func__, sc->sw_index, 1086 sc->hw_context, count); 1087 } 1088 loop++; 1089 udelay(1); 1090 } 1091 } 1092 1093 /* 1094 * Step 3: Wait for all packets to egress. 1095 * This is done while disabling the send context 1096 * 1097 * Step 4: Disable the context 1098 * 1099 * This is a superset of the halt. After the disable, the 1100 * errors can be cleared. 1101 */ 1102 sc_disable(sc); 1103 1104 /* 1105 * Step 5: Enable the context 1106 * 1107 * This enable will clear the halted flag and per-send context 1108 * error flags. 1109 */ 1110 return sc_enable(sc); 1111 } 1112 1113 /* 1114 * PIO freeze processing. To be called after the TXE block is fully frozen. 1115 * Go through all frozen send contexts and disable them. The contexts are 1116 * already stopped by the freeze. 1117 */ 1118 void pio_freeze(struct hfi1_devdata *dd) 1119 { 1120 struct send_context *sc; 1121 int i; 1122 1123 for (i = 0; i < dd->num_send_contexts; i++) { 1124 sc = dd->send_contexts[i].sc; 1125 /* 1126 * Don't disable unallocated, unfrozen, or user send contexts. 1127 * User send contexts will be disabled when the process 1128 * calls into the driver to reset its context. 1129 */ 1130 if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER) 1131 continue; 1132 1133 /* only need to disable, the context is already stopped */ 1134 sc_disable(sc); 1135 } 1136 } 1137 1138 /* 1139 * Unfreeze PIO for kernel send contexts. The precondition for calling this 1140 * is that all PIO send contexts have been disabled and the SPC freeze has 1141 * been cleared. Now perform the last step and re-enable each kernel context. 1142 * User (PSM) processing will occur when PSM calls into the kernel to 1143 * acknowledge the freeze. 1144 */ 1145 void pio_kernel_unfreeze(struct hfi1_devdata *dd) 1146 { 1147 struct send_context *sc; 1148 int i; 1149 1150 for (i = 0; i < dd->num_send_contexts; i++) { 1151 sc = dd->send_contexts[i].sc; 1152 if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER) 1153 continue; 1154 if (sc->flags & SCF_LINK_DOWN) 1155 continue; 1156 1157 sc_enable(sc); /* will clear the sc frozen flag */ 1158 } 1159 } 1160 1161 /** 1162 * pio_kernel_linkup() - Re-enable send contexts after linkup event 1163 * @dd: valid devive data 1164 * 1165 * When the link goes down, the freeze path is taken. However, a link down 1166 * event is different from a freeze because if the send context is re-enabled 1167 * whowever is sending data will start sending data again, which will hang 1168 * any QP that is sending data. 1169 * 1170 * The freeze path now looks at the type of event that occurs and takes this 1171 * path for link down event. 1172 */ 1173 void pio_kernel_linkup(struct hfi1_devdata *dd) 1174 { 1175 struct send_context *sc; 1176 int i; 1177 1178 for (i = 0; i < dd->num_send_contexts; i++) { 1179 sc = dd->send_contexts[i].sc; 1180 if (!sc || !(sc->flags & SCF_LINK_DOWN) || sc->type == SC_USER) 1181 continue; 1182 1183 sc_enable(sc); /* will clear the sc link down flag */ 1184 } 1185 } 1186 1187 /* 1188 * Wait for the SendPioInitCtxt.PioInitInProgress bit to clear. 1189 * Returns: 1190 * -ETIMEDOUT - if we wait too long 1191 * -EIO - if there was an error 1192 */ 1193 static int pio_init_wait_progress(struct hfi1_devdata *dd) 1194 { 1195 u64 reg; 1196 int max, count = 0; 1197 1198 /* max is the longest possible HW init time / delay */ 1199 max = (dd->icode == ICODE_FPGA_EMULATION) ? 120 : 5; 1200 while (1) { 1201 reg = read_csr(dd, SEND_PIO_INIT_CTXT); 1202 if (!(reg & SEND_PIO_INIT_CTXT_PIO_INIT_IN_PROGRESS_SMASK)) 1203 break; 1204 if (count >= max) 1205 return -ETIMEDOUT; 1206 udelay(5); 1207 count++; 1208 } 1209 1210 return reg & SEND_PIO_INIT_CTXT_PIO_INIT_ERR_SMASK ? -EIO : 0; 1211 } 1212 1213 /* 1214 * Reset all of the send contexts to their power-on state. Used 1215 * only during manual init - no lock against sc_enable needed. 1216 */ 1217 void pio_reset_all(struct hfi1_devdata *dd) 1218 { 1219 int ret; 1220 1221 /* make sure the init engine is not busy */ 1222 ret = pio_init_wait_progress(dd); 1223 /* ignore any timeout */ 1224 if (ret == -EIO) { 1225 /* clear the error */ 1226 write_csr(dd, SEND_PIO_ERR_CLEAR, 1227 SEND_PIO_ERR_CLEAR_PIO_INIT_SM_IN_ERR_SMASK); 1228 } 1229 1230 /* reset init all */ 1231 write_csr(dd, SEND_PIO_INIT_CTXT, 1232 SEND_PIO_INIT_CTXT_PIO_ALL_CTXT_INIT_SMASK); 1233 udelay(2); 1234 ret = pio_init_wait_progress(dd); 1235 if (ret < 0) { 1236 dd_dev_err(dd, 1237 "PIO send context init %s while initializing all PIO blocks\n", 1238 ret == -ETIMEDOUT ? "is stuck" : "had an error"); 1239 } 1240 } 1241 1242 /* enable the context */ 1243 int sc_enable(struct send_context *sc) 1244 { 1245 u64 sc_ctrl, reg, pio; 1246 struct hfi1_devdata *dd; 1247 unsigned long flags; 1248 int ret = 0; 1249 1250 if (!sc) 1251 return -EINVAL; 1252 dd = sc->dd; 1253 1254 /* 1255 * Obtain the allocator lock to guard against any allocation 1256 * attempts (which should not happen prior to context being 1257 * enabled). On the release/disable side we don't need to 1258 * worry about locking since the releaser will not do anything 1259 * if the context accounting values have not changed. 1260 */ 1261 spin_lock_irqsave(&sc->alloc_lock, flags); 1262 sc_ctrl = read_kctxt_csr(dd, sc->hw_context, SC(CTRL)); 1263 if ((sc_ctrl & SC(CTRL_CTXT_ENABLE_SMASK))) 1264 goto unlock; /* already enabled */ 1265 1266 /* IMPORTANT: only clear free and fill if transitioning 0 -> 1 */ 1267 1268 *sc->hw_free = 0; 1269 sc->free = 0; 1270 sc->alloc_free = 0; 1271 sc->fill = 0; 1272 sc->fill_wrap = 0; 1273 sc->sr_head = 0; 1274 sc->sr_tail = 0; 1275 sc->flags = 0; 1276 /* the alloc lock insures no fast path allocation */ 1277 reset_buffers_allocated(sc); 1278 1279 /* 1280 * Clear all per-context errors. Some of these will be set when 1281 * we are re-enabling after a context halt. Now that the context 1282 * is disabled, the halt will not clear until after the PIO init 1283 * engine runs below. 1284 */ 1285 reg = read_kctxt_csr(dd, sc->hw_context, SC(ERR_STATUS)); 1286 if (reg) 1287 write_kctxt_csr(dd, sc->hw_context, SC(ERR_CLEAR), reg); 1288 1289 /* 1290 * The HW PIO initialization engine can handle only one init 1291 * request at a time. Serialize access to each device's engine. 1292 */ 1293 spin_lock(&dd->sc_init_lock); 1294 /* 1295 * Since access to this code block is serialized and 1296 * each access waits for the initialization to complete 1297 * before releasing the lock, the PIO initialization engine 1298 * should not be in use, so we don't have to wait for the 1299 * InProgress bit to go down. 1300 */ 1301 pio = ((sc->hw_context & SEND_PIO_INIT_CTXT_PIO_CTXT_NUM_MASK) << 1302 SEND_PIO_INIT_CTXT_PIO_CTXT_NUM_SHIFT) | 1303 SEND_PIO_INIT_CTXT_PIO_SINGLE_CTXT_INIT_SMASK; 1304 write_csr(dd, SEND_PIO_INIT_CTXT, pio); 1305 /* 1306 * Wait until the engine is done. Give the chip the required time 1307 * so, hopefully, we read the register just once. 1308 */ 1309 udelay(2); 1310 ret = pio_init_wait_progress(dd); 1311 spin_unlock(&dd->sc_init_lock); 1312 if (ret) { 1313 dd_dev_err(dd, 1314 "sctxt%u(%u): Context not enabled due to init failure %d\n", 1315 sc->sw_index, sc->hw_context, ret); 1316 goto unlock; 1317 } 1318 1319 /* 1320 * All is well. Enable the context. 1321 */ 1322 sc_ctrl |= SC(CTRL_CTXT_ENABLE_SMASK); 1323 write_kctxt_csr(dd, sc->hw_context, SC(CTRL), sc_ctrl); 1324 /* 1325 * Read SendCtxtCtrl to force the write out and prevent a timing 1326 * hazard where a PIO write may reach the context before the enable. 1327 */ 1328 read_kctxt_csr(dd, sc->hw_context, SC(CTRL)); 1329 sc->flags |= SCF_ENABLED; 1330 1331 unlock: 1332 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1333 1334 return ret; 1335 } 1336 1337 /* force a credit return on the context */ 1338 void sc_return_credits(struct send_context *sc) 1339 { 1340 if (!sc) 1341 return; 1342 1343 /* a 0->1 transition schedules a credit return */ 1344 write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE), 1345 SC(CREDIT_FORCE_FORCE_RETURN_SMASK)); 1346 /* 1347 * Ensure that the write is flushed and the credit return is 1348 * scheduled. We care more about the 0 -> 1 transition. 1349 */ 1350 read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE)); 1351 /* set back to 0 for next time */ 1352 write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE), 0); 1353 } 1354 1355 /* allow all in-flight packets to drain on the context */ 1356 void sc_flush(struct send_context *sc) 1357 { 1358 if (!sc) 1359 return; 1360 1361 sc_wait_for_packet_egress(sc, 1); 1362 } 1363 1364 /* 1365 * Start the software reaction to a context halt or SPC freeze: 1366 * - mark the context as halted or frozen 1367 * - stop buffer allocations 1368 * 1369 * Called from the error interrupt. Other work is deferred until 1370 * out of the interrupt. 1371 */ 1372 void sc_stop(struct send_context *sc, int flag) 1373 { 1374 unsigned long flags; 1375 1376 /* stop buffer allocations */ 1377 spin_lock_irqsave(&sc->alloc_lock, flags); 1378 /* mark the context */ 1379 sc->flags |= flag; 1380 sc->flags &= ~SCF_ENABLED; 1381 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1382 wake_up(&sc->halt_wait); 1383 } 1384 1385 #define BLOCK_DWORDS (PIO_BLOCK_SIZE / sizeof(u32)) 1386 #define dwords_to_blocks(x) DIV_ROUND_UP(x, BLOCK_DWORDS) 1387 1388 /* 1389 * The send context buffer "allocator". 1390 * 1391 * @sc: the PIO send context we are allocating from 1392 * @len: length of whole packet - including PBC - in dwords 1393 * @cb: optional callback to call when the buffer is finished sending 1394 * @arg: argument for cb 1395 * 1396 * Return a pointer to a PIO buffer, NULL if not enough room, -ECOMM 1397 * when link is down. 1398 */ 1399 struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len, 1400 pio_release_cb cb, void *arg) 1401 { 1402 struct pio_buf *pbuf = NULL; 1403 unsigned long flags; 1404 unsigned long avail; 1405 unsigned long blocks = dwords_to_blocks(dw_len); 1406 u32 fill_wrap; 1407 int trycount = 0; 1408 u32 head, next; 1409 1410 spin_lock_irqsave(&sc->alloc_lock, flags); 1411 if (!(sc->flags & SCF_ENABLED)) { 1412 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1413 return ERR_PTR(-ECOMM); 1414 } 1415 1416 retry: 1417 avail = (unsigned long)sc->credits - (sc->fill - sc->alloc_free); 1418 if (blocks > avail) { 1419 /* not enough room */ 1420 if (unlikely(trycount)) { /* already tried to get more room */ 1421 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1422 goto done; 1423 } 1424 /* copy from receiver cache line and recalculate */ 1425 sc->alloc_free = READ_ONCE(sc->free); 1426 avail = 1427 (unsigned long)sc->credits - 1428 (sc->fill - sc->alloc_free); 1429 if (blocks > avail) { 1430 /* still no room, actively update */ 1431 sc_release_update(sc); 1432 sc->alloc_free = READ_ONCE(sc->free); 1433 trycount++; 1434 goto retry; 1435 } 1436 } 1437 1438 /* there is enough room */ 1439 1440 preempt_disable(); 1441 this_cpu_inc(*sc->buffers_allocated); 1442 1443 /* read this once */ 1444 head = sc->sr_head; 1445 1446 /* "allocate" the buffer */ 1447 sc->fill += blocks; 1448 fill_wrap = sc->fill_wrap; 1449 sc->fill_wrap += blocks; 1450 if (sc->fill_wrap >= sc->credits) 1451 sc->fill_wrap = sc->fill_wrap - sc->credits; 1452 1453 /* 1454 * Fill the parts that the releaser looks at before moving the head. 1455 * The only necessary piece is the sent_at field. The credits 1456 * we have just allocated cannot have been returned yet, so the 1457 * cb and arg will not be looked at for a "while". Put them 1458 * on this side of the memory barrier anyway. 1459 */ 1460 pbuf = &sc->sr[head].pbuf; 1461 pbuf->sent_at = sc->fill; 1462 pbuf->cb = cb; 1463 pbuf->arg = arg; 1464 pbuf->sc = sc; /* could be filled in at sc->sr init time */ 1465 /* make sure this is in memory before updating the head */ 1466 1467 /* calculate next head index, do not store */ 1468 next = head + 1; 1469 if (next >= sc->sr_size) 1470 next = 0; 1471 /* 1472 * update the head - must be last! - the releaser can look at fields 1473 * in pbuf once we move the head 1474 */ 1475 smp_wmb(); 1476 sc->sr_head = next; 1477 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1478 1479 /* finish filling in the buffer outside the lock */ 1480 pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE; 1481 pbuf->end = sc->base_addr + sc->size; 1482 pbuf->qw_written = 0; 1483 pbuf->carry_bytes = 0; 1484 pbuf->carry.val64 = 0; 1485 done: 1486 return pbuf; 1487 } 1488 1489 /* 1490 * There are at least two entities that can turn on credit return 1491 * interrupts and they can overlap. Avoid problems by implementing 1492 * a count scheme that is enforced by a lock. The lock is needed because 1493 * the count and CSR write must be paired. 1494 */ 1495 1496 /* 1497 * Start credit return interrupts. This is managed by a count. If already 1498 * on, just increment the count. 1499 */ 1500 void sc_add_credit_return_intr(struct send_context *sc) 1501 { 1502 unsigned long flags; 1503 1504 /* lock must surround both the count change and the CSR update */ 1505 spin_lock_irqsave(&sc->credit_ctrl_lock, flags); 1506 if (sc->credit_intr_count == 0) { 1507 sc->credit_ctrl |= SC(CREDIT_CTRL_CREDIT_INTR_SMASK); 1508 write_kctxt_csr(sc->dd, sc->hw_context, 1509 SC(CREDIT_CTRL), sc->credit_ctrl); 1510 } 1511 sc->credit_intr_count++; 1512 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags); 1513 } 1514 1515 /* 1516 * Stop credit return interrupts. This is managed by a count. Decrement the 1517 * count, if the last user, then turn the credit interrupts off. 1518 */ 1519 void sc_del_credit_return_intr(struct send_context *sc) 1520 { 1521 unsigned long flags; 1522 1523 WARN_ON(sc->credit_intr_count == 0); 1524 1525 /* lock must surround both the count change and the CSR update */ 1526 spin_lock_irqsave(&sc->credit_ctrl_lock, flags); 1527 sc->credit_intr_count--; 1528 if (sc->credit_intr_count == 0) { 1529 sc->credit_ctrl &= ~SC(CREDIT_CTRL_CREDIT_INTR_SMASK); 1530 write_kctxt_csr(sc->dd, sc->hw_context, 1531 SC(CREDIT_CTRL), sc->credit_ctrl); 1532 } 1533 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags); 1534 } 1535 1536 /* 1537 * The caller must be careful when calling this. All needint calls 1538 * must be paired with !needint. 1539 */ 1540 void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint) 1541 { 1542 if (needint) 1543 sc_add_credit_return_intr(sc); 1544 else 1545 sc_del_credit_return_intr(sc); 1546 trace_hfi1_wantpiointr(sc, needint, sc->credit_ctrl); 1547 if (needint) 1548 sc_return_credits(sc); 1549 } 1550 1551 /** 1552 * sc_piobufavail - callback when a PIO buffer is available 1553 * @sc: the send context 1554 * 1555 * This is called from the interrupt handler when a PIO buffer is 1556 * available after hfi1_verbs_send() returned an error that no buffers were 1557 * available. Disable the interrupt if there are no more QPs waiting. 1558 */ 1559 static void sc_piobufavail(struct send_context *sc) 1560 { 1561 struct hfi1_devdata *dd = sc->dd; 1562 struct list_head *list; 1563 struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE]; 1564 struct rvt_qp *qp; 1565 struct hfi1_qp_priv *priv; 1566 unsigned long flags; 1567 uint i, n = 0, top_idx = 0; 1568 1569 if (dd->send_contexts[sc->sw_index].type != SC_KERNEL && 1570 dd->send_contexts[sc->sw_index].type != SC_VL15) 1571 return; 1572 list = &sc->piowait; 1573 /* 1574 * Note: checking that the piowait list is empty and clearing 1575 * the buffer available interrupt needs to be atomic or we 1576 * could end up with QPs on the wait list with the interrupt 1577 * disabled. 1578 */ 1579 write_seqlock_irqsave(&sc->waitlock, flags); 1580 while (!list_empty(list)) { 1581 struct iowait *wait; 1582 1583 if (n == ARRAY_SIZE(qps)) 1584 break; 1585 wait = list_first_entry(list, struct iowait, list); 1586 iowait_get_priority(wait); 1587 qp = iowait_to_qp(wait); 1588 priv = qp->priv; 1589 list_del_init(&priv->s_iowait.list); 1590 priv->s_iowait.lock = NULL; 1591 if (n) { 1592 priv = qps[top_idx]->priv; 1593 top_idx = iowait_priority_update_top(wait, 1594 &priv->s_iowait, 1595 n, top_idx); 1596 } 1597 1598 /* refcount held until actual wake up */ 1599 qps[n++] = qp; 1600 } 1601 /* 1602 * If there had been waiters and there are more 1603 * insure that we redo the force to avoid a potential hang. 1604 */ 1605 if (n) { 1606 hfi1_sc_wantpiobuf_intr(sc, 0); 1607 if (!list_empty(list)) 1608 hfi1_sc_wantpiobuf_intr(sc, 1); 1609 } 1610 write_sequnlock_irqrestore(&sc->waitlock, flags); 1611 1612 /* Wake up the top-priority one first */ 1613 if (n) 1614 hfi1_qp_wakeup(qps[top_idx], 1615 RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); 1616 for (i = 0; i < n; i++) 1617 if (i != top_idx) 1618 hfi1_qp_wakeup(qps[i], 1619 RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); 1620 } 1621 1622 /* translate a send credit update to a bit code of reasons */ 1623 static inline int fill_code(u64 hw_free) 1624 { 1625 int code = 0; 1626 1627 if (hw_free & CR_STATUS_SMASK) 1628 code |= PRC_STATUS_ERR; 1629 if (hw_free & CR_CREDIT_RETURN_DUE_TO_PBC_SMASK) 1630 code |= PRC_PBC; 1631 if (hw_free & CR_CREDIT_RETURN_DUE_TO_THRESHOLD_SMASK) 1632 code |= PRC_THRESHOLD; 1633 if (hw_free & CR_CREDIT_RETURN_DUE_TO_ERR_SMASK) 1634 code |= PRC_FILL_ERR; 1635 if (hw_free & CR_CREDIT_RETURN_DUE_TO_FORCE_SMASK) 1636 code |= PRC_SC_DISABLE; 1637 return code; 1638 } 1639 1640 /* use the jiffies compare to get the wrap right */ 1641 #define sent_before(a, b) time_before(a, b) /* a < b */ 1642 1643 /* 1644 * The send context buffer "releaser". 1645 */ 1646 void sc_release_update(struct send_context *sc) 1647 { 1648 struct pio_buf *pbuf; 1649 u64 hw_free; 1650 u32 head, tail; 1651 unsigned long old_free; 1652 unsigned long free; 1653 unsigned long extra; 1654 unsigned long flags; 1655 int code; 1656 1657 if (!sc) 1658 return; 1659 1660 spin_lock_irqsave(&sc->release_lock, flags); 1661 /* update free */ 1662 hw_free = le64_to_cpu(*sc->hw_free); /* volatile read */ 1663 old_free = sc->free; 1664 extra = (((hw_free & CR_COUNTER_SMASK) >> CR_COUNTER_SHIFT) 1665 - (old_free & CR_COUNTER_MASK)) 1666 & CR_COUNTER_MASK; 1667 free = old_free + extra; 1668 trace_hfi1_piofree(sc, extra); 1669 1670 /* call sent buffer callbacks */ 1671 code = -1; /* code not yet set */ 1672 head = READ_ONCE(sc->sr_head); /* snapshot the head */ 1673 tail = sc->sr_tail; 1674 while (head != tail) { 1675 pbuf = &sc->sr[tail].pbuf; 1676 1677 if (sent_before(free, pbuf->sent_at)) { 1678 /* not sent yet */ 1679 break; 1680 } 1681 if (pbuf->cb) { 1682 if (code < 0) /* fill in code on first user */ 1683 code = fill_code(hw_free); 1684 (*pbuf->cb)(pbuf->arg, code); 1685 } 1686 1687 tail++; 1688 if (tail >= sc->sr_size) 1689 tail = 0; 1690 } 1691 sc->sr_tail = tail; 1692 /* make sure tail is updated before free */ 1693 smp_wmb(); 1694 sc->free = free; 1695 spin_unlock_irqrestore(&sc->release_lock, flags); 1696 sc_piobufavail(sc); 1697 } 1698 1699 /* 1700 * Send context group releaser. Argument is the send context that caused 1701 * the interrupt. Called from the send context interrupt handler. 1702 * 1703 * Call release on all contexts in the group. 1704 * 1705 * This routine takes the sc_lock without an irqsave because it is only 1706 * called from an interrupt handler. Adjust if that changes. 1707 */ 1708 void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context) 1709 { 1710 struct send_context *sc; 1711 u32 sw_index; 1712 u32 gc, gc_end; 1713 1714 spin_lock(&dd->sc_lock); 1715 sw_index = dd->hw_to_sw[hw_context]; 1716 if (unlikely(sw_index >= dd->num_send_contexts)) { 1717 dd_dev_err(dd, "%s: invalid hw (%u) to sw (%u) mapping\n", 1718 __func__, hw_context, sw_index); 1719 goto done; 1720 } 1721 sc = dd->send_contexts[sw_index].sc; 1722 if (unlikely(!sc)) 1723 goto done; 1724 1725 gc = group_context(hw_context, sc->group); 1726 gc_end = gc + group_size(sc->group); 1727 for (; gc < gc_end; gc++) { 1728 sw_index = dd->hw_to_sw[gc]; 1729 if (unlikely(sw_index >= dd->num_send_contexts)) { 1730 dd_dev_err(dd, 1731 "%s: invalid hw (%u) to sw (%u) mapping\n", 1732 __func__, hw_context, sw_index); 1733 continue; 1734 } 1735 sc_release_update(dd->send_contexts[sw_index].sc); 1736 } 1737 done: 1738 spin_unlock(&dd->sc_lock); 1739 } 1740 1741 /* 1742 * pio_select_send_context_vl() - select send context 1743 * @dd: devdata 1744 * @selector: a spreading factor 1745 * @vl: this vl 1746 * 1747 * This function returns a send context based on the selector and a vl. 1748 * The mapping fields are protected by RCU 1749 */ 1750 struct send_context *pio_select_send_context_vl(struct hfi1_devdata *dd, 1751 u32 selector, u8 vl) 1752 { 1753 struct pio_vl_map *m; 1754 struct pio_map_elem *e; 1755 struct send_context *rval; 1756 1757 /* 1758 * NOTE This should only happen if SC->VL changed after the initial 1759 * checks on the QP/AH 1760 * Default will return VL0's send context below 1761 */ 1762 if (unlikely(vl >= num_vls)) { 1763 rval = NULL; 1764 goto done; 1765 } 1766 1767 rcu_read_lock(); 1768 m = rcu_dereference(dd->pio_map); 1769 if (unlikely(!m)) { 1770 rcu_read_unlock(); 1771 return dd->vld[0].sc; 1772 } 1773 e = m->map[vl & m->mask]; 1774 rval = e->ksc[selector & e->mask]; 1775 rcu_read_unlock(); 1776 1777 done: 1778 rval = !rval ? dd->vld[0].sc : rval; 1779 return rval; 1780 } 1781 1782 /* 1783 * pio_select_send_context_sc() - select send context 1784 * @dd: devdata 1785 * @selector: a spreading factor 1786 * @sc5: the 5 bit sc 1787 * 1788 * This function returns an send context based on the selector and an sc 1789 */ 1790 struct send_context *pio_select_send_context_sc(struct hfi1_devdata *dd, 1791 u32 selector, u8 sc5) 1792 { 1793 u8 vl = sc_to_vlt(dd, sc5); 1794 1795 return pio_select_send_context_vl(dd, selector, vl); 1796 } 1797 1798 /* 1799 * Free the indicated map struct 1800 */ 1801 static void pio_map_free(struct pio_vl_map *m) 1802 { 1803 int i; 1804 1805 for (i = 0; m && i < m->actual_vls; i++) 1806 kfree(m->map[i]); 1807 kfree(m); 1808 } 1809 1810 /* 1811 * Handle RCU callback 1812 */ 1813 static void pio_map_rcu_callback(struct rcu_head *list) 1814 { 1815 struct pio_vl_map *m = container_of(list, struct pio_vl_map, list); 1816 1817 pio_map_free(m); 1818 } 1819 1820 /* 1821 * Set credit return threshold for the kernel send context 1822 */ 1823 static void set_threshold(struct hfi1_devdata *dd, int scontext, int i) 1824 { 1825 u32 thres; 1826 1827 thres = min(sc_percent_to_threshold(dd->kernel_send_context[scontext], 1828 50), 1829 sc_mtu_to_threshold(dd->kernel_send_context[scontext], 1830 dd->vld[i].mtu, 1831 dd->rcd[0]->rcvhdrqentsize)); 1832 sc_set_cr_threshold(dd->kernel_send_context[scontext], thres); 1833 } 1834 1835 /* 1836 * pio_map_init - called when #vls change 1837 * @dd: hfi1_devdata 1838 * @port: port number 1839 * @num_vls: number of vls 1840 * @vl_scontexts: per vl send context mapping (optional) 1841 * 1842 * This routine changes the mapping based on the number of vls. 1843 * 1844 * vl_scontexts is used to specify a non-uniform vl/send context 1845 * loading. NULL implies auto computing the loading and giving each 1846 * VL an uniform distribution of send contexts per VL. 1847 * 1848 * The auto algorithm computers the sc_per_vl and the number of extra 1849 * send contexts. Any extra send contexts are added from the last VL 1850 * on down 1851 * 1852 * rcu locking is used here to control access to the mapping fields. 1853 * 1854 * If either the num_vls or num_send_contexts are non-power of 2, the 1855 * array sizes in the struct pio_vl_map and the struct pio_map_elem are 1856 * rounded up to the next highest power of 2 and the first entry is 1857 * reused in a round robin fashion. 1858 * 1859 * If an error occurs the map change is not done and the mapping is not 1860 * chaged. 1861 * 1862 */ 1863 int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts) 1864 { 1865 int i, j; 1866 int extra, sc_per_vl; 1867 int scontext = 1; 1868 int num_kernel_send_contexts = 0; 1869 u8 lvl_scontexts[OPA_MAX_VLS]; 1870 struct pio_vl_map *oldmap, *newmap; 1871 1872 if (!vl_scontexts) { 1873 for (i = 0; i < dd->num_send_contexts; i++) 1874 if (dd->send_contexts[i].type == SC_KERNEL) 1875 num_kernel_send_contexts++; 1876 /* truncate divide */ 1877 sc_per_vl = num_kernel_send_contexts / num_vls; 1878 /* extras */ 1879 extra = num_kernel_send_contexts % num_vls; 1880 vl_scontexts = lvl_scontexts; 1881 /* add extras from last vl down */ 1882 for (i = num_vls - 1; i >= 0; i--, extra--) 1883 vl_scontexts[i] = sc_per_vl + (extra > 0 ? 1 : 0); 1884 } 1885 /* build new map */ 1886 newmap = kzalloc(struct_size(newmap, map, roundup_pow_of_two(num_vls)), 1887 GFP_KERNEL); 1888 if (!newmap) 1889 goto bail; 1890 newmap->actual_vls = num_vls; 1891 newmap->vls = roundup_pow_of_two(num_vls); 1892 newmap->mask = (1 << ilog2(newmap->vls)) - 1; 1893 for (i = 0; i < newmap->vls; i++) { 1894 /* save for wrap around */ 1895 int first_scontext = scontext; 1896 1897 if (i < newmap->actual_vls) { 1898 int sz = roundup_pow_of_two(vl_scontexts[i]); 1899 1900 /* only allocate once */ 1901 newmap->map[i] = kzalloc(struct_size(newmap->map[i], 1902 ksc, sz), 1903 GFP_KERNEL); 1904 if (!newmap->map[i]) 1905 goto bail; 1906 newmap->map[i]->mask = (1 << ilog2(sz)) - 1; 1907 /* 1908 * assign send contexts and 1909 * adjust credit return threshold 1910 */ 1911 for (j = 0; j < sz; j++) { 1912 if (dd->kernel_send_context[scontext]) { 1913 newmap->map[i]->ksc[j] = 1914 dd->kernel_send_context[scontext]; 1915 set_threshold(dd, scontext, i); 1916 } 1917 if (++scontext >= first_scontext + 1918 vl_scontexts[i]) 1919 /* wrap back to first send context */ 1920 scontext = first_scontext; 1921 } 1922 } else { 1923 /* just re-use entry without allocating */ 1924 newmap->map[i] = newmap->map[i % num_vls]; 1925 } 1926 scontext = first_scontext + vl_scontexts[i]; 1927 } 1928 /* newmap in hand, save old map */ 1929 spin_lock_irq(&dd->pio_map_lock); 1930 oldmap = rcu_dereference_protected(dd->pio_map, 1931 lockdep_is_held(&dd->pio_map_lock)); 1932 1933 /* publish newmap */ 1934 rcu_assign_pointer(dd->pio_map, newmap); 1935 1936 spin_unlock_irq(&dd->pio_map_lock); 1937 /* success, free any old map after grace period */ 1938 if (oldmap) 1939 call_rcu(&oldmap->list, pio_map_rcu_callback); 1940 return 0; 1941 bail: 1942 /* free any partial allocation */ 1943 pio_map_free(newmap); 1944 return -ENOMEM; 1945 } 1946 1947 void free_pio_map(struct hfi1_devdata *dd) 1948 { 1949 /* Free PIO map if allocated */ 1950 if (rcu_access_pointer(dd->pio_map)) { 1951 spin_lock_irq(&dd->pio_map_lock); 1952 pio_map_free(rcu_access_pointer(dd->pio_map)); 1953 RCU_INIT_POINTER(dd->pio_map, NULL); 1954 spin_unlock_irq(&dd->pio_map_lock); 1955 synchronize_rcu(); 1956 } 1957 kfree(dd->kernel_send_context); 1958 dd->kernel_send_context = NULL; 1959 } 1960 1961 int init_pervl_scs(struct hfi1_devdata *dd) 1962 { 1963 int i; 1964 u64 mask, all_vl_mask = (u64)0x80ff; /* VLs 0-7, 15 */ 1965 u64 data_vls_mask = (u64)0x00ff; /* VLs 0-7 */ 1966 u32 ctxt; 1967 struct hfi1_pportdata *ppd = dd->pport; 1968 1969 dd->vld[15].sc = sc_alloc(dd, SC_VL15, 1970 dd->rcd[0]->rcvhdrqentsize, dd->node); 1971 if (!dd->vld[15].sc) 1972 return -ENOMEM; 1973 1974 hfi1_init_ctxt(dd->vld[15].sc); 1975 dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048); 1976 1977 dd->kernel_send_context = kcalloc_node(dd->num_send_contexts, 1978 sizeof(struct send_context *), 1979 GFP_KERNEL, dd->node); 1980 if (!dd->kernel_send_context) 1981 goto freesc15; 1982 1983 dd->kernel_send_context[0] = dd->vld[15].sc; 1984 1985 for (i = 0; i < num_vls; i++) { 1986 /* 1987 * Since this function does not deal with a specific 1988 * receive context but we need the RcvHdrQ entry size, 1989 * use the size from rcd[0]. It is guaranteed to be 1990 * valid at this point and will remain the same for all 1991 * receive contexts. 1992 */ 1993 dd->vld[i].sc = sc_alloc(dd, SC_KERNEL, 1994 dd->rcd[0]->rcvhdrqentsize, dd->node); 1995 if (!dd->vld[i].sc) 1996 goto nomem; 1997 dd->kernel_send_context[i + 1] = dd->vld[i].sc; 1998 hfi1_init_ctxt(dd->vld[i].sc); 1999 /* non VL15 start with the max MTU */ 2000 dd->vld[i].mtu = hfi1_max_mtu; 2001 } 2002 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) { 2003 dd->kernel_send_context[i + 1] = 2004 sc_alloc(dd, SC_KERNEL, dd->rcd[0]->rcvhdrqentsize, dd->node); 2005 if (!dd->kernel_send_context[i + 1]) 2006 goto nomem; 2007 hfi1_init_ctxt(dd->kernel_send_context[i + 1]); 2008 } 2009 2010 sc_enable(dd->vld[15].sc); 2011 ctxt = dd->vld[15].sc->hw_context; 2012 mask = all_vl_mask & ~(1LL << 15); 2013 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask); 2014 dd_dev_info(dd, 2015 "Using send context %u(%u) for VL15\n", 2016 dd->vld[15].sc->sw_index, ctxt); 2017 2018 for (i = 0; i < num_vls; i++) { 2019 sc_enable(dd->vld[i].sc); 2020 ctxt = dd->vld[i].sc->hw_context; 2021 mask = all_vl_mask & ~(data_vls_mask); 2022 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask); 2023 } 2024 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) { 2025 sc_enable(dd->kernel_send_context[i + 1]); 2026 ctxt = dd->kernel_send_context[i + 1]->hw_context; 2027 mask = all_vl_mask & ~(data_vls_mask); 2028 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask); 2029 } 2030 2031 if (pio_map_init(dd, ppd->port - 1, num_vls, NULL)) 2032 goto nomem; 2033 return 0; 2034 2035 nomem: 2036 for (i = 0; i < num_vls; i++) { 2037 sc_free(dd->vld[i].sc); 2038 dd->vld[i].sc = NULL; 2039 } 2040 2041 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) 2042 sc_free(dd->kernel_send_context[i + 1]); 2043 2044 kfree(dd->kernel_send_context); 2045 dd->kernel_send_context = NULL; 2046 2047 freesc15: 2048 sc_free(dd->vld[15].sc); 2049 return -ENOMEM; 2050 } 2051 2052 int init_credit_return(struct hfi1_devdata *dd) 2053 { 2054 int ret; 2055 int i; 2056 2057 dd->cr_base = kcalloc( 2058 node_affinity.num_possible_nodes, 2059 sizeof(struct credit_return_base), 2060 GFP_KERNEL); 2061 if (!dd->cr_base) { 2062 ret = -ENOMEM; 2063 goto done; 2064 } 2065 for_each_node_with_cpus(i) { 2066 int bytes = TXE_NUM_CONTEXTS * sizeof(struct credit_return); 2067 2068 set_dev_node(&dd->pcidev->dev, i); 2069 dd->cr_base[i].va = dma_alloc_coherent(&dd->pcidev->dev, 2070 bytes, 2071 &dd->cr_base[i].dma, 2072 GFP_KERNEL); 2073 if (!dd->cr_base[i].va) { 2074 set_dev_node(&dd->pcidev->dev, dd->node); 2075 dd_dev_err(dd, 2076 "Unable to allocate credit return DMA range for NUMA %d\n", 2077 i); 2078 ret = -ENOMEM; 2079 goto free_cr_base; 2080 } 2081 } 2082 set_dev_node(&dd->pcidev->dev, dd->node); 2083 2084 ret = 0; 2085 done: 2086 return ret; 2087 2088 free_cr_base: 2089 free_credit_return(dd); 2090 goto done; 2091 } 2092 2093 void free_credit_return(struct hfi1_devdata *dd) 2094 { 2095 int i; 2096 2097 if (!dd->cr_base) 2098 return; 2099 for (i = 0; i < node_affinity.num_possible_nodes; i++) { 2100 if (dd->cr_base[i].va) { 2101 dma_free_coherent(&dd->pcidev->dev, 2102 TXE_NUM_CONTEXTS * 2103 sizeof(struct credit_return), 2104 dd->cr_base[i].va, 2105 dd->cr_base[i].dma); 2106 } 2107 } 2108 kfree(dd->cr_base); 2109 dd->cr_base = NULL; 2110 } 2111 2112 void seqfile_dump_sci(struct seq_file *s, u32 i, 2113 struct send_context_info *sci) 2114 { 2115 struct send_context *sc = sci->sc; 2116 u64 reg; 2117 2118 seq_printf(s, "SCI %u: type %u base %u credits %u\n", 2119 i, sci->type, sci->base, sci->credits); 2120 seq_printf(s, " flags 0x%x sw_inx %u hw_ctxt %u grp %u\n", 2121 sc->flags, sc->sw_index, sc->hw_context, sc->group); 2122 seq_printf(s, " sr_size %u credits %u sr_head %u sr_tail %u\n", 2123 sc->sr_size, sc->credits, sc->sr_head, sc->sr_tail); 2124 seq_printf(s, " fill %lu free %lu fill_wrap %u alloc_free %lu\n", 2125 sc->fill, sc->free, sc->fill_wrap, sc->alloc_free); 2126 seq_printf(s, " credit_intr_count %u credit_ctrl 0x%llx\n", 2127 sc->credit_intr_count, sc->credit_ctrl); 2128 reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_STATUS)); 2129 seq_printf(s, " *hw_free %llu CurrentFree %llu LastReturned %llu\n", 2130 (le64_to_cpu(*sc->hw_free) & CR_COUNTER_SMASK) >> 2131 CR_COUNTER_SHIFT, 2132 (reg >> SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT)) & 2133 SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK), 2134 reg & SC(CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK)); 2135 } 2136