1 /* 2 * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. 3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35 #include <linux/pci.h> 36 #include <linux/netdevice.h> 37 #include <linux/vmalloc.h> 38 #include <linux/delay.h> 39 #include <linux/idr.h> 40 #include <linux/module.h> 41 #include <linux/printk.h> 42 #ifdef CONFIG_INFINIBAND_QIB_DCA 43 #include <linux/dca.h> 44 #endif 45 46 #include "qib.h" 47 #include "qib_common.h" 48 #include "qib_mad.h" 49 #ifdef CONFIG_DEBUG_FS 50 #include "qib_debugfs.h" 51 #include "qib_verbs.h" 52 #endif 53 54 #undef pr_fmt 55 #define pr_fmt(fmt) QIB_DRV_NAME ": " fmt 56 57 /* 58 * min buffers we want to have per context, after driver 59 */ 60 #define QIB_MIN_USER_CTXT_BUFCNT 7 61 62 #define QLOGIC_IB_R_SOFTWARE_MASK 0xFF 63 #define QLOGIC_IB_R_SOFTWARE_SHIFT 24 64 #define QLOGIC_IB_R_EMULATOR_MASK (1ULL<<62) 65 66 /* 67 * Number of ctxts we are configured to use (to allow for more pio 68 * buffers per ctxt, etc.) Zero means use chip value. 69 */ 70 ushort qib_cfgctxts; 71 module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO); 72 MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use"); 73 74 unsigned qib_numa_aware; 75 module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO); 76 MODULE_PARM_DESC(numa_aware, 77 "0 -> PSM allocation close to HCA, 1 -> PSM allocation local to process"); 78 79 /* 80 * If set, do not write to any regs if avoidable, hack to allow 81 * check for deranged default register values. 82 */ 83 ushort qib_mini_init; 84 module_param_named(mini_init, qib_mini_init, ushort, S_IRUGO); 85 MODULE_PARM_DESC(mini_init, "If set, do minimal diag init"); 86 87 unsigned qib_n_krcv_queues; 88 module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO); 89 MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port"); 90 91 unsigned qib_cc_table_size; 92 module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO); 93 MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984"); 94 /* 95 * qib_wc_pat parameter: 96 * 0 is WC via MTRR 97 * 1 is WC via PAT 98 * If PAT initialization fails, code reverts back to MTRR 99 */ 100 unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */ 101 module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO); 102 MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism"); 103 104 static void verify_interrupt(unsigned long); 105 106 static struct idr qib_unit_table; 107 u32 qib_cpulist_count; 108 unsigned long *qib_cpulist; 109 110 /* set number of contexts we'll actually use */ 111 void qib_set_ctxtcnt(struct qib_devdata *dd) 112 { 113 if (!qib_cfgctxts) { 114 dd->cfgctxts = dd->first_user_ctxt + num_online_cpus(); 115 if (dd->cfgctxts > dd->ctxtcnt) 116 dd->cfgctxts = dd->ctxtcnt; 117 } else if (qib_cfgctxts < dd->num_pports) 118 dd->cfgctxts = dd->ctxtcnt; 119 else if (qib_cfgctxts <= dd->ctxtcnt) 120 dd->cfgctxts = qib_cfgctxts; 121 else 122 dd->cfgctxts = dd->ctxtcnt; 123 dd->freectxts = (dd->first_user_ctxt > dd->cfgctxts) ? 0 : 124 dd->cfgctxts - dd->first_user_ctxt; 125 } 126 127 /* 128 * Common code for creating the receive context array. 129 */ 130 int qib_create_ctxts(struct qib_devdata *dd) 131 { 132 unsigned i; 133 int local_node_id = pcibus_to_node(dd->pcidev->bus); 134 135 if (local_node_id < 0) 136 local_node_id = numa_node_id(); 137 dd->assigned_node_id = local_node_id; 138 139 /* 140 * Allocate full ctxtcnt array, rather than just cfgctxts, because 141 * cleanup iterates across all possible ctxts. 142 */ 143 dd->rcd = kcalloc(dd->ctxtcnt, sizeof(*dd->rcd), GFP_KERNEL); 144 if (!dd->rcd) { 145 qib_dev_err(dd, 146 "Unable to allocate ctxtdata array, failing\n"); 147 return -ENOMEM; 148 } 149 150 /* create (one or more) kctxt */ 151 for (i = 0; i < dd->first_user_ctxt; ++i) { 152 struct qib_pportdata *ppd; 153 struct qib_ctxtdata *rcd; 154 155 if (dd->skip_kctxt_mask & (1 << i)) 156 continue; 157 158 ppd = dd->pport + (i % dd->num_pports); 159 160 rcd = qib_create_ctxtdata(ppd, i, dd->assigned_node_id); 161 if (!rcd) { 162 qib_dev_err(dd, 163 "Unable to allocate ctxtdata for Kernel ctxt, failing\n"); 164 kfree(dd->rcd); 165 dd->rcd = NULL; 166 return -ENOMEM; 167 } 168 rcd->pkeys[0] = QIB_DEFAULT_P_KEY; 169 rcd->seq_cnt = 1; 170 } 171 return 0; 172 } 173 174 /* 175 * Common code for user and kernel context setup. 176 */ 177 struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt, 178 int node_id) 179 { 180 struct qib_devdata *dd = ppd->dd; 181 struct qib_ctxtdata *rcd; 182 183 rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, node_id); 184 if (rcd) { 185 INIT_LIST_HEAD(&rcd->qp_wait_list); 186 rcd->node_id = node_id; 187 rcd->ppd = ppd; 188 rcd->dd = dd; 189 rcd->cnt = 1; 190 rcd->ctxt = ctxt; 191 dd->rcd[ctxt] = rcd; 192 #ifdef CONFIG_DEBUG_FS 193 if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */ 194 rcd->opstats = kzalloc_node(sizeof(*rcd->opstats), 195 GFP_KERNEL, node_id); 196 if (!rcd->opstats) { 197 kfree(rcd); 198 qib_dev_err(dd, 199 "Unable to allocate per ctxt stats buffer\n"); 200 return NULL; 201 } 202 } 203 #endif 204 dd->f_init_ctxt(rcd); 205 206 /* 207 * To avoid wasting a lot of memory, we allocate 32KB chunks 208 * of physically contiguous memory, advance through it until 209 * used up and then allocate more. Of course, we need 210 * memory to store those extra pointers, now. 32KB seems to 211 * be the most that is "safe" under memory pressure 212 * (creating large files and then copying them over 213 * NFS while doing lots of MPI jobs). The OOM killer can 214 * get invoked, even though we say we can sleep and this can 215 * cause significant system problems.... 216 */ 217 rcd->rcvegrbuf_size = 0x8000; 218 rcd->rcvegrbufs_perchunk = 219 rcd->rcvegrbuf_size / dd->rcvegrbufsize; 220 rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt + 221 rcd->rcvegrbufs_perchunk - 1) / 222 rcd->rcvegrbufs_perchunk; 223 BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk)); 224 rcd->rcvegrbufs_perchunk_shift = 225 ilog2(rcd->rcvegrbufs_perchunk); 226 } 227 return rcd; 228 } 229 230 /* 231 * Common code for initializing the physical port structure. 232 */ 233 int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, 234 u8 hw_pidx, u8 port) 235 { 236 int size; 237 238 ppd->dd = dd; 239 ppd->hw_pidx = hw_pidx; 240 ppd->port = port; /* IB port number, not index */ 241 242 spin_lock_init(&ppd->sdma_lock); 243 spin_lock_init(&ppd->lflags_lock); 244 spin_lock_init(&ppd->cc_shadow_lock); 245 init_waitqueue_head(&ppd->state_wait); 246 247 init_timer(&ppd->symerr_clear_timer); 248 ppd->symerr_clear_timer.function = qib_clear_symerror_on_linkup; 249 ppd->symerr_clear_timer.data = (unsigned long)ppd; 250 251 ppd->qib_wq = NULL; 252 ppd->ibport_data.pmastats = 253 alloc_percpu(struct qib_pma_counters); 254 if (!ppd->ibport_data.pmastats) 255 return -ENOMEM; 256 257 if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) 258 goto bail; 259 260 ppd->cc_supported_table_entries = min(max_t(int, qib_cc_table_size, 261 IB_CCT_MIN_ENTRIES), IB_CCT_ENTRIES*IB_CC_TABLE_CAP_DEFAULT); 262 263 ppd->cc_max_table_entries = 264 ppd->cc_supported_table_entries/IB_CCT_ENTRIES; 265 266 size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry) 267 * IB_CCT_ENTRIES; 268 ppd->ccti_entries = kzalloc(size, GFP_KERNEL); 269 if (!ppd->ccti_entries) { 270 qib_dev_err(dd, 271 "failed to allocate congestion control table for port %d!\n", 272 port); 273 goto bail; 274 } 275 276 size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry); 277 ppd->congestion_entries = kzalloc(size, GFP_KERNEL); 278 if (!ppd->congestion_entries) { 279 qib_dev_err(dd, 280 "failed to allocate congestion setting list for port %d!\n", 281 port); 282 goto bail_1; 283 } 284 285 size = sizeof(struct cc_table_shadow); 286 ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL); 287 if (!ppd->ccti_entries_shadow) { 288 qib_dev_err(dd, 289 "failed to allocate shadow ccti list for port %d!\n", 290 port); 291 goto bail_2; 292 } 293 294 size = sizeof(struct ib_cc_congestion_setting_attr); 295 ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL); 296 if (!ppd->congestion_entries_shadow) { 297 qib_dev_err(dd, 298 "failed to allocate shadow congestion setting list for port %d!\n", 299 port); 300 goto bail_3; 301 } 302 303 return 0; 304 305 bail_3: 306 kfree(ppd->ccti_entries_shadow); 307 ppd->ccti_entries_shadow = NULL; 308 bail_2: 309 kfree(ppd->congestion_entries); 310 ppd->congestion_entries = NULL; 311 bail_1: 312 kfree(ppd->ccti_entries); 313 ppd->ccti_entries = NULL; 314 bail: 315 /* User is intentionally disabling the congestion control agent */ 316 if (!qib_cc_table_size) 317 return 0; 318 319 if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) { 320 qib_cc_table_size = 0; 321 qib_dev_err(dd, 322 "Congestion Control table size %d less than minimum %d for port %d\n", 323 qib_cc_table_size, IB_CCT_MIN_ENTRIES, port); 324 } 325 326 qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n", 327 port); 328 return 0; 329 } 330 331 static int init_pioavailregs(struct qib_devdata *dd) 332 { 333 int ret, pidx; 334 u64 *status_page; 335 336 dd->pioavailregs_dma = dma_alloc_coherent( 337 &dd->pcidev->dev, PAGE_SIZE, &dd->pioavailregs_phys, 338 GFP_KERNEL); 339 if (!dd->pioavailregs_dma) { 340 qib_dev_err(dd, 341 "failed to allocate PIOavail reg area in memory\n"); 342 ret = -ENOMEM; 343 goto done; 344 } 345 346 /* 347 * We really want L2 cache aligned, but for current CPUs of 348 * interest, they are the same. 349 */ 350 status_page = (u64 *) 351 ((char *) dd->pioavailregs_dma + 352 ((2 * L1_CACHE_BYTES + 353 dd->pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES)); 354 /* device status comes first, for backwards compatibility */ 355 dd->devstatusp = status_page; 356 *status_page++ = 0; 357 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 358 dd->pport[pidx].statusp = status_page; 359 *status_page++ = 0; 360 } 361 362 /* 363 * Setup buffer to hold freeze and other messages, accessible to 364 * apps, following statusp. This is per-unit, not per port. 365 */ 366 dd->freezemsg = (char *) status_page; 367 *dd->freezemsg = 0; 368 /* length of msg buffer is "whatever is left" */ 369 ret = (char *) status_page - (char *) dd->pioavailregs_dma; 370 dd->freezelen = PAGE_SIZE - ret; 371 372 ret = 0; 373 374 done: 375 return ret; 376 } 377 378 /** 379 * init_shadow_tids - allocate the shadow TID array 380 * @dd: the qlogic_ib device 381 * 382 * allocate the shadow TID array, so we can qib_munlock previous 383 * entries. It may make more sense to move the pageshadow to the 384 * ctxt data structure, so we only allocate memory for ctxts actually 385 * in use, since we at 8k per ctxt, now. 386 * We don't want failures here to prevent use of the driver/chip, 387 * so no return value. 388 */ 389 static void init_shadow_tids(struct qib_devdata *dd) 390 { 391 struct page **pages; 392 dma_addr_t *addrs; 393 394 pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); 395 if (!pages) { 396 qib_dev_err(dd, 397 "failed to allocate shadow page * array, no expected sends!\n"); 398 goto bail; 399 } 400 401 addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); 402 if (!addrs) { 403 qib_dev_err(dd, 404 "failed to allocate shadow dma handle array, no expected sends!\n"); 405 goto bail_free; 406 } 407 408 dd->pageshadow = pages; 409 dd->physshadow = addrs; 410 return; 411 412 bail_free: 413 vfree(pages); 414 bail: 415 dd->pageshadow = NULL; 416 } 417 418 /* 419 * Do initialization for device that is only needed on 420 * first detect, not on resets. 421 */ 422 static int loadtime_init(struct qib_devdata *dd) 423 { 424 int ret = 0; 425 426 if (((dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & 427 QLOGIC_IB_R_SOFTWARE_MASK) != QIB_CHIP_SWVERSION) { 428 qib_dev_err(dd, 429 "Driver only handles version %d, chip swversion is %d (%llx), failng\n", 430 QIB_CHIP_SWVERSION, 431 (int)(dd->revision >> 432 QLOGIC_IB_R_SOFTWARE_SHIFT) & 433 QLOGIC_IB_R_SOFTWARE_MASK, 434 (unsigned long long) dd->revision); 435 ret = -ENOSYS; 436 goto done; 437 } 438 439 if (dd->revision & QLOGIC_IB_R_EMULATOR_MASK) 440 qib_devinfo(dd->pcidev, "%s", dd->boardversion); 441 442 spin_lock_init(&dd->pioavail_lock); 443 spin_lock_init(&dd->sendctrl_lock); 444 spin_lock_init(&dd->uctxt_lock); 445 spin_lock_init(&dd->qib_diag_trans_lock); 446 spin_lock_init(&dd->eep_st_lock); 447 mutex_init(&dd->eep_lock); 448 449 if (qib_mini_init) 450 goto done; 451 452 ret = init_pioavailregs(dd); 453 init_shadow_tids(dd); 454 455 qib_get_eeprom_info(dd); 456 457 /* setup time (don't start yet) to verify we got interrupt */ 458 init_timer(&dd->intrchk_timer); 459 dd->intrchk_timer.function = verify_interrupt; 460 dd->intrchk_timer.data = (unsigned long) dd; 461 462 ret = qib_cq_init(dd); 463 done: 464 return ret; 465 } 466 467 /** 468 * init_after_reset - re-initialize after a reset 469 * @dd: the qlogic_ib device 470 * 471 * sanity check at least some of the values after reset, and 472 * ensure no receive or transmit (explicitly, in case reset 473 * failed 474 */ 475 static int init_after_reset(struct qib_devdata *dd) 476 { 477 int i; 478 479 /* 480 * Ensure chip does no sends or receives, tail updates, or 481 * pioavail updates while we re-initialize. This is mostly 482 * for the driver data structures, not chip registers. 483 */ 484 for (i = 0; i < dd->num_pports; ++i) { 485 /* 486 * ctxt == -1 means "all contexts". Only really safe for 487 * _dis_abling things, as here. 488 */ 489 dd->f_rcvctrl(dd->pport + i, QIB_RCVCTRL_CTXT_DIS | 490 QIB_RCVCTRL_INTRAVAIL_DIS | 491 QIB_RCVCTRL_TAILUPD_DIS, -1); 492 /* Redundant across ports for some, but no big deal. */ 493 dd->f_sendctrl(dd->pport + i, QIB_SENDCTRL_SEND_DIS | 494 QIB_SENDCTRL_AVAIL_DIS); 495 } 496 497 return 0; 498 } 499 500 static void enable_chip(struct qib_devdata *dd) 501 { 502 u64 rcvmask; 503 int i; 504 505 /* 506 * Enable PIO send, and update of PIOavail regs to memory. 507 */ 508 for (i = 0; i < dd->num_pports; ++i) 509 dd->f_sendctrl(dd->pport + i, QIB_SENDCTRL_SEND_ENB | 510 QIB_SENDCTRL_AVAIL_ENB); 511 /* 512 * Enable kernel ctxts' receive and receive interrupt. 513 * Other ctxts done as user opens and inits them. 514 */ 515 rcvmask = QIB_RCVCTRL_CTXT_ENB | QIB_RCVCTRL_INTRAVAIL_ENB; 516 rcvmask |= (dd->flags & QIB_NODMA_RTAIL) ? 517 QIB_RCVCTRL_TAILUPD_DIS : QIB_RCVCTRL_TAILUPD_ENB; 518 for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) { 519 struct qib_ctxtdata *rcd = dd->rcd[i]; 520 521 if (rcd) 522 dd->f_rcvctrl(rcd->ppd, rcvmask, i); 523 } 524 } 525 526 static void verify_interrupt(unsigned long opaque) 527 { 528 struct qib_devdata *dd = (struct qib_devdata *) opaque; 529 u64 int_counter; 530 531 if (!dd) 532 return; /* being torn down */ 533 534 /* 535 * If we don't have a lid or any interrupts, let the user know and 536 * don't bother checking again. 537 */ 538 int_counter = qib_int_counter(dd) - dd->z_int_counter; 539 if (int_counter == 0) { 540 if (!dd->f_intr_fallback(dd)) 541 dev_err(&dd->pcidev->dev, 542 "No interrupts detected, not usable.\n"); 543 else /* re-arm the timer to see if fallback works */ 544 mod_timer(&dd->intrchk_timer, jiffies + HZ/2); 545 } 546 } 547 548 static void init_piobuf_state(struct qib_devdata *dd) 549 { 550 int i, pidx; 551 u32 uctxts; 552 553 /* 554 * Ensure all buffers are free, and fifos empty. Buffers 555 * are common, so only do once for port 0. 556 * 557 * After enable and qib_chg_pioavailkernel so we can safely 558 * enable pioavail updates and PIOENABLE. After this, packets 559 * are ready and able to go out. 560 */ 561 dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_ALL); 562 for (pidx = 0; pidx < dd->num_pports; ++pidx) 563 dd->f_sendctrl(dd->pport + pidx, QIB_SENDCTRL_FLUSH); 564 565 /* 566 * If not all sendbufs are used, add the one to each of the lower 567 * numbered contexts. pbufsctxt and lastctxt_piobuf are 568 * calculated in chip-specific code because it may cause some 569 * chip-specific adjustments to be made. 570 */ 571 uctxts = dd->cfgctxts - dd->first_user_ctxt; 572 dd->ctxts_extrabuf = dd->pbufsctxt ? 573 dd->lastctxt_piobuf - (dd->pbufsctxt * uctxts) : 0; 574 575 /* 576 * Set up the shadow copies of the piobufavail registers, 577 * which we compare against the chip registers for now, and 578 * the in memory DMA'ed copies of the registers. 579 * By now pioavail updates to memory should have occurred, so 580 * copy them into our working/shadow registers; this is in 581 * case something went wrong with abort, but mostly to get the 582 * initial values of the generation bit correct. 583 */ 584 for (i = 0; i < dd->pioavregs; i++) { 585 __le64 tmp; 586 587 tmp = dd->pioavailregs_dma[i]; 588 /* 589 * Don't need to worry about pioavailkernel here 590 * because we will call qib_chg_pioavailkernel() later 591 * in initialization, to busy out buffers as needed. 592 */ 593 dd->pioavailshadow[i] = le64_to_cpu(tmp); 594 } 595 while (i < ARRAY_SIZE(dd->pioavailshadow)) 596 dd->pioavailshadow[i++] = 0; /* for debugging sanity */ 597 598 /* after pioavailshadow is setup */ 599 qib_chg_pioavailkernel(dd, 0, dd->piobcnt2k + dd->piobcnt4k, 600 TXCHK_CHG_TYPE_KERN, NULL); 601 dd->f_initvl15_bufs(dd); 602 } 603 604 /** 605 * qib_create_workqueues - create per port workqueues 606 * @dd: the qlogic_ib device 607 */ 608 static int qib_create_workqueues(struct qib_devdata *dd) 609 { 610 int pidx; 611 struct qib_pportdata *ppd; 612 613 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 614 ppd = dd->pport + pidx; 615 if (!ppd->qib_wq) { 616 char wq_name[8]; /* 3 + 2 + 1 + 1 + 1 */ 617 618 snprintf(wq_name, sizeof(wq_name), "qib%d_%d", 619 dd->unit, pidx); 620 ppd->qib_wq = 621 create_singlethread_workqueue(wq_name); 622 if (!ppd->qib_wq) 623 goto wq_error; 624 } 625 } 626 return 0; 627 wq_error: 628 pr_err("create_singlethread_workqueue failed for port %d\n", 629 pidx + 1); 630 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 631 ppd = dd->pport + pidx; 632 if (ppd->qib_wq) { 633 destroy_workqueue(ppd->qib_wq); 634 ppd->qib_wq = NULL; 635 } 636 } 637 return -ENOMEM; 638 } 639 640 static void qib_free_pportdata(struct qib_pportdata *ppd) 641 { 642 free_percpu(ppd->ibport_data.pmastats); 643 ppd->ibport_data.pmastats = NULL; 644 } 645 646 /** 647 * qib_init - do the actual initialization sequence on the chip 648 * @dd: the qlogic_ib device 649 * @reinit: reinitializing, so don't allocate new memory 650 * 651 * Do the actual initialization sequence on the chip. This is done 652 * both from the init routine called from the PCI infrastructure, and 653 * when we reset the chip, or detect that it was reset internally, 654 * or it's administratively re-enabled. 655 * 656 * Memory allocation here and in called routines is only done in 657 * the first case (reinit == 0). We have to be careful, because even 658 * without memory allocation, we need to re-write all the chip registers 659 * TIDs, etc. after the reset or enable has completed. 660 */ 661 int qib_init(struct qib_devdata *dd, int reinit) 662 { 663 int ret = 0, pidx, lastfail = 0; 664 u32 portok = 0; 665 unsigned i; 666 struct qib_ctxtdata *rcd; 667 struct qib_pportdata *ppd; 668 unsigned long flags; 669 670 /* Set linkstate to unknown, so we can watch for a transition. */ 671 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 672 ppd = dd->pport + pidx; 673 spin_lock_irqsave(&ppd->lflags_lock, flags); 674 ppd->lflags &= ~(QIBL_LINKACTIVE | QIBL_LINKARMED | 675 QIBL_LINKDOWN | QIBL_LINKINIT | 676 QIBL_LINKV); 677 spin_unlock_irqrestore(&ppd->lflags_lock, flags); 678 } 679 680 if (reinit) 681 ret = init_after_reset(dd); 682 else 683 ret = loadtime_init(dd); 684 if (ret) 685 goto done; 686 687 /* Bypass most chip-init, to get to device creation */ 688 if (qib_mini_init) 689 return 0; 690 691 ret = dd->f_late_initreg(dd); 692 if (ret) 693 goto done; 694 695 /* dd->rcd can be NULL if early init failed */ 696 for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) { 697 /* 698 * Set up the (kernel) rcvhdr queue and egr TIDs. If doing 699 * re-init, the simplest way to handle this is to free 700 * existing, and re-allocate. 701 * Need to re-create rest of ctxt 0 ctxtdata as well. 702 */ 703 rcd = dd->rcd[i]; 704 if (!rcd) 705 continue; 706 707 lastfail = qib_create_rcvhdrq(dd, rcd); 708 if (!lastfail) 709 lastfail = qib_setup_eagerbufs(rcd); 710 if (lastfail) { 711 qib_dev_err(dd, 712 "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); 713 continue; 714 } 715 } 716 717 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 718 int mtu; 719 720 if (lastfail) 721 ret = lastfail; 722 ppd = dd->pport + pidx; 723 mtu = ib_mtu_enum_to_int(qib_ibmtu); 724 if (mtu == -1) { 725 mtu = QIB_DEFAULT_MTU; 726 qib_ibmtu = 0; /* don't leave invalid value */ 727 } 728 /* set max we can ever have for this driver load */ 729 ppd->init_ibmaxlen = min(mtu > 2048 ? 730 dd->piosize4k : dd->piosize2k, 731 dd->rcvegrbufsize + 732 (dd->rcvhdrentsize << 2)); 733 /* 734 * Have to initialize ibmaxlen, but this will normally 735 * change immediately in qib_set_mtu(). 736 */ 737 ppd->ibmaxlen = ppd->init_ibmaxlen; 738 qib_set_mtu(ppd, mtu); 739 740 spin_lock_irqsave(&ppd->lflags_lock, flags); 741 ppd->lflags |= QIBL_IB_LINK_DISABLED; 742 spin_unlock_irqrestore(&ppd->lflags_lock, flags); 743 744 lastfail = dd->f_bringup_serdes(ppd); 745 if (lastfail) { 746 qib_devinfo(dd->pcidev, 747 "Failed to bringup IB port %u\n", ppd->port); 748 lastfail = -ENETDOWN; 749 continue; 750 } 751 752 portok++; 753 } 754 755 if (!portok) { 756 /* none of the ports initialized */ 757 if (!ret && lastfail) 758 ret = lastfail; 759 else if (!ret) 760 ret = -ENETDOWN; 761 /* but continue on, so we can debug cause */ 762 } 763 764 enable_chip(dd); 765 766 init_piobuf_state(dd); 767 768 done: 769 if (!ret) { 770 /* chip is OK for user apps; mark it as initialized */ 771 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 772 ppd = dd->pport + pidx; 773 /* 774 * Set status even if port serdes is not initialized 775 * so that diags will work. 776 */ 777 *ppd->statusp |= QIB_STATUS_CHIP_PRESENT | 778 QIB_STATUS_INITTED; 779 if (!ppd->link_speed_enabled) 780 continue; 781 if (dd->flags & QIB_HAS_SEND_DMA) 782 ret = qib_setup_sdma(ppd); 783 init_timer(&ppd->hol_timer); 784 ppd->hol_timer.function = qib_hol_event; 785 ppd->hol_timer.data = (unsigned long)ppd; 786 ppd->hol_state = QIB_HOL_UP; 787 } 788 789 /* now we can enable all interrupts from the chip */ 790 dd->f_set_intr_state(dd, 1); 791 792 /* 793 * Setup to verify we get an interrupt, and fallback 794 * to an alternate if necessary and possible. 795 */ 796 mod_timer(&dd->intrchk_timer, jiffies + HZ/2); 797 /* start stats retrieval timer */ 798 mod_timer(&dd->stats_timer, jiffies + HZ * ACTIVITY_TIMER); 799 } 800 801 /* if ret is non-zero, we probably should do some cleanup here... */ 802 return ret; 803 } 804 805 /* 806 * These next two routines are placeholders in case we don't have per-arch 807 * code for controlling write combining. If explicit control of write 808 * combining is not available, performance will probably be awful. 809 */ 810 811 int __attribute__((weak)) qib_enable_wc(struct qib_devdata *dd) 812 { 813 return -EOPNOTSUPP; 814 } 815 816 void __attribute__((weak)) qib_disable_wc(struct qib_devdata *dd) 817 { 818 } 819 820 static inline struct qib_devdata *__qib_lookup(int unit) 821 { 822 return idr_find(&qib_unit_table, unit); 823 } 824 825 struct qib_devdata *qib_lookup(int unit) 826 { 827 struct qib_devdata *dd; 828 unsigned long flags; 829 830 spin_lock_irqsave(&qib_devs_lock, flags); 831 dd = __qib_lookup(unit); 832 spin_unlock_irqrestore(&qib_devs_lock, flags); 833 834 return dd; 835 } 836 837 /* 838 * Stop the timers during unit shutdown, or after an error late 839 * in initialization. 840 */ 841 static void qib_stop_timers(struct qib_devdata *dd) 842 { 843 struct qib_pportdata *ppd; 844 int pidx; 845 846 if (dd->stats_timer.data) { 847 del_timer_sync(&dd->stats_timer); 848 dd->stats_timer.data = 0; 849 } 850 if (dd->intrchk_timer.data) { 851 del_timer_sync(&dd->intrchk_timer); 852 dd->intrchk_timer.data = 0; 853 } 854 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 855 ppd = dd->pport + pidx; 856 if (ppd->hol_timer.data) 857 del_timer_sync(&ppd->hol_timer); 858 if (ppd->led_override_timer.data) { 859 del_timer_sync(&ppd->led_override_timer); 860 atomic_set(&ppd->led_override_timer_active, 0); 861 } 862 if (ppd->symerr_clear_timer.data) 863 del_timer_sync(&ppd->symerr_clear_timer); 864 } 865 } 866 867 /** 868 * qib_shutdown_device - shut down a device 869 * @dd: the qlogic_ib device 870 * 871 * This is called to make the device quiet when we are about to 872 * unload the driver, and also when the device is administratively 873 * disabled. It does not free any data structures. 874 * Everything it does has to be setup again by qib_init(dd, 1) 875 */ 876 static void qib_shutdown_device(struct qib_devdata *dd) 877 { 878 struct qib_pportdata *ppd; 879 unsigned pidx; 880 881 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 882 ppd = dd->pport + pidx; 883 884 spin_lock_irq(&ppd->lflags_lock); 885 ppd->lflags &= ~(QIBL_LINKDOWN | QIBL_LINKINIT | 886 QIBL_LINKARMED | QIBL_LINKACTIVE | 887 QIBL_LINKV); 888 spin_unlock_irq(&ppd->lflags_lock); 889 *ppd->statusp &= ~(QIB_STATUS_IB_CONF | QIB_STATUS_IB_READY); 890 } 891 dd->flags &= ~QIB_INITTED; 892 893 /* mask interrupts, but not errors */ 894 dd->f_set_intr_state(dd, 0); 895 896 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 897 ppd = dd->pport + pidx; 898 dd->f_rcvctrl(ppd, QIB_RCVCTRL_TAILUPD_DIS | 899 QIB_RCVCTRL_CTXT_DIS | 900 QIB_RCVCTRL_INTRAVAIL_DIS | 901 QIB_RCVCTRL_PKEY_ENB, -1); 902 /* 903 * Gracefully stop all sends allowing any in progress to 904 * trickle out first. 905 */ 906 dd->f_sendctrl(ppd, QIB_SENDCTRL_CLEAR); 907 } 908 909 /* 910 * Enough for anything that's going to trickle out to have actually 911 * done so. 912 */ 913 udelay(20); 914 915 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 916 ppd = dd->pport + pidx; 917 dd->f_setextled(ppd, 0); /* make sure LEDs are off */ 918 919 if (dd->flags & QIB_HAS_SEND_DMA) 920 qib_teardown_sdma(ppd); 921 922 dd->f_sendctrl(ppd, QIB_SENDCTRL_AVAIL_DIS | 923 QIB_SENDCTRL_SEND_DIS); 924 /* 925 * Clear SerdesEnable. 926 * We can't count on interrupts since we are stopping. 927 */ 928 dd->f_quiet_serdes(ppd); 929 930 if (ppd->qib_wq) { 931 destroy_workqueue(ppd->qib_wq); 932 ppd->qib_wq = NULL; 933 } 934 qib_free_pportdata(ppd); 935 } 936 937 } 938 939 /** 940 * qib_free_ctxtdata - free a context's allocated data 941 * @dd: the qlogic_ib device 942 * @rcd: the ctxtdata structure 943 * 944 * free up any allocated data for a context 945 * This should not touch anything that would affect a simultaneous 946 * re-allocation of context data, because it is called after qib_mutex 947 * is released (and can be called from reinit as well). 948 * It should never change any chip state, or global driver state. 949 */ 950 void qib_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd) 951 { 952 if (!rcd) 953 return; 954 955 if (rcd->rcvhdrq) { 956 dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size, 957 rcd->rcvhdrq, rcd->rcvhdrq_phys); 958 rcd->rcvhdrq = NULL; 959 if (rcd->rcvhdrtail_kvaddr) { 960 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, 961 rcd->rcvhdrtail_kvaddr, 962 rcd->rcvhdrqtailaddr_phys); 963 rcd->rcvhdrtail_kvaddr = NULL; 964 } 965 } 966 if (rcd->rcvegrbuf) { 967 unsigned e; 968 969 for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { 970 void *base = rcd->rcvegrbuf[e]; 971 size_t size = rcd->rcvegrbuf_size; 972 973 dma_free_coherent(&dd->pcidev->dev, size, 974 base, rcd->rcvegrbuf_phys[e]); 975 } 976 kfree(rcd->rcvegrbuf); 977 rcd->rcvegrbuf = NULL; 978 kfree(rcd->rcvegrbuf_phys); 979 rcd->rcvegrbuf_phys = NULL; 980 rcd->rcvegrbuf_chunks = 0; 981 } 982 983 kfree(rcd->tid_pg_list); 984 vfree(rcd->user_event_mask); 985 vfree(rcd->subctxt_uregbase); 986 vfree(rcd->subctxt_rcvegrbuf); 987 vfree(rcd->subctxt_rcvhdr_base); 988 #ifdef CONFIG_DEBUG_FS 989 kfree(rcd->opstats); 990 rcd->opstats = NULL; 991 #endif 992 kfree(rcd); 993 } 994 995 /* 996 * Perform a PIO buffer bandwidth write test, to verify proper system 997 * configuration. Even when all the setup calls work, occasionally 998 * BIOS or other issues can prevent write combining from working, or 999 * can cause other bandwidth problems to the chip. 1000 * 1001 * This test simply writes the same buffer over and over again, and 1002 * measures close to the peak bandwidth to the chip (not testing 1003 * data bandwidth to the wire). On chips that use an address-based 1004 * trigger to send packets to the wire, this is easy. On chips that 1005 * use a count to trigger, we want to make sure that the packet doesn't 1006 * go out on the wire, or trigger flow control checks. 1007 */ 1008 static void qib_verify_pioperf(struct qib_devdata *dd) 1009 { 1010 u32 pbnum, cnt, lcnt; 1011 u32 __iomem *piobuf; 1012 u32 *addr; 1013 u64 msecs, emsecs; 1014 1015 piobuf = dd->f_getsendbuf(dd->pport, 0ULL, &pbnum); 1016 if (!piobuf) { 1017 qib_devinfo(dd->pcidev, 1018 "No PIObufs for checking perf, skipping\n"); 1019 return; 1020 } 1021 1022 /* 1023 * Enough to give us a reasonable test, less than piobuf size, and 1024 * likely multiple of store buffer length. 1025 */ 1026 cnt = 1024; 1027 1028 addr = vmalloc(cnt); 1029 if (!addr) { 1030 qib_devinfo(dd->pcidev, 1031 "Couldn't get memory for checking PIO perf, skipping\n"); 1032 goto done; 1033 } 1034 1035 preempt_disable(); /* we want reasonably accurate elapsed time */ 1036 msecs = 1 + jiffies_to_msecs(jiffies); 1037 for (lcnt = 0; lcnt < 10000U; lcnt++) { 1038 /* wait until we cross msec boundary */ 1039 if (jiffies_to_msecs(jiffies) >= msecs) 1040 break; 1041 udelay(1); 1042 } 1043 1044 dd->f_set_armlaunch(dd, 0); 1045 1046 /* 1047 * length 0, no dwords actually sent 1048 */ 1049 writeq(0, piobuf); 1050 qib_flush_wc(); 1051 1052 /* 1053 * This is only roughly accurate, since even with preempt we 1054 * still take interrupts that could take a while. Running for 1055 * >= 5 msec seems to get us "close enough" to accurate values. 1056 */ 1057 msecs = jiffies_to_msecs(jiffies); 1058 for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) { 1059 qib_pio_copy(piobuf + 64, addr, cnt >> 2); 1060 emsecs = jiffies_to_msecs(jiffies) - msecs; 1061 } 1062 1063 /* 1 GiB/sec, slightly over IB SDR line rate */ 1064 if (lcnt < (emsecs * 1024U)) 1065 qib_dev_err(dd, 1066 "Performance problem: bandwidth to PIO buffers is only %u MiB/sec\n", 1067 lcnt / (u32) emsecs); 1068 1069 preempt_enable(); 1070 1071 vfree(addr); 1072 1073 done: 1074 /* disarm piobuf, so it's available again */ 1075 dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(pbnum)); 1076 qib_sendbuf_done(dd, pbnum); 1077 dd->f_set_armlaunch(dd, 1); 1078 } 1079 1080 void qib_free_devdata(struct qib_devdata *dd) 1081 { 1082 unsigned long flags; 1083 1084 spin_lock_irqsave(&qib_devs_lock, flags); 1085 idr_remove(&qib_unit_table, dd->unit); 1086 list_del(&dd->list); 1087 spin_unlock_irqrestore(&qib_devs_lock, flags); 1088 1089 #ifdef CONFIG_DEBUG_FS 1090 qib_dbg_ibdev_exit(&dd->verbs_dev); 1091 #endif 1092 free_percpu(dd->int_counter); 1093 ib_dealloc_device(&dd->verbs_dev.ibdev); 1094 } 1095 1096 u64 qib_int_counter(struct qib_devdata *dd) 1097 { 1098 int cpu; 1099 u64 int_counter = 0; 1100 1101 for_each_possible_cpu(cpu) 1102 int_counter += *per_cpu_ptr(dd->int_counter, cpu); 1103 return int_counter; 1104 } 1105 1106 u64 qib_sps_ints(void) 1107 { 1108 unsigned long flags; 1109 struct qib_devdata *dd; 1110 u64 sps_ints = 0; 1111 1112 spin_lock_irqsave(&qib_devs_lock, flags); 1113 list_for_each_entry(dd, &qib_dev_list, list) { 1114 sps_ints += qib_int_counter(dd); 1115 } 1116 spin_unlock_irqrestore(&qib_devs_lock, flags); 1117 return sps_ints; 1118 } 1119 1120 /* 1121 * Allocate our primary per-unit data structure. Must be done via verbs 1122 * allocator, because the verbs cleanup process both does cleanup and 1123 * free of the data structure. 1124 * "extra" is for chip-specific data. 1125 * 1126 * Use the idr mechanism to get a unit number for this unit. 1127 */ 1128 struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) 1129 { 1130 unsigned long flags; 1131 struct qib_devdata *dd; 1132 int ret; 1133 1134 dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); 1135 if (!dd) 1136 return ERR_PTR(-ENOMEM); 1137 1138 INIT_LIST_HEAD(&dd->list); 1139 1140 idr_preload(GFP_KERNEL); 1141 spin_lock_irqsave(&qib_devs_lock, flags); 1142 1143 ret = idr_alloc(&qib_unit_table, dd, 0, 0, GFP_NOWAIT); 1144 if (ret >= 0) { 1145 dd->unit = ret; 1146 list_add(&dd->list, &qib_dev_list); 1147 } 1148 1149 spin_unlock_irqrestore(&qib_devs_lock, flags); 1150 idr_preload_end(); 1151 1152 if (ret < 0) { 1153 qib_early_err(&pdev->dev, 1154 "Could not allocate unit ID: error %d\n", -ret); 1155 goto bail; 1156 } 1157 dd->int_counter = alloc_percpu(u64); 1158 if (!dd->int_counter) { 1159 ret = -ENOMEM; 1160 qib_early_err(&pdev->dev, 1161 "Could not allocate per-cpu int_counter\n"); 1162 goto bail; 1163 } 1164 1165 if (!qib_cpulist_count) { 1166 u32 count = num_online_cpus(); 1167 1168 qib_cpulist = kzalloc(BITS_TO_LONGS(count) * 1169 sizeof(long), GFP_KERNEL); 1170 if (qib_cpulist) 1171 qib_cpulist_count = count; 1172 else 1173 qib_early_err(&pdev->dev, 1174 "Could not alloc cpulist info, cpu affinity might be wrong\n"); 1175 } 1176 #ifdef CONFIG_DEBUG_FS 1177 qib_dbg_ibdev_init(&dd->verbs_dev); 1178 #endif 1179 return dd; 1180 bail: 1181 if (!list_empty(&dd->list)) 1182 list_del_init(&dd->list); 1183 ib_dealloc_device(&dd->verbs_dev.ibdev); 1184 return ERR_PTR(ret); 1185 } 1186 1187 /* 1188 * Called from freeze mode handlers, and from PCI error 1189 * reporting code. Should be paranoid about state of 1190 * system and data structures. 1191 */ 1192 void qib_disable_after_error(struct qib_devdata *dd) 1193 { 1194 if (dd->flags & QIB_INITTED) { 1195 u32 pidx; 1196 1197 dd->flags &= ~QIB_INITTED; 1198 if (dd->pport) 1199 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 1200 struct qib_pportdata *ppd; 1201 1202 ppd = dd->pport + pidx; 1203 if (dd->flags & QIB_PRESENT) { 1204 qib_set_linkstate(ppd, 1205 QIB_IB_LINKDOWN_DISABLE); 1206 dd->f_setextled(ppd, 0); 1207 } 1208 *ppd->statusp &= ~QIB_STATUS_IB_READY; 1209 } 1210 } 1211 1212 /* 1213 * Mark as having had an error for driver, and also 1214 * for /sys and status word mapped to user programs. 1215 * This marks unit as not usable, until reset. 1216 */ 1217 if (dd->devstatusp) 1218 *dd->devstatusp |= QIB_STATUS_HWERROR; 1219 } 1220 1221 static void qib_remove_one(struct pci_dev *); 1222 static int qib_init_one(struct pci_dev *, const struct pci_device_id *); 1223 1224 #define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: " 1225 #define PFX QIB_DRV_NAME ": " 1226 1227 static const struct pci_device_id qib_pci_tbl[] = { 1228 { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_QLOGIC_IB_6120) }, 1229 { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7220) }, 1230 { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7322) }, 1231 { 0, } 1232 }; 1233 1234 MODULE_DEVICE_TABLE(pci, qib_pci_tbl); 1235 1236 static struct pci_driver qib_driver = { 1237 .name = QIB_DRV_NAME, 1238 .probe = qib_init_one, 1239 .remove = qib_remove_one, 1240 .id_table = qib_pci_tbl, 1241 .err_handler = &qib_pci_err_handler, 1242 }; 1243 1244 #ifdef CONFIG_INFINIBAND_QIB_DCA 1245 1246 static int qib_notify_dca(struct notifier_block *, unsigned long, void *); 1247 static struct notifier_block dca_notifier = { 1248 .notifier_call = qib_notify_dca, 1249 .next = NULL, 1250 .priority = 0 1251 }; 1252 1253 static int qib_notify_dca_device(struct device *device, void *data) 1254 { 1255 struct qib_devdata *dd = dev_get_drvdata(device); 1256 unsigned long event = *(unsigned long *)data; 1257 1258 return dd->f_notify_dca(dd, event); 1259 } 1260 1261 static int qib_notify_dca(struct notifier_block *nb, unsigned long event, 1262 void *p) 1263 { 1264 int rval; 1265 1266 rval = driver_for_each_device(&qib_driver.driver, NULL, 1267 &event, qib_notify_dca_device); 1268 return rval ? NOTIFY_BAD : NOTIFY_DONE; 1269 } 1270 1271 #endif 1272 1273 /* 1274 * Do all the generic driver unit- and chip-independent memory 1275 * allocation and initialization. 1276 */ 1277 static int __init qib_ib_init(void) 1278 { 1279 int ret; 1280 1281 ret = qib_dev_init(); 1282 if (ret) 1283 goto bail; 1284 1285 /* 1286 * These must be called before the driver is registered with 1287 * the PCI subsystem. 1288 */ 1289 idr_init(&qib_unit_table); 1290 1291 #ifdef CONFIG_INFINIBAND_QIB_DCA 1292 dca_register_notify(&dca_notifier); 1293 #endif 1294 #ifdef CONFIG_DEBUG_FS 1295 qib_dbg_init(); 1296 #endif 1297 ret = pci_register_driver(&qib_driver); 1298 if (ret < 0) { 1299 pr_err("Unable to register driver: error %d\n", -ret); 1300 goto bail_dev; 1301 } 1302 1303 /* not fatal if it doesn't work */ 1304 if (qib_init_qibfs()) 1305 pr_err("Unable to register ipathfs\n"); 1306 goto bail; /* all OK */ 1307 1308 bail_dev: 1309 #ifdef CONFIG_INFINIBAND_QIB_DCA 1310 dca_unregister_notify(&dca_notifier); 1311 #endif 1312 #ifdef CONFIG_DEBUG_FS 1313 qib_dbg_exit(); 1314 #endif 1315 idr_destroy(&qib_unit_table); 1316 qib_dev_cleanup(); 1317 bail: 1318 return ret; 1319 } 1320 1321 module_init(qib_ib_init); 1322 1323 /* 1324 * Do the non-unit driver cleanup, memory free, etc. at unload. 1325 */ 1326 static void __exit qib_ib_cleanup(void) 1327 { 1328 int ret; 1329 1330 ret = qib_exit_qibfs(); 1331 if (ret) 1332 pr_err( 1333 "Unable to cleanup counter filesystem: error %d\n", 1334 -ret); 1335 1336 #ifdef CONFIG_INFINIBAND_QIB_DCA 1337 dca_unregister_notify(&dca_notifier); 1338 #endif 1339 pci_unregister_driver(&qib_driver); 1340 #ifdef CONFIG_DEBUG_FS 1341 qib_dbg_exit(); 1342 #endif 1343 1344 qib_cpulist_count = 0; 1345 kfree(qib_cpulist); 1346 1347 idr_destroy(&qib_unit_table); 1348 qib_dev_cleanup(); 1349 } 1350 1351 module_exit(qib_ib_cleanup); 1352 1353 /* this can only be called after a successful initialization */ 1354 static void cleanup_device_data(struct qib_devdata *dd) 1355 { 1356 int ctxt; 1357 int pidx; 1358 struct qib_ctxtdata **tmp; 1359 unsigned long flags; 1360 1361 /* users can't do anything more with chip */ 1362 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 1363 if (dd->pport[pidx].statusp) 1364 *dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT; 1365 1366 spin_lock(&dd->pport[pidx].cc_shadow_lock); 1367 1368 kfree(dd->pport[pidx].congestion_entries); 1369 dd->pport[pidx].congestion_entries = NULL; 1370 kfree(dd->pport[pidx].ccti_entries); 1371 dd->pport[pidx].ccti_entries = NULL; 1372 kfree(dd->pport[pidx].ccti_entries_shadow); 1373 dd->pport[pidx].ccti_entries_shadow = NULL; 1374 kfree(dd->pport[pidx].congestion_entries_shadow); 1375 dd->pport[pidx].congestion_entries_shadow = NULL; 1376 1377 spin_unlock(&dd->pport[pidx].cc_shadow_lock); 1378 } 1379 1380 if (!qib_wc_pat) 1381 qib_disable_wc(dd); 1382 1383 if (dd->pioavailregs_dma) { 1384 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, 1385 (void *) dd->pioavailregs_dma, 1386 dd->pioavailregs_phys); 1387 dd->pioavailregs_dma = NULL; 1388 } 1389 1390 if (dd->pageshadow) { 1391 struct page **tmpp = dd->pageshadow; 1392 dma_addr_t *tmpd = dd->physshadow; 1393 int i; 1394 1395 for (ctxt = 0; ctxt < dd->cfgctxts; ctxt++) { 1396 int ctxt_tidbase = ctxt * dd->rcvtidcnt; 1397 int maxtid = ctxt_tidbase + dd->rcvtidcnt; 1398 1399 for (i = ctxt_tidbase; i < maxtid; i++) { 1400 if (!tmpp[i]) 1401 continue; 1402 pci_unmap_page(dd->pcidev, tmpd[i], 1403 PAGE_SIZE, PCI_DMA_FROMDEVICE); 1404 qib_release_user_pages(&tmpp[i], 1); 1405 tmpp[i] = NULL; 1406 } 1407 } 1408 1409 dd->pageshadow = NULL; 1410 vfree(tmpp); 1411 dd->physshadow = NULL; 1412 vfree(tmpd); 1413 } 1414 1415 /* 1416 * Free any resources still in use (usually just kernel contexts) 1417 * at unload; we do for ctxtcnt, because that's what we allocate. 1418 * We acquire lock to be really paranoid that rcd isn't being 1419 * accessed from some interrupt-related code (that should not happen, 1420 * but best to be sure). 1421 */ 1422 spin_lock_irqsave(&dd->uctxt_lock, flags); 1423 tmp = dd->rcd; 1424 dd->rcd = NULL; 1425 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 1426 for (ctxt = 0; tmp && ctxt < dd->ctxtcnt; ctxt++) { 1427 struct qib_ctxtdata *rcd = tmp[ctxt]; 1428 1429 tmp[ctxt] = NULL; /* debugging paranoia */ 1430 qib_free_ctxtdata(dd, rcd); 1431 } 1432 kfree(tmp); 1433 kfree(dd->boardname); 1434 qib_cq_exit(dd); 1435 } 1436 1437 /* 1438 * Clean up on unit shutdown, or error during unit load after 1439 * successful initialization. 1440 */ 1441 static void qib_postinit_cleanup(struct qib_devdata *dd) 1442 { 1443 /* 1444 * Clean up chip-specific stuff. 1445 * We check for NULL here, because it's outside 1446 * the kregbase check, and we need to call it 1447 * after the free_irq. Thus it's possible that 1448 * the function pointers were never initialized. 1449 */ 1450 if (dd->f_cleanup) 1451 dd->f_cleanup(dd); 1452 1453 qib_pcie_ddcleanup(dd); 1454 1455 cleanup_device_data(dd); 1456 1457 qib_free_devdata(dd); 1458 } 1459 1460 static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) 1461 { 1462 int ret, j, pidx, initfail; 1463 struct qib_devdata *dd = NULL; 1464 1465 ret = qib_pcie_init(pdev, ent); 1466 if (ret) 1467 goto bail; 1468 1469 /* 1470 * Do device-specific initialiation, function table setup, dd 1471 * allocation, etc. 1472 */ 1473 switch (ent->device) { 1474 case PCI_DEVICE_ID_QLOGIC_IB_6120: 1475 #ifdef CONFIG_PCI_MSI 1476 dd = qib_init_iba6120_funcs(pdev, ent); 1477 #else 1478 qib_early_err(&pdev->dev, 1479 "Intel PCIE device 0x%x cannot work if CONFIG_PCI_MSI is not enabled\n", 1480 ent->device); 1481 dd = ERR_PTR(-ENODEV); 1482 #endif 1483 break; 1484 1485 case PCI_DEVICE_ID_QLOGIC_IB_7220: 1486 dd = qib_init_iba7220_funcs(pdev, ent); 1487 break; 1488 1489 case PCI_DEVICE_ID_QLOGIC_IB_7322: 1490 dd = qib_init_iba7322_funcs(pdev, ent); 1491 break; 1492 1493 default: 1494 qib_early_err(&pdev->dev, 1495 "Failing on unknown Intel deviceid 0x%x\n", 1496 ent->device); 1497 ret = -ENODEV; 1498 } 1499 1500 if (IS_ERR(dd)) 1501 ret = PTR_ERR(dd); 1502 if (ret) 1503 goto bail; /* error already printed */ 1504 1505 ret = qib_create_workqueues(dd); 1506 if (ret) 1507 goto bail; 1508 1509 /* do the generic initialization */ 1510 initfail = qib_init(dd, 0); 1511 1512 ret = qib_register_ib_device(dd); 1513 1514 /* 1515 * Now ready for use. this should be cleared whenever we 1516 * detect a reset, or initiate one. If earlier failure, 1517 * we still create devices, so diags, etc. can be used 1518 * to determine cause of problem. 1519 */ 1520 if (!qib_mini_init && !initfail && !ret) 1521 dd->flags |= QIB_INITTED; 1522 1523 j = qib_device_create(dd); 1524 if (j) 1525 qib_dev_err(dd, "Failed to create /dev devices: %d\n", -j); 1526 j = qibfs_add(dd); 1527 if (j) 1528 qib_dev_err(dd, "Failed filesystem setup for counters: %d\n", 1529 -j); 1530 1531 if (qib_mini_init || initfail || ret) { 1532 qib_stop_timers(dd); 1533 flush_workqueue(ib_wq); 1534 for (pidx = 0; pidx < dd->num_pports; ++pidx) 1535 dd->f_quiet_serdes(dd->pport + pidx); 1536 if (qib_mini_init) 1537 goto bail; 1538 if (!j) { 1539 (void) qibfs_remove(dd); 1540 qib_device_remove(dd); 1541 } 1542 if (!ret) 1543 qib_unregister_ib_device(dd); 1544 qib_postinit_cleanup(dd); 1545 if (initfail) 1546 ret = initfail; 1547 goto bail; 1548 } 1549 1550 if (!qib_wc_pat) { 1551 ret = qib_enable_wc(dd); 1552 if (ret) { 1553 qib_dev_err(dd, 1554 "Write combining not enabled (err %d): performance may be poor\n", 1555 -ret); 1556 ret = 0; 1557 } 1558 } 1559 1560 qib_verify_pioperf(dd); 1561 bail: 1562 return ret; 1563 } 1564 1565 static void qib_remove_one(struct pci_dev *pdev) 1566 { 1567 struct qib_devdata *dd = pci_get_drvdata(pdev); 1568 int ret; 1569 1570 /* unregister from IB core */ 1571 qib_unregister_ib_device(dd); 1572 1573 /* 1574 * Disable the IB link, disable interrupts on the device, 1575 * clear dma engines, etc. 1576 */ 1577 if (!qib_mini_init) 1578 qib_shutdown_device(dd); 1579 1580 qib_stop_timers(dd); 1581 1582 /* wait until all of our (qsfp) queue_work() calls complete */ 1583 flush_workqueue(ib_wq); 1584 1585 ret = qibfs_remove(dd); 1586 if (ret) 1587 qib_dev_err(dd, "Failed counters filesystem cleanup: %d\n", 1588 -ret); 1589 1590 qib_device_remove(dd); 1591 1592 qib_postinit_cleanup(dd); 1593 } 1594 1595 /** 1596 * qib_create_rcvhdrq - create a receive header queue 1597 * @dd: the qlogic_ib device 1598 * @rcd: the context data 1599 * 1600 * This must be contiguous memory (from an i/o perspective), and must be 1601 * DMA'able (which means for some systems, it will go through an IOMMU, 1602 * or be forced into a low address range). 1603 */ 1604 int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) 1605 { 1606 unsigned amt; 1607 int old_node_id; 1608 1609 if (!rcd->rcvhdrq) { 1610 dma_addr_t phys_hdrqtail; 1611 gfp_t gfp_flags; 1612 1613 amt = ALIGN(dd->rcvhdrcnt * dd->rcvhdrentsize * 1614 sizeof(u32), PAGE_SIZE); 1615 gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? 1616 GFP_USER : GFP_KERNEL; 1617 1618 old_node_id = dev_to_node(&dd->pcidev->dev); 1619 set_dev_node(&dd->pcidev->dev, rcd->node_id); 1620 rcd->rcvhdrq = dma_alloc_coherent( 1621 &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, 1622 gfp_flags | __GFP_COMP); 1623 set_dev_node(&dd->pcidev->dev, old_node_id); 1624 1625 if (!rcd->rcvhdrq) { 1626 qib_dev_err(dd, 1627 "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n", 1628 amt, rcd->ctxt); 1629 goto bail; 1630 } 1631 1632 if (rcd->ctxt >= dd->first_user_ctxt) { 1633 rcd->user_event_mask = vmalloc_user(PAGE_SIZE); 1634 if (!rcd->user_event_mask) 1635 goto bail_free_hdrq; 1636 } 1637 1638 if (!(dd->flags & QIB_NODMA_RTAIL)) { 1639 set_dev_node(&dd->pcidev->dev, rcd->node_id); 1640 rcd->rcvhdrtail_kvaddr = dma_alloc_coherent( 1641 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, 1642 gfp_flags); 1643 set_dev_node(&dd->pcidev->dev, old_node_id); 1644 if (!rcd->rcvhdrtail_kvaddr) 1645 goto bail_free; 1646 rcd->rcvhdrqtailaddr_phys = phys_hdrqtail; 1647 } 1648 1649 rcd->rcvhdrq_size = amt; 1650 } 1651 1652 /* clear for security and sanity on each use */ 1653 memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size); 1654 if (rcd->rcvhdrtail_kvaddr) 1655 memset(rcd->rcvhdrtail_kvaddr, 0, PAGE_SIZE); 1656 return 0; 1657 1658 bail_free: 1659 qib_dev_err(dd, 1660 "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n", 1661 rcd->ctxt); 1662 vfree(rcd->user_event_mask); 1663 rcd->user_event_mask = NULL; 1664 bail_free_hdrq: 1665 dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq, 1666 rcd->rcvhdrq_phys); 1667 rcd->rcvhdrq = NULL; 1668 bail: 1669 return -ENOMEM; 1670 } 1671 1672 /** 1673 * allocate eager buffers, both kernel and user contexts. 1674 * @rcd: the context we are setting up. 1675 * 1676 * Allocate the eager TID buffers and program them into hip. 1677 * They are no longer completely contiguous, we do multiple allocation 1678 * calls. Otherwise we get the OOM code involved, by asking for too 1679 * much per call, with disastrous results on some kernels. 1680 */ 1681 int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) 1682 { 1683 struct qib_devdata *dd = rcd->dd; 1684 unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; 1685 size_t size; 1686 gfp_t gfp_flags; 1687 int old_node_id; 1688 1689 /* 1690 * GFP_USER, but without GFP_FS, so buffer cache can be 1691 * coalesced (we hope); otherwise, even at order 4, 1692 * heavy filesystem activity makes these fail, and we can 1693 * use compound pages. 1694 */ 1695 gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; 1696 1697 egrcnt = rcd->rcvegrcnt; 1698 egroff = rcd->rcvegr_tid_base; 1699 egrsize = dd->rcvegrbufsize; 1700 1701 chunk = rcd->rcvegrbuf_chunks; 1702 egrperchunk = rcd->rcvegrbufs_perchunk; 1703 size = rcd->rcvegrbuf_size; 1704 if (!rcd->rcvegrbuf) { 1705 rcd->rcvegrbuf = 1706 kzalloc_node(chunk * sizeof(rcd->rcvegrbuf[0]), 1707 GFP_KERNEL, rcd->node_id); 1708 if (!rcd->rcvegrbuf) 1709 goto bail; 1710 } 1711 if (!rcd->rcvegrbuf_phys) { 1712 rcd->rcvegrbuf_phys = 1713 kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]), 1714 GFP_KERNEL, rcd->node_id); 1715 if (!rcd->rcvegrbuf_phys) 1716 goto bail_rcvegrbuf; 1717 } 1718 for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { 1719 if (rcd->rcvegrbuf[e]) 1720 continue; 1721 1722 old_node_id = dev_to_node(&dd->pcidev->dev); 1723 set_dev_node(&dd->pcidev->dev, rcd->node_id); 1724 rcd->rcvegrbuf[e] = 1725 dma_alloc_coherent(&dd->pcidev->dev, size, 1726 &rcd->rcvegrbuf_phys[e], 1727 gfp_flags); 1728 set_dev_node(&dd->pcidev->dev, old_node_id); 1729 if (!rcd->rcvegrbuf[e]) 1730 goto bail_rcvegrbuf_phys; 1731 } 1732 1733 rcd->rcvegr_phys = rcd->rcvegrbuf_phys[0]; 1734 1735 for (e = chunk = 0; chunk < rcd->rcvegrbuf_chunks; chunk++) { 1736 dma_addr_t pa = rcd->rcvegrbuf_phys[chunk]; 1737 unsigned i; 1738 1739 /* clear for security and sanity on each use */ 1740 memset(rcd->rcvegrbuf[chunk], 0, size); 1741 1742 for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) { 1743 dd->f_put_tid(dd, e + egroff + 1744 (u64 __iomem *) 1745 ((char __iomem *) 1746 dd->kregbase + 1747 dd->rcvegrbase), 1748 RCVHQ_RCV_TYPE_EAGER, pa); 1749 pa += egrsize; 1750 } 1751 cond_resched(); /* don't hog the cpu */ 1752 } 1753 1754 return 0; 1755 1756 bail_rcvegrbuf_phys: 1757 for (e = 0; e < rcd->rcvegrbuf_chunks && rcd->rcvegrbuf[e]; e++) 1758 dma_free_coherent(&dd->pcidev->dev, size, 1759 rcd->rcvegrbuf[e], rcd->rcvegrbuf_phys[e]); 1760 kfree(rcd->rcvegrbuf_phys); 1761 rcd->rcvegrbuf_phys = NULL; 1762 bail_rcvegrbuf: 1763 kfree(rcd->rcvegrbuf); 1764 rcd->rcvegrbuf = NULL; 1765 bail: 1766 return -ENOMEM; 1767 } 1768 1769 /* 1770 * Note: Changes to this routine should be mirrored 1771 * for the diagnostics routine qib_remap_ioaddr32(). 1772 * There is also related code for VL15 buffers in qib_init_7322_variables(). 1773 * The teardown code that unmaps is in qib_pcie_ddcleanup() 1774 */ 1775 int init_chip_wc_pat(struct qib_devdata *dd, u32 vl15buflen) 1776 { 1777 u64 __iomem *qib_kregbase = NULL; 1778 void __iomem *qib_piobase = NULL; 1779 u64 __iomem *qib_userbase = NULL; 1780 u64 qib_kreglen; 1781 u64 qib_pio2koffset = dd->piobufbase & 0xffffffff; 1782 u64 qib_pio4koffset = dd->piobufbase >> 32; 1783 u64 qib_pio2klen = dd->piobcnt2k * dd->palign; 1784 u64 qib_pio4klen = dd->piobcnt4k * dd->align4k; 1785 u64 qib_physaddr = dd->physaddr; 1786 u64 qib_piolen; 1787 u64 qib_userlen = 0; 1788 1789 /* 1790 * Free the old mapping because the kernel will try to reuse the 1791 * old mapping and not create a new mapping with the 1792 * write combining attribute. 1793 */ 1794 iounmap(dd->kregbase); 1795 dd->kregbase = NULL; 1796 1797 /* 1798 * Assumes chip address space looks like: 1799 * - kregs + sregs + cregs + uregs (in any order) 1800 * - piobufs (2K and 4K bufs in either order) 1801 * or: 1802 * - kregs + sregs + cregs (in any order) 1803 * - piobufs (2K and 4K bufs in either order) 1804 * - uregs 1805 */ 1806 if (dd->piobcnt4k == 0) { 1807 qib_kreglen = qib_pio2koffset; 1808 qib_piolen = qib_pio2klen; 1809 } else if (qib_pio2koffset < qib_pio4koffset) { 1810 qib_kreglen = qib_pio2koffset; 1811 qib_piolen = qib_pio4koffset + qib_pio4klen - qib_kreglen; 1812 } else { 1813 qib_kreglen = qib_pio4koffset; 1814 qib_piolen = qib_pio2koffset + qib_pio2klen - qib_kreglen; 1815 } 1816 qib_piolen += vl15buflen; 1817 /* Map just the configured ports (not all hw ports) */ 1818 if (dd->uregbase > qib_kreglen) 1819 qib_userlen = dd->ureg_align * dd->cfgctxts; 1820 1821 /* Sanity checks passed, now create the new mappings */ 1822 qib_kregbase = ioremap_nocache(qib_physaddr, qib_kreglen); 1823 if (!qib_kregbase) 1824 goto bail; 1825 1826 qib_piobase = ioremap_wc(qib_physaddr + qib_kreglen, qib_piolen); 1827 if (!qib_piobase) 1828 goto bail_kregbase; 1829 1830 if (qib_userlen) { 1831 qib_userbase = ioremap_nocache(qib_physaddr + dd->uregbase, 1832 qib_userlen); 1833 if (!qib_userbase) 1834 goto bail_piobase; 1835 } 1836 1837 dd->kregbase = qib_kregbase; 1838 dd->kregend = (u64 __iomem *) 1839 ((char __iomem *) qib_kregbase + qib_kreglen); 1840 dd->piobase = qib_piobase; 1841 dd->pio2kbase = (void __iomem *) 1842 (((char __iomem *) dd->piobase) + 1843 qib_pio2koffset - qib_kreglen); 1844 if (dd->piobcnt4k) 1845 dd->pio4kbase = (void __iomem *) 1846 (((char __iomem *) dd->piobase) + 1847 qib_pio4koffset - qib_kreglen); 1848 if (qib_userlen) 1849 /* ureg will now be accessed relative to dd->userbase */ 1850 dd->userbase = qib_userbase; 1851 return 0; 1852 1853 bail_piobase: 1854 iounmap(qib_piobase); 1855 bail_kregbase: 1856 iounmap(qib_kregbase); 1857 bail: 1858 return -ENOMEM; 1859 } 1860