1 /* 2 * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. 3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35 #include <linux/pci.h> 36 #include <linux/netdevice.h> 37 #include <linux/vmalloc.h> 38 #include <linux/delay.h> 39 #include <linux/idr.h> 40 #include <linux/module.h> 41 #include <linux/printk.h> 42 #ifdef CONFIG_INFINIBAND_QIB_DCA 43 #include <linux/dca.h> 44 #endif 45 46 #include "qib.h" 47 #include "qib_common.h" 48 #include "qib_mad.h" 49 #ifdef CONFIG_DEBUG_FS 50 #include "qib_debugfs.h" 51 #include "qib_verbs.h" 52 #endif 53 54 #undef pr_fmt 55 #define pr_fmt(fmt) QIB_DRV_NAME ": " fmt 56 57 /* 58 * min buffers we want to have per context, after driver 59 */ 60 #define QIB_MIN_USER_CTXT_BUFCNT 7 61 62 #define QLOGIC_IB_R_SOFTWARE_MASK 0xFF 63 #define QLOGIC_IB_R_SOFTWARE_SHIFT 24 64 #define QLOGIC_IB_R_EMULATOR_MASK (1ULL<<62) 65 66 /* 67 * Number of ctxts we are configured to use (to allow for more pio 68 * buffers per ctxt, etc.) Zero means use chip value. 69 */ 70 ushort qib_cfgctxts; 71 module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO); 72 MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use"); 73 74 unsigned qib_numa_aware; 75 module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO); 76 MODULE_PARM_DESC(numa_aware, 77 "0 -> PSM allocation close to HCA, 1 -> PSM allocation local to process"); 78 79 /* 80 * If set, do not write to any regs if avoidable, hack to allow 81 * check for deranged default register values. 82 */ 83 ushort qib_mini_init; 84 module_param_named(mini_init, qib_mini_init, ushort, S_IRUGO); 85 MODULE_PARM_DESC(mini_init, "If set, do minimal diag init"); 86 87 unsigned qib_n_krcv_queues; 88 module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO); 89 MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port"); 90 91 unsigned qib_cc_table_size; 92 module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO); 93 MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984"); 94 /* 95 * qib_wc_pat parameter: 96 * 0 is WC via MTRR 97 * 1 is WC via PAT 98 * If PAT initialization fails, code reverts back to MTRR 99 */ 100 unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */ 101 module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO); 102 MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism"); 103 104 static void verify_interrupt(unsigned long); 105 106 static struct idr qib_unit_table; 107 u32 qib_cpulist_count; 108 unsigned long *qib_cpulist; 109 110 /* set number of contexts we'll actually use */ 111 void qib_set_ctxtcnt(struct qib_devdata *dd) 112 { 113 if (!qib_cfgctxts) { 114 dd->cfgctxts = dd->first_user_ctxt + num_online_cpus(); 115 if (dd->cfgctxts > dd->ctxtcnt) 116 dd->cfgctxts = dd->ctxtcnt; 117 } else if (qib_cfgctxts < dd->num_pports) 118 dd->cfgctxts = dd->ctxtcnt; 119 else if (qib_cfgctxts <= dd->ctxtcnt) 120 dd->cfgctxts = qib_cfgctxts; 121 else 122 dd->cfgctxts = dd->ctxtcnt; 123 dd->freectxts = (dd->first_user_ctxt > dd->cfgctxts) ? 0 : 124 dd->cfgctxts - dd->first_user_ctxt; 125 } 126 127 /* 128 * Common code for creating the receive context array. 129 */ 130 int qib_create_ctxts(struct qib_devdata *dd) 131 { 132 unsigned i; 133 int ret; 134 int local_node_id = pcibus_to_node(dd->pcidev->bus); 135 136 if (local_node_id < 0) 137 local_node_id = numa_node_id(); 138 dd->assigned_node_id = local_node_id; 139 140 /* 141 * Allocate full ctxtcnt array, rather than just cfgctxts, because 142 * cleanup iterates across all possible ctxts. 143 */ 144 dd->rcd = kzalloc(sizeof(*dd->rcd) * dd->ctxtcnt, GFP_KERNEL); 145 if (!dd->rcd) { 146 qib_dev_err(dd, 147 "Unable to allocate ctxtdata array, failing\n"); 148 ret = -ENOMEM; 149 goto done; 150 } 151 152 /* create (one or more) kctxt */ 153 for (i = 0; i < dd->first_user_ctxt; ++i) { 154 struct qib_pportdata *ppd; 155 struct qib_ctxtdata *rcd; 156 157 if (dd->skip_kctxt_mask & (1 << i)) 158 continue; 159 160 ppd = dd->pport + (i % dd->num_pports); 161 162 rcd = qib_create_ctxtdata(ppd, i, dd->assigned_node_id); 163 if (!rcd) { 164 qib_dev_err(dd, 165 "Unable to allocate ctxtdata for Kernel ctxt, failing\n"); 166 ret = -ENOMEM; 167 goto done; 168 } 169 rcd->pkeys[0] = QIB_DEFAULT_P_KEY; 170 rcd->seq_cnt = 1; 171 } 172 ret = 0; 173 done: 174 return ret; 175 } 176 177 /* 178 * Common code for user and kernel context setup. 179 */ 180 struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt, 181 int node_id) 182 { 183 struct qib_devdata *dd = ppd->dd; 184 struct qib_ctxtdata *rcd; 185 186 rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, node_id); 187 if (rcd) { 188 INIT_LIST_HEAD(&rcd->qp_wait_list); 189 rcd->node_id = node_id; 190 rcd->ppd = ppd; 191 rcd->dd = dd; 192 rcd->cnt = 1; 193 rcd->ctxt = ctxt; 194 dd->rcd[ctxt] = rcd; 195 #ifdef CONFIG_DEBUG_FS 196 if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */ 197 rcd->opstats = kzalloc_node(sizeof(*rcd->opstats), 198 GFP_KERNEL, node_id); 199 if (!rcd->opstats) { 200 kfree(rcd); 201 qib_dev_err(dd, 202 "Unable to allocate per ctxt stats buffer\n"); 203 return NULL; 204 } 205 } 206 #endif 207 dd->f_init_ctxt(rcd); 208 209 /* 210 * To avoid wasting a lot of memory, we allocate 32KB chunks 211 * of physically contiguous memory, advance through it until 212 * used up and then allocate more. Of course, we need 213 * memory to store those extra pointers, now. 32KB seems to 214 * be the most that is "safe" under memory pressure 215 * (creating large files and then copying them over 216 * NFS while doing lots of MPI jobs). The OOM killer can 217 * get invoked, even though we say we can sleep and this can 218 * cause significant system problems.... 219 */ 220 rcd->rcvegrbuf_size = 0x8000; 221 rcd->rcvegrbufs_perchunk = 222 rcd->rcvegrbuf_size / dd->rcvegrbufsize; 223 rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt + 224 rcd->rcvegrbufs_perchunk - 1) / 225 rcd->rcvegrbufs_perchunk; 226 BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk)); 227 rcd->rcvegrbufs_perchunk_shift = 228 ilog2(rcd->rcvegrbufs_perchunk); 229 } 230 return rcd; 231 } 232 233 /* 234 * Common code for initializing the physical port structure. 235 */ 236 void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, 237 u8 hw_pidx, u8 port) 238 { 239 int size; 240 ppd->dd = dd; 241 ppd->hw_pidx = hw_pidx; 242 ppd->port = port; /* IB port number, not index */ 243 244 spin_lock_init(&ppd->sdma_lock); 245 spin_lock_init(&ppd->lflags_lock); 246 init_waitqueue_head(&ppd->state_wait); 247 248 init_timer(&ppd->symerr_clear_timer); 249 ppd->symerr_clear_timer.function = qib_clear_symerror_on_linkup; 250 ppd->symerr_clear_timer.data = (unsigned long)ppd; 251 252 ppd->qib_wq = NULL; 253 254 spin_lock_init(&ppd->cc_shadow_lock); 255 256 if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) 257 goto bail; 258 259 ppd->cc_supported_table_entries = min(max_t(int, qib_cc_table_size, 260 IB_CCT_MIN_ENTRIES), IB_CCT_ENTRIES*IB_CC_TABLE_CAP_DEFAULT); 261 262 ppd->cc_max_table_entries = 263 ppd->cc_supported_table_entries/IB_CCT_ENTRIES; 264 265 size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry) 266 * IB_CCT_ENTRIES; 267 ppd->ccti_entries = kzalloc(size, GFP_KERNEL); 268 if (!ppd->ccti_entries) { 269 qib_dev_err(dd, 270 "failed to allocate congestion control table for port %d!\n", 271 port); 272 goto bail; 273 } 274 275 size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry); 276 ppd->congestion_entries = kzalloc(size, GFP_KERNEL); 277 if (!ppd->congestion_entries) { 278 qib_dev_err(dd, 279 "failed to allocate congestion setting list for port %d!\n", 280 port); 281 goto bail_1; 282 } 283 284 size = sizeof(struct cc_table_shadow); 285 ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL); 286 if (!ppd->ccti_entries_shadow) { 287 qib_dev_err(dd, 288 "failed to allocate shadow ccti list for port %d!\n", 289 port); 290 goto bail_2; 291 } 292 293 size = sizeof(struct ib_cc_congestion_setting_attr); 294 ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL); 295 if (!ppd->congestion_entries_shadow) { 296 qib_dev_err(dd, 297 "failed to allocate shadow congestion setting list for port %d!\n", 298 port); 299 goto bail_3; 300 } 301 302 return; 303 304 bail_3: 305 kfree(ppd->ccti_entries_shadow); 306 ppd->ccti_entries_shadow = NULL; 307 bail_2: 308 kfree(ppd->congestion_entries); 309 ppd->congestion_entries = NULL; 310 bail_1: 311 kfree(ppd->ccti_entries); 312 ppd->ccti_entries = NULL; 313 bail: 314 /* User is intentionally disabling the congestion control agent */ 315 if (!qib_cc_table_size) 316 return; 317 318 if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) { 319 qib_cc_table_size = 0; 320 qib_dev_err(dd, 321 "Congestion Control table size %d less than minimum %d for port %d\n", 322 qib_cc_table_size, IB_CCT_MIN_ENTRIES, port); 323 } 324 325 qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n", 326 port); 327 return; 328 } 329 330 static int init_pioavailregs(struct qib_devdata *dd) 331 { 332 int ret, pidx; 333 u64 *status_page; 334 335 dd->pioavailregs_dma = dma_alloc_coherent( 336 &dd->pcidev->dev, PAGE_SIZE, &dd->pioavailregs_phys, 337 GFP_KERNEL); 338 if (!dd->pioavailregs_dma) { 339 qib_dev_err(dd, 340 "failed to allocate PIOavail reg area in memory\n"); 341 ret = -ENOMEM; 342 goto done; 343 } 344 345 /* 346 * We really want L2 cache aligned, but for current CPUs of 347 * interest, they are the same. 348 */ 349 status_page = (u64 *) 350 ((char *) dd->pioavailregs_dma + 351 ((2 * L1_CACHE_BYTES + 352 dd->pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES)); 353 /* device status comes first, for backwards compatibility */ 354 dd->devstatusp = status_page; 355 *status_page++ = 0; 356 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 357 dd->pport[pidx].statusp = status_page; 358 *status_page++ = 0; 359 } 360 361 /* 362 * Setup buffer to hold freeze and other messages, accessible to 363 * apps, following statusp. This is per-unit, not per port. 364 */ 365 dd->freezemsg = (char *) status_page; 366 *dd->freezemsg = 0; 367 /* length of msg buffer is "whatever is left" */ 368 ret = (char *) status_page - (char *) dd->pioavailregs_dma; 369 dd->freezelen = PAGE_SIZE - ret; 370 371 ret = 0; 372 373 done: 374 return ret; 375 } 376 377 /** 378 * init_shadow_tids - allocate the shadow TID array 379 * @dd: the qlogic_ib device 380 * 381 * allocate the shadow TID array, so we can qib_munlock previous 382 * entries. It may make more sense to move the pageshadow to the 383 * ctxt data structure, so we only allocate memory for ctxts actually 384 * in use, since we at 8k per ctxt, now. 385 * We don't want failures here to prevent use of the driver/chip, 386 * so no return value. 387 */ 388 static void init_shadow_tids(struct qib_devdata *dd) 389 { 390 struct page **pages; 391 dma_addr_t *addrs; 392 393 pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); 394 if (!pages) { 395 qib_dev_err(dd, 396 "failed to allocate shadow page * array, no expected sends!\n"); 397 goto bail; 398 } 399 400 addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); 401 if (!addrs) { 402 qib_dev_err(dd, 403 "failed to allocate shadow dma handle array, no expected sends!\n"); 404 goto bail_free; 405 } 406 407 dd->pageshadow = pages; 408 dd->physshadow = addrs; 409 return; 410 411 bail_free: 412 vfree(pages); 413 bail: 414 dd->pageshadow = NULL; 415 } 416 417 /* 418 * Do initialization for device that is only needed on 419 * first detect, not on resets. 420 */ 421 static int loadtime_init(struct qib_devdata *dd) 422 { 423 int ret = 0; 424 425 if (((dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & 426 QLOGIC_IB_R_SOFTWARE_MASK) != QIB_CHIP_SWVERSION) { 427 qib_dev_err(dd, 428 "Driver only handles version %d, chip swversion is %d (%llx), failng\n", 429 QIB_CHIP_SWVERSION, 430 (int)(dd->revision >> 431 QLOGIC_IB_R_SOFTWARE_SHIFT) & 432 QLOGIC_IB_R_SOFTWARE_MASK, 433 (unsigned long long) dd->revision); 434 ret = -ENOSYS; 435 goto done; 436 } 437 438 if (dd->revision & QLOGIC_IB_R_EMULATOR_MASK) 439 qib_devinfo(dd->pcidev, "%s", dd->boardversion); 440 441 spin_lock_init(&dd->pioavail_lock); 442 spin_lock_init(&dd->sendctrl_lock); 443 spin_lock_init(&dd->uctxt_lock); 444 spin_lock_init(&dd->qib_diag_trans_lock); 445 spin_lock_init(&dd->eep_st_lock); 446 mutex_init(&dd->eep_lock); 447 448 if (qib_mini_init) 449 goto done; 450 451 ret = init_pioavailregs(dd); 452 init_shadow_tids(dd); 453 454 qib_get_eeprom_info(dd); 455 456 /* setup time (don't start yet) to verify we got interrupt */ 457 init_timer(&dd->intrchk_timer); 458 dd->intrchk_timer.function = verify_interrupt; 459 dd->intrchk_timer.data = (unsigned long) dd; 460 461 ret = qib_cq_init(dd); 462 done: 463 return ret; 464 } 465 466 /** 467 * init_after_reset - re-initialize after a reset 468 * @dd: the qlogic_ib device 469 * 470 * sanity check at least some of the values after reset, and 471 * ensure no receive or transmit (explicitly, in case reset 472 * failed 473 */ 474 static int init_after_reset(struct qib_devdata *dd) 475 { 476 int i; 477 478 /* 479 * Ensure chip does no sends or receives, tail updates, or 480 * pioavail updates while we re-initialize. This is mostly 481 * for the driver data structures, not chip registers. 482 */ 483 for (i = 0; i < dd->num_pports; ++i) { 484 /* 485 * ctxt == -1 means "all contexts". Only really safe for 486 * _dis_abling things, as here. 487 */ 488 dd->f_rcvctrl(dd->pport + i, QIB_RCVCTRL_CTXT_DIS | 489 QIB_RCVCTRL_INTRAVAIL_DIS | 490 QIB_RCVCTRL_TAILUPD_DIS, -1); 491 /* Redundant across ports for some, but no big deal. */ 492 dd->f_sendctrl(dd->pport + i, QIB_SENDCTRL_SEND_DIS | 493 QIB_SENDCTRL_AVAIL_DIS); 494 } 495 496 return 0; 497 } 498 499 static void enable_chip(struct qib_devdata *dd) 500 { 501 u64 rcvmask; 502 int i; 503 504 /* 505 * Enable PIO send, and update of PIOavail regs to memory. 506 */ 507 for (i = 0; i < dd->num_pports; ++i) 508 dd->f_sendctrl(dd->pport + i, QIB_SENDCTRL_SEND_ENB | 509 QIB_SENDCTRL_AVAIL_ENB); 510 /* 511 * Enable kernel ctxts' receive and receive interrupt. 512 * Other ctxts done as user opens and inits them. 513 */ 514 rcvmask = QIB_RCVCTRL_CTXT_ENB | QIB_RCVCTRL_INTRAVAIL_ENB; 515 rcvmask |= (dd->flags & QIB_NODMA_RTAIL) ? 516 QIB_RCVCTRL_TAILUPD_DIS : QIB_RCVCTRL_TAILUPD_ENB; 517 for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) { 518 struct qib_ctxtdata *rcd = dd->rcd[i]; 519 520 if (rcd) 521 dd->f_rcvctrl(rcd->ppd, rcvmask, i); 522 } 523 } 524 525 static void verify_interrupt(unsigned long opaque) 526 { 527 struct qib_devdata *dd = (struct qib_devdata *) opaque; 528 529 if (!dd) 530 return; /* being torn down */ 531 532 /* 533 * If we don't have a lid or any interrupts, let the user know and 534 * don't bother checking again. 535 */ 536 if (dd->int_counter == 0) { 537 if (!dd->f_intr_fallback(dd)) 538 dev_err(&dd->pcidev->dev, 539 "No interrupts detected, not usable.\n"); 540 else /* re-arm the timer to see if fallback works */ 541 mod_timer(&dd->intrchk_timer, jiffies + HZ/2); 542 } 543 } 544 545 static void init_piobuf_state(struct qib_devdata *dd) 546 { 547 int i, pidx; 548 u32 uctxts; 549 550 /* 551 * Ensure all buffers are free, and fifos empty. Buffers 552 * are common, so only do once for port 0. 553 * 554 * After enable and qib_chg_pioavailkernel so we can safely 555 * enable pioavail updates and PIOENABLE. After this, packets 556 * are ready and able to go out. 557 */ 558 dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_ALL); 559 for (pidx = 0; pidx < dd->num_pports; ++pidx) 560 dd->f_sendctrl(dd->pport + pidx, QIB_SENDCTRL_FLUSH); 561 562 /* 563 * If not all sendbufs are used, add the one to each of the lower 564 * numbered contexts. pbufsctxt and lastctxt_piobuf are 565 * calculated in chip-specific code because it may cause some 566 * chip-specific adjustments to be made. 567 */ 568 uctxts = dd->cfgctxts - dd->first_user_ctxt; 569 dd->ctxts_extrabuf = dd->pbufsctxt ? 570 dd->lastctxt_piobuf - (dd->pbufsctxt * uctxts) : 0; 571 572 /* 573 * Set up the shadow copies of the piobufavail registers, 574 * which we compare against the chip registers for now, and 575 * the in memory DMA'ed copies of the registers. 576 * By now pioavail updates to memory should have occurred, so 577 * copy them into our working/shadow registers; this is in 578 * case something went wrong with abort, but mostly to get the 579 * initial values of the generation bit correct. 580 */ 581 for (i = 0; i < dd->pioavregs; i++) { 582 __le64 tmp; 583 584 tmp = dd->pioavailregs_dma[i]; 585 /* 586 * Don't need to worry about pioavailkernel here 587 * because we will call qib_chg_pioavailkernel() later 588 * in initialization, to busy out buffers as needed. 589 */ 590 dd->pioavailshadow[i] = le64_to_cpu(tmp); 591 } 592 while (i < ARRAY_SIZE(dd->pioavailshadow)) 593 dd->pioavailshadow[i++] = 0; /* for debugging sanity */ 594 595 /* after pioavailshadow is setup */ 596 qib_chg_pioavailkernel(dd, 0, dd->piobcnt2k + dd->piobcnt4k, 597 TXCHK_CHG_TYPE_KERN, NULL); 598 dd->f_initvl15_bufs(dd); 599 } 600 601 /** 602 * qib_create_workqueues - create per port workqueues 603 * @dd: the qlogic_ib device 604 */ 605 static int qib_create_workqueues(struct qib_devdata *dd) 606 { 607 int pidx; 608 struct qib_pportdata *ppd; 609 610 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 611 ppd = dd->pport + pidx; 612 if (!ppd->qib_wq) { 613 char wq_name[8]; /* 3 + 2 + 1 + 1 + 1 */ 614 snprintf(wq_name, sizeof(wq_name), "qib%d_%d", 615 dd->unit, pidx); 616 ppd->qib_wq = 617 create_singlethread_workqueue(wq_name); 618 if (!ppd->qib_wq) 619 goto wq_error; 620 } 621 } 622 return 0; 623 wq_error: 624 pr_err("create_singlethread_workqueue failed for port %d\n", 625 pidx + 1); 626 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 627 ppd = dd->pport + pidx; 628 if (ppd->qib_wq) { 629 destroy_workqueue(ppd->qib_wq); 630 ppd->qib_wq = NULL; 631 } 632 } 633 return -ENOMEM; 634 } 635 636 /** 637 * qib_init - do the actual initialization sequence on the chip 638 * @dd: the qlogic_ib device 639 * @reinit: reinitializing, so don't allocate new memory 640 * 641 * Do the actual initialization sequence on the chip. This is done 642 * both from the init routine called from the PCI infrastructure, and 643 * when we reset the chip, or detect that it was reset internally, 644 * or it's administratively re-enabled. 645 * 646 * Memory allocation here and in called routines is only done in 647 * the first case (reinit == 0). We have to be careful, because even 648 * without memory allocation, we need to re-write all the chip registers 649 * TIDs, etc. after the reset or enable has completed. 650 */ 651 int qib_init(struct qib_devdata *dd, int reinit) 652 { 653 int ret = 0, pidx, lastfail = 0; 654 u32 portok = 0; 655 unsigned i; 656 struct qib_ctxtdata *rcd; 657 struct qib_pportdata *ppd; 658 unsigned long flags; 659 660 /* Set linkstate to unknown, so we can watch for a transition. */ 661 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 662 ppd = dd->pport + pidx; 663 spin_lock_irqsave(&ppd->lflags_lock, flags); 664 ppd->lflags &= ~(QIBL_LINKACTIVE | QIBL_LINKARMED | 665 QIBL_LINKDOWN | QIBL_LINKINIT | 666 QIBL_LINKV); 667 spin_unlock_irqrestore(&ppd->lflags_lock, flags); 668 } 669 670 if (reinit) 671 ret = init_after_reset(dd); 672 else 673 ret = loadtime_init(dd); 674 if (ret) 675 goto done; 676 677 /* Bypass most chip-init, to get to device creation */ 678 if (qib_mini_init) 679 return 0; 680 681 ret = dd->f_late_initreg(dd); 682 if (ret) 683 goto done; 684 685 /* dd->rcd can be NULL if early init failed */ 686 for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) { 687 /* 688 * Set up the (kernel) rcvhdr queue and egr TIDs. If doing 689 * re-init, the simplest way to handle this is to free 690 * existing, and re-allocate. 691 * Need to re-create rest of ctxt 0 ctxtdata as well. 692 */ 693 rcd = dd->rcd[i]; 694 if (!rcd) 695 continue; 696 697 lastfail = qib_create_rcvhdrq(dd, rcd); 698 if (!lastfail) 699 lastfail = qib_setup_eagerbufs(rcd); 700 if (lastfail) { 701 qib_dev_err(dd, 702 "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); 703 continue; 704 } 705 } 706 707 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 708 int mtu; 709 if (lastfail) 710 ret = lastfail; 711 ppd = dd->pport + pidx; 712 mtu = ib_mtu_enum_to_int(qib_ibmtu); 713 if (mtu == -1) { 714 mtu = QIB_DEFAULT_MTU; 715 qib_ibmtu = 0; /* don't leave invalid value */ 716 } 717 /* set max we can ever have for this driver load */ 718 ppd->init_ibmaxlen = min(mtu > 2048 ? 719 dd->piosize4k : dd->piosize2k, 720 dd->rcvegrbufsize + 721 (dd->rcvhdrentsize << 2)); 722 /* 723 * Have to initialize ibmaxlen, but this will normally 724 * change immediately in qib_set_mtu(). 725 */ 726 ppd->ibmaxlen = ppd->init_ibmaxlen; 727 qib_set_mtu(ppd, mtu); 728 729 spin_lock_irqsave(&ppd->lflags_lock, flags); 730 ppd->lflags |= QIBL_IB_LINK_DISABLED; 731 spin_unlock_irqrestore(&ppd->lflags_lock, flags); 732 733 lastfail = dd->f_bringup_serdes(ppd); 734 if (lastfail) { 735 qib_devinfo(dd->pcidev, 736 "Failed to bringup IB port %u\n", ppd->port); 737 lastfail = -ENETDOWN; 738 continue; 739 } 740 741 portok++; 742 } 743 744 if (!portok) { 745 /* none of the ports initialized */ 746 if (!ret && lastfail) 747 ret = lastfail; 748 else if (!ret) 749 ret = -ENETDOWN; 750 /* but continue on, so we can debug cause */ 751 } 752 753 enable_chip(dd); 754 755 init_piobuf_state(dd); 756 757 done: 758 if (!ret) { 759 /* chip is OK for user apps; mark it as initialized */ 760 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 761 ppd = dd->pport + pidx; 762 /* 763 * Set status even if port serdes is not initialized 764 * so that diags will work. 765 */ 766 *ppd->statusp |= QIB_STATUS_CHIP_PRESENT | 767 QIB_STATUS_INITTED; 768 if (!ppd->link_speed_enabled) 769 continue; 770 if (dd->flags & QIB_HAS_SEND_DMA) 771 ret = qib_setup_sdma(ppd); 772 init_timer(&ppd->hol_timer); 773 ppd->hol_timer.function = qib_hol_event; 774 ppd->hol_timer.data = (unsigned long)ppd; 775 ppd->hol_state = QIB_HOL_UP; 776 } 777 778 /* now we can enable all interrupts from the chip */ 779 dd->f_set_intr_state(dd, 1); 780 781 /* 782 * Setup to verify we get an interrupt, and fallback 783 * to an alternate if necessary and possible. 784 */ 785 mod_timer(&dd->intrchk_timer, jiffies + HZ/2); 786 /* start stats retrieval timer */ 787 mod_timer(&dd->stats_timer, jiffies + HZ * ACTIVITY_TIMER); 788 } 789 790 /* if ret is non-zero, we probably should do some cleanup here... */ 791 return ret; 792 } 793 794 /* 795 * These next two routines are placeholders in case we don't have per-arch 796 * code for controlling write combining. If explicit control of write 797 * combining is not available, performance will probably be awful. 798 */ 799 800 int __attribute__((weak)) qib_enable_wc(struct qib_devdata *dd) 801 { 802 return -EOPNOTSUPP; 803 } 804 805 void __attribute__((weak)) qib_disable_wc(struct qib_devdata *dd) 806 { 807 } 808 809 static inline struct qib_devdata *__qib_lookup(int unit) 810 { 811 return idr_find(&qib_unit_table, unit); 812 } 813 814 struct qib_devdata *qib_lookup(int unit) 815 { 816 struct qib_devdata *dd; 817 unsigned long flags; 818 819 spin_lock_irqsave(&qib_devs_lock, flags); 820 dd = __qib_lookup(unit); 821 spin_unlock_irqrestore(&qib_devs_lock, flags); 822 823 return dd; 824 } 825 826 /* 827 * Stop the timers during unit shutdown, or after an error late 828 * in initialization. 829 */ 830 static void qib_stop_timers(struct qib_devdata *dd) 831 { 832 struct qib_pportdata *ppd; 833 int pidx; 834 835 if (dd->stats_timer.data) { 836 del_timer_sync(&dd->stats_timer); 837 dd->stats_timer.data = 0; 838 } 839 if (dd->intrchk_timer.data) { 840 del_timer_sync(&dd->intrchk_timer); 841 dd->intrchk_timer.data = 0; 842 } 843 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 844 ppd = dd->pport + pidx; 845 if (ppd->hol_timer.data) 846 del_timer_sync(&ppd->hol_timer); 847 if (ppd->led_override_timer.data) { 848 del_timer_sync(&ppd->led_override_timer); 849 atomic_set(&ppd->led_override_timer_active, 0); 850 } 851 if (ppd->symerr_clear_timer.data) 852 del_timer_sync(&ppd->symerr_clear_timer); 853 } 854 } 855 856 /** 857 * qib_shutdown_device - shut down a device 858 * @dd: the qlogic_ib device 859 * 860 * This is called to make the device quiet when we are about to 861 * unload the driver, and also when the device is administratively 862 * disabled. It does not free any data structures. 863 * Everything it does has to be setup again by qib_init(dd, 1) 864 */ 865 static void qib_shutdown_device(struct qib_devdata *dd) 866 { 867 struct qib_pportdata *ppd; 868 unsigned pidx; 869 870 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 871 ppd = dd->pport + pidx; 872 873 spin_lock_irq(&ppd->lflags_lock); 874 ppd->lflags &= ~(QIBL_LINKDOWN | QIBL_LINKINIT | 875 QIBL_LINKARMED | QIBL_LINKACTIVE | 876 QIBL_LINKV); 877 spin_unlock_irq(&ppd->lflags_lock); 878 *ppd->statusp &= ~(QIB_STATUS_IB_CONF | QIB_STATUS_IB_READY); 879 } 880 dd->flags &= ~QIB_INITTED; 881 882 /* mask interrupts, but not errors */ 883 dd->f_set_intr_state(dd, 0); 884 885 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 886 ppd = dd->pport + pidx; 887 dd->f_rcvctrl(ppd, QIB_RCVCTRL_TAILUPD_DIS | 888 QIB_RCVCTRL_CTXT_DIS | 889 QIB_RCVCTRL_INTRAVAIL_DIS | 890 QIB_RCVCTRL_PKEY_ENB, -1); 891 /* 892 * Gracefully stop all sends allowing any in progress to 893 * trickle out first. 894 */ 895 dd->f_sendctrl(ppd, QIB_SENDCTRL_CLEAR); 896 } 897 898 /* 899 * Enough for anything that's going to trickle out to have actually 900 * done so. 901 */ 902 udelay(20); 903 904 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 905 ppd = dd->pport + pidx; 906 dd->f_setextled(ppd, 0); /* make sure LEDs are off */ 907 908 if (dd->flags & QIB_HAS_SEND_DMA) 909 qib_teardown_sdma(ppd); 910 911 dd->f_sendctrl(ppd, QIB_SENDCTRL_AVAIL_DIS | 912 QIB_SENDCTRL_SEND_DIS); 913 /* 914 * Clear SerdesEnable. 915 * We can't count on interrupts since we are stopping. 916 */ 917 dd->f_quiet_serdes(ppd); 918 919 if (ppd->qib_wq) { 920 destroy_workqueue(ppd->qib_wq); 921 ppd->qib_wq = NULL; 922 } 923 } 924 925 qib_update_eeprom_log(dd); 926 } 927 928 /** 929 * qib_free_ctxtdata - free a context's allocated data 930 * @dd: the qlogic_ib device 931 * @rcd: the ctxtdata structure 932 * 933 * free up any allocated data for a context 934 * This should not touch anything that would affect a simultaneous 935 * re-allocation of context data, because it is called after qib_mutex 936 * is released (and can be called from reinit as well). 937 * It should never change any chip state, or global driver state. 938 */ 939 void qib_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd) 940 { 941 if (!rcd) 942 return; 943 944 if (rcd->rcvhdrq) { 945 dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size, 946 rcd->rcvhdrq, rcd->rcvhdrq_phys); 947 rcd->rcvhdrq = NULL; 948 if (rcd->rcvhdrtail_kvaddr) { 949 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, 950 rcd->rcvhdrtail_kvaddr, 951 rcd->rcvhdrqtailaddr_phys); 952 rcd->rcvhdrtail_kvaddr = NULL; 953 } 954 } 955 if (rcd->rcvegrbuf) { 956 unsigned e; 957 958 for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { 959 void *base = rcd->rcvegrbuf[e]; 960 size_t size = rcd->rcvegrbuf_size; 961 962 dma_free_coherent(&dd->pcidev->dev, size, 963 base, rcd->rcvegrbuf_phys[e]); 964 } 965 kfree(rcd->rcvegrbuf); 966 rcd->rcvegrbuf = NULL; 967 kfree(rcd->rcvegrbuf_phys); 968 rcd->rcvegrbuf_phys = NULL; 969 rcd->rcvegrbuf_chunks = 0; 970 } 971 972 kfree(rcd->tid_pg_list); 973 vfree(rcd->user_event_mask); 974 vfree(rcd->subctxt_uregbase); 975 vfree(rcd->subctxt_rcvegrbuf); 976 vfree(rcd->subctxt_rcvhdr_base); 977 #ifdef CONFIG_DEBUG_FS 978 kfree(rcd->opstats); 979 rcd->opstats = NULL; 980 #endif 981 kfree(rcd); 982 } 983 984 /* 985 * Perform a PIO buffer bandwidth write test, to verify proper system 986 * configuration. Even when all the setup calls work, occasionally 987 * BIOS or other issues can prevent write combining from working, or 988 * can cause other bandwidth problems to the chip. 989 * 990 * This test simply writes the same buffer over and over again, and 991 * measures close to the peak bandwidth to the chip (not testing 992 * data bandwidth to the wire). On chips that use an address-based 993 * trigger to send packets to the wire, this is easy. On chips that 994 * use a count to trigger, we want to make sure that the packet doesn't 995 * go out on the wire, or trigger flow control checks. 996 */ 997 static void qib_verify_pioperf(struct qib_devdata *dd) 998 { 999 u32 pbnum, cnt, lcnt; 1000 u32 __iomem *piobuf; 1001 u32 *addr; 1002 u64 msecs, emsecs; 1003 1004 piobuf = dd->f_getsendbuf(dd->pport, 0ULL, &pbnum); 1005 if (!piobuf) { 1006 qib_devinfo(dd->pcidev, 1007 "No PIObufs for checking perf, skipping\n"); 1008 return; 1009 } 1010 1011 /* 1012 * Enough to give us a reasonable test, less than piobuf size, and 1013 * likely multiple of store buffer length. 1014 */ 1015 cnt = 1024; 1016 1017 addr = vmalloc(cnt); 1018 if (!addr) { 1019 qib_devinfo(dd->pcidev, 1020 "Couldn't get memory for checking PIO perf," 1021 " skipping\n"); 1022 goto done; 1023 } 1024 1025 preempt_disable(); /* we want reasonably accurate elapsed time */ 1026 msecs = 1 + jiffies_to_msecs(jiffies); 1027 for (lcnt = 0; lcnt < 10000U; lcnt++) { 1028 /* wait until we cross msec boundary */ 1029 if (jiffies_to_msecs(jiffies) >= msecs) 1030 break; 1031 udelay(1); 1032 } 1033 1034 dd->f_set_armlaunch(dd, 0); 1035 1036 /* 1037 * length 0, no dwords actually sent 1038 */ 1039 writeq(0, piobuf); 1040 qib_flush_wc(); 1041 1042 /* 1043 * This is only roughly accurate, since even with preempt we 1044 * still take interrupts that could take a while. Running for 1045 * >= 5 msec seems to get us "close enough" to accurate values. 1046 */ 1047 msecs = jiffies_to_msecs(jiffies); 1048 for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) { 1049 qib_pio_copy(piobuf + 64, addr, cnt >> 2); 1050 emsecs = jiffies_to_msecs(jiffies) - msecs; 1051 } 1052 1053 /* 1 GiB/sec, slightly over IB SDR line rate */ 1054 if (lcnt < (emsecs * 1024U)) 1055 qib_dev_err(dd, 1056 "Performance problem: bandwidth to PIO buffers is only %u MiB/sec\n", 1057 lcnt / (u32) emsecs); 1058 1059 preempt_enable(); 1060 1061 vfree(addr); 1062 1063 done: 1064 /* disarm piobuf, so it's available again */ 1065 dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(pbnum)); 1066 qib_sendbuf_done(dd, pbnum); 1067 dd->f_set_armlaunch(dd, 1); 1068 } 1069 1070 void qib_free_devdata(struct qib_devdata *dd) 1071 { 1072 unsigned long flags; 1073 1074 spin_lock_irqsave(&qib_devs_lock, flags); 1075 idr_remove(&qib_unit_table, dd->unit); 1076 list_del(&dd->list); 1077 spin_unlock_irqrestore(&qib_devs_lock, flags); 1078 1079 #ifdef CONFIG_DEBUG_FS 1080 qib_dbg_ibdev_exit(&dd->verbs_dev); 1081 #endif 1082 ib_dealloc_device(&dd->verbs_dev.ibdev); 1083 } 1084 1085 /* 1086 * Allocate our primary per-unit data structure. Must be done via verbs 1087 * allocator, because the verbs cleanup process both does cleanup and 1088 * free of the data structure. 1089 * "extra" is for chip-specific data. 1090 * 1091 * Use the idr mechanism to get a unit number for this unit. 1092 */ 1093 struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) 1094 { 1095 unsigned long flags; 1096 struct qib_devdata *dd; 1097 int ret; 1098 1099 dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); 1100 if (!dd) { 1101 dd = ERR_PTR(-ENOMEM); 1102 goto bail; 1103 } 1104 1105 #ifdef CONFIG_DEBUG_FS 1106 qib_dbg_ibdev_init(&dd->verbs_dev); 1107 #endif 1108 1109 idr_preload(GFP_KERNEL); 1110 spin_lock_irqsave(&qib_devs_lock, flags); 1111 1112 ret = idr_alloc(&qib_unit_table, dd, 0, 0, GFP_NOWAIT); 1113 if (ret >= 0) { 1114 dd->unit = ret; 1115 list_add(&dd->list, &qib_dev_list); 1116 } 1117 1118 spin_unlock_irqrestore(&qib_devs_lock, flags); 1119 idr_preload_end(); 1120 1121 if (ret < 0) { 1122 qib_early_err(&pdev->dev, 1123 "Could not allocate unit ID: error %d\n", -ret); 1124 #ifdef CONFIG_DEBUG_FS 1125 qib_dbg_ibdev_exit(&dd->verbs_dev); 1126 #endif 1127 ib_dealloc_device(&dd->verbs_dev.ibdev); 1128 dd = ERR_PTR(ret); 1129 goto bail; 1130 } 1131 1132 if (!qib_cpulist_count) { 1133 u32 count = num_online_cpus(); 1134 qib_cpulist = kzalloc(BITS_TO_LONGS(count) * 1135 sizeof(long), GFP_KERNEL); 1136 if (qib_cpulist) 1137 qib_cpulist_count = count; 1138 else 1139 qib_early_err(&pdev->dev, 1140 "Could not alloc cpulist info, cpu affinity might be wrong\n"); 1141 } 1142 1143 bail: 1144 return dd; 1145 } 1146 1147 /* 1148 * Called from freeze mode handlers, and from PCI error 1149 * reporting code. Should be paranoid about state of 1150 * system and data structures. 1151 */ 1152 void qib_disable_after_error(struct qib_devdata *dd) 1153 { 1154 if (dd->flags & QIB_INITTED) { 1155 u32 pidx; 1156 1157 dd->flags &= ~QIB_INITTED; 1158 if (dd->pport) 1159 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 1160 struct qib_pportdata *ppd; 1161 1162 ppd = dd->pport + pidx; 1163 if (dd->flags & QIB_PRESENT) { 1164 qib_set_linkstate(ppd, 1165 QIB_IB_LINKDOWN_DISABLE); 1166 dd->f_setextled(ppd, 0); 1167 } 1168 *ppd->statusp &= ~QIB_STATUS_IB_READY; 1169 } 1170 } 1171 1172 /* 1173 * Mark as having had an error for driver, and also 1174 * for /sys and status word mapped to user programs. 1175 * This marks unit as not usable, until reset. 1176 */ 1177 if (dd->devstatusp) 1178 *dd->devstatusp |= QIB_STATUS_HWERROR; 1179 } 1180 1181 static void qib_remove_one(struct pci_dev *); 1182 static int qib_init_one(struct pci_dev *, const struct pci_device_id *); 1183 1184 #define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: " 1185 #define PFX QIB_DRV_NAME ": " 1186 1187 static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = { 1188 { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_QLOGIC_IB_6120) }, 1189 { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7220) }, 1190 { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7322) }, 1191 { 0, } 1192 }; 1193 1194 MODULE_DEVICE_TABLE(pci, qib_pci_tbl); 1195 1196 static struct pci_driver qib_driver = { 1197 .name = QIB_DRV_NAME, 1198 .probe = qib_init_one, 1199 .remove = qib_remove_one, 1200 .id_table = qib_pci_tbl, 1201 .err_handler = &qib_pci_err_handler, 1202 }; 1203 1204 #ifdef CONFIG_INFINIBAND_QIB_DCA 1205 1206 static int qib_notify_dca(struct notifier_block *, unsigned long, void *); 1207 static struct notifier_block dca_notifier = { 1208 .notifier_call = qib_notify_dca, 1209 .next = NULL, 1210 .priority = 0 1211 }; 1212 1213 static int qib_notify_dca_device(struct device *device, void *data) 1214 { 1215 struct qib_devdata *dd = dev_get_drvdata(device); 1216 unsigned long event = *(unsigned long *)data; 1217 1218 return dd->f_notify_dca(dd, event); 1219 } 1220 1221 static int qib_notify_dca(struct notifier_block *nb, unsigned long event, 1222 void *p) 1223 { 1224 int rval; 1225 1226 rval = driver_for_each_device(&qib_driver.driver, NULL, 1227 &event, qib_notify_dca_device); 1228 return rval ? NOTIFY_BAD : NOTIFY_DONE; 1229 } 1230 1231 #endif 1232 1233 /* 1234 * Do all the generic driver unit- and chip-independent memory 1235 * allocation and initialization. 1236 */ 1237 static int __init qlogic_ib_init(void) 1238 { 1239 int ret; 1240 1241 ret = qib_dev_init(); 1242 if (ret) 1243 goto bail; 1244 1245 /* 1246 * These must be called before the driver is registered with 1247 * the PCI subsystem. 1248 */ 1249 idr_init(&qib_unit_table); 1250 1251 #ifdef CONFIG_INFINIBAND_QIB_DCA 1252 dca_register_notify(&dca_notifier); 1253 #endif 1254 #ifdef CONFIG_DEBUG_FS 1255 qib_dbg_init(); 1256 #endif 1257 ret = pci_register_driver(&qib_driver); 1258 if (ret < 0) { 1259 pr_err("Unable to register driver: error %d\n", -ret); 1260 goto bail_dev; 1261 } 1262 1263 /* not fatal if it doesn't work */ 1264 if (qib_init_qibfs()) 1265 pr_err("Unable to register ipathfs\n"); 1266 goto bail; /* all OK */ 1267 1268 bail_dev: 1269 #ifdef CONFIG_INFINIBAND_QIB_DCA 1270 dca_unregister_notify(&dca_notifier); 1271 #endif 1272 #ifdef CONFIG_DEBUG_FS 1273 qib_dbg_exit(); 1274 #endif 1275 idr_destroy(&qib_unit_table); 1276 qib_dev_cleanup(); 1277 bail: 1278 return ret; 1279 } 1280 1281 module_init(qlogic_ib_init); 1282 1283 /* 1284 * Do the non-unit driver cleanup, memory free, etc. at unload. 1285 */ 1286 static void __exit qlogic_ib_cleanup(void) 1287 { 1288 int ret; 1289 1290 ret = qib_exit_qibfs(); 1291 if (ret) 1292 pr_err( 1293 "Unable to cleanup counter filesystem: error %d\n", 1294 -ret); 1295 1296 #ifdef CONFIG_INFINIBAND_QIB_DCA 1297 dca_unregister_notify(&dca_notifier); 1298 #endif 1299 pci_unregister_driver(&qib_driver); 1300 #ifdef CONFIG_DEBUG_FS 1301 qib_dbg_exit(); 1302 #endif 1303 1304 qib_cpulist_count = 0; 1305 kfree(qib_cpulist); 1306 1307 idr_destroy(&qib_unit_table); 1308 qib_dev_cleanup(); 1309 } 1310 1311 module_exit(qlogic_ib_cleanup); 1312 1313 /* this can only be called after a successful initialization */ 1314 static void cleanup_device_data(struct qib_devdata *dd) 1315 { 1316 int ctxt; 1317 int pidx; 1318 struct qib_ctxtdata **tmp; 1319 unsigned long flags; 1320 1321 /* users can't do anything more with chip */ 1322 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 1323 if (dd->pport[pidx].statusp) 1324 *dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT; 1325 1326 spin_lock(&dd->pport[pidx].cc_shadow_lock); 1327 1328 kfree(dd->pport[pidx].congestion_entries); 1329 dd->pport[pidx].congestion_entries = NULL; 1330 kfree(dd->pport[pidx].ccti_entries); 1331 dd->pport[pidx].ccti_entries = NULL; 1332 kfree(dd->pport[pidx].ccti_entries_shadow); 1333 dd->pport[pidx].ccti_entries_shadow = NULL; 1334 kfree(dd->pport[pidx].congestion_entries_shadow); 1335 dd->pport[pidx].congestion_entries_shadow = NULL; 1336 1337 spin_unlock(&dd->pport[pidx].cc_shadow_lock); 1338 } 1339 1340 if (!qib_wc_pat) 1341 qib_disable_wc(dd); 1342 1343 if (dd->pioavailregs_dma) { 1344 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, 1345 (void *) dd->pioavailregs_dma, 1346 dd->pioavailregs_phys); 1347 dd->pioavailregs_dma = NULL; 1348 } 1349 1350 if (dd->pageshadow) { 1351 struct page **tmpp = dd->pageshadow; 1352 dma_addr_t *tmpd = dd->physshadow; 1353 int i; 1354 1355 for (ctxt = 0; ctxt < dd->cfgctxts; ctxt++) { 1356 int ctxt_tidbase = ctxt * dd->rcvtidcnt; 1357 int maxtid = ctxt_tidbase + dd->rcvtidcnt; 1358 1359 for (i = ctxt_tidbase; i < maxtid; i++) { 1360 if (!tmpp[i]) 1361 continue; 1362 pci_unmap_page(dd->pcidev, tmpd[i], 1363 PAGE_SIZE, PCI_DMA_FROMDEVICE); 1364 qib_release_user_pages(&tmpp[i], 1); 1365 tmpp[i] = NULL; 1366 } 1367 } 1368 1369 dd->pageshadow = NULL; 1370 vfree(tmpp); 1371 dd->physshadow = NULL; 1372 vfree(tmpd); 1373 } 1374 1375 /* 1376 * Free any resources still in use (usually just kernel contexts) 1377 * at unload; we do for ctxtcnt, because that's what we allocate. 1378 * We acquire lock to be really paranoid that rcd isn't being 1379 * accessed from some interrupt-related code (that should not happen, 1380 * but best to be sure). 1381 */ 1382 spin_lock_irqsave(&dd->uctxt_lock, flags); 1383 tmp = dd->rcd; 1384 dd->rcd = NULL; 1385 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 1386 for (ctxt = 0; tmp && ctxt < dd->ctxtcnt; ctxt++) { 1387 struct qib_ctxtdata *rcd = tmp[ctxt]; 1388 1389 tmp[ctxt] = NULL; /* debugging paranoia */ 1390 qib_free_ctxtdata(dd, rcd); 1391 } 1392 kfree(tmp); 1393 kfree(dd->boardname); 1394 qib_cq_exit(dd); 1395 } 1396 1397 /* 1398 * Clean up on unit shutdown, or error during unit load after 1399 * successful initialization. 1400 */ 1401 static void qib_postinit_cleanup(struct qib_devdata *dd) 1402 { 1403 /* 1404 * Clean up chip-specific stuff. 1405 * We check for NULL here, because it's outside 1406 * the kregbase check, and we need to call it 1407 * after the free_irq. Thus it's possible that 1408 * the function pointers were never initialized. 1409 */ 1410 if (dd->f_cleanup) 1411 dd->f_cleanup(dd); 1412 1413 qib_pcie_ddcleanup(dd); 1414 1415 cleanup_device_data(dd); 1416 1417 qib_free_devdata(dd); 1418 } 1419 1420 static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) 1421 { 1422 int ret, j, pidx, initfail; 1423 struct qib_devdata *dd = NULL; 1424 1425 ret = qib_pcie_init(pdev, ent); 1426 if (ret) 1427 goto bail; 1428 1429 /* 1430 * Do device-specific initialiation, function table setup, dd 1431 * allocation, etc. 1432 */ 1433 switch (ent->device) { 1434 case PCI_DEVICE_ID_QLOGIC_IB_6120: 1435 #ifdef CONFIG_PCI_MSI 1436 dd = qib_init_iba6120_funcs(pdev, ent); 1437 #else 1438 qib_early_err(&pdev->dev, 1439 "Intel PCIE device 0x%x cannot work if CONFIG_PCI_MSI is not enabled\n", 1440 ent->device); 1441 dd = ERR_PTR(-ENODEV); 1442 #endif 1443 break; 1444 1445 case PCI_DEVICE_ID_QLOGIC_IB_7220: 1446 dd = qib_init_iba7220_funcs(pdev, ent); 1447 break; 1448 1449 case PCI_DEVICE_ID_QLOGIC_IB_7322: 1450 dd = qib_init_iba7322_funcs(pdev, ent); 1451 break; 1452 1453 default: 1454 qib_early_err(&pdev->dev, 1455 "Failing on unknown Intel deviceid 0x%x\n", 1456 ent->device); 1457 ret = -ENODEV; 1458 } 1459 1460 if (IS_ERR(dd)) 1461 ret = PTR_ERR(dd); 1462 if (ret) 1463 goto bail; /* error already printed */ 1464 1465 ret = qib_create_workqueues(dd); 1466 if (ret) 1467 goto bail; 1468 1469 /* do the generic initialization */ 1470 initfail = qib_init(dd, 0); 1471 1472 ret = qib_register_ib_device(dd); 1473 1474 /* 1475 * Now ready for use. this should be cleared whenever we 1476 * detect a reset, or initiate one. If earlier failure, 1477 * we still create devices, so diags, etc. can be used 1478 * to determine cause of problem. 1479 */ 1480 if (!qib_mini_init && !initfail && !ret) 1481 dd->flags |= QIB_INITTED; 1482 1483 j = qib_device_create(dd); 1484 if (j) 1485 qib_dev_err(dd, "Failed to create /dev devices: %d\n", -j); 1486 j = qibfs_add(dd); 1487 if (j) 1488 qib_dev_err(dd, "Failed filesystem setup for counters: %d\n", 1489 -j); 1490 1491 if (qib_mini_init || initfail || ret) { 1492 qib_stop_timers(dd); 1493 flush_workqueue(ib_wq); 1494 for (pidx = 0; pidx < dd->num_pports; ++pidx) 1495 dd->f_quiet_serdes(dd->pport + pidx); 1496 if (qib_mini_init) 1497 goto bail; 1498 if (!j) { 1499 (void) qibfs_remove(dd); 1500 qib_device_remove(dd); 1501 } 1502 if (!ret) 1503 qib_unregister_ib_device(dd); 1504 qib_postinit_cleanup(dd); 1505 if (initfail) 1506 ret = initfail; 1507 goto bail; 1508 } 1509 1510 if (!qib_wc_pat) { 1511 ret = qib_enable_wc(dd); 1512 if (ret) { 1513 qib_dev_err(dd, 1514 "Write combining not enabled (err %d): performance may be poor\n", 1515 -ret); 1516 ret = 0; 1517 } 1518 } 1519 1520 qib_verify_pioperf(dd); 1521 bail: 1522 return ret; 1523 } 1524 1525 static void qib_remove_one(struct pci_dev *pdev) 1526 { 1527 struct qib_devdata *dd = pci_get_drvdata(pdev); 1528 int ret; 1529 1530 /* unregister from IB core */ 1531 qib_unregister_ib_device(dd); 1532 1533 /* 1534 * Disable the IB link, disable interrupts on the device, 1535 * clear dma engines, etc. 1536 */ 1537 if (!qib_mini_init) 1538 qib_shutdown_device(dd); 1539 1540 qib_stop_timers(dd); 1541 1542 /* wait until all of our (qsfp) queue_work() calls complete */ 1543 flush_workqueue(ib_wq); 1544 1545 ret = qibfs_remove(dd); 1546 if (ret) 1547 qib_dev_err(dd, "Failed counters filesystem cleanup: %d\n", 1548 -ret); 1549 1550 qib_device_remove(dd); 1551 1552 qib_postinit_cleanup(dd); 1553 } 1554 1555 /** 1556 * qib_create_rcvhdrq - create a receive header queue 1557 * @dd: the qlogic_ib device 1558 * @rcd: the context data 1559 * 1560 * This must be contiguous memory (from an i/o perspective), and must be 1561 * DMA'able (which means for some systems, it will go through an IOMMU, 1562 * or be forced into a low address range). 1563 */ 1564 int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) 1565 { 1566 unsigned amt; 1567 int old_node_id; 1568 1569 if (!rcd->rcvhdrq) { 1570 dma_addr_t phys_hdrqtail; 1571 gfp_t gfp_flags; 1572 1573 amt = ALIGN(dd->rcvhdrcnt * dd->rcvhdrentsize * 1574 sizeof(u32), PAGE_SIZE); 1575 gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? 1576 GFP_USER : GFP_KERNEL; 1577 1578 old_node_id = dev_to_node(&dd->pcidev->dev); 1579 set_dev_node(&dd->pcidev->dev, rcd->node_id); 1580 rcd->rcvhdrq = dma_alloc_coherent( 1581 &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, 1582 gfp_flags | __GFP_COMP); 1583 set_dev_node(&dd->pcidev->dev, old_node_id); 1584 1585 if (!rcd->rcvhdrq) { 1586 qib_dev_err(dd, 1587 "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n", 1588 amt, rcd->ctxt); 1589 goto bail; 1590 } 1591 1592 if (rcd->ctxt >= dd->first_user_ctxt) { 1593 rcd->user_event_mask = vmalloc_user(PAGE_SIZE); 1594 if (!rcd->user_event_mask) 1595 goto bail_free_hdrq; 1596 } 1597 1598 if (!(dd->flags & QIB_NODMA_RTAIL)) { 1599 set_dev_node(&dd->pcidev->dev, rcd->node_id); 1600 rcd->rcvhdrtail_kvaddr = dma_alloc_coherent( 1601 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, 1602 gfp_flags); 1603 set_dev_node(&dd->pcidev->dev, old_node_id); 1604 if (!rcd->rcvhdrtail_kvaddr) 1605 goto bail_free; 1606 rcd->rcvhdrqtailaddr_phys = phys_hdrqtail; 1607 } 1608 1609 rcd->rcvhdrq_size = amt; 1610 } 1611 1612 /* clear for security and sanity on each use */ 1613 memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size); 1614 if (rcd->rcvhdrtail_kvaddr) 1615 memset(rcd->rcvhdrtail_kvaddr, 0, PAGE_SIZE); 1616 return 0; 1617 1618 bail_free: 1619 qib_dev_err(dd, 1620 "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n", 1621 rcd->ctxt); 1622 vfree(rcd->user_event_mask); 1623 rcd->user_event_mask = NULL; 1624 bail_free_hdrq: 1625 dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq, 1626 rcd->rcvhdrq_phys); 1627 rcd->rcvhdrq = NULL; 1628 bail: 1629 return -ENOMEM; 1630 } 1631 1632 /** 1633 * allocate eager buffers, both kernel and user contexts. 1634 * @rcd: the context we are setting up. 1635 * 1636 * Allocate the eager TID buffers and program them into hip. 1637 * They are no longer completely contiguous, we do multiple allocation 1638 * calls. Otherwise we get the OOM code involved, by asking for too 1639 * much per call, with disastrous results on some kernels. 1640 */ 1641 int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) 1642 { 1643 struct qib_devdata *dd = rcd->dd; 1644 unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; 1645 size_t size; 1646 gfp_t gfp_flags; 1647 int old_node_id; 1648 1649 /* 1650 * GFP_USER, but without GFP_FS, so buffer cache can be 1651 * coalesced (we hope); otherwise, even at order 4, 1652 * heavy filesystem activity makes these fail, and we can 1653 * use compound pages. 1654 */ 1655 gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; 1656 1657 egrcnt = rcd->rcvegrcnt; 1658 egroff = rcd->rcvegr_tid_base; 1659 egrsize = dd->rcvegrbufsize; 1660 1661 chunk = rcd->rcvegrbuf_chunks; 1662 egrperchunk = rcd->rcvegrbufs_perchunk; 1663 size = rcd->rcvegrbuf_size; 1664 if (!rcd->rcvegrbuf) { 1665 rcd->rcvegrbuf = 1666 kzalloc_node(chunk * sizeof(rcd->rcvegrbuf[0]), 1667 GFP_KERNEL, rcd->node_id); 1668 if (!rcd->rcvegrbuf) 1669 goto bail; 1670 } 1671 if (!rcd->rcvegrbuf_phys) { 1672 rcd->rcvegrbuf_phys = 1673 kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]), 1674 GFP_KERNEL, rcd->node_id); 1675 if (!rcd->rcvegrbuf_phys) 1676 goto bail_rcvegrbuf; 1677 } 1678 for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { 1679 if (rcd->rcvegrbuf[e]) 1680 continue; 1681 1682 old_node_id = dev_to_node(&dd->pcidev->dev); 1683 set_dev_node(&dd->pcidev->dev, rcd->node_id); 1684 rcd->rcvegrbuf[e] = 1685 dma_alloc_coherent(&dd->pcidev->dev, size, 1686 &rcd->rcvegrbuf_phys[e], 1687 gfp_flags); 1688 set_dev_node(&dd->pcidev->dev, old_node_id); 1689 if (!rcd->rcvegrbuf[e]) 1690 goto bail_rcvegrbuf_phys; 1691 } 1692 1693 rcd->rcvegr_phys = rcd->rcvegrbuf_phys[0]; 1694 1695 for (e = chunk = 0; chunk < rcd->rcvegrbuf_chunks; chunk++) { 1696 dma_addr_t pa = rcd->rcvegrbuf_phys[chunk]; 1697 unsigned i; 1698 1699 /* clear for security and sanity on each use */ 1700 memset(rcd->rcvegrbuf[chunk], 0, size); 1701 1702 for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) { 1703 dd->f_put_tid(dd, e + egroff + 1704 (u64 __iomem *) 1705 ((char __iomem *) 1706 dd->kregbase + 1707 dd->rcvegrbase), 1708 RCVHQ_RCV_TYPE_EAGER, pa); 1709 pa += egrsize; 1710 } 1711 cond_resched(); /* don't hog the cpu */ 1712 } 1713 1714 return 0; 1715 1716 bail_rcvegrbuf_phys: 1717 for (e = 0; e < rcd->rcvegrbuf_chunks && rcd->rcvegrbuf[e]; e++) 1718 dma_free_coherent(&dd->pcidev->dev, size, 1719 rcd->rcvegrbuf[e], rcd->rcvegrbuf_phys[e]); 1720 kfree(rcd->rcvegrbuf_phys); 1721 rcd->rcvegrbuf_phys = NULL; 1722 bail_rcvegrbuf: 1723 kfree(rcd->rcvegrbuf); 1724 rcd->rcvegrbuf = NULL; 1725 bail: 1726 return -ENOMEM; 1727 } 1728 1729 /* 1730 * Note: Changes to this routine should be mirrored 1731 * for the diagnostics routine qib_remap_ioaddr32(). 1732 * There is also related code for VL15 buffers in qib_init_7322_variables(). 1733 * The teardown code that unmaps is in qib_pcie_ddcleanup() 1734 */ 1735 int init_chip_wc_pat(struct qib_devdata *dd, u32 vl15buflen) 1736 { 1737 u64 __iomem *qib_kregbase = NULL; 1738 void __iomem *qib_piobase = NULL; 1739 u64 __iomem *qib_userbase = NULL; 1740 u64 qib_kreglen; 1741 u64 qib_pio2koffset = dd->piobufbase & 0xffffffff; 1742 u64 qib_pio4koffset = dd->piobufbase >> 32; 1743 u64 qib_pio2klen = dd->piobcnt2k * dd->palign; 1744 u64 qib_pio4klen = dd->piobcnt4k * dd->align4k; 1745 u64 qib_physaddr = dd->physaddr; 1746 u64 qib_piolen; 1747 u64 qib_userlen = 0; 1748 1749 /* 1750 * Free the old mapping because the kernel will try to reuse the 1751 * old mapping and not create a new mapping with the 1752 * write combining attribute. 1753 */ 1754 iounmap(dd->kregbase); 1755 dd->kregbase = NULL; 1756 1757 /* 1758 * Assumes chip address space looks like: 1759 * - kregs + sregs + cregs + uregs (in any order) 1760 * - piobufs (2K and 4K bufs in either order) 1761 * or: 1762 * - kregs + sregs + cregs (in any order) 1763 * - piobufs (2K and 4K bufs in either order) 1764 * - uregs 1765 */ 1766 if (dd->piobcnt4k == 0) { 1767 qib_kreglen = qib_pio2koffset; 1768 qib_piolen = qib_pio2klen; 1769 } else if (qib_pio2koffset < qib_pio4koffset) { 1770 qib_kreglen = qib_pio2koffset; 1771 qib_piolen = qib_pio4koffset + qib_pio4klen - qib_kreglen; 1772 } else { 1773 qib_kreglen = qib_pio4koffset; 1774 qib_piolen = qib_pio2koffset + qib_pio2klen - qib_kreglen; 1775 } 1776 qib_piolen += vl15buflen; 1777 /* Map just the configured ports (not all hw ports) */ 1778 if (dd->uregbase > qib_kreglen) 1779 qib_userlen = dd->ureg_align * dd->cfgctxts; 1780 1781 /* Sanity checks passed, now create the new mappings */ 1782 qib_kregbase = ioremap_nocache(qib_physaddr, qib_kreglen); 1783 if (!qib_kregbase) 1784 goto bail; 1785 1786 qib_piobase = ioremap_wc(qib_physaddr + qib_kreglen, qib_piolen); 1787 if (!qib_piobase) 1788 goto bail_kregbase; 1789 1790 if (qib_userlen) { 1791 qib_userbase = ioremap_nocache(qib_physaddr + dd->uregbase, 1792 qib_userlen); 1793 if (!qib_userbase) 1794 goto bail_piobase; 1795 } 1796 1797 dd->kregbase = qib_kregbase; 1798 dd->kregend = (u64 __iomem *) 1799 ((char __iomem *) qib_kregbase + qib_kreglen); 1800 dd->piobase = qib_piobase; 1801 dd->pio2kbase = (void __iomem *) 1802 (((char __iomem *) dd->piobase) + 1803 qib_pio2koffset - qib_kreglen); 1804 if (dd->piobcnt4k) 1805 dd->pio4kbase = (void __iomem *) 1806 (((char __iomem *) dd->piobase) + 1807 qib_pio4koffset - qib_kreglen); 1808 if (qib_userlen) 1809 /* ureg will now be accessed relative to dd->userbase */ 1810 dd->userbase = qib_userbase; 1811 return 0; 1812 1813 bail_piobase: 1814 iounmap(qib_piobase); 1815 bail_kregbase: 1816 iounmap(qib_kregbase); 1817 bail: 1818 return -ENOMEM; 1819 } 1820