1 /* 2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35 /* 36 * This file is conditionally built on x86_64 only. Otherwise weak symbol 37 * versions of the functions exported from here are used. 38 */ 39 40 #include <linux/pci.h> 41 #include <asm/mtrr.h> 42 #include <asm/processor.h> 43 44 #include "qib.h" 45 46 /** 47 * qib_enable_wc - enable write combining for MMIO writes to the device 48 * @dd: qlogic_ib device 49 * 50 * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable 51 * write combining. 52 */ 53 int qib_enable_wc(struct qib_devdata *dd) 54 { 55 int ret = 0; 56 u64 pioaddr, piolen; 57 unsigned bits; 58 const unsigned long addr = pci_resource_start(dd->pcidev, 0); 59 const size_t len = pci_resource_len(dd->pcidev, 0); 60 61 /* 62 * Set the PIO buffers to be WCCOMB, so we get HT bursts to the 63 * chip. Linux (possibly the hardware) requires it to be on a power 64 * of 2 address matching the length (which has to be a power of 2). 65 * For rev1, that means the base address, for rev2, it will be just 66 * the PIO buffers themselves. 67 * For chips with two sets of buffers, the calculations are 68 * somewhat more complicated; we need to sum, and the piobufbase 69 * register has both offsets, 2K in low 32 bits, 4K in high 32 bits. 70 * The buffers are still packed, so a single range covers both. 71 */ 72 if (dd->piobcnt2k && dd->piobcnt4k) { 73 /* 2 sizes for chip */ 74 unsigned long pio2kbase, pio4kbase; 75 pio2kbase = dd->piobufbase & 0xffffffffUL; 76 pio4kbase = (dd->piobufbase >> 32) & 0xffffffffUL; 77 if (pio2kbase < pio4kbase) { 78 /* all current chips */ 79 pioaddr = addr + pio2kbase; 80 piolen = pio4kbase - pio2kbase + 81 dd->piobcnt4k * dd->align4k; 82 } else { 83 pioaddr = addr + pio4kbase; 84 piolen = pio2kbase - pio4kbase + 85 dd->piobcnt2k * dd->palign; 86 } 87 } else { /* single buffer size (2K, currently) */ 88 pioaddr = addr + dd->piobufbase; 89 piolen = dd->piobcnt2k * dd->palign + 90 dd->piobcnt4k * dd->align4k; 91 } 92 93 for (bits = 0; !(piolen & (1ULL << bits)); bits++) 94 /* do nothing */ ; 95 96 if (piolen != (1ULL << bits)) { 97 piolen >>= bits; 98 while (piolen >>= 1) 99 bits++; 100 piolen = 1ULL << (bits + 1); 101 } 102 if (pioaddr & (piolen - 1)) { 103 u64 atmp; 104 atmp = pioaddr & ~(piolen - 1); 105 if (atmp < addr || (atmp + piolen) > (addr + len)) { 106 qib_dev_err(dd, 107 "No way to align address/size (%llx/%llx), no WC mtrr\n", 108 (unsigned long long) atmp, 109 (unsigned long long) piolen << 1); 110 ret = -ENODEV; 111 } else { 112 pioaddr = atmp; 113 piolen <<= 1; 114 } 115 } 116 117 if (!ret) { 118 int cookie; 119 120 cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0); 121 if (cookie < 0) { 122 { 123 qib_devinfo(dd->pcidev, 124 "mtrr_add() WC for PIO bufs failed (%d)\n", 125 cookie); 126 ret = -EINVAL; 127 } 128 } else { 129 dd->wc_cookie = cookie; 130 dd->wc_base = (unsigned long) pioaddr; 131 dd->wc_len = (unsigned long) piolen; 132 } 133 } 134 135 return ret; 136 } 137 138 /** 139 * qib_disable_wc - disable write combining for MMIO writes to the device 140 * @dd: qlogic_ib device 141 */ 142 void qib_disable_wc(struct qib_devdata *dd) 143 { 144 if (dd->wc_cookie) { 145 int r; 146 147 r = mtrr_del(dd->wc_cookie, dd->wc_base, 148 dd->wc_len); 149 if (r < 0) 150 qib_devinfo(dd->pcidev, 151 "mtrr_del(%lx, %lx, %lx) failed: %d\n", 152 dd->wc_cookie, dd->wc_base, 153 dd->wc_len, r); 154 dd->wc_cookie = 0; /* even on failure */ 155 } 156 } 157 158 /** 159 * qib_unordered_wc - indicate whether write combining is ordered 160 * 161 * Because our performance depends on our ability to do write combining mmio 162 * writes in the most efficient way, we need to know if we are on an Intel 163 * or AMD x86_64 processor. AMD x86_64 processors flush WC buffers out in 164 * the order completed, and so no special flushing is required to get 165 * correct ordering. Intel processors, however, will flush write buffers 166 * out in "random" orders, and so explicit ordering is needed at times. 167 */ 168 int qib_unordered_wc(void) 169 { 170 return boot_cpu_data.x86_vendor != X86_VENDOR_AMD; 171 } 172