1 /*-
2 * Copyright (c) 2017 Andriy Gapon
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/bus.h>
30 #include <sys/kernel.h>
31 #include <sys/conf.h>
32 #include <sys/malloc.h>
33 #include <sys/module.h>
34 #include <sys/sysctl.h>
35 #include <sys/types.h>
36
37 #include <dev/pci/pcivar.h>
38
39 #include <vm/vm.h>
40 #include <vm/vm_extern.h>
41 #include <vm/vm_kern.h>
42
43 #include <machine/cputypes.h>
44 #include <machine/md_var.h>
45
46 /*
47 * See BKDG for AMD Family 15h Models 00h-0Fh Processors
48 * (publication 42301 Rev 3.08 - March 12, 2012):
49 * - 2.13.3.1 DRAM Error Injection
50 * - D18F3xB8 NB Array Address
51 * - D18F3xBC NB Array Data Port
52 * - D18F3xBC_x8 DRAM ECC
53 */
54 #define NB_MCA_CFG 0x44
55 #define DRAM_ECC_EN (1 << 22)
56 #define NB_MCA_EXTCFG 0x180
57 #define ECC_SYMB_SZ (1 << 25)
58 #define NB_ARRAY_ADDR 0xb8
59 #define DRAM_ECC_SEL (0x8 << 28)
60 #define QUADRANT_SHIFT 1
61 #define QUADRANT_MASK 0x3
62 #define NB_ARRAY_PORT 0xbc
63 #define INJ_WORD_SHIFT 20
64 #define INJ_WORD_MASK 0x1ff
65 #define DRAM_ERR_EN (1 << 18)
66 #define DRAM_WR_REQ (1 << 17)
67 #define DRAM_RD_REQ (1 << 16)
68 #define INJ_VECTOR_MASK 0xffff
69
70 static void ecc_ei_inject(int);
71
72 static device_t nbdev;
73 static int delay_ms = 0;
74 static int quadrant = 0; /* 0 - 3 */
75 static int word_mask = 0x001; /* 9 bits: 8 + 1 for ECC */
76 static int bit_mask = 0x0001; /* 16 bits */
77
78 static int
sysctl_int_with_max(SYSCTL_HANDLER_ARGS)79 sysctl_int_with_max(SYSCTL_HANDLER_ARGS)
80 {
81 u_int value;
82 int error;
83
84 value = *(u_int *)arg1;
85 error = sysctl_handle_int(oidp, &value, 0, req);
86 if (error || req->newptr == NULL)
87 return (error);
88 if (value > arg2)
89 return (EINVAL);
90 *(u_int *)arg1 = value;
91 return (0);
92 }
93
94 static int
sysctl_nonzero_int_with_max(SYSCTL_HANDLER_ARGS)95 sysctl_nonzero_int_with_max(SYSCTL_HANDLER_ARGS)
96 {
97 u_int value;
98 int error;
99
100 value = *(u_int *)arg1;
101 error = sysctl_int_with_max(oidp, &value, arg2, req);
102 if (error || req->newptr == NULL)
103 return (error);
104 if (value == 0)
105 return (EINVAL);
106 *(u_int *)arg1 = value;
107 return (0);
108 }
109
110 static int
sysctl_proc_inject(SYSCTL_HANDLER_ARGS)111 sysctl_proc_inject(SYSCTL_HANDLER_ARGS)
112 {
113 int error;
114 int i;
115
116 i = 0;
117 error = sysctl_handle_int(oidp, &i, 0, req);
118 if (error)
119 return (error);
120 if (i != 0)
121 ecc_ei_inject(i);
122 return (0);
123 }
124
125 static SYSCTL_NODE(_hw, OID_AUTO, error_injection,
126 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
127 "Hardware error injection");
128 static SYSCTL_NODE(_hw_error_injection, OID_AUTO, dram_ecc,
129 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
130 "DRAM ECC error injection");
131 SYSCTL_UINT(_hw_error_injection_dram_ecc, OID_AUTO, delay,
132 CTLTYPE_UINT | CTLFLAG_RW, &delay_ms, 0,
133 "Delay in milliseconds between error injections");
134 SYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, quadrant,
135 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &quadrant, QUADRANT_MASK,
136 sysctl_int_with_max, "IU",
137 "Index of 16-byte quadrant within 64-byte line where errors "
138 "should be injected");
139 SYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, word_mask,
140 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &word_mask, INJ_WORD_MASK,
141 sysctl_nonzero_int_with_max, "IU",
142 "9-bit mask of words where errors should be injected (8 data + 1 ECC)");
143 SYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, bit_mask,
144 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &bit_mask, INJ_VECTOR_MASK,
145 sysctl_nonzero_int_with_max, "IU",
146 "16-bit mask of bits within each selected word where errors "
147 "should be injected");
148 SYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, inject,
149 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, sysctl_proc_inject, "I",
150 "Inject a number of errors according to configured parameters");
151
152 static void
ecc_ei_inject_one(void * arg,size_t size)153 ecc_ei_inject_one(void *arg, size_t size)
154 {
155 volatile uint64_t *memory = arg;
156 uint32_t val;
157 int i;
158
159 val = DRAM_ECC_SEL | (quadrant << QUADRANT_SHIFT);
160 pci_write_config(nbdev, NB_ARRAY_ADDR, val, 4);
161
162 val = (word_mask << INJ_WORD_SHIFT) | DRAM_WR_REQ | bit_mask;
163 pci_write_config(nbdev, NB_ARRAY_PORT, val, 4);
164
165 for (i = 0; i < size / sizeof(uint64_t); i++) {
166 memory[i] = 0;
167 val = pci_read_config(nbdev, NB_ARRAY_PORT, 4);
168 if ((val & DRAM_WR_REQ) == 0)
169 break;
170 }
171 for (i = 0; i < size / sizeof(uint64_t); i++)
172 memory[0] = memory[i];
173 }
174
175 static void
ecc_ei_inject(int count)176 ecc_ei_inject(int count)
177 {
178 void *memory;
179 int injected;
180
181 KASSERT((quadrant & ~QUADRANT_MASK) == 0,
182 ("quadrant value is outside of range: %u", quadrant));
183 KASSERT(word_mask != 0 && (word_mask & ~INJ_WORD_MASK) == 0,
184 ("word mask value is outside of range: 0x%x", word_mask));
185 KASSERT(bit_mask != 0 && (bit_mask & ~INJ_VECTOR_MASK) == 0,
186 ("bit mask value is outside of range: 0x%x", bit_mask));
187
188 memory = kmem_alloc_attr(PAGE_SIZE, M_WAITOK, 0, ~0,
189 VM_MEMATTR_UNCACHEABLE);
190
191 for (injected = 0; injected < count; injected++) {
192 ecc_ei_inject_one(memory, PAGE_SIZE);
193 if (delay_ms != 0 && injected != count - 1)
194 pause_sbt("ecc_ei_inject", delay_ms * SBT_1MS, 0, 0);
195 }
196
197 kmem_free(memory, PAGE_SIZE);
198 }
199
200 static int
ecc_ei_load(void)201 ecc_ei_load(void)
202 {
203 uint32_t val;
204
205 if ((cpu_vendor_id != CPU_VENDOR_AMD || CPUID_TO_FAMILY(cpu_id) < 0x10) &&
206 cpu_vendor_id != CPU_VENDOR_HYGON) {
207 printf("DRAM ECC error injection is not supported\n");
208 return (ENXIO);
209 }
210 nbdev = pci_find_bsf(0, 24, 3);
211 if (nbdev == NULL) {
212 printf("Couldn't find NB PCI device\n");
213 return (ENXIO);
214 }
215 val = pci_read_config(nbdev, NB_MCA_CFG, 4);
216 if ((val & DRAM_ECC_EN) == 0) {
217 printf("DRAM ECC is not supported or disabled\n");
218 return (ENXIO);
219 }
220 printf("DRAM ECC error injection support loaded\n");
221 return (0);
222 }
223
224 static int
tsc_modevent(module_t mod __unused,int type,void * data __unused)225 tsc_modevent(module_t mod __unused, int type, void *data __unused)
226 {
227 int error;
228
229 error = 0;
230 switch (type) {
231 case MOD_LOAD:
232 error = ecc_ei_load();
233 break;
234 case MOD_UNLOAD:
235 case MOD_SHUTDOWN:
236 break;
237 default:
238 error = EOPNOTSUPP;
239 }
240 return (error);
241 }
242
243 DEV_MODULE(tsc, tsc_modevent, NULL);
244