xref: /freebsd/sys/dev/nvdimm/nvdimm_e820.c (revision cfd6422a5217410fbd66f7a7a8a64d9d85e61229)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2019 Dell EMC Isilon
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/param.h>
32 #include <sys/bio.h>
33 #include <sys/bitstring.h>
34 #include <sys/bus.h>
35 #include <sys/efi.h>
36 #include <sys/kernel.h>
37 #include <sys/linker.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/sbuf.h>
42 #include <sys/uuid.h>
43 
44 #include <vm/vm_param.h>
45 
46 #include <machine/metadata.h>
47 #include <machine/pc/bios.h>
48 
49 #include <contrib/dev/acpica/include/acpi.h>
50 
51 #include <dev/nvdimm/nvdimm_var.h>
52 
53 struct nvdimm_e820_bus {
54 	SLIST_HEAD(, SPA_mapping) spas;
55 };
56 
57 #define	NVDIMM_E820	"nvdimm_e820"
58 
59 static MALLOC_DEFINE(M_NVDIMM_E820, NVDIMM_E820, "NVDIMM e820 bus memory");
60 
61 static const struct bios_smap *smapbase;
62 static struct {
63 	vm_paddr_t start;
64 	vm_paddr_t size;
65 } pram_segments[VM_PHYSSEG_MAX];
66 static unsigned pram_nreg;
67 
68 static void
69 nvdimm_e820_dump_prams(device_t dev, const char *func, int hintunit)
70 {
71 	char buffer[256];
72 	struct sbuf sb;
73 	bool printed = false;
74 	unsigned i;
75 
76 	sbuf_new(&sb, buffer, sizeof(buffer), SBUF_FIXEDLEN);
77 	sbuf_set_drain(&sb, sbuf_printf_drain, NULL);
78 
79 	sbuf_printf(&sb, "%s: %s: ", device_get_nameunit(dev), func);
80 	if (hintunit < 0)
81 		sbuf_cat(&sb, "Found BIOS PRAM regions: ");
82 	else
83 		sbuf_printf(&sb, "Remaining unallocated PRAM regions after "
84 		    "hint %d: ", hintunit);
85 
86 	for (i = 0; i < pram_nreg; i++) {
87 		if (pram_segments[i].size == 0)
88 			continue;
89 		if (printed)
90 			sbuf_putc(&sb, ',');
91 		else
92 			printed = true;
93 		sbuf_printf(&sb, "0x%jx-0x%jx",
94 		    (uintmax_t)pram_segments[i].start,
95 		    (uintmax_t)pram_segments[i].start + pram_segments[i].size
96 		    - 1);
97 	}
98 
99 	if (!printed)
100 		sbuf_cat(&sb, "<none>");
101 	sbuf_putc(&sb, '\n');
102 	sbuf_finish(&sb);
103 	sbuf_delete(&sb);
104 }
105 
106 static int
107 nvdimm_e820_create_spas(device_t dev)
108 {
109 	static const vm_size_t HINT_ALL = (vm_size_t)-1;
110 
111 	ACPI_NFIT_SYSTEM_ADDRESS nfit_sa;
112 	struct SPA_mapping *spa_mapping;
113 	enum SPA_mapping_type spa_type;
114 	struct nvdimm_e820_bus *sc;
115 	const char *hinttype;
116 	long hintaddrl, hintsizel;
117 	vm_paddr_t hintaddr;
118 	vm_size_t hintsize;
119 	unsigned i, j;
120 	int error;
121 
122 	sc = device_get_softc(dev);
123 	error = 0;
124 	nfit_sa = (ACPI_NFIT_SYSTEM_ADDRESS) { 0 };
125 
126 	if (bootverbose)
127 		nvdimm_e820_dump_prams(dev, __func__, -1);
128 
129 	for (i = 0;
130 	    resource_long_value("nvdimm_spa", i, "maddr", &hintaddrl) == 0;
131 	    i++) {
132 		if (resource_long_value("nvdimm_spa", i, "msize", &hintsizel)
133 		    != 0) {
134 			device_printf(dev, "hint.nvdimm_spa.%u missing msize\n",
135 			    i);
136 			continue;
137 		}
138 
139 		hintaddr = (vm_paddr_t)hintaddrl;
140 		hintsize = (vm_size_t)hintsizel;
141 		if ((hintaddr & PAGE_MASK) != 0 ||
142 		    ((hintsize & PAGE_MASK) != 0 && hintsize != HINT_ALL)) {
143 			device_printf(dev, "hint.nvdimm_spa.%u addr or size "
144 			    "not page aligned\n", i);
145 			continue;
146 		}
147 
148 		if (resource_string_value("nvdimm_spa", i, "type", &hinttype)
149 		    != 0) {
150 			device_printf(dev, "hint.nvdimm_spa.%u missing type\n",
151 			    i);
152 			continue;
153 		}
154 		spa_type = nvdimm_spa_type_from_name(hinttype);
155 		if (spa_type == SPA_TYPE_UNKNOWN) {
156 			device_printf(dev, "hint.nvdimm_spa%u.type does not "
157 			    "match any known SPA types\n", i);
158 			continue;
159 		}
160 
161 		for (j = 0; j < pram_nreg; j++) {
162 			if (pram_segments[j].start <= hintaddr &&
163 			    (hintsize == HINT_ALL ||
164 			    (pram_segments[j].start + pram_segments[j].size) >=
165 			    (hintaddr + hintsize)))
166 				break;
167 		}
168 
169 		if (j == pram_nreg) {
170 			device_printf(dev, "hint.nvdimm_spa%u hint does not "
171 			    "match any region\n", i);
172 			continue;
173 		}
174 
175 		/* Carve off "SPA" from available regions. */
176 		if (pram_segments[j].start == hintaddr) {
177 			/* Easy case first: beginning of segment. */
178 			if (hintsize == HINT_ALL)
179 				hintsize = pram_segments[j].size;
180 			pram_segments[j].start += hintsize;
181 			pram_segments[j].size -= hintsize;
182 			/* We might leave an empty segment; who cares. */
183 		} else if (hintsize == HINT_ALL ||
184 		    (pram_segments[j].start + pram_segments[j].size) ==
185 		    (hintaddr + hintsize)) {
186 			/* 2nd easy case: end of segment. */
187 			if (hintsize == HINT_ALL)
188 				hintsize = pram_segments[j].size -
189 				    (hintaddr - pram_segments[j].start);
190 			pram_segments[j].size -= hintsize;
191 		} else {
192 			/* Hard case: mid segment. */
193 			if (pram_nreg == nitems(pram_segments)) {
194 				/* Improbable, but handle gracefully. */
195 				device_printf(dev, "Ran out of %zu segments\n",
196 				    nitems(pram_segments));
197 				error = ENOBUFS;
198 				break;
199 			}
200 
201 			if (j != pram_nreg - 1) {
202 				memmove(&pram_segments[j + 2],
203 				    &pram_segments[j + 1],
204 				    (pram_nreg - 1 - j) *
205 				    sizeof(pram_segments[0]));
206 			}
207 			pram_nreg++;
208 
209 			pram_segments[j + 1].start = hintaddr + hintsize;
210 			pram_segments[j + 1].size =
211 			    (pram_segments[j].start + pram_segments[j].size) -
212 			    (hintaddr + hintsize);
213 			pram_segments[j].size = hintaddr -
214 			    pram_segments[j].start;
215 		}
216 
217 		if (bootverbose)
218 			nvdimm_e820_dump_prams(dev, __func__, (int)i);
219 
220 		spa_mapping = malloc(sizeof(*spa_mapping), M_NVDIMM_E820,
221 		    M_WAITOK | M_ZERO);
222 
223 		/* Mock up a super primitive table for nvdimm_spa_init(). */
224 		nfit_sa.RangeIndex = i;
225 		nfit_sa.Flags = 0;
226 		nfit_sa.Address = hintaddr;
227 		nfit_sa.Length = hintsize;
228 		nfit_sa.MemoryMapping = EFI_MD_ATTR_WB | EFI_MD_ATTR_WT |
229 		    EFI_MD_ATTR_UC;
230 
231 		error = nvdimm_spa_init(spa_mapping, &nfit_sa, spa_type);
232 		if (error != 0) {
233 			nvdimm_spa_fini(spa_mapping);
234 			free(spa_mapping, M_NVDIMM_E820);
235 			break;
236 		}
237 
238 		SLIST_INSERT_HEAD(&sc->spas, spa_mapping, link);
239 	}
240 	return (error);
241 }
242 
243 static int
244 nvdimm_e820_remove_spas(device_t dev)
245 {
246 	struct nvdimm_e820_bus *sc;
247 	struct SPA_mapping *spa, *next;
248 
249 	sc = device_get_softc(dev);
250 
251 	SLIST_FOREACH_SAFE(spa, &sc->spas, link, next) {
252 		nvdimm_spa_fini(spa);
253 		SLIST_REMOVE_HEAD(&sc->spas, link);
254 		free(spa, M_NVDIMM_E820);
255 	}
256 	return (0);
257 }
258 
259 static void
260 nvdimm_e820_identify(driver_t *driver __unused, device_t parent)
261 {
262 	device_t child;
263 	caddr_t kmdp;
264 
265 	if (resource_disabled(NVDIMM_E820, 0))
266 		return;
267 	/* Just create a single instance of the fake bus. */
268 	if (device_find_child(parent, NVDIMM_E820, -1) != NULL)
269 		return;
270 
271 	kmdp = preload_search_by_type("elf kernel");
272 	if (kmdp == NULL)
273 		kmdp = preload_search_by_type("elf64 kernel");
274 	smapbase = (const void *)preload_search_info(kmdp,
275 	    MODINFO_METADATA | MODINFOMD_SMAP);
276 
277 	/* Only supports BIOS SMAP for now. */
278 	if (smapbase == NULL)
279 		return;
280 
281 	child = BUS_ADD_CHILD(parent, 0, NVDIMM_E820, -1);
282 	if (child == NULL)
283 		device_printf(parent, "add %s child failed\n", NVDIMM_E820);
284 }
285 
286 static int
287 nvdimm_e820_probe(device_t dev)
288 {
289 	/*
290 	 * nexus panics if a child doesn't have ivars.  BUS_ADD_CHILD uses
291 	 * nexus_add_child, which creates fuckin ivars.  but sometimes if you
292 	 * unload and reload nvdimm_e820, the device node stays but the ivars
293 	 * are deleted??? avoid trivial panic but this is a kludge.
294 	 */
295 	if (device_get_ivars(dev) == NULL)
296 		return (ENXIO);
297 
298 	device_quiet(dev);
299 	device_set_desc(dev, "Legacy e820 NVDIMM root device");
300 	return (BUS_PROBE_NOWILDCARD);
301 }
302 
303 static int
304 nvdimm_e820_attach(device_t dev)
305 {
306 	const struct bios_smap *smapend, *smap;
307 	uint32_t smapsize;
308 	unsigned nregions;
309 	int error;
310 
311 	smapsize = *((const uint32_t *)smapbase - 1);
312 	smapend = (const void *)((const char *)smapbase + smapsize);
313 
314 	for (nregions = 0, smap = smapbase; smap < smapend; smap++) {
315 		if (smap->type != SMAP_TYPE_PRAM || smap->length == 0)
316 			continue;
317 		pram_segments[nregions].start = smap->base;
318 		pram_segments[nregions].size = smap->length;
319 
320 		device_printf(dev, "Found PRAM 0x%jx +0x%jx\n",
321 		    (uintmax_t)smap->base, (uintmax_t)smap->length);
322 
323 		nregions++;
324 	}
325 
326 	if (nregions == 0) {
327 		device_printf(dev, "No e820 PRAM regions detected\n");
328 		return (ENXIO);
329 	}
330 	pram_nreg = nregions;
331 
332 	error = nvdimm_e820_create_spas(dev);
333 	return (error);
334 }
335 
336 static int
337 nvdimm_e820_detach(device_t dev)
338 {
339 	int error;
340 
341 	error = nvdimm_e820_remove_spas(dev);
342 	return (error);
343 }
344 
345 static device_method_t nvdimm_e820_methods[] = {
346 	DEVMETHOD(device_identify, nvdimm_e820_identify),
347 	DEVMETHOD(device_probe, nvdimm_e820_probe),
348 	DEVMETHOD(device_attach, nvdimm_e820_attach),
349 	DEVMETHOD(device_detach, nvdimm_e820_detach),
350 	DEVMETHOD_END
351 };
352 
353 static driver_t	nvdimm_e820_driver = {
354 	NVDIMM_E820,
355 	nvdimm_e820_methods,
356 	sizeof(struct nvdimm_e820_bus),
357 };
358 
359 static devclass_t nvdimm_e820_devclass;
360 
361 static int
362 nvdimm_e820_chainevh(struct module *m, int e, void *arg __unused)
363 {
364 	devclass_t dc;
365 	device_t dev, parent;
366 	int i, error, maxunit;
367 
368 	switch (e) {
369 	case MOD_UNLOAD:
370 		dc = nvdimm_e820_devclass;
371 		maxunit = devclass_get_maxunit(dc);
372 		for (i = 0; i < maxunit; i++) {
373 			dev = devclass_get_device(dc, i);
374 			if (dev == NULL)
375 				continue;
376 			parent = device_get_parent(dev);
377 			if (parent == NULL) {
378 				/* Not sure how this would happen. */
379 				continue;
380 			}
381 			error = device_delete_child(parent, dev);
382 			if (error != 0)
383 				return (error);
384 		}
385 		break;
386 	default:
387 		/* Prevent compiler warning about unhandled cases. */
388 		break;
389 	}
390 	return (0);
391 }
392 
393 DRIVER_MODULE(nvdimm_e820, nexus, nvdimm_e820_driver, nvdimm_e820_devclass,
394     nvdimm_e820_chainevh, NULL);
395