xref: /freebsd/lib/libkvm/kvm.c (revision 9e5787d2284e187abb5b654d924394a65772e004)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1992, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software developed by the Computer Systems
8  * Engineering group at Lawrence Berkeley Laboratory under DARPA contract
9  * BG 91-66 and contributed to Berkeley.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38 __SCCSID("@(#)kvm.c	8.2 (Berkeley) 2/13/94");
39 
40 #include <sys/param.h>
41 #include <sys/fnv_hash.h>
42 
43 #define	_WANT_VNET
44 
45 #include <sys/user.h>
46 #include <sys/linker.h>
47 #include <sys/pcpu.h>
48 #include <sys/stat.h>
49 #include <sys/sysctl.h>
50 #include <sys/mman.h>
51 
52 #include <stdbool.h>
53 #include <net/vnet.h>
54 
55 #include <fcntl.h>
56 #include <kvm.h>
57 #include <limits.h>
58 #include <paths.h>
59 #include <stdint.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <string.h>
63 #include <unistd.h>
64 
65 #include "kvm_private.h"
66 
67 SET_DECLARE(kvm_arch, struct kvm_arch);
68 
69 static char _kd_is_null[] = "";
70 
71 char *
72 kvm_geterr(kvm_t *kd)
73 {
74 
75 	if (kd == NULL)
76 		return (_kd_is_null);
77 	return (kd->errbuf);
78 }
79 
80 static int
81 _kvm_read_kernel_ehdr(kvm_t *kd)
82 {
83 	Elf *elf;
84 
85 	if (elf_version(EV_CURRENT) == EV_NONE) {
86 		_kvm_err(kd, kd->program, "Unsupported libelf");
87 		return (-1);
88 	}
89 	elf = elf_begin(kd->nlfd, ELF_C_READ, NULL);
90 	if (elf == NULL) {
91 		_kvm_err(kd, kd->program, "%s", elf_errmsg(0));
92 		return (-1);
93 	}
94 	if (elf_kind(elf) != ELF_K_ELF) {
95 		_kvm_err(kd, kd->program, "kernel is not an ELF file");
96 		return (-1);
97 	}
98 	if (gelf_getehdr(elf, &kd->nlehdr) == NULL) {
99 		_kvm_err(kd, kd->program, "%s", elf_errmsg(0));
100 		elf_end(elf);
101 		return (-1);
102 	}
103 	elf_end(elf);
104 
105 	switch (kd->nlehdr.e_ident[EI_DATA]) {
106 	case ELFDATA2LSB:
107 	case ELFDATA2MSB:
108 		return (0);
109 	default:
110 		_kvm_err(kd, kd->program,
111 		    "unsupported ELF data encoding for kernel");
112 		return (-1);
113 	}
114 }
115 
116 static kvm_t *
117 _kvm_open(kvm_t *kd, const char *uf, const char *mf, int flag, char *errout)
118 {
119 	struct kvm_arch **parch;
120 	struct stat st;
121 
122 	kd->vmfd = -1;
123 	kd->pmfd = -1;
124 	kd->nlfd = -1;
125 	kd->vmst = NULL;
126 	kd->procbase = NULL;
127 	kd->argspc = NULL;
128 	kd->argv = NULL;
129 
130 	if (uf == NULL)
131 		uf = getbootfile();
132 	else if (strlen(uf) >= MAXPATHLEN) {
133 		_kvm_err(kd, kd->program, "exec file name too long");
134 		goto failed;
135 	}
136 	if (flag & ~O_RDWR) {
137 		_kvm_err(kd, kd->program, "bad flags arg");
138 		goto failed;
139 	}
140 	if (mf == NULL)
141 		mf = _PATH_MEM;
142 
143 	if ((kd->pmfd = open(mf, flag | O_CLOEXEC, 0)) < 0) {
144 		_kvm_syserr(kd, kd->program, "%s", mf);
145 		goto failed;
146 	}
147 	if (fstat(kd->pmfd, &st) < 0) {
148 		_kvm_syserr(kd, kd->program, "%s", mf);
149 		goto failed;
150 	}
151 	if (S_ISREG(st.st_mode) && st.st_size <= 0) {
152 		errno = EINVAL;
153 		_kvm_syserr(kd, kd->program, "empty file");
154 		goto failed;
155 	}
156 	if (S_ISCHR(st.st_mode)) {
157 		/*
158 		 * If this is a character special device, then check that
159 		 * it's /dev/mem.  If so, open kmem too.  (Maybe we should
160 		 * make it work for either /dev/mem or /dev/kmem -- in either
161 		 * case you're working with a live kernel.)
162 		 */
163 		if (strcmp(mf, _PATH_DEVNULL) == 0) {
164 			kd->vmfd = open(_PATH_DEVNULL, O_RDONLY | O_CLOEXEC);
165 			return (kd);
166 		} else if (strcmp(mf, _PATH_MEM) == 0) {
167 			if ((kd->vmfd = open(_PATH_KMEM, flag | O_CLOEXEC)) <
168 			    0) {
169 				_kvm_syserr(kd, kd->program, "%s", _PATH_KMEM);
170 				goto failed;
171 			}
172 			return (kd);
173 		}
174 	}
175 
176 	/*
177 	 * This is either a crash dump or a remote live system with its physical
178 	 * memory fully accessible via a special device.
179 	 * Open the namelist fd and determine the architecture.
180 	 */
181 	if ((kd->nlfd = open(uf, O_RDONLY | O_CLOEXEC, 0)) < 0) {
182 		_kvm_syserr(kd, kd->program, "%s", uf);
183 		goto failed;
184 	}
185 	if (_kvm_read_kernel_ehdr(kd) < 0)
186 		goto failed;
187 	if (strncmp(mf, _PATH_FWMEM, strlen(_PATH_FWMEM)) == 0 ||
188 	    strncmp(mf, _PATH_DEVVMM, strlen(_PATH_DEVVMM)) == 0) {
189 		kd->rawdump = 1;
190 		kd->writable = 1;
191 	}
192 	SET_FOREACH(parch, kvm_arch) {
193 		if ((*parch)->ka_probe(kd)) {
194 			kd->arch = *parch;
195 			break;
196 		}
197 	}
198 	if (kd->arch == NULL) {
199 		_kvm_err(kd, kd->program, "unsupported architecture");
200 		goto failed;
201 	}
202 
203 	/*
204 	 * Non-native kernels require a symbol resolver.
205 	 */
206 	if (!kd->arch->ka_native(kd) && kd->resolve_symbol == NULL) {
207 		_kvm_err(kd, kd->program,
208 		    "non-native kernel requires a symbol resolver");
209 		goto failed;
210 	}
211 
212 	/*
213 	 * Initialize the virtual address translation machinery.
214 	 */
215 	if (kd->arch->ka_initvtop(kd) < 0)
216 		goto failed;
217 	return (kd);
218 failed:
219 	/*
220 	 * Copy out the error if doing sane error semantics.
221 	 */
222 	if (errout != NULL)
223 		strlcpy(errout, kd->errbuf, _POSIX2_LINE_MAX);
224 	(void)kvm_close(kd);
225 	return (NULL);
226 }
227 
228 kvm_t *
229 kvm_openfiles(const char *uf, const char *mf, const char *sf __unused, int flag,
230     char *errout)
231 {
232 	kvm_t *kd;
233 
234 	if ((kd = calloc(1, sizeof(*kd))) == NULL) {
235 		if (errout != NULL)
236 			(void)strlcpy(errout, strerror(errno),
237 			    _POSIX2_LINE_MAX);
238 		return (NULL);
239 	}
240 	return (_kvm_open(kd, uf, mf, flag, errout));
241 }
242 
243 kvm_t *
244 kvm_open(const char *uf, const char *mf, const char *sf __unused, int flag,
245     const char *errstr)
246 {
247 	kvm_t *kd;
248 
249 	if ((kd = calloc(1, sizeof(*kd))) == NULL) {
250 		if (errstr != NULL)
251 			(void)fprintf(stderr, "%s: %s\n",
252 				      errstr, strerror(errno));
253 		return (NULL);
254 	}
255 	kd->program = errstr;
256 	return (_kvm_open(kd, uf, mf, flag, NULL));
257 }
258 
259 kvm_t *
260 kvm_open2(const char *uf, const char *mf, int flag, char *errout,
261     int (*resolver)(const char *, kvaddr_t *))
262 {
263 	kvm_t *kd;
264 
265 	if ((kd = calloc(1, sizeof(*kd))) == NULL) {
266 		if (errout != NULL)
267 			(void)strlcpy(errout, strerror(errno),
268 			    _POSIX2_LINE_MAX);
269 		return (NULL);
270 	}
271 	kd->resolve_symbol = resolver;
272 	return (_kvm_open(kd, uf, mf, flag, errout));
273 }
274 
275 int
276 kvm_close(kvm_t *kd)
277 {
278 	int error = 0;
279 
280 	if (kd == NULL) {
281 		errno = EINVAL;
282 		return (-1);
283 	}
284 	if (kd->vmst != NULL)
285 		kd->arch->ka_freevtop(kd);
286 	if (kd->pmfd >= 0)
287 		error |= close(kd->pmfd);
288 	if (kd->vmfd >= 0)
289 		error |= close(kd->vmfd);
290 	if (kd->nlfd >= 0)
291 		error |= close(kd->nlfd);
292 	if (kd->procbase != 0)
293 		free((void *)kd->procbase);
294 	if (kd->argbuf != 0)
295 		free((void *) kd->argbuf);
296 	if (kd->argspc != 0)
297 		free((void *) kd->argspc);
298 	if (kd->argv != 0)
299 		free((void *)kd->argv);
300 	if (kd->pt_map != NULL)
301 		free(kd->pt_map);
302 	if (kd->page_map != NULL)
303 		free(kd->page_map);
304 	if (kd->sparse_map != MAP_FAILED)
305 		munmap(kd->sparse_map, kd->pt_sparse_size);
306 	free((void *)kd);
307 
308 	return (error);
309 }
310 
311 int
312 kvm_nlist2(kvm_t *kd, struct kvm_nlist *nl)
313 {
314 
315 	/*
316 	 * If called via the public interface, permit initialization of
317 	 * further virtualized modules on demand.
318 	 */
319 	return (_kvm_nlist(kd, nl, 1));
320 }
321 
322 int
323 kvm_nlist(kvm_t *kd, struct nlist *nl)
324 {
325 	struct kvm_nlist *kl;
326 	int count, i, nfail;
327 
328 	/*
329 	 * Avoid reporting truncated addresses by failing for non-native
330 	 * cores.
331 	 */
332 	if (!kvm_native(kd)) {
333 		_kvm_err(kd, kd->program, "kvm_nlist of non-native vmcore");
334 		return (-1);
335 	}
336 
337 	for (count = 0; nl[count].n_name != NULL && nl[count].n_name[0] != '\0';
338 	     count++)
339 		;
340 	if (count == 0)
341 		return (0);
342 	kl = calloc(count + 1, sizeof(*kl));
343 	for (i = 0; i < count; i++)
344 		kl[i].n_name = nl[i].n_name;
345 	nfail = kvm_nlist2(kd, kl);
346 	for (i = 0; i < count; i++) {
347 		nl[i].n_type = kl[i].n_type;
348 		nl[i].n_other = 0;
349 		nl[i].n_desc = 0;
350 		nl[i].n_value = kl[i].n_value;
351 	}
352 	return (nfail);
353 }
354 
355 ssize_t
356 kvm_read(kvm_t *kd, u_long kva, void *buf, size_t len)
357 {
358 
359 	return (kvm_read2(kd, kva, buf, len));
360 }
361 
362 ssize_t
363 kvm_read2(kvm_t *kd, kvaddr_t kva, void *buf, size_t len)
364 {
365 	int cc;
366 	ssize_t cr;
367 	off_t pa;
368 	char *cp;
369 
370 	if (ISALIVE(kd)) {
371 		/*
372 		 * We're using /dev/kmem.  Just read straight from the
373 		 * device and let the active kernel do the address translation.
374 		 */
375 		errno = 0;
376 		if (lseek(kd->vmfd, (off_t)kva, 0) == -1 && errno != 0) {
377 			_kvm_err(kd, 0, "invalid address (0x%jx)",
378 			    (uintmax_t)kva);
379 			return (-1);
380 		}
381 		cr = read(kd->vmfd, buf, len);
382 		if (cr < 0) {
383 			_kvm_syserr(kd, 0, "kvm_read");
384 			return (-1);
385 		} else if (cr < (ssize_t)len)
386 			_kvm_err(kd, kd->program, "short read");
387 		return (cr);
388 	}
389 
390 	cp = buf;
391 	while (len > 0) {
392 		cc = kd->arch->ka_kvatop(kd, kva, &pa);
393 		if (cc == 0)
394 			return (-1);
395 		if (cc > (ssize_t)len)
396 			cc = len;
397 		errno = 0;
398 		if (lseek(kd->pmfd, pa, 0) == -1 && errno != 0) {
399 			_kvm_syserr(kd, 0, _PATH_MEM);
400 			break;
401 		}
402 		cr = read(kd->pmfd, cp, cc);
403 		if (cr < 0) {
404 			_kvm_syserr(kd, kd->program, "kvm_read");
405 			break;
406 		}
407 		/*
408 		 * If ka_kvatop returns a bogus value or our core file is
409 		 * truncated, we might wind up seeking beyond the end of the
410 		 * core file in which case the read will return 0 (EOF).
411 		 */
412 		if (cr == 0)
413 			break;
414 		cp += cr;
415 		kva += cr;
416 		len -= cr;
417 	}
418 
419 	return (cp - (char *)buf);
420 }
421 
422 ssize_t
423 kvm_write(kvm_t *kd, u_long kva, const void *buf, size_t len)
424 {
425 	int cc;
426 	ssize_t cw;
427 	off_t pa;
428 	const char *cp;
429 
430 	if (!ISALIVE(kd) && !kd->writable) {
431 		_kvm_err(kd, kd->program,
432 		    "kvm_write not implemented for dead kernels");
433 		return (-1);
434 	}
435 
436 	if (ISALIVE(kd)) {
437 		/*
438 		 * Just like kvm_read, only we write.
439 		 */
440 		errno = 0;
441 		if (lseek(kd->vmfd, (off_t)kva, 0) == -1 && errno != 0) {
442 			_kvm_err(kd, 0, "invalid address (%lx)", kva);
443 			return (-1);
444 		}
445 		cc = write(kd->vmfd, buf, len);
446 		if (cc < 0) {
447 			_kvm_syserr(kd, 0, "kvm_write");
448 			return (-1);
449 		} else if ((size_t)cc < len)
450 			_kvm_err(kd, kd->program, "short write");
451 		return (cc);
452 	}
453 
454 	cp = buf;
455 	while (len > 0) {
456 		cc = kd->arch->ka_kvatop(kd, kva, &pa);
457 		if (cc == 0)
458 			return (-1);
459 		if (cc > (ssize_t)len)
460 			cc = len;
461 		errno = 0;
462 		if (lseek(kd->pmfd, pa, 0) == -1 && errno != 0) {
463 			_kvm_syserr(kd, 0, _PATH_MEM);
464 			break;
465 		}
466 		cw = write(kd->pmfd, cp, cc);
467 		if (cw < 0) {
468 			_kvm_syserr(kd, kd->program, "kvm_write");
469 			break;
470 		}
471 		/*
472 		 * If ka_kvatop returns a bogus value or our core file is
473 		 * truncated, we might wind up seeking beyond the end of the
474 		 * core file in which case the read will return 0 (EOF).
475 		 */
476 		if (cw == 0)
477 			break;
478 		cp += cw;
479 		kva += cw;
480 		len -= cw;
481 	}
482 
483 	return (cp - (const char *)buf);
484 }
485 
486 int
487 kvm_native(kvm_t *kd)
488 {
489 
490 	if (ISALIVE(kd))
491 		return (1);
492 	return (kd->arch->ka_native(kd));
493 }
494 
495 int
496 kvm_walk_pages(kvm_t *kd, kvm_walk_pages_cb_t *cb, void *closure)
497 {
498 
499 	if (kd->arch->ka_walk_pages == NULL)
500 		return (0);
501 
502 	return (kd->arch->ka_walk_pages(kd, cb, closure));
503 }
504 
505 kssize_t
506 kvm_kerndisp(kvm_t *kd)
507 {
508 	unsigned long kernbase, rel_kernbase;
509 	size_t kernbase_len = sizeof(kernbase);
510 	size_t rel_kernbase_len = sizeof(rel_kernbase);
511 
512 	if (ISALIVE(kd)) {
513 		if (sysctlbyname("kern.base_address", &kernbase,
514 		    &kernbase_len, NULL, 0) == -1) {
515 			_kvm_syserr(kd, kd->program,
516 				"failed to get kernel base address");
517 			return (0);
518 		}
519 		if (sysctlbyname("kern.relbase_address", &rel_kernbase,
520 		    &rel_kernbase_len, NULL, 0) == -1) {
521 			_kvm_syserr(kd, kd->program,
522 				"failed to get relocated kernel base address");
523 			return (0);
524 		}
525 		return (rel_kernbase - kernbase);
526 	}
527 
528 	if (kd->arch->ka_kerndisp == NULL)
529 		return (0);
530 
531 	return (kd->arch->ka_kerndisp(kd));
532 }
533