xref: /freebsd/lib/libkvm/kvm_private.c (revision c7a063741720ef81d4caa4613242579d12f1d605)
1 /*-
2  * Copyright (c) 1989, 1992, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software developed by the Computer Systems
6  * Engineering group at Lawrence Berkeley Laboratory under DARPA contract
7  * BG 91-66 and contributed to Berkeley.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include <sys/param.h>
38 #include <sys/fnv_hash.h>
39 
40 #define	_WANT_VNET
41 
42 #include <sys/user.h>
43 #include <sys/linker.h>
44 #include <sys/pcpu.h>
45 #include <sys/stat.h>
46 #include <sys/mman.h>
47 
48 #include <stdbool.h>
49 #include <net/vnet.h>
50 
51 #include <assert.h>
52 #include <fcntl.h>
53 #include <vm/vm.h>
54 #include <kvm.h>
55 #include <limits.h>
56 #include <paths.h>
57 #include <stdint.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <unistd.h>
62 #include <stdarg.h>
63 #include <inttypes.h>
64 
65 #include "kvm_private.h"
66 
67 /*
68  * Routines private to libkvm.
69  */
70 
71 /* from src/lib/libc/gen/nlist.c */
72 int __fdnlist(int, struct nlist *);
73 
74 /*
75  * Report an error using printf style arguments.  "program" is kd->program
76  * on hard errors, and 0 on soft errors, so that under sun error emulation,
77  * only hard errors are printed out (otherwise, programs like gdb will
78  * generate tons of error messages when trying to access bogus pointers).
79  */
80 void
81 _kvm_err(kvm_t *kd, const char *program, const char *fmt, ...)
82 {
83 	va_list ap;
84 
85 	va_start(ap, fmt);
86 	if (program != NULL) {
87 		(void)fprintf(stderr, "%s: ", program);
88 		(void)vfprintf(stderr, fmt, ap);
89 		(void)fputc('\n', stderr);
90 	} else
91 		(void)vsnprintf(kd->errbuf,
92 		    sizeof(kd->errbuf), fmt, ap);
93 
94 	va_end(ap);
95 }
96 
97 void
98 _kvm_syserr(kvm_t *kd, const char *program, const char *fmt, ...)
99 {
100 	va_list ap;
101 	int n;
102 
103 	va_start(ap, fmt);
104 	if (program != NULL) {
105 		(void)fprintf(stderr, "%s: ", program);
106 		(void)vfprintf(stderr, fmt, ap);
107 		(void)fprintf(stderr, ": %s\n", strerror(errno));
108 	} else {
109 		char *cp = kd->errbuf;
110 
111 		(void)vsnprintf(cp, sizeof(kd->errbuf), fmt, ap);
112 		n = strlen(cp);
113 		(void)snprintf(&cp[n], sizeof(kd->errbuf) - n, ": %s",
114 		    strerror(errno));
115 	}
116 	va_end(ap);
117 }
118 
119 void *
120 _kvm_malloc(kvm_t *kd, size_t n)
121 {
122 	void *p;
123 
124 	if ((p = calloc(n, sizeof(char))) == NULL)
125 		_kvm_err(kd, kd->program, "can't allocate %zu bytes: %s",
126 			 n, strerror(errno));
127 	return (p);
128 }
129 
130 int
131 _kvm_probe_elf_kernel(kvm_t *kd, int class, int machine)
132 {
133 
134 	return (kd->nlehdr.e_ident[EI_CLASS] == class &&
135 	    ((machine == EM_PPC || machine == EM_PPC64) ?
136 	     kd->nlehdr.e_type == ET_DYN : kd->nlehdr.e_type == ET_EXEC) &&
137 	    kd->nlehdr.e_machine == machine);
138 }
139 
140 int
141 _kvm_is_minidump(kvm_t *kd)
142 {
143 	char minihdr[8];
144 
145 	if (kd->rawdump)
146 		return (0);
147 	if (pread(kd->pmfd, &minihdr, 8, 0) == 8 &&
148 	    memcmp(&minihdr, "minidump", 8) == 0)
149 		return (1);
150 	return (0);
151 }
152 
153 /*
154  * The powerpc backend has a hack to strip a leading kerneldump
155  * header from the core before treating it as an ELF header.
156  *
157  * We can add that here if we can get a change to libelf to support
158  * an initial offset into the file.  Alternatively we could patch
159  * savecore to extract cores from a regular file instead.
160  */
161 int
162 _kvm_read_core_phdrs(kvm_t *kd, size_t *phnump, GElf_Phdr **phdrp)
163 {
164 	GElf_Ehdr ehdr;
165 	GElf_Phdr *phdr;
166 	Elf *elf;
167 	size_t i, phnum;
168 
169 	elf = elf_begin(kd->pmfd, ELF_C_READ, NULL);
170 	if (elf == NULL) {
171 		_kvm_err(kd, kd->program, "%s", elf_errmsg(0));
172 		return (-1);
173 	}
174 	if (elf_kind(elf) != ELF_K_ELF) {
175 		_kvm_err(kd, kd->program, "invalid core");
176 		goto bad;
177 	}
178 	if (gelf_getclass(elf) != kd->nlehdr.e_ident[EI_CLASS]) {
179 		_kvm_err(kd, kd->program, "invalid core");
180 		goto bad;
181 	}
182 	if (gelf_getehdr(elf, &ehdr) == NULL) {
183 		_kvm_err(kd, kd->program, "%s", elf_errmsg(0));
184 		goto bad;
185 	}
186 	if (ehdr.e_type != ET_CORE) {
187 		_kvm_err(kd, kd->program, "invalid core");
188 		goto bad;
189 	}
190 	if (ehdr.e_machine != kd->nlehdr.e_machine) {
191 		_kvm_err(kd, kd->program, "invalid core");
192 		goto bad;
193 	}
194 
195 	if (elf_getphdrnum(elf, &phnum) == -1) {
196 		_kvm_err(kd, kd->program, "%s", elf_errmsg(0));
197 		goto bad;
198 	}
199 
200 	phdr = calloc(phnum, sizeof(*phdr));
201 	if (phdr == NULL) {
202 		_kvm_err(kd, kd->program, "failed to allocate phdrs");
203 		goto bad;
204 	}
205 
206 	for (i = 0; i < phnum; i++) {
207 		if (gelf_getphdr(elf, i, &phdr[i]) == NULL) {
208 			free(phdr);
209 			_kvm_err(kd, kd->program, "%s", elf_errmsg(0));
210 			goto bad;
211 		}
212 	}
213 	elf_end(elf);
214 	*phnump = phnum;
215 	*phdrp = phdr;
216 	return (0);
217 
218 bad:
219 	elf_end(elf);
220 	return (-1);
221 }
222 
223 /*
224  * Transform v such that only bits [bit0, bitN) may be set.  Generates a
225  * bitmask covering the number of bits, then shifts so +bit0+ is the first.
226  */
227 static uint64_t
228 bitmask_range(uint64_t v, uint64_t bit0, uint64_t bitN)
229 {
230 	if (bit0 == 0 && bitN == BITS_IN(v))
231 		return (v);
232 
233 	return (v & (((1ULL << (bitN - bit0)) - 1ULL) << bit0));
234 }
235 
236 /*
237  * Returns the number of bits in a given byte array range starting at a
238  * given base, from bit0 to bitN.  bit0 may be non-zero in the case of
239  * counting backwards from bitN.
240  */
241 static uint64_t
242 popcount_bytes(uint64_t *addr, uint32_t bit0, uint32_t bitN)
243 {
244 	uint32_t res = bitN - bit0;
245 	uint64_t count = 0;
246 	uint32_t bound;
247 
248 	/* Align to 64-bit boundary on the left side if needed. */
249 	if ((bit0 % BITS_IN(*addr)) != 0) {
250 		bound = MIN(bitN, roundup2(bit0, BITS_IN(*addr)));
251 		count += __bitcount64(bitmask_range(*addr, bit0, bound));
252 		res -= (bound - bit0);
253 		addr++;
254 	}
255 
256 	while (res > 0) {
257 		bound = MIN(res, BITS_IN(*addr));
258 		count += __bitcount64(bitmask_range(*addr, 0, bound));
259 		res -= bound;
260 		addr++;
261 	}
262 
263 	return (count);
264 }
265 
266 void *
267 _kvm_pmap_get(kvm_t *kd, u_long idx, size_t len)
268 {
269 	uintptr_t off = idx * len;
270 
271 	if ((off_t)off >= kd->pt_sparse_off)
272 		return (NULL);
273 	return (void *)((uintptr_t)kd->page_map + off);
274 }
275 
276 void *
277 _kvm_map_get(kvm_t *kd, u_long pa, unsigned int page_size)
278 {
279 	off_t off;
280 	uintptr_t addr;
281 
282 	off = _kvm_pt_find(kd, pa, page_size);
283 	if (off == -1)
284 		return NULL;
285 
286 	addr = (uintptr_t)kd->page_map + off;
287 	if (off >= kd->pt_sparse_off)
288 		addr = (uintptr_t)kd->sparse_map + (off - kd->pt_sparse_off);
289 	return (void *)addr;
290 }
291 
292 int
293 _kvm_pt_init(kvm_t *kd, size_t dump_avail_size, off_t dump_avail_off,
294     size_t map_len, off_t map_off, off_t sparse_off, int page_size)
295 {
296 	uint64_t *addr;
297 	uint32_t *popcount_bin;
298 	int bin_popcounts = 0;
299 	uint64_t pc_bins, res;
300 	ssize_t rd;
301 
302 	kd->dump_avail_size = dump_avail_size;
303 	if (dump_avail_size > 0) {
304 		kd->dump_avail = mmap(NULL, kd->dump_avail_size, PROT_READ,
305 		    MAP_PRIVATE, kd->pmfd, dump_avail_off);
306 	} else {
307 		/*
308 		 * Older version minidumps don't provide dump_avail[],
309 		 * so the bitmap is fully populated from 0 to
310 		 * last_pa. Create an implied dump_avail that
311 		 * expresses this.
312 		 */
313 		kd->dump_avail = calloc(4, sizeof(uint64_t));
314 		kd->dump_avail[1] = _kvm64toh(kd, map_len * 8 * page_size);
315 	}
316 
317 	/*
318 	 * Map the bitmap specified by the arguments.
319 	 */
320 	kd->pt_map = _kvm_malloc(kd, map_len);
321 	if (kd->pt_map == NULL) {
322 		_kvm_err(kd, kd->program, "cannot allocate %zu bytes for bitmap",
323 		    map_len);
324 		return (-1);
325 	}
326 	rd = pread(kd->pmfd, kd->pt_map, map_len, map_off);
327 	if (rd < 0 || rd != (ssize_t)map_len) {
328 		_kvm_err(kd, kd->program, "cannot read %zu bytes for bitmap",
329 		    map_len);
330 		return (-1);
331 	}
332 	kd->pt_map_size = map_len;
333 
334 	/*
335 	 * Generate a popcount cache for every POPCOUNT_BITS in the bitmap,
336 	 * so lookups only have to calculate the number of bits set between
337 	 * a cache point and their bit.  This reduces lookups to O(1),
338 	 * without significantly increasing memory requirements.
339 	 *
340 	 * Round up the number of bins so that 'upper half' lookups work for
341 	 * the final bin, if needed.  The first popcount is 0, since no bits
342 	 * precede bit 0, so add 1 for that also.  Without this, extra work
343 	 * would be needed to handle the first PTEs in _kvm_pt_find().
344 	 */
345 	addr = kd->pt_map;
346 	res = map_len;
347 	pc_bins = 1 + (res * NBBY + POPCOUNT_BITS / 2) / POPCOUNT_BITS;
348 	kd->pt_popcounts = calloc(pc_bins, sizeof(uint32_t));
349 	if (kd->pt_popcounts == NULL) {
350 		_kvm_err(kd, kd->program, "cannot allocate popcount bins");
351 		return (-1);
352 	}
353 
354 	for (popcount_bin = &kd->pt_popcounts[1]; res > 0;
355 	    addr++, res -= sizeof(*addr)) {
356 		*popcount_bin += popcount_bytes(addr, 0,
357 		    MIN(res * NBBY, BITS_IN(*addr)));
358 		if (++bin_popcounts == POPCOUNTS_IN(*addr)) {
359 			popcount_bin++;
360 			*popcount_bin = *(popcount_bin - 1);
361 			bin_popcounts = 0;
362 		}
363 	}
364 
365 	assert(pc_bins * sizeof(*popcount_bin) ==
366 	    ((uintptr_t)popcount_bin - (uintptr_t)kd->pt_popcounts));
367 
368 	kd->pt_sparse_off = sparse_off;
369 	kd->pt_sparse_size = (uint64_t)*popcount_bin * page_size;
370 	kd->pt_page_size = page_size;
371 
372 	/*
373 	 * Map the sparse page array.  This is useful for performing point
374 	 * lookups of specific pages, e.g. for kvm_walk_pages.  Generally,
375 	 * this is much larger than is reasonable to read in up front, so
376 	 * mmap it in instead.
377 	 */
378 	kd->sparse_map = mmap(NULL, kd->pt_sparse_size, PROT_READ,
379 	    MAP_PRIVATE, kd->pmfd, kd->pt_sparse_off);
380 	if (kd->sparse_map == MAP_FAILED) {
381 		_kvm_err(kd, kd->program, "cannot map %" PRIu64
382 		    " bytes from fd %d offset %jd for sparse map: %s",
383 		    kd->pt_sparse_size, kd->pmfd,
384 		    (intmax_t)kd->pt_sparse_off, strerror(errno));
385 		return (-1);
386 	}
387 	return (0);
388 }
389 
390 int
391 _kvm_pmap_init(kvm_t *kd, uint32_t pmap_size, off_t pmap_off)
392 {
393 	ssize_t exp_len = pmap_size;
394 
395 	kd->page_map_size = pmap_size;
396 	kd->page_map_off = pmap_off;
397 	kd->page_map = _kvm_malloc(kd, pmap_size);
398 	if (kd->page_map == NULL) {
399 		_kvm_err(kd, kd->program, "cannot allocate %u bytes "
400 		    "for page map", pmap_size);
401 		return (-1);
402 	}
403 	if (pread(kd->pmfd, kd->page_map, pmap_size, pmap_off) != exp_len) {
404 		_kvm_err(kd, kd->program, "cannot read %d bytes from "
405 		    "offset %jd for page map", pmap_size, (intmax_t)pmap_off);
406 		return (-1);
407 	}
408 	return (0);
409 }
410 
411 static inline uint64_t
412 dump_avail_n(kvm_t *kd, long i)
413 {
414 	return (_kvm64toh(kd, kd->dump_avail[i]));
415 }
416 
417 uint64_t
418 _kvm_pa_bit_id(kvm_t *kd, uint64_t pa, unsigned int page_size)
419 {
420 	uint64_t adj;
421 	long i;
422 
423 	adj = 0;
424 	for (i = 0; dump_avail_n(kd, i + 1) != 0; i += 2) {
425 		if (pa >= dump_avail_n(kd, i + 1)) {
426 			adj += howmany(dump_avail_n(kd, i + 1), page_size) -
427 			    dump_avail_n(kd, i) / page_size;
428 		} else {
429 			return (pa / page_size -
430 			    dump_avail_n(kd, i) / page_size + adj);
431 		}
432 	}
433 	return (_KVM_BIT_ID_INVALID);
434 }
435 
436 uint64_t
437 _kvm_bit_id_pa(kvm_t *kd, uint64_t bit_id, unsigned int page_size)
438 {
439 	uint64_t sz;
440 	long i;
441 
442 	for (i = 0; dump_avail_n(kd, i + 1) != 0; i += 2) {
443 		sz = howmany(dump_avail_n(kd, i + 1), page_size) -
444 		    dump_avail_n(kd, i) / page_size;
445 		if (bit_id < sz) {
446 			return (rounddown2(dump_avail_n(kd, i), page_size) +
447 			    bit_id * page_size);
448 		}
449 		bit_id -= sz;
450 	}
451 	return (_KVM_PA_INVALID);
452 }
453 
454 /*
455  * Find the offset for the given physical page address; returns -1 otherwise.
456  *
457  * A page's offset is represented by the sparse page base offset plus the
458  * number of bits set before its bit multiplied by page size.  This means
459  * that if a page exists in the dump, it's necessary to know how many pages
460  * in the dump precede it.  Reduce this O(n) counting to O(1) by caching the
461  * number of bits set at POPCOUNT_BITS intervals.
462  *
463  * Then to find the number of pages before the requested address, simply
464  * index into the cache and count the number of bits set between that cache
465  * bin and the page's bit.  Halve the number of bytes that have to be
466  * checked by also counting down from the next higher bin if it's closer.
467  */
468 off_t
469 _kvm_pt_find(kvm_t *kd, uint64_t pa, unsigned int page_size)
470 {
471 	uint64_t *bitmap = kd->pt_map;
472 	uint64_t pte_bit_id = _kvm_pa_bit_id(kd, pa, page_size);
473 	uint64_t pte_u64 = pte_bit_id / BITS_IN(*bitmap);
474 	uint64_t popcount_id = pte_bit_id / POPCOUNT_BITS;
475 	uint64_t pte_mask = 1ULL << (pte_bit_id % BITS_IN(*bitmap));
476 	uint64_t bitN;
477 	uint32_t count;
478 
479 	/* Check whether the page address requested is in the dump. */
480 	if (pte_bit_id == _KVM_BIT_ID_INVALID ||
481 	    pte_bit_id >= (kd->pt_map_size * NBBY) ||
482 	    (bitmap[pte_u64] & pte_mask) == 0)
483 		return (-1);
484 
485 	/*
486 	 * Add/sub popcounts from the bitmap until the PTE's bit is reached.
487 	 * For bits that are in the upper half between the calculated
488 	 * popcount id and the next one, use the next one and subtract to
489 	 * minimize the number of popcounts required.
490 	 */
491 	if ((pte_bit_id % POPCOUNT_BITS) < (POPCOUNT_BITS / 2)) {
492 		count = kd->pt_popcounts[popcount_id] + popcount_bytes(
493 		    bitmap + popcount_id * POPCOUNTS_IN(*bitmap),
494 		    0, pte_bit_id - popcount_id * POPCOUNT_BITS);
495 	} else {
496 		/*
497 		 * Counting in reverse is trickier, since we must avoid
498 		 * reading from bytes that are not in range, and invert.
499 		 */
500 		uint64_t pte_u64_bit_off = pte_u64 * BITS_IN(*bitmap);
501 
502 		popcount_id++;
503 		bitN = MIN(popcount_id * POPCOUNT_BITS,
504 		    kd->pt_map_size * BITS_IN(uint8_t));
505 		count = kd->pt_popcounts[popcount_id] - popcount_bytes(
506 		    bitmap + pte_u64,
507 		    pte_bit_id - pte_u64_bit_off, bitN - pte_u64_bit_off);
508 	}
509 
510 	/*
511 	 * This can only happen if the core is truncated.  Treat these
512 	 * entries as if they don't exist, since their backing doesn't.
513 	 */
514 	if (count >= (kd->pt_sparse_size / page_size))
515 		return (-1);
516 
517 	return (kd->pt_sparse_off + (uint64_t)count * page_size);
518 }
519 
520 static int
521 kvm_fdnlist(kvm_t *kd, struct kvm_nlist *list)
522 {
523 	kvaddr_t addr;
524 	int error, nfail;
525 
526 	if (kd->resolve_symbol == NULL) {
527 		struct nlist *nl;
528 		int count, i;
529 
530 		for (count = 0; list[count].n_name != NULL &&
531 		     list[count].n_name[0] != '\0'; count++)
532 			;
533 		nl = calloc(count + 1, sizeof(*nl));
534 		for (i = 0; i < count; i++)
535 			nl[i].n_name = list[i].n_name;
536 		nfail = __fdnlist(kd->nlfd, nl);
537 		for (i = 0; i < count; i++) {
538 			list[i].n_type = nl[i].n_type;
539 			list[i].n_value = nl[i].n_value;
540 		}
541 		free(nl);
542 		return (nfail);
543 	}
544 
545 	nfail = 0;
546 	while (list->n_name != NULL && list->n_name[0] != '\0') {
547 		error = kd->resolve_symbol(list->n_name, &addr);
548 		if (error != 0) {
549 			nfail++;
550 			list->n_value = 0;
551 			list->n_type = 0;
552 		} else {
553 			list->n_value = addr;
554 			list->n_type = N_DATA | N_EXT;
555 		}
556 		list++;
557 	}
558 	return (nfail);
559 }
560 
561 /*
562  * Walk the list of unresolved symbols, generate a new list and prefix the
563  * symbol names, try again, and merge back what we could resolve.
564  */
565 static int
566 kvm_fdnlist_prefix(kvm_t *kd, struct kvm_nlist *nl, int missing,
567     const char *prefix, kvaddr_t (*validate_fn)(kvm_t *, kvaddr_t))
568 {
569 	struct kvm_nlist *n, *np, *p;
570 	char *cp, *ce;
571 	const char *ccp;
572 	size_t len;
573 	int slen, unresolved;
574 
575 	/*
576 	 * Calculate the space we need to malloc for nlist and names.
577 	 * We are going to store the name twice for later lookups: once
578 	 * with the prefix and once the unmodified name delmited by \0.
579 	 */
580 	len = 0;
581 	unresolved = 0;
582 	for (p = nl; p->n_name && p->n_name[0]; ++p) {
583 		if (p->n_type != N_UNDF)
584 			continue;
585 		len += sizeof(struct kvm_nlist) + strlen(prefix) +
586 		    2 * (strlen(p->n_name) + 1);
587 		unresolved++;
588 	}
589 	if (unresolved == 0)
590 		return (unresolved);
591 	/* Add space for the terminating nlist entry. */
592 	len += sizeof(struct kvm_nlist);
593 	unresolved++;
594 
595 	/* Alloc one chunk for (nlist, [names]) and setup pointers. */
596 	n = np = malloc(len);
597 	bzero(n, len);
598 	if (n == NULL)
599 		return (missing);
600 	cp = ce = (char *)np;
601 	cp += unresolved * sizeof(struct kvm_nlist);
602 	ce += len;
603 
604 	/* Generate shortened nlist with special prefix. */
605 	unresolved = 0;
606 	for (p = nl; p->n_name && p->n_name[0]; ++p) {
607 		if (p->n_type != N_UNDF)
608 			continue;
609 		*np = *p;
610 		/* Save the new\0orig. name so we can later match it again. */
611 		slen = snprintf(cp, ce - cp, "%s%s%c%s", prefix,
612 		    (prefix[0] != '\0' && p->n_name[0] == '_') ?
613 			(p->n_name + 1) : p->n_name, '\0', p->n_name);
614 		if (slen < 0 || slen >= ce - cp)
615 			continue;
616 		np->n_name = cp;
617 		cp += slen + 1;
618 		np++;
619 		unresolved++;
620 	}
621 
622 	/* Do lookup on the reduced list. */
623 	np = n;
624 	unresolved = kvm_fdnlist(kd, np);
625 
626 	/* Check if we could resolve further symbols and update the list. */
627 	if (unresolved >= 0 && unresolved < missing) {
628 		/* Find the first freshly resolved entry. */
629 		for (; np->n_name && np->n_name[0]; np++)
630 			if (np->n_type != N_UNDF)
631 				break;
632 		/*
633 		 * The lists are both in the same order,
634 		 * so we can walk them in parallel.
635 		 */
636 		for (p = nl; np->n_name && np->n_name[0] &&
637 		    p->n_name && p->n_name[0]; ++p) {
638 			if (p->n_type != N_UNDF)
639 				continue;
640 			/* Skip expanded name and compare to orig. one. */
641 			ccp = np->n_name + strlen(np->n_name) + 1;
642 			if (strcmp(ccp, p->n_name) != 0)
643 				continue;
644 			/* Update nlist with new, translated results. */
645 			p->n_type = np->n_type;
646 			if (validate_fn)
647 				p->n_value = (*validate_fn)(kd, np->n_value);
648 			else
649 				p->n_value = np->n_value;
650 			missing--;
651 			/* Find next freshly resolved entry. */
652 			for (np++; np->n_name && np->n_name[0]; np++)
653 				if (np->n_type != N_UNDF)
654 					break;
655 		}
656 	}
657 	/* We could assert missing = unresolved here. */
658 
659 	free(n);
660 	return (unresolved);
661 }
662 
663 int
664 _kvm_nlist(kvm_t *kd, struct kvm_nlist *nl, int initialize)
665 {
666 	struct kvm_nlist *p;
667 	int nvalid;
668 	struct kld_sym_lookup lookup;
669 	int error;
670 	const char *prefix = "";
671 	char symname[1024]; /* XXX-BZ symbol name length limit? */
672 	int tried_vnet, tried_dpcpu;
673 
674 	/*
675 	 * If we can't use the kld symbol lookup, revert to the
676 	 * slow library call.
677 	 */
678 	if (!ISALIVE(kd)) {
679 		error = kvm_fdnlist(kd, nl);
680 		if (error <= 0)			/* Hard error or success. */
681 			return (error);
682 
683 		if (_kvm_vnet_initialized(kd, initialize))
684 			error = kvm_fdnlist_prefix(kd, nl, error,
685 			    VNET_SYMPREFIX, _kvm_vnet_validaddr);
686 
687 		if (error > 0 && _kvm_dpcpu_initialized(kd, initialize))
688 			error = kvm_fdnlist_prefix(kd, nl, error,
689 			    DPCPU_SYMPREFIX, _kvm_dpcpu_validaddr);
690 
691 		return (error);
692 	}
693 
694 	/*
695 	 * We can use the kld lookup syscall.  Go through each nlist entry
696 	 * and look it up with a kldsym(2) syscall.
697 	 */
698 	nvalid = 0;
699 	tried_vnet = 0;
700 	tried_dpcpu = 0;
701 again:
702 	for (p = nl; p->n_name && p->n_name[0]; ++p) {
703 		if (p->n_type != N_UNDF)
704 			continue;
705 
706 		lookup.version = sizeof(lookup);
707 		lookup.symvalue = 0;
708 		lookup.symsize = 0;
709 
710 		error = snprintf(symname, sizeof(symname), "%s%s", prefix,
711 		    (prefix[0] != '\0' && p->n_name[0] == '_') ?
712 			(p->n_name + 1) : p->n_name);
713 		if (error < 0 || error >= (int)sizeof(symname))
714 			continue;
715 		lookup.symname = symname;
716 		if (lookup.symname[0] == '_')
717 			lookup.symname++;
718 
719 		if (kldsym(0, KLDSYM_LOOKUP, &lookup) != -1) {
720 			p->n_type = N_TEXT;
721 			if (_kvm_vnet_initialized(kd, initialize) &&
722 			    strcmp(prefix, VNET_SYMPREFIX) == 0)
723 				p->n_value =
724 				    _kvm_vnet_validaddr(kd, lookup.symvalue);
725 			else if (_kvm_dpcpu_initialized(kd, initialize) &&
726 			    strcmp(prefix, DPCPU_SYMPREFIX) == 0)
727 				p->n_value =
728 				    _kvm_dpcpu_validaddr(kd, lookup.symvalue);
729 			else
730 				p->n_value = lookup.symvalue;
731 			++nvalid;
732 			/* lookup.symsize */
733 		}
734 	}
735 
736 	/*
737 	 * Check the number of entries that weren't found. If they exist,
738 	 * try again with a prefix for virtualized or DPCPU symbol names.
739 	 */
740 	error = ((p - nl) - nvalid);
741 	if (error && _kvm_vnet_initialized(kd, initialize) && !tried_vnet) {
742 		tried_vnet = 1;
743 		prefix = VNET_SYMPREFIX;
744 		goto again;
745 	}
746 	if (error && _kvm_dpcpu_initialized(kd, initialize) && !tried_dpcpu) {
747 		tried_dpcpu = 1;
748 		prefix = DPCPU_SYMPREFIX;
749 		goto again;
750 	}
751 
752 	/*
753 	 * Return the number of entries that weren't found. If they exist,
754 	 * also fill internal error buffer.
755 	 */
756 	error = ((p - nl) - nvalid);
757 	if (error)
758 		_kvm_syserr(kd, kd->program, "kvm_nlist");
759 	return (error);
760 }
761 
762 int
763 _kvm_bitmap_init(struct kvm_bitmap *bm, u_long bitmapsize, u_long *idx)
764 {
765 
766 	*idx = ULONG_MAX;
767 	bm->map = calloc(bitmapsize, sizeof *bm->map);
768 	if (bm->map == NULL)
769 		return (0);
770 	bm->size = bitmapsize;
771 	return (1);
772 }
773 
774 void
775 _kvm_bitmap_set(struct kvm_bitmap *bm, u_long bm_index)
776 {
777 	uint8_t *byte = &bm->map[bm_index / 8];
778 
779 	if (bm_index / 8 < bm->size)
780 		*byte |= (1UL << (bm_index % 8));
781 }
782 
783 int
784 _kvm_bitmap_next(struct kvm_bitmap *bm, u_long *idx)
785 {
786 	u_long first_invalid = bm->size * CHAR_BIT;
787 
788 	if (*idx == ULONG_MAX)
789 		*idx = 0;
790 	else
791 		(*idx)++;
792 
793 	/* Find the next valid idx. */
794 	for (; *idx < first_invalid; (*idx)++) {
795 		unsigned int mask = 1U << (*idx % CHAR_BIT);
796 		if ((bm->map[*idx / CHAR_BIT] & mask) != 0)
797 			break;
798 	}
799 
800 	return (*idx < first_invalid);
801 }
802 
803 void
804 _kvm_bitmap_deinit(struct kvm_bitmap *bm)
805 {
806 
807 	free(bm->map);
808 }
809 
810 int
811 _kvm_visit_cb(kvm_t *kd, kvm_walk_pages_cb_t *cb, void *arg, u_long pa,
812     u_long kmap_vaddr, u_long dmap_vaddr, vm_prot_t prot, size_t len,
813     unsigned int page_size)
814 {
815 	unsigned int pgsz = page_size ? page_size : len;
816 	struct kvm_page p = {
817 		.kp_version = LIBKVM_WALK_PAGES_VERSION,
818 		.kp_paddr = pa,
819 		.kp_kmap_vaddr = kmap_vaddr,
820 		.kp_dmap_vaddr = dmap_vaddr,
821 		.kp_prot = prot,
822 		.kp_offset = _kvm_pt_find(kd, pa, pgsz),
823 		.kp_len = len,
824 	};
825 
826 	return cb(&p, arg);
827 }
828