xref: /freebsd/lib/libkvm/kvm_private.c (revision 64c2a712d661db9be31f02fe97c3b59710290ae3)
1  /*-
2   * Copyright (c) 1989, 1992, 1993
3   *	The Regents of the University of California.  All rights reserved.
4   *
5   * This code is derived from software developed by the Computer Systems
6   * Engineering group at Lawrence Berkeley Laboratory under DARPA contract
7   * BG 91-66 and contributed to Berkeley.
8   *
9   * Redistribution and use in source and binary forms, with or without
10   * modification, are permitted provided that the following conditions
11   * are met:
12   * 1. Redistributions of source code must retain the above copyright
13   *    notice, this list of conditions and the following disclaimer.
14   * 2. Redistributions in binary form must reproduce the above copyright
15   *    notice, this list of conditions and the following disclaimer in the
16   *    documentation and/or other materials provided with the distribution.
17   * 3. Neither the name of the University nor the names of its contributors
18   *    may be used to endorse or promote products derived from this software
19   *    without specific prior written permission.
20   *
21   * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24   * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31   * SUCH DAMAGE.
32   */
33  
34  #include <sys/cdefs.h>
35  __FBSDID("$FreeBSD$");
36  
37  #include <sys/param.h>
38  #include <sys/fnv_hash.h>
39  
40  #define	_WANT_VNET
41  
42  #include <sys/user.h>
43  #include <sys/linker.h>
44  #include <sys/pcpu.h>
45  #include <sys/stat.h>
46  #include <sys/mman.h>
47  
48  #include <stdbool.h>
49  #include <net/vnet.h>
50  
51  #include <assert.h>
52  #include <fcntl.h>
53  #include <vm/vm.h>
54  #include <kvm.h>
55  #include <limits.h>
56  #include <paths.h>
57  #include <stdint.h>
58  #include <stdio.h>
59  #include <stdlib.h>
60  #include <string.h>
61  #include <unistd.h>
62  #include <stdarg.h>
63  #include <inttypes.h>
64  
65  #include "kvm_private.h"
66  
67  /*
68   * Routines private to libkvm.
69   */
70  
71  /* from src/lib/libc/gen/nlist.c */
72  int __fdnlist(int, struct nlist *);
73  
74  /*
75   * Report an error using printf style arguments.  "program" is kd->program
76   * on hard errors, and 0 on soft errors, so that under sun error emulation,
77   * only hard errors are printed out (otherwise, programs like gdb will
78   * generate tons of error messages when trying to access bogus pointers).
79   */
80  void
81  _kvm_err(kvm_t *kd, const char *program, const char *fmt, ...)
82  {
83  	va_list ap;
84  
85  	va_start(ap, fmt);
86  	if (program != NULL) {
87  		(void)fprintf(stderr, "%s: ", program);
88  		(void)vfprintf(stderr, fmt, ap);
89  		(void)fputc('\n', stderr);
90  	} else
91  		(void)vsnprintf(kd->errbuf,
92  		    sizeof(kd->errbuf), fmt, ap);
93  
94  	va_end(ap);
95  }
96  
97  void
98  _kvm_syserr(kvm_t *kd, const char *program, const char *fmt, ...)
99  {
100  	va_list ap;
101  	int n;
102  
103  	va_start(ap, fmt);
104  	if (program != NULL) {
105  		(void)fprintf(stderr, "%s: ", program);
106  		(void)vfprintf(stderr, fmt, ap);
107  		(void)fprintf(stderr, ": %s\n", strerror(errno));
108  	} else {
109  		char *cp = kd->errbuf;
110  
111  		(void)vsnprintf(cp, sizeof(kd->errbuf), fmt, ap);
112  		n = strlen(cp);
113  		(void)snprintf(&cp[n], sizeof(kd->errbuf) - n, ": %s",
114  		    strerror(errno));
115  	}
116  	va_end(ap);
117  }
118  
119  void *
120  _kvm_malloc(kvm_t *kd, size_t n)
121  {
122  	void *p;
123  
124  	if ((p = calloc(n, sizeof(char))) == NULL)
125  		_kvm_err(kd, kd->program, "can't allocate %zu bytes: %s",
126  			 n, strerror(errno));
127  	return (p);
128  }
129  
130  int
131  _kvm_probe_elf_kernel(kvm_t *kd, int class, int machine)
132  {
133  
134  	return (kd->nlehdr.e_ident[EI_CLASS] == class &&
135  	    ((machine == EM_PPC || machine == EM_PPC64) ?
136  	     kd->nlehdr.e_type == ET_DYN : kd->nlehdr.e_type == ET_EXEC) &&
137  	    kd->nlehdr.e_machine == machine);
138  }
139  
140  int
141  _kvm_is_minidump(kvm_t *kd)
142  {
143  	char minihdr[8];
144  
145  	if (kd->rawdump)
146  		return (0);
147  	if (pread(kd->pmfd, &minihdr, 8, 0) == 8 &&
148  	    memcmp(&minihdr, "minidump", 8) == 0)
149  		return (1);
150  	return (0);
151  }
152  
153  /*
154   * The powerpc backend has a hack to strip a leading kerneldump
155   * header from the core before treating it as an ELF header.
156   *
157   * We can add that here if we can get a change to libelf to support
158   * an initial offset into the file.  Alternatively we could patch
159   * savecore to extract cores from a regular file instead.
160   */
161  int
162  _kvm_read_core_phdrs(kvm_t *kd, size_t *phnump, GElf_Phdr **phdrp)
163  {
164  	GElf_Ehdr ehdr;
165  	GElf_Phdr *phdr;
166  	Elf *elf;
167  	size_t i, phnum;
168  
169  	elf = elf_begin(kd->pmfd, ELF_C_READ, NULL);
170  	if (elf == NULL) {
171  		_kvm_err(kd, kd->program, "%s", elf_errmsg(0));
172  		return (-1);
173  	}
174  	if (elf_kind(elf) != ELF_K_ELF) {
175  		_kvm_err(kd, kd->program, "invalid core");
176  		goto bad;
177  	}
178  	if (gelf_getclass(elf) != kd->nlehdr.e_ident[EI_CLASS]) {
179  		_kvm_err(kd, kd->program, "invalid core");
180  		goto bad;
181  	}
182  	if (gelf_getehdr(elf, &ehdr) == NULL) {
183  		_kvm_err(kd, kd->program, "%s", elf_errmsg(0));
184  		goto bad;
185  	}
186  	if (ehdr.e_type != ET_CORE) {
187  		_kvm_err(kd, kd->program, "invalid core");
188  		goto bad;
189  	}
190  	if (ehdr.e_machine != kd->nlehdr.e_machine) {
191  		_kvm_err(kd, kd->program, "invalid core");
192  		goto bad;
193  	}
194  
195  	if (elf_getphdrnum(elf, &phnum) == -1) {
196  		_kvm_err(kd, kd->program, "%s", elf_errmsg(0));
197  		goto bad;
198  	}
199  
200  	phdr = calloc(phnum, sizeof(*phdr));
201  	if (phdr == NULL) {
202  		_kvm_err(kd, kd->program, "failed to allocate phdrs");
203  		goto bad;
204  	}
205  
206  	for (i = 0; i < phnum; i++) {
207  		if (gelf_getphdr(elf, i, &phdr[i]) == NULL) {
208  			free(phdr);
209  			_kvm_err(kd, kd->program, "%s", elf_errmsg(0));
210  			goto bad;
211  		}
212  	}
213  	elf_end(elf);
214  	*phnump = phnum;
215  	*phdrp = phdr;
216  	return (0);
217  
218  bad:
219  	elf_end(elf);
220  	return (-1);
221  }
222  
223  /*
224   * Transform v such that only bits [bit0, bitN) may be set.  Generates a
225   * bitmask covering the number of bits, then shifts so +bit0+ is the first.
226   */
227  static uint64_t
228  bitmask_range(uint64_t v, uint64_t bit0, uint64_t bitN)
229  {
230  	if (bit0 == 0 && bitN == BITS_IN(v))
231  		return (v);
232  
233  	return (v & (((1ULL << (bitN - bit0)) - 1ULL) << bit0));
234  }
235  
236  /*
237   * Returns the number of bits in a given byte array range starting at a
238   * given base, from bit0 to bitN.  bit0 may be non-zero in the case of
239   * counting backwards from bitN.
240   */
241  static uint64_t
242  popcount_bytes(uint64_t *addr, uint32_t bit0, uint32_t bitN)
243  {
244  	uint32_t res = bitN - bit0;
245  	uint64_t count = 0;
246  	uint32_t bound;
247  
248  	/* Align to 64-bit boundary on the left side if needed. */
249  	if ((bit0 % BITS_IN(*addr)) != 0) {
250  		bound = MIN(bitN, roundup2(bit0, BITS_IN(*addr)));
251  		count += __bitcount64(bitmask_range(*addr, bit0, bound));
252  		res -= (bound - bit0);
253  		addr++;
254  	}
255  
256  	while (res > 0) {
257  		bound = MIN(res, BITS_IN(*addr));
258  		count += __bitcount64(bitmask_range(*addr, 0, bound));
259  		res -= bound;
260  		addr++;
261  	}
262  
263  	return (count);
264  }
265  
266  void *
267  _kvm_pmap_get(kvm_t *kd, u_long idx, size_t len)
268  {
269  	uintptr_t off = idx * len;
270  
271  	if ((off_t)off >= kd->pt_sparse_off)
272  		return (NULL);
273  	return (void *)((uintptr_t)kd->page_map + off);
274  }
275  
276  void *
277  _kvm_map_get(kvm_t *kd, u_long pa, unsigned int page_size)
278  {
279  	off_t off;
280  	uintptr_t addr;
281  
282  	off = _kvm_pt_find(kd, pa, page_size);
283  	if (off == -1)
284  		return NULL;
285  
286  	addr = (uintptr_t)kd->page_map + off;
287  	if (off >= kd->pt_sparse_off)
288  		addr = (uintptr_t)kd->sparse_map + (off - kd->pt_sparse_off);
289  	return (void *)addr;
290  }
291  
292  int
293  _kvm_pt_init(kvm_t *kd, size_t dump_avail_size, off_t dump_avail_off,
294      size_t map_len, off_t map_off, off_t sparse_off, int page_size)
295  {
296  	uint64_t *addr;
297  	uint32_t *popcount_bin;
298  	int bin_popcounts = 0;
299  	uint64_t pc_bins, res;
300  	ssize_t rd;
301  
302  	kd->dump_avail_size = dump_avail_size;
303  	if (dump_avail_size > 0) {
304  		kd->dump_avail = mmap(NULL, kd->dump_avail_size, PROT_READ,
305  		    MAP_PRIVATE, kd->pmfd, dump_avail_off);
306  	} else {
307  		/*
308  		 * Older version minidumps don't provide dump_avail[],
309  		 * so the bitmap is fully populated from 0 to
310  		 * last_pa. Create an implied dump_avail that
311  		 * expresses this.
312  		 */
313  		kd->dump_avail = calloc(4, sizeof(uint64_t));
314  		kd->dump_avail[1] = _kvm64toh(kd, map_len * 8 * page_size);
315  	}
316  
317  	/*
318  	 * Map the bitmap specified by the arguments.
319  	 */
320  	kd->pt_map = _kvm_malloc(kd, map_len);
321  	if (kd->pt_map == NULL) {
322  		_kvm_err(kd, kd->program, "cannot allocate %zu bytes for bitmap",
323  		    map_len);
324  		return (-1);
325  	}
326  	rd = pread(kd->pmfd, kd->pt_map, map_len, map_off);
327  	if (rd < 0 || rd != (ssize_t)map_len) {
328  		_kvm_err(kd, kd->program, "cannot read %zu bytes for bitmap",
329  		    map_len);
330  		return (-1);
331  	}
332  	kd->pt_map_size = map_len;
333  
334  	/*
335  	 * Generate a popcount cache for every POPCOUNT_BITS in the bitmap,
336  	 * so lookups only have to calculate the number of bits set between
337  	 * a cache point and their bit.  This reduces lookups to O(1),
338  	 * without significantly increasing memory requirements.
339  	 *
340  	 * Round up the number of bins so that 'upper half' lookups work for
341  	 * the final bin, if needed.  The first popcount is 0, since no bits
342  	 * precede bit 0, so add 1 for that also.  Without this, extra work
343  	 * would be needed to handle the first PTEs in _kvm_pt_find().
344  	 */
345  	addr = kd->pt_map;
346  	res = map_len;
347  	pc_bins = 1 + (res * NBBY + POPCOUNT_BITS / 2) / POPCOUNT_BITS;
348  	kd->pt_popcounts = calloc(pc_bins, sizeof(uint32_t));
349  	if (kd->pt_popcounts == NULL) {
350  		_kvm_err(kd, kd->program, "cannot allocate popcount bins");
351  		return (-1);
352  	}
353  
354  	for (popcount_bin = &kd->pt_popcounts[1]; res > 0;
355  	    addr++, res -= sizeof(*addr)) {
356  		*popcount_bin += popcount_bytes(addr, 0,
357  		    MIN(res * NBBY, BITS_IN(*addr)));
358  		if (++bin_popcounts == POPCOUNTS_IN(*addr)) {
359  			popcount_bin++;
360  			*popcount_bin = *(popcount_bin - 1);
361  			bin_popcounts = 0;
362  		}
363  	}
364  
365  	assert(pc_bins * sizeof(*popcount_bin) ==
366  	    ((uintptr_t)popcount_bin - (uintptr_t)kd->pt_popcounts));
367  
368  	kd->pt_sparse_off = sparse_off;
369  	kd->pt_sparse_size = (uint64_t)*popcount_bin * page_size;
370  	kd->pt_page_size = page_size;
371  
372  	/*
373  	 * Map the sparse page array.  This is useful for performing point
374  	 * lookups of specific pages, e.g. for kvm_walk_pages.  Generally,
375  	 * this is much larger than is reasonable to read in up front, so
376  	 * mmap it in instead.
377  	 */
378  	kd->sparse_map = mmap(NULL, kd->pt_sparse_size, PROT_READ,
379  	    MAP_PRIVATE, kd->pmfd, kd->pt_sparse_off);
380  	if (kd->sparse_map == MAP_FAILED) {
381  		_kvm_err(kd, kd->program, "cannot map %" PRIu64
382  		    " bytes from fd %d offset %jd for sparse map: %s",
383  		    kd->pt_sparse_size, kd->pmfd,
384  		    (intmax_t)kd->pt_sparse_off, strerror(errno));
385  		return (-1);
386  	}
387  	return (0);
388  }
389  
390  int
391  _kvm_pmap_init(kvm_t *kd, uint32_t pmap_size, off_t pmap_off)
392  {
393  	ssize_t exp_len = pmap_size;
394  
395  	kd->page_map_size = pmap_size;
396  	kd->page_map_off = pmap_off;
397  	kd->page_map = _kvm_malloc(kd, pmap_size);
398  	if (kd->page_map == NULL) {
399  		_kvm_err(kd, kd->program, "cannot allocate %u bytes "
400  		    "for page map", pmap_size);
401  		return (-1);
402  	}
403  	if (pread(kd->pmfd, kd->page_map, pmap_size, pmap_off) != exp_len) {
404  		_kvm_err(kd, kd->program, "cannot read %d bytes from "
405  		    "offset %jd for page map", pmap_size, (intmax_t)pmap_off);
406  		return (-1);
407  	}
408  	return (0);
409  }
410  
411  static inline uint64_t
412  dump_avail_n(kvm_t *kd, long i)
413  {
414  	return (_kvm64toh(kd, kd->dump_avail[i]));
415  }
416  
417  uint64_t
418  _kvm_pa_bit_id(kvm_t *kd, uint64_t pa, unsigned int page_size)
419  {
420  	uint64_t adj;
421  	long i;
422  
423  	adj = 0;
424  	for (i = 0; dump_avail_n(kd, i + 1) != 0; i += 2) {
425  		if (pa >= dump_avail_n(kd, i + 1)) {
426  			adj += howmany(dump_avail_n(kd, i + 1), page_size) -
427  			    dump_avail_n(kd, i) / page_size;
428  		} else {
429  			return (pa / page_size -
430  			    dump_avail_n(kd, i) / page_size + adj);
431  		}
432  	}
433  	return (_KVM_BIT_ID_INVALID);
434  }
435  
436  uint64_t
437  _kvm_bit_id_pa(kvm_t *kd, uint64_t bit_id, unsigned int page_size)
438  {
439  	uint64_t sz;
440  	long i;
441  
442  	for (i = 0; dump_avail_n(kd, i + 1) != 0; i += 2) {
443  		sz = howmany(dump_avail_n(kd, i + 1), page_size) -
444  		    dump_avail_n(kd, i) / page_size;
445  		if (bit_id < sz) {
446  			return (rounddown2(dump_avail_n(kd, i), page_size) +
447  			    bit_id * page_size);
448  		}
449  		bit_id -= sz;
450  	}
451  	return (_KVM_PA_INVALID);
452  }
453  
454  /*
455   * Find the offset for the given physical page address; returns -1 otherwise.
456   *
457   * A page's offset is represented by the sparse page base offset plus the
458   * number of bits set before its bit multiplied by page size.  This means
459   * that if a page exists in the dump, it's necessary to know how many pages
460   * in the dump precede it.  Reduce this O(n) counting to O(1) by caching the
461   * number of bits set at POPCOUNT_BITS intervals.
462   *
463   * Then to find the number of pages before the requested address, simply
464   * index into the cache and count the number of bits set between that cache
465   * bin and the page's bit.  Halve the number of bytes that have to be
466   * checked by also counting down from the next higher bin if it's closer.
467   */
468  off_t
469  _kvm_pt_find(kvm_t *kd, uint64_t pa, unsigned int page_size)
470  {
471  	uint64_t *bitmap = kd->pt_map;
472  	uint64_t pte_bit_id = _kvm_pa_bit_id(kd, pa, page_size);
473  	uint64_t pte_u64 = pte_bit_id / BITS_IN(*bitmap);
474  	uint64_t popcount_id = pte_bit_id / POPCOUNT_BITS;
475  	uint64_t pte_mask = 1ULL << (pte_bit_id % BITS_IN(*bitmap));
476  	uint64_t bitN;
477  	uint32_t count;
478  
479  	/* Check whether the page address requested is in the dump. */
480  	if (pte_bit_id == _KVM_BIT_ID_INVALID ||
481  	    pte_bit_id >= (kd->pt_map_size * NBBY) ||
482  	    (bitmap[pte_u64] & pte_mask) == 0)
483  		return (-1);
484  
485  	/*
486  	 * Add/sub popcounts from the bitmap until the PTE's bit is reached.
487  	 * For bits that are in the upper half between the calculated
488  	 * popcount id and the next one, use the next one and subtract to
489  	 * minimize the number of popcounts required.
490  	 */
491  	if ((pte_bit_id % POPCOUNT_BITS) < (POPCOUNT_BITS / 2)) {
492  		count = kd->pt_popcounts[popcount_id] + popcount_bytes(
493  		    bitmap + popcount_id * POPCOUNTS_IN(*bitmap),
494  		    0, pte_bit_id - popcount_id * POPCOUNT_BITS);
495  	} else {
496  		/*
497  		 * Counting in reverse is trickier, since we must avoid
498  		 * reading from bytes that are not in range, and invert.
499  		 */
500  		uint64_t pte_u64_bit_off = pte_u64 * BITS_IN(*bitmap);
501  
502  		popcount_id++;
503  		bitN = MIN(popcount_id * POPCOUNT_BITS,
504  		    kd->pt_map_size * BITS_IN(uint8_t));
505  		count = kd->pt_popcounts[popcount_id] - popcount_bytes(
506  		    bitmap + pte_u64,
507  		    pte_bit_id - pte_u64_bit_off, bitN - pte_u64_bit_off);
508  	}
509  
510  	/*
511  	 * This can only happen if the core is truncated.  Treat these
512  	 * entries as if they don't exist, since their backing doesn't.
513  	 */
514  	if (count >= (kd->pt_sparse_size / page_size))
515  		return (-1);
516  
517  	return (kd->pt_sparse_off + (uint64_t)count * page_size);
518  }
519  
520  static int
521  kvm_fdnlist(kvm_t *kd, struct kvm_nlist *list)
522  {
523  	kvaddr_t addr;
524  	int error, nfail;
525  
526  	if (kd->resolve_symbol == NULL) {
527  		struct nlist *nl;
528  		int count, i;
529  
530  		for (count = 0; list[count].n_name != NULL &&
531  		     list[count].n_name[0] != '\0'; count++)
532  			;
533  		nl = calloc(count + 1, sizeof(*nl));
534  		for (i = 0; i < count; i++)
535  			nl[i].n_name = list[i].n_name;
536  		nfail = __fdnlist(kd->nlfd, nl);
537  		for (i = 0; i < count; i++) {
538  			list[i].n_type = nl[i].n_type;
539  			list[i].n_value = nl[i].n_value;
540  		}
541  		free(nl);
542  		return (nfail);
543  	}
544  
545  	nfail = 0;
546  	while (list->n_name != NULL && list->n_name[0] != '\0') {
547  		error = kd->resolve_symbol(list->n_name, &addr);
548  		if (error != 0) {
549  			nfail++;
550  			list->n_value = 0;
551  			list->n_type = 0;
552  		} else {
553  			list->n_value = addr;
554  			list->n_type = N_DATA | N_EXT;
555  		}
556  		list++;
557  	}
558  	return (nfail);
559  }
560  
561  /*
562   * Walk the list of unresolved symbols, generate a new list and prefix the
563   * symbol names, try again, and merge back what we could resolve.
564   */
565  static int
566  kvm_fdnlist_prefix(kvm_t *kd, struct kvm_nlist *nl, int missing,
567      const char *prefix, kvaddr_t (*validate_fn)(kvm_t *, kvaddr_t))
568  {
569  	struct kvm_nlist *n, *np, *p;
570  	char *cp, *ce;
571  	const char *ccp;
572  	size_t len;
573  	int slen, unresolved;
574  
575  	/*
576  	 * Calculate the space we need to malloc for nlist and names.
577  	 * We are going to store the name twice for later lookups: once
578  	 * with the prefix and once the unmodified name delmited by \0.
579  	 */
580  	len = 0;
581  	unresolved = 0;
582  	for (p = nl; p->n_name && p->n_name[0]; ++p) {
583  		if (p->n_type != N_UNDF)
584  			continue;
585  		len += sizeof(struct kvm_nlist) + strlen(prefix) +
586  		    2 * (strlen(p->n_name) + 1);
587  		unresolved++;
588  	}
589  	if (unresolved == 0)
590  		return (unresolved);
591  	/* Add space for the terminating nlist entry. */
592  	len += sizeof(struct kvm_nlist);
593  	unresolved++;
594  
595  	/* Alloc one chunk for (nlist, [names]) and setup pointers. */
596  	n = np = malloc(len);
597  	bzero(n, len);
598  	if (n == NULL)
599  		return (missing);
600  	cp = ce = (char *)np;
601  	cp += unresolved * sizeof(struct kvm_nlist);
602  	ce += len;
603  
604  	/* Generate shortened nlist with special prefix. */
605  	unresolved = 0;
606  	for (p = nl; p->n_name && p->n_name[0]; ++p) {
607  		if (p->n_type != N_UNDF)
608  			continue;
609  		*np = *p;
610  		/* Save the new\0orig. name so we can later match it again. */
611  		slen = snprintf(cp, ce - cp, "%s%s%c%s", prefix,
612  		    (prefix[0] != '\0' && p->n_name[0] == '_') ?
613  			(p->n_name + 1) : p->n_name, '\0', p->n_name);
614  		if (slen < 0 || slen >= ce - cp)
615  			continue;
616  		np->n_name = cp;
617  		cp += slen + 1;
618  		np++;
619  		unresolved++;
620  	}
621  
622  	/* Do lookup on the reduced list. */
623  	np = n;
624  	unresolved = kvm_fdnlist(kd, np);
625  
626  	/* Check if we could resolve further symbols and update the list. */
627  	if (unresolved >= 0 && unresolved < missing) {
628  		/* Find the first freshly resolved entry. */
629  		for (; np->n_name && np->n_name[0]; np++)
630  			if (np->n_type != N_UNDF)
631  				break;
632  		/*
633  		 * The lists are both in the same order,
634  		 * so we can walk them in parallel.
635  		 */
636  		for (p = nl; np->n_name && np->n_name[0] &&
637  		    p->n_name && p->n_name[0]; ++p) {
638  			if (p->n_type != N_UNDF)
639  				continue;
640  			/* Skip expanded name and compare to orig. one. */
641  			ccp = np->n_name + strlen(np->n_name) + 1;
642  			if (strcmp(ccp, p->n_name) != 0)
643  				continue;
644  			/* Update nlist with new, translated results. */
645  			p->n_type = np->n_type;
646  			if (validate_fn)
647  				p->n_value = (*validate_fn)(kd, np->n_value);
648  			else
649  				p->n_value = np->n_value;
650  			missing--;
651  			/* Find next freshly resolved entry. */
652  			for (np++; np->n_name && np->n_name[0]; np++)
653  				if (np->n_type != N_UNDF)
654  					break;
655  		}
656  	}
657  	/* We could assert missing = unresolved here. */
658  
659  	free(n);
660  	return (unresolved);
661  }
662  
663  int
664  _kvm_nlist(kvm_t *kd, struct kvm_nlist *nl, int initialize)
665  {
666  	struct kvm_nlist *p;
667  	int nvalid;
668  	struct kld_sym_lookup lookup;
669  	int error;
670  	const char *prefix = "";
671  	char symname[1024]; /* XXX-BZ symbol name length limit? */
672  	int tried_vnet, tried_dpcpu;
673  
674  	/*
675  	 * If we can't use the kld symbol lookup, revert to the
676  	 * slow library call.
677  	 */
678  	if (!ISALIVE(kd)) {
679  		error = kvm_fdnlist(kd, nl);
680  		if (error <= 0)			/* Hard error or success. */
681  			return (error);
682  
683  		if (_kvm_vnet_initialized(kd, initialize))
684  			error = kvm_fdnlist_prefix(kd, nl, error,
685  			    VNET_SYMPREFIX, _kvm_vnet_validaddr);
686  
687  		if (error > 0 && _kvm_dpcpu_initialized(kd, initialize))
688  			error = kvm_fdnlist_prefix(kd, nl, error,
689  			    DPCPU_SYMPREFIX, _kvm_dpcpu_validaddr);
690  
691  		return (error);
692  	}
693  
694  	/*
695  	 * We can use the kld lookup syscall.  Go through each nlist entry
696  	 * and look it up with a kldsym(2) syscall.
697  	 */
698  	nvalid = 0;
699  	tried_vnet = 0;
700  	tried_dpcpu = 0;
701  again:
702  	for (p = nl; p->n_name && p->n_name[0]; ++p) {
703  		if (p->n_type != N_UNDF)
704  			continue;
705  
706  		lookup.version = sizeof(lookup);
707  		lookup.symvalue = 0;
708  		lookup.symsize = 0;
709  
710  		error = snprintf(symname, sizeof(symname), "%s%s", prefix,
711  		    (prefix[0] != '\0' && p->n_name[0] == '_') ?
712  			(p->n_name + 1) : p->n_name);
713  		if (error < 0 || error >= (int)sizeof(symname))
714  			continue;
715  		lookup.symname = symname;
716  		if (lookup.symname[0] == '_')
717  			lookup.symname++;
718  
719  		if (kldsym(0, KLDSYM_LOOKUP, &lookup) != -1) {
720  			p->n_type = N_TEXT;
721  			if (_kvm_vnet_initialized(kd, initialize) &&
722  			    strcmp(prefix, VNET_SYMPREFIX) == 0)
723  				p->n_value =
724  				    _kvm_vnet_validaddr(kd, lookup.symvalue);
725  			else if (_kvm_dpcpu_initialized(kd, initialize) &&
726  			    strcmp(prefix, DPCPU_SYMPREFIX) == 0)
727  				p->n_value =
728  				    _kvm_dpcpu_validaddr(kd, lookup.symvalue);
729  			else
730  				p->n_value = lookup.symvalue;
731  			++nvalid;
732  			/* lookup.symsize */
733  		}
734  	}
735  
736  	/*
737  	 * Check the number of entries that weren't found. If they exist,
738  	 * try again with a prefix for virtualized or DPCPU symbol names.
739  	 */
740  	error = ((p - nl) - nvalid);
741  	if (error && _kvm_vnet_initialized(kd, initialize) && !tried_vnet) {
742  		tried_vnet = 1;
743  		prefix = VNET_SYMPREFIX;
744  		goto again;
745  	}
746  	if (error && _kvm_dpcpu_initialized(kd, initialize) && !tried_dpcpu) {
747  		tried_dpcpu = 1;
748  		prefix = DPCPU_SYMPREFIX;
749  		goto again;
750  	}
751  
752  	/*
753  	 * Return the number of entries that weren't found. If they exist,
754  	 * also fill internal error buffer.
755  	 */
756  	error = ((p - nl) - nvalid);
757  	if (error)
758  		_kvm_syserr(kd, kd->program, "kvm_nlist");
759  	return (error);
760  }
761  
762  int
763  _kvm_bitmap_init(struct kvm_bitmap *bm, u_long bitmapsize, u_long *idx)
764  {
765  
766  	*idx = ULONG_MAX;
767  	bm->map = calloc(bitmapsize, sizeof *bm->map);
768  	if (bm->map == NULL)
769  		return (0);
770  	bm->size = bitmapsize;
771  	return (1);
772  }
773  
774  void
775  _kvm_bitmap_set(struct kvm_bitmap *bm, u_long bm_index)
776  {
777  	uint8_t *byte = &bm->map[bm_index / 8];
778  
779  	if (bm_index / 8 < bm->size)
780  		*byte |= (1UL << (bm_index % 8));
781  }
782  
783  int
784  _kvm_bitmap_next(struct kvm_bitmap *bm, u_long *idx)
785  {
786  	u_long first_invalid = bm->size * CHAR_BIT;
787  
788  	if (*idx == ULONG_MAX)
789  		*idx = 0;
790  	else
791  		(*idx)++;
792  
793  	/* Find the next valid idx. */
794  	for (; *idx < first_invalid; (*idx)++) {
795  		unsigned int mask = 1U << (*idx % CHAR_BIT);
796  		if ((bm->map[*idx / CHAR_BIT] & mask) != 0)
797  			break;
798  	}
799  
800  	return (*idx < first_invalid);
801  }
802  
803  void
804  _kvm_bitmap_deinit(struct kvm_bitmap *bm)
805  {
806  
807  	free(bm->map);
808  }
809  
810  int
811  _kvm_visit_cb(kvm_t *kd, kvm_walk_pages_cb_t *cb, void *arg, u_long pa,
812      u_long kmap_vaddr, u_long dmap_vaddr, vm_prot_t prot, size_t len,
813      unsigned int page_size)
814  {
815  	unsigned int pgsz = page_size ? page_size : len;
816  	struct kvm_page p = {
817  		.kp_version = LIBKVM_WALK_PAGES_VERSION,
818  		.kp_paddr = pa,
819  		.kp_kmap_vaddr = kmap_vaddr,
820  		.kp_dmap_vaddr = dmap_vaddr,
821  		.kp_prot = prot,
822  		.kp_offset = _kvm_pt_find(kd, pa, pgsz),
823  		.kp_len = len,
824  	};
825  
826  	return cb(&p, arg);
827  }
828