1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
29 */
30
31 #include <kvm.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <stdarg.h>
35 #include <unistd.h>
36 #include <limits.h>
37 #include <fcntl.h>
38 #include <strings.h>
39 #include <errno.h>
40 #include <sys/mem.h>
41 #include <sys/stat.h>
42 #include <sys/mman.h>
43 #include <sys/dumphdr.h>
44 #include <sys/sysmacros.h>
45
46 struct _kvmd {
47 struct dumphdr kvm_dump;
48 char *kvm_debug;
49 int kvm_openflag;
50 int kvm_corefd;
51 int kvm_kmemfd;
52 int kvm_memfd;
53 size_t kvm_coremapsize;
54 char *kvm_core;
55 dump_map_t *kvm_map;
56 pfn_t *kvm_pfn;
57 struct as *kvm_kas;
58 proc_t *kvm_practive;
59 pid_t kvm_pid;
60 char kvm_namelist[MAXNAMELEN + 1];
61 boolean_t kvm_namelist_core;
62 proc_t kvm_proc;
63 };
64
65 #define PREAD (ssize_t (*)(int, void *, size_t, offset_t))pread64
66 #define PWRITE (ssize_t (*)(int, void *, size_t, offset_t))pwrite64
67
68 static int kvm_nlist_core(kvm_t *kd, struct nlist nl[], const char *err);
69
70 static kvm_t *
fail(kvm_t * kd,const char * err,const char * message,...)71 fail(kvm_t *kd, const char *err, const char *message, ...)
72 {
73 va_list args;
74
75 va_start(args, message);
76 if (err || (kd && kd->kvm_debug)) {
77 (void) fprintf(stderr, "%s: ", err ? err : "KVM_DEBUG");
78 (void) vfprintf(stderr, message, args);
79 (void) fprintf(stderr, "\n");
80 }
81 va_end(args);
82 if (kd != NULL)
83 (void) kvm_close(kd);
84 return (NULL);
85 }
86
87 /*ARGSUSED*/
88 kvm_t *
kvm_open(const char * namelist,const char * corefile,const char * swapfile,int flag,const char * err)89 kvm_open(const char *namelist, const char *corefile, const char *swapfile,
90 int flag, const char *err)
91 {
92 kvm_t *kd;
93 struct stat64 memstat, kmemstat, allkmemstat, corestat;
94 struct nlist nl[3] = { { "kas" }, { "practive" }, { "" } };
95
96 if ((kd = calloc(1, sizeof (kvm_t))) == NULL)
97 return (fail(NULL, err, "cannot allocate space for kvm_t"));
98
99 kd->kvm_corefd = kd->kvm_kmemfd = kd->kvm_memfd = -1;
100 kd->kvm_debug = getenv("KVM_DEBUG");
101
102 if ((kd->kvm_openflag = flag) != O_RDONLY && flag != O_RDWR)
103 return (fail(kd, err, "illegal flag 0x%x to kvm_open()", flag));
104
105 if (corefile == NULL)
106 corefile = "/dev/kmem";
107
108 if (stat64(corefile, &corestat) == -1)
109 return (fail(kd, err, "cannot stat %s", corefile));
110
111 if (S_ISCHR(corestat.st_mode)) {
112 if (stat64("/dev/mem", &memstat) == -1)
113 return (fail(kd, err, "cannot stat /dev/mem"));
114
115 if (stat64("/dev/kmem", &kmemstat) == -1)
116 return (fail(kd, err, "cannot stat /dev/kmem"));
117
118 if (stat64("/dev/allkmem", &allkmemstat) == -1)
119 return (fail(kd, err, "cannot stat /dev/allkmem"));
120 if (corestat.st_rdev == memstat.st_rdev ||
121 corestat.st_rdev == kmemstat.st_rdev ||
122 corestat.st_rdev == allkmemstat.st_rdev) {
123 char *kmem = (corestat.st_rdev == allkmemstat.st_rdev ?
124 "/dev/allkmem" : "/dev/kmem");
125
126 if ((kd->kvm_kmemfd = open64(kmem, flag)) == -1)
127 return (fail(kd, err, "cannot open %s", kmem));
128 if ((kd->kvm_memfd = open64("/dev/mem", flag)) == -1)
129 return (fail(kd, err, "cannot open /dev/mem"));
130 }
131 } else {
132 if ((kd->kvm_corefd = open64(corefile, flag)) == -1)
133 return (fail(kd, err, "cannot open %s", corefile));
134 if (pread64(kd->kvm_corefd, &kd->kvm_dump,
135 sizeof (kd->kvm_dump), 0) != sizeof (kd->kvm_dump))
136 return (fail(kd, err, "cannot read dump header"));
137 if (kd->kvm_dump.dump_magic != DUMP_MAGIC)
138 return (fail(kd, err, "%s is not a kernel core file "
139 "(bad magic number %x)", corefile,
140 kd->kvm_dump.dump_magic));
141 if (kd->kvm_dump.dump_version != DUMP_VERSION)
142 return (fail(kd, err,
143 "libkvm version (%u) != corefile version (%u)",
144 DUMP_VERSION, kd->kvm_dump.dump_version));
145 if (kd->kvm_dump.dump_wordsize != DUMP_WORDSIZE)
146 return (fail(kd, err, "%s is a %d-bit core file - "
147 "cannot examine with %d-bit libkvm", corefile,
148 kd->kvm_dump.dump_wordsize, DUMP_WORDSIZE));
149 /*
150 * We try to mmap(2) the entire corefile for performance
151 * (so we can use bcopy(3C) rather than pread(2)). Failing
152 * that, we insist on at least mmap(2)ing the dump map.
153 */
154 kd->kvm_coremapsize = (size_t)corestat.st_size;
155 if (corestat.st_size > LONG_MAX ||
156 (kd->kvm_core = mmap64(0, kd->kvm_coremapsize,
157 PROT_READ, MAP_SHARED, kd->kvm_corefd, 0)) == MAP_FAILED) {
158 kd->kvm_coremapsize = kd->kvm_dump.dump_data;
159 if ((kd->kvm_core = mmap64(0, kd->kvm_coremapsize,
160 PROT_READ, MAP_SHARED, kd->kvm_corefd, 0)) ==
161 MAP_FAILED)
162 return (fail(kd, err, "cannot mmap corefile"));
163 }
164 kd->kvm_map = (void *)(kd->kvm_core + kd->kvm_dump.dump_map);
165 kd->kvm_pfn = (void *)(kd->kvm_core + kd->kvm_dump.dump_pfn);
166 }
167
168 if (namelist == NULL)
169 namelist = "/dev/ksyms";
170
171 (void) strncpy(kd->kvm_namelist, namelist, MAXNAMELEN);
172
173 if (kvm_nlist(kd, nl) == -1) {
174 if (kd->kvm_corefd == -1) {
175 return (fail(kd, err, "%s is not a %d-bit "
176 "kernel namelist", namelist, DUMP_WORDSIZE));
177 }
178
179 if (kvm_nlist_core(kd, nl, err) == -1)
180 return (NULL); /* fail() already called */
181 }
182
183 kd->kvm_kas = (struct as *)nl[0].n_value;
184 kd->kvm_practive = (proc_t *)nl[1].n_value;
185
186 (void) kvm_setproc(kd);
187 return (kd);
188 }
189
190 int
kvm_close(kvm_t * kd)191 kvm_close(kvm_t *kd)
192 {
193 if (kd->kvm_core != NULL && kd->kvm_core != MAP_FAILED)
194 (void) munmap(kd->kvm_core, kd->kvm_coremapsize);
195 if (kd->kvm_corefd != -1)
196 (void) close(kd->kvm_corefd);
197 if (kd->kvm_kmemfd != -1)
198 (void) close(kd->kvm_kmemfd);
199 if (kd->kvm_memfd != -1)
200 (void) close(kd->kvm_memfd);
201 if (kd->kvm_namelist_core)
202 (void) unlink(kd->kvm_namelist);
203 free(kd);
204 return (0);
205 }
206
207 const char *
kvm_namelist(kvm_t * kd)208 kvm_namelist(kvm_t *kd)
209 {
210 return (kd->kvm_namelist);
211 }
212
213 int
kvm_nlist(kvm_t * kd,struct nlist nl[])214 kvm_nlist(kvm_t *kd, struct nlist nl[])
215 {
216 return (nlist(kd->kvm_namelist, nl));
217 }
218
219 /*
220 * If we don't have a name list, try to dig it out of the kernel crash dump.
221 * (The symbols have been present in the dump, uncompressed, for nearly a
222 * decade as of this writing -- and it is frankly surprising that the archaic
223 * notion of a disjoint symbol table managed to survive that change.)
224 */
225 static int
kvm_nlist_core(kvm_t * kd,struct nlist nl[],const char * err)226 kvm_nlist_core(kvm_t *kd, struct nlist nl[], const char *err)
227 {
228 dumphdr_t *dump = &kd->kvm_dump;
229 char *msg = "couldn't extract symbols from dump";
230 char *template = "/tmp/.libkvm.kvm_nlist_core.pid%d.XXXXXX";
231 int fd, rval;
232
233 if (dump->dump_ksyms_size != dump->dump_ksyms_csize) {
234 (void) fail(kd, err, "%s: kernel symbols are compressed", msg);
235 return (-1);
236 }
237
238 if (dump->dump_ksyms + dump->dump_ksyms_size > kd->kvm_coremapsize) {
239 (void) fail(kd, err, "%s: kernel symbols not mapped", msg);
240 return (-1);
241 }
242
243 /*
244 * Beause this temporary file may be left as a turd if the caller
245 * does not properly call kvm_close(), we make sure that it clearly
246 * indicates its origins.
247 */
248 (void) snprintf(kd->kvm_namelist, MAXNAMELEN, template, getpid());
249
250 if ((fd = mkstemp(kd->kvm_namelist)) == -1) {
251 (void) fail(kd, err, "%s: couldn't create temporary "
252 "symbols file: %s", msg, strerror(errno));
253 return (-1);
254 }
255
256 kd->kvm_namelist_core = B_TRUE;
257
258 do {
259 rval = write(fd, (caddr_t)((uintptr_t)kd->kvm_core +
260 (uintptr_t)dump->dump_ksyms), dump->dump_ksyms_size);
261 } while (rval < dump->dump_ksyms_size && errno == EINTR);
262
263 if (rval < dump->dump_ksyms_size) {
264 (void) fail(kd, err, "%s: couldn't write to temporary "
265 "symbols file: %s", msg, strerror(errno));
266 (void) close(fd);
267 return (-1);
268 }
269
270 (void) close(fd);
271
272 if (kvm_nlist(kd, nl) == -1) {
273 (void) fail(kd, err, "%s: symbols not valid", msg);
274 return (-1);
275 }
276
277 return (0);
278 }
279
280 static offset_t
kvm_lookup(kvm_t * kd,struct as * as,uint64_t addr)281 kvm_lookup(kvm_t *kd, struct as *as, uint64_t addr)
282 {
283 uintptr_t pageoff = addr & (kd->kvm_dump.dump_pagesize - 1);
284 uint64_t page = addr - pageoff;
285 offset_t off = 0;
286
287 if (kd->kvm_debug)
288 fprintf(stderr, "kvm_lookup(%p, %llx):", (void *)as, addr);
289
290 if (as == NULL) { /* physical addressing mode */
291 long first = 0;
292 long last = kd->kvm_dump.dump_npages - 1;
293 pfn_t target = (pfn_t)(page >> kd->kvm_dump.dump_pageshift);
294 while (last >= first) {
295 long middle = (first + last) / 2;
296 pfn_t pfn = kd->kvm_pfn[middle];
297 if (kd->kvm_debug)
298 fprintf(stderr, " %ld ->", middle);
299 if (pfn == target) {
300 off = kd->kvm_dump.dump_data + pageoff +
301 ((uint64_t)middle <<
302 kd->kvm_dump.dump_pageshift);
303 break;
304 }
305 if (pfn < target)
306 first = middle + 1;
307 else
308 last = middle - 1;
309 }
310 } else {
311 long hash = DUMP_HASH(&kd->kvm_dump, as, page);
312 off = kd->kvm_map[hash].dm_first;
313 while (off != 0) {
314 dump_map_t *dmp = (void *)(kd->kvm_core + off);
315 if (kd->kvm_debug)
316 fprintf(stderr, " %llx ->", off);
317 if (dmp < kd->kvm_map ||
318 dmp > kd->kvm_map + kd->kvm_dump.dump_hashmask ||
319 (off & (sizeof (offset_t) - 1)) != 0 ||
320 DUMP_HASH(&kd->kvm_dump, dmp->dm_as, dmp->dm_va) !=
321 hash) {
322 if (kd->kvm_debug)
323 fprintf(stderr, " dump map corrupt\n");
324 return (0);
325 }
326 if (dmp->dm_va == page && dmp->dm_as == as) {
327 off = dmp->dm_data + pageoff;
328 break;
329 }
330 off = dmp->dm_next;
331 }
332 }
333 if (kd->kvm_debug)
334 fprintf(stderr, "%s found: %llx\n", off ? "" : " not", off);
335 return (off);
336 }
337
338 static ssize_t
kvm_rw(kvm_t * kd,uint64_t addr,void * buf,size_t size,struct as * as,ssize_t (* prw)(int,void *,size_t,offset_t))339 kvm_rw(kvm_t *kd, uint64_t addr, void *buf, size_t size,
340 struct as *as, ssize_t (*prw)(int, void *, size_t, offset_t))
341 {
342 offset_t off;
343 size_t resid = size;
344
345 /*
346 * read/write of zero bytes always succeeds
347 */
348 if (size == 0)
349 return (0);
350
351 if (kd->kvm_core == NULL) {
352 char procbuf[100];
353 int procfd;
354 ssize_t rval;
355
356 if (as == kd->kvm_kas)
357 return (prw(kd->kvm_kmemfd, buf, size, addr));
358 if (as == NULL)
359 return (prw(kd->kvm_memfd, buf, size, addr));
360
361 (void) sprintf(procbuf, "/proc/%ld/as", kd->kvm_pid);
362 if ((procfd = open64(procbuf, kd->kvm_openflag)) == -1)
363 return (-1);
364 rval = prw(procfd, buf, size, addr);
365 (void) close(procfd);
366 return (rval);
367 }
368
369 while (resid != 0) {
370 uintptr_t pageoff = addr & (kd->kvm_dump.dump_pagesize - 1);
371 ssize_t len = MIN(resid, kd->kvm_dump.dump_pagesize - pageoff);
372
373 if ((off = kvm_lookup(kd, as, addr)) == 0)
374 break;
375
376 if (prw == PREAD && off < kd->kvm_coremapsize)
377 bcopy(kd->kvm_core + off, buf, len);
378 else if ((len = prw(kd->kvm_corefd, buf, len, off)) <= 0)
379 break;
380 resid -= len;
381 addr += len;
382 buf = (char *)buf + len;
383 }
384 return (resid < size ? size - resid : -1);
385 }
386
387 ssize_t
kvm_read(kvm_t * kd,uintptr_t addr,void * buf,size_t size)388 kvm_read(kvm_t *kd, uintptr_t addr, void *buf, size_t size)
389 {
390 return (kvm_rw(kd, addr, buf, size, kd->kvm_kas, PREAD));
391 }
392
393 ssize_t
kvm_kread(kvm_t * kd,uintptr_t addr,void * buf,size_t size)394 kvm_kread(kvm_t *kd, uintptr_t addr, void *buf, size_t size)
395 {
396 return (kvm_rw(kd, addr, buf, size, kd->kvm_kas, PREAD));
397 }
398
399 ssize_t
kvm_uread(kvm_t * kd,uintptr_t addr,void * buf,size_t size)400 kvm_uread(kvm_t *kd, uintptr_t addr, void *buf, size_t size)
401 {
402 return (kvm_rw(kd, addr, buf, size, kd->kvm_proc.p_as, PREAD));
403 }
404
405 ssize_t
kvm_aread(kvm_t * kd,uintptr_t addr,void * buf,size_t size,struct as * as)406 kvm_aread(kvm_t *kd, uintptr_t addr, void *buf, size_t size, struct as *as)
407 {
408 return (kvm_rw(kd, addr, buf, size, as, PREAD));
409 }
410
411 ssize_t
kvm_pread(kvm_t * kd,uint64_t addr,void * buf,size_t size)412 kvm_pread(kvm_t *kd, uint64_t addr, void *buf, size_t size)
413 {
414 return (kvm_rw(kd, addr, buf, size, NULL, PREAD));
415 }
416
417 ssize_t
kvm_write(kvm_t * kd,uintptr_t addr,const void * buf,size_t size)418 kvm_write(kvm_t *kd, uintptr_t addr, const void *buf, size_t size)
419 {
420 return (kvm_rw(kd, addr, (void *)buf, size, kd->kvm_kas, PWRITE));
421 }
422
423 ssize_t
kvm_kwrite(kvm_t * kd,uintptr_t addr,const void * buf,size_t size)424 kvm_kwrite(kvm_t *kd, uintptr_t addr, const void *buf, size_t size)
425 {
426 return (kvm_rw(kd, addr, (void *)buf, size, kd->kvm_kas, PWRITE));
427 }
428
429 ssize_t
kvm_uwrite(kvm_t * kd,uintptr_t addr,const void * buf,size_t size)430 kvm_uwrite(kvm_t *kd, uintptr_t addr, const void *buf, size_t size)
431 {
432 return (kvm_rw(kd, addr, (void *)buf, size, kd->kvm_proc.p_as, PWRITE));
433 }
434
435 ssize_t
kvm_awrite(kvm_t * kd,uintptr_t addr,const void * buf,size_t size,struct as * as)436 kvm_awrite(kvm_t *kd, uintptr_t addr, const void *buf, size_t size,
437 struct as *as)
438 {
439 return (kvm_rw(kd, addr, (void *)buf, size, as, PWRITE));
440 }
441
442 ssize_t
kvm_pwrite(kvm_t * kd,uint64_t addr,const void * buf,size_t size)443 kvm_pwrite(kvm_t *kd, uint64_t addr, const void *buf, size_t size)
444 {
445 return (kvm_rw(kd, addr, (void *)buf, size, NULL, PWRITE));
446 }
447
448 uint64_t
kvm_physaddr(kvm_t * kd,struct as * as,uintptr_t addr)449 kvm_physaddr(kvm_t *kd, struct as *as, uintptr_t addr)
450 {
451 mem_vtop_t mem_vtop;
452 offset_t off;
453
454 if (kd->kvm_core == NULL) {
455 mem_vtop.m_as = as;
456 mem_vtop.m_va = (void *)addr;
457 if (ioctl(kd->kvm_kmemfd, MEM_VTOP, &mem_vtop) == 0)
458 return ((uint64_t)mem_vtop.m_pfn * getpagesize() +
459 (addr & (getpagesize() - 1)));
460 } else {
461 if ((off = kvm_lookup(kd, as, addr)) != 0) {
462 long pfn_index =
463 (u_offset_t)(off - kd->kvm_dump.dump_data) >>
464 kd->kvm_dump.dump_pageshift;
465 return (((uint64_t)kd->kvm_pfn[pfn_index] <<
466 kd->kvm_dump.dump_pageshift) +
467 (addr & (kd->kvm_dump.dump_pagesize - 1)));
468 }
469 }
470 return (-1ULL);
471 }
472
473 struct proc *
kvm_getproc(kvm_t * kd,pid_t pid)474 kvm_getproc(kvm_t *kd, pid_t pid)
475 {
476 (void) kvm_setproc(kd);
477 while (kvm_nextproc(kd) != NULL)
478 if (kd->kvm_pid == pid)
479 return (&kd->kvm_proc);
480 return (NULL);
481 }
482
483 struct proc *
kvm_nextproc(kvm_t * kd)484 kvm_nextproc(kvm_t *kd)
485 {
486 if (kd->kvm_proc.p_next == NULL ||
487 kvm_kread(kd, (uintptr_t)kd->kvm_proc.p_next,
488 &kd->kvm_proc, sizeof (proc_t)) != sizeof (proc_t) ||
489 kvm_kread(kd, (uintptr_t)&kd->kvm_proc.p_pidp->pid_id,
490 &kd->kvm_pid, sizeof (pid_t)) != sizeof (pid_t))
491 return (NULL);
492
493 return (&kd->kvm_proc);
494 }
495
496 int
kvm_setproc(kvm_t * kd)497 kvm_setproc(kvm_t *kd)
498 {
499 (void) kvm_kread(kd, (uintptr_t)kd->kvm_practive,
500 &kd->kvm_proc.p_next, sizeof (proc_t *));
501 kd->kvm_pid = -1;
502 return (0);
503 }
504
505 /*ARGSUSED*/
506 struct user *
kvm_getu(kvm_t * kd,struct proc * p)507 kvm_getu(kvm_t *kd, struct proc *p)
508 {
509 return (&p->p_user);
510 }
511