xref: /freebsd/usr.sbin/bhyve/gdb.c (revision f6a3b357e9be4c6423c85eff9a847163a0d307c8)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/param.h>
32 #ifndef WITHOUT_CAPSICUM
33 #include <sys/capsicum.h>
34 #endif
35 #include <sys/endian.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <sys/socket.h>
39 #include <machine/atomic.h>
40 #include <machine/specialreg.h>
41 #include <machine/vmm.h>
42 #include <netinet/in.h>
43 #include <assert.h>
44 #ifndef WITHOUT_CAPSICUM
45 #include <capsicum_helpers.h>
46 #endif
47 #include <err.h>
48 #include <errno.h>
49 #include <fcntl.h>
50 #include <pthread.h>
51 #include <pthread_np.h>
52 #include <stdbool.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <sysexits.h>
57 #include <unistd.h>
58 #include <vmmapi.h>
59 
60 #include "bhyverun.h"
61 #include "mem.h"
62 #include "mevent.h"
63 
64 /*
65  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
66  * use SIGTRAP.
67  */
68 #define	GDB_SIGNAL_TRAP		5
69 
70 static void gdb_resume_vcpus(void);
71 static void check_command(int fd);
72 
73 static struct mevent *read_event, *write_event;
74 
75 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
76 static pthread_mutex_t gdb_lock;
77 static pthread_cond_t idle_vcpus;
78 static bool stop_pending, first_stop;
79 static int stepping_vcpu, stopped_vcpu;
80 
81 /*
82  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
83  * read buffer, 'start' is unused and 'len' contains the number of
84  * valid bytes in the buffer.  For a write buffer, 'start' is set to
85  * the index of the next byte in 'data' to send, and 'len' contains
86  * the remaining number of valid bytes to send.
87  */
88 struct io_buffer {
89 	uint8_t *data;
90 	size_t capacity;
91 	size_t start;
92 	size_t len;
93 };
94 
95 static struct io_buffer cur_comm, cur_resp;
96 static uint8_t cur_csum;
97 static int cur_vcpu;
98 static struct vmctx *ctx;
99 static int cur_fd = -1;
100 
101 const int gdb_regset[] = {
102 	VM_REG_GUEST_RAX,
103 	VM_REG_GUEST_RBX,
104 	VM_REG_GUEST_RCX,
105 	VM_REG_GUEST_RDX,
106 	VM_REG_GUEST_RSI,
107 	VM_REG_GUEST_RDI,
108 	VM_REG_GUEST_RBP,
109 	VM_REG_GUEST_RSP,
110 	VM_REG_GUEST_R8,
111 	VM_REG_GUEST_R9,
112 	VM_REG_GUEST_R10,
113 	VM_REG_GUEST_R11,
114 	VM_REG_GUEST_R12,
115 	VM_REG_GUEST_R13,
116 	VM_REG_GUEST_R14,
117 	VM_REG_GUEST_R15,
118 	VM_REG_GUEST_RIP,
119 	VM_REG_GUEST_RFLAGS,
120 	VM_REG_GUEST_CS,
121 	VM_REG_GUEST_SS,
122 	VM_REG_GUEST_DS,
123 	VM_REG_GUEST_ES,
124 	VM_REG_GUEST_FS,
125 	VM_REG_GUEST_GS
126 };
127 
128 const int gdb_regsize[] = {
129 	8,
130 	8,
131 	8,
132 	8,
133 	8,
134 	8,
135 	8,
136 	8,
137 	8,
138 	8,
139 	8,
140 	8,
141 	8,
142 	8,
143 	8,
144 	8,
145 	8,
146 	4,
147 	4,
148 	4,
149 	4,
150 	4,
151 	4,
152 	4
153 };
154 
155 #ifdef GDB_LOG
156 #include <stdarg.h>
157 #include <stdio.h>
158 
159 static void __printflike(1, 2)
160 debug(const char *fmt, ...)
161 {
162 	static FILE *logfile;
163 	va_list ap;
164 
165 	if (logfile == NULL) {
166 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
167 		if (logfile == NULL)
168 			return;
169 #ifndef WITHOUT_CAPSICUM
170 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
171 			fclose(logfile);
172 			logfile = NULL;
173 			return;
174 		}
175 #endif
176 		setlinebuf(logfile);
177 	}
178 	va_start(ap, fmt);
179 	vfprintf(logfile, fmt, ap);
180 	va_end(ap);
181 }
182 #else
183 #define debug(...)
184 #endif
185 
186 static int
187 guest_paging_info(int vcpu, struct vm_guest_paging *paging)
188 {
189 	uint64_t regs[4];
190 	const int regset[4] = {
191 		VM_REG_GUEST_CR0,
192 		VM_REG_GUEST_CR3,
193 		VM_REG_GUEST_CR4,
194 		VM_REG_GUEST_EFER
195 	};
196 
197 	if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
198 		return (-1);
199 
200 	/*
201 	 * For the debugger, always pretend to be the kernel (CPL 0),
202 	 * and if long-mode is enabled, always parse addresses as if
203 	 * in 64-bit mode.
204 	 */
205 	paging->cr3 = regs[1];
206 	paging->cpl = 0;
207 	if (regs[3] & EFER_LMA)
208 		paging->cpu_mode = CPU_MODE_64BIT;
209 	else if (regs[0] & CR0_PE)
210 		paging->cpu_mode = CPU_MODE_PROTECTED;
211 	else
212 		paging->cpu_mode = CPU_MODE_REAL;
213 	if (!(regs[0] & CR0_PG))
214 		paging->paging_mode = PAGING_MODE_FLAT;
215 	else if (!(regs[2] & CR4_PAE))
216 		paging->paging_mode = PAGING_MODE_32;
217 	else if (regs[3] & EFER_LME)
218 		paging->paging_mode = PAGING_MODE_64;
219 	else
220 		paging->paging_mode = PAGING_MODE_PAE;
221 	return (0);
222 }
223 
224 /*
225  * Map a guest virtual address to a physical address (for a given vcpu).
226  * If a guest virtual address is valid, return 1.  If the address is
227  * not valid, return 0.  If an error occurs obtaining the mapping,
228  * return -1.
229  */
230 static int
231 guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
232 {
233 	struct vm_guest_paging paging;
234 	int fault;
235 
236 	if (guest_paging_info(vcpu, &paging) == -1)
237 		return (-1);
238 
239 	/*
240 	 * Always use PROT_READ.  We really care if the VA is
241 	 * accessible, not if the current vCPU can write.
242 	 */
243 	if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
244 	    &fault) == -1)
245 		return (-1);
246 	if (fault)
247 		return (0);
248 	return (1);
249 }
250 
251 static void
252 io_buffer_reset(struct io_buffer *io)
253 {
254 
255 	io->start = 0;
256 	io->len = 0;
257 }
258 
259 /* Available room for adding data. */
260 static size_t
261 io_buffer_avail(struct io_buffer *io)
262 {
263 
264 	return (io->capacity - (io->start + io->len));
265 }
266 
267 static uint8_t *
268 io_buffer_head(struct io_buffer *io)
269 {
270 
271 	return (io->data + io->start);
272 }
273 
274 static uint8_t *
275 io_buffer_tail(struct io_buffer *io)
276 {
277 
278 	return (io->data + io->start + io->len);
279 }
280 
281 static void
282 io_buffer_advance(struct io_buffer *io, size_t amount)
283 {
284 
285 	assert(amount <= io->len);
286 	io->start += amount;
287 	io->len -= amount;
288 }
289 
290 static void
291 io_buffer_consume(struct io_buffer *io, size_t amount)
292 {
293 
294 	io_buffer_advance(io, amount);
295 	if (io->len == 0) {
296 		io->start = 0;
297 		return;
298 	}
299 
300 	/*
301 	 * XXX: Consider making this move optional and compacting on a
302 	 * future read() before realloc().
303 	 */
304 	memmove(io->data, io_buffer_head(io), io->len);
305 	io->start = 0;
306 }
307 
308 static void
309 io_buffer_grow(struct io_buffer *io, size_t newsize)
310 {
311 	uint8_t *new_data;
312 	size_t avail, new_cap;
313 
314 	avail = io_buffer_avail(io);
315 	if (newsize <= avail)
316 		return;
317 
318 	new_cap = io->capacity + (newsize - avail);
319 	new_data = realloc(io->data, new_cap);
320 	if (new_data == NULL)
321 		err(1, "Failed to grow GDB I/O buffer");
322 	io->data = new_data;
323 	io->capacity = new_cap;
324 }
325 
326 static bool
327 response_pending(void)
328 {
329 
330 	if (cur_resp.start == 0 && cur_resp.len == 0)
331 		return (false);
332 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
333 		return (false);
334 	return (true);
335 }
336 
337 static void
338 close_connection(void)
339 {
340 
341 	/*
342 	 * XXX: This triggers a warning because mevent does the close
343 	 * before the EV_DELETE.
344 	 */
345 	pthread_mutex_lock(&gdb_lock);
346 	mevent_delete(write_event);
347 	mevent_delete_close(read_event);
348 	write_event = NULL;
349 	read_event = NULL;
350 	io_buffer_reset(&cur_comm);
351 	io_buffer_reset(&cur_resp);
352 	cur_fd = -1;
353 
354 	/* Resume any stopped vCPUs. */
355 	gdb_resume_vcpus();
356 	pthread_mutex_unlock(&gdb_lock);
357 }
358 
359 static uint8_t
360 hex_digit(uint8_t nibble)
361 {
362 
363 	if (nibble <= 9)
364 		return (nibble + '0');
365 	else
366 		return (nibble + 'a' - 10);
367 }
368 
369 static uint8_t
370 parse_digit(uint8_t v)
371 {
372 
373 	if (v >= '0' && v <= '9')
374 		return (v - '0');
375 	if (v >= 'a' && v <= 'f')
376 		return (v - 'a' + 10);
377 	if (v >= 'A' && v <= 'F')
378 		return (v - 'A' + 10);
379 	return (0xF);
380 }
381 
382 /* Parses big-endian hexadecimal. */
383 static uintmax_t
384 parse_integer(const uint8_t *p, size_t len)
385 {
386 	uintmax_t v;
387 
388 	v = 0;
389 	while (len > 0) {
390 		v <<= 4;
391 		v |= parse_digit(*p);
392 		p++;
393 		len--;
394 	}
395 	return (v);
396 }
397 
398 static uint8_t
399 parse_byte(const uint8_t *p)
400 {
401 
402 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
403 }
404 
405 static void
406 send_pending_data(int fd)
407 {
408 	ssize_t nwritten;
409 
410 	if (cur_resp.len == 0) {
411 		mevent_disable(write_event);
412 		return;
413 	}
414 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
415 	if (nwritten == -1) {
416 		warn("Write to GDB socket failed");
417 		close_connection();
418 	} else {
419 		io_buffer_advance(&cur_resp, nwritten);
420 		if (cur_resp.len == 0)
421 			mevent_disable(write_event);
422 		else
423 			mevent_enable(write_event);
424 	}
425 }
426 
427 /* Append a single character to the output buffer. */
428 static void
429 send_char(uint8_t data)
430 {
431 	io_buffer_grow(&cur_resp, 1);
432 	*io_buffer_tail(&cur_resp) = data;
433 	cur_resp.len++;
434 }
435 
436 /* Append an array of bytes to the output buffer. */
437 static void
438 send_data(const uint8_t *data, size_t len)
439 {
440 
441 	io_buffer_grow(&cur_resp, len);
442 	memcpy(io_buffer_tail(&cur_resp), data, len);
443 	cur_resp.len += len;
444 }
445 
446 static void
447 format_byte(uint8_t v, uint8_t *buf)
448 {
449 
450 	buf[0] = hex_digit(v >> 4);
451 	buf[1] = hex_digit(v & 0xf);
452 }
453 
454 /*
455  * Append a single byte (formatted as two hex characters) to the
456  * output buffer.
457  */
458 static void
459 send_byte(uint8_t v)
460 {
461 	uint8_t buf[2];
462 
463 	format_byte(v, buf);
464 	send_data(buf, sizeof(buf));
465 }
466 
467 static void
468 start_packet(void)
469 {
470 
471 	send_char('$');
472 	cur_csum = 0;
473 }
474 
475 static void
476 finish_packet(void)
477 {
478 
479 	send_char('#');
480 	send_byte(cur_csum);
481 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
482 }
483 
484 /*
485  * Append a single character (for the packet payload) and update the
486  * checksum.
487  */
488 static void
489 append_char(uint8_t v)
490 {
491 
492 	send_char(v);
493 	cur_csum += v;
494 }
495 
496 /*
497  * Append an array of bytes (for the packet payload) and update the
498  * checksum.
499  */
500 static void
501 append_packet_data(const uint8_t *data, size_t len)
502 {
503 
504 	send_data(data, len);
505 	while (len > 0) {
506 		cur_csum += *data;
507 		data++;
508 		len--;
509 	}
510 }
511 
512 static void
513 append_string(const char *str)
514 {
515 
516 	append_packet_data(str, strlen(str));
517 }
518 
519 static void
520 append_byte(uint8_t v)
521 {
522 	uint8_t buf[2];
523 
524 	format_byte(v, buf);
525 	append_packet_data(buf, sizeof(buf));
526 }
527 
528 static void
529 append_unsigned_native(uintmax_t value, size_t len)
530 {
531 	size_t i;
532 
533 	for (i = 0; i < len; i++) {
534 		append_byte(value);
535 		value >>= 8;
536 	}
537 }
538 
539 static void
540 append_unsigned_be(uintmax_t value, size_t len)
541 {
542 	char buf[len * 2];
543 	size_t i;
544 
545 	for (i = 0; i < len; i++) {
546 		format_byte(value, buf + (len - i - 1) * 2);
547 		value >>= 8;
548 	}
549 	append_packet_data(buf, sizeof(buf));
550 }
551 
552 static void
553 append_integer(unsigned int value)
554 {
555 
556 	if (value == 0)
557 		append_char('0');
558 	else
559 		append_unsigned_be(value, fls(value) + 7 / 8);
560 }
561 
562 static void
563 append_asciihex(const char *str)
564 {
565 
566 	while (*str != '\0') {
567 		append_byte(*str);
568 		str++;
569 	}
570 }
571 
572 static void
573 send_empty_response(void)
574 {
575 
576 	start_packet();
577 	finish_packet();
578 }
579 
580 static void
581 send_error(int error)
582 {
583 
584 	start_packet();
585 	append_char('E');
586 	append_byte(error);
587 	finish_packet();
588 }
589 
590 static void
591 send_ok(void)
592 {
593 
594 	start_packet();
595 	append_string("OK");
596 	finish_packet();
597 }
598 
599 static int
600 parse_threadid(const uint8_t *data, size_t len)
601 {
602 
603 	if (len == 1 && *data == '0')
604 		return (0);
605 	if (len == 2 && memcmp(data, "-1", 2) == 0)
606 		return (-1);
607 	if (len == 0)
608 		return (-2);
609 	return (parse_integer(data, len));
610 }
611 
612 static void
613 report_stop(void)
614 {
615 
616 	start_packet();
617 	if (stopped_vcpu == -1)
618 		append_char('S');
619 	else
620 		append_char('T');
621 	append_byte(GDB_SIGNAL_TRAP);
622 	if (stopped_vcpu != -1) {
623 		append_string("thread:");
624 		append_integer(stopped_vcpu + 1);
625 		append_char(';');
626 	}
627 	stopped_vcpu = -1;
628 	finish_packet();
629 }
630 
631 static void
632 gdb_finish_suspend_vcpus(void)
633 {
634 
635 	if (first_stop) {
636 		first_stop = false;
637 		stopped_vcpu = -1;
638 	} else if (response_pending())
639 		stop_pending = true;
640 	else {
641 		report_stop();
642 		send_pending_data(cur_fd);
643 	}
644 }
645 
646 static void
647 _gdb_cpu_suspend(int vcpu, bool report_stop)
648 {
649 
650 	debug("$vCPU %d suspending\n", vcpu);
651 	CPU_SET(vcpu, &vcpus_waiting);
652 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
653 		gdb_finish_suspend_vcpus();
654 	while (CPU_ISSET(vcpu, &vcpus_suspended) && vcpu != stepping_vcpu)
655 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
656 	CPU_CLR(vcpu, &vcpus_waiting);
657 	debug("$vCPU %d resuming\n", vcpu);
658 }
659 
660 void
661 gdb_cpu_add(int vcpu)
662 {
663 
664 	debug("$vCPU %d starting\n", vcpu);
665 	pthread_mutex_lock(&gdb_lock);
666 	CPU_SET(vcpu, &vcpus_active);
667 
668 	/*
669 	 * If a vcpu is added while vcpus are stopped, suspend the new
670 	 * vcpu so that it will pop back out with a debug exit before
671 	 * executing the first instruction.
672 	 */
673 	if (!CPU_EMPTY(&vcpus_suspended)) {
674 		CPU_SET(vcpu, &vcpus_suspended);
675 		_gdb_cpu_suspend(vcpu, false);
676 	}
677 	pthread_mutex_unlock(&gdb_lock);
678 }
679 
680 void
681 gdb_cpu_suspend(int vcpu)
682 {
683 
684 	pthread_mutex_lock(&gdb_lock);
685 	_gdb_cpu_suspend(vcpu, true);
686 	pthread_mutex_unlock(&gdb_lock);
687 }
688 
689 void
690 gdb_cpu_mtrap(int vcpu)
691 {
692 
693 	debug("$vCPU %d MTRAP\n", vcpu);
694 	pthread_mutex_lock(&gdb_lock);
695 	if (vcpu == stepping_vcpu) {
696 		stepping_vcpu = -1;
697 		vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
698 		vm_suspend_cpu(ctx, vcpu);
699 		assert(stopped_vcpu == -1);
700 		stopped_vcpu = vcpu;
701 		_gdb_cpu_suspend(vcpu, true);
702 	}
703 	pthread_mutex_unlock(&gdb_lock);
704 }
705 
706 static void
707 gdb_suspend_vcpus(void)
708 {
709 
710 	assert(pthread_mutex_isowned_np(&gdb_lock));
711 	debug("suspending all CPUs\n");
712 	vcpus_suspended = vcpus_active;
713 	vm_suspend_cpu(ctx, -1);
714 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
715 		gdb_finish_suspend_vcpus();
716 }
717 
718 static bool
719 gdb_step_vcpu(int vcpu)
720 {
721 	int error, val;
722 
723 	debug("$vCPU %d step\n", vcpu);
724 	error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
725 	if (error < 0)
726 		return (false);
727 	error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
728 	vm_resume_cpu(ctx, vcpu);
729 	stepping_vcpu = vcpu;
730 	pthread_cond_broadcast(&idle_vcpus);
731 	return (true);
732 }
733 
734 static void
735 gdb_resume_vcpus(void)
736 {
737 
738 	assert(pthread_mutex_isowned_np(&gdb_lock));
739 	vm_resume_cpu(ctx, -1);
740 	debug("resuming all CPUs\n");
741 	CPU_ZERO(&vcpus_suspended);
742 	pthread_cond_broadcast(&idle_vcpus);
743 }
744 
745 static void
746 gdb_read_regs(void)
747 {
748 	uint64_t regvals[nitems(gdb_regset)];
749 	int i;
750 
751 	if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
752 	    gdb_regset, regvals) == -1) {
753 		send_error(errno);
754 		return;
755 	}
756 	start_packet();
757 	for (i = 0; i < nitems(regvals); i++)
758 		append_unsigned_native(regvals[i], gdb_regsize[i]);
759 	finish_packet();
760 }
761 
762 static void
763 gdb_read_mem(const uint8_t *data, size_t len)
764 {
765 	uint64_t gpa, gva, val;
766 	uint8_t *cp;
767 	size_t resid, todo, bytes;
768 	bool started;
769 	int error;
770 
771 	/* Skip 'm' */
772 	data += 1;
773 	len -= 1;
774 
775 	/* Parse and consume address. */
776 	cp = memchr(data, ',', len);
777 	if (cp == NULL || cp == data) {
778 		send_error(EINVAL);
779 		return;
780 	}
781 	gva = parse_integer(data, cp - data);
782 	len -= (cp - data) + 1;
783 	data += (cp - data) + 1;
784 
785 	/* Parse length. */
786 	resid = parse_integer(data, len);
787 
788 	started = false;
789 	while (resid > 0) {
790 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
791 		if (error == -1) {
792 			if (started)
793 				finish_packet();
794 			else
795 				send_error(errno);
796 			return;
797 		}
798 		if (error == 0) {
799 			if (started)
800 				finish_packet();
801 			else
802 				send_error(EFAULT);
803 			return;
804 		}
805 
806 		/* Read bytes from current page. */
807 		todo = getpagesize() - gpa % getpagesize();
808 		if (todo > resid)
809 			todo = resid;
810 
811 		cp = paddr_guest2host(ctx, gpa, todo);
812 		if (cp != NULL) {
813 			/*
814 			 * If this page is guest RAM, read it a byte
815 			 * at a time.
816 			 */
817 			if (!started) {
818 				start_packet();
819 				started = true;
820 			}
821 			while (todo > 0) {
822 				append_byte(*cp);
823 				cp++;
824 				gpa++;
825 				gva++;
826 				resid--;
827 				todo--;
828 			}
829 		} else {
830 			/*
831 			 * If this page isn't guest RAM, try to handle
832 			 * it via MMIO.  For MMIO requests, use
833 			 * aligned reads of words when possible.
834 			 */
835 			while (todo > 0) {
836 				if (gpa & 1 || todo == 1)
837 					bytes = 1;
838 				else if (gpa & 2 || todo == 2)
839 					bytes = 2;
840 				else
841 					bytes = 4;
842 				error = read_mem(ctx, cur_vcpu, gpa, &val,
843 				    bytes);
844 				if (error == 0) {
845 					if (!started) {
846 						start_packet();
847 						started = true;
848 					}
849 					gpa += bytes;
850 					gva += bytes;
851 					resid -= bytes;
852 					todo -= bytes;
853 					while (bytes > 0) {
854 						append_byte(val);
855 						val >>= 8;
856 						bytes--;
857 					}
858 				} else {
859 					if (started)
860 						finish_packet();
861 					else
862 						send_error(EFAULT);
863 					return;
864 				}
865 			}
866 		}
867 		assert(resid == 0 || gpa % getpagesize() == 0);
868 	}
869 	if (!started)
870 		start_packet();
871 	finish_packet();
872 }
873 
874 static void
875 gdb_write_mem(const uint8_t *data, size_t len)
876 {
877 	uint64_t gpa, gva, val;
878 	uint8_t *cp;
879 	size_t resid, todo, bytes;
880 	int error;
881 
882 	/* Skip 'M' */
883 	data += 1;
884 	len -= 1;
885 
886 	/* Parse and consume address. */
887 	cp = memchr(data, ',', len);
888 	if (cp == NULL || cp == data) {
889 		send_error(EINVAL);
890 		return;
891 	}
892 	gva = parse_integer(data, cp - data);
893 	len -= (cp - data) + 1;
894 	data += (cp - data) + 1;
895 
896 	/* Parse and consume length. */
897 	cp = memchr(data, ':', len);
898 	if (cp == NULL || cp == data) {
899 		send_error(EINVAL);
900 		return;
901 	}
902 	resid = parse_integer(data, cp - data);
903 	len -= (cp - data) + 1;
904 	data += (cp - data) + 1;
905 
906 	/* Verify the available bytes match the length. */
907 	if (len != resid * 2) {
908 		send_error(EINVAL);
909 		return;
910 	}
911 
912 	while (resid > 0) {
913 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
914 		if (error == -1) {
915 			send_error(errno);
916 			return;
917 		}
918 		if (error == 0) {
919 			send_error(EFAULT);
920 			return;
921 		}
922 
923 		/* Write bytes to current page. */
924 		todo = getpagesize() - gpa % getpagesize();
925 		if (todo > resid)
926 			todo = resid;
927 
928 		cp = paddr_guest2host(ctx, gpa, todo);
929 		if (cp != NULL) {
930 			/*
931 			 * If this page is guest RAM, write it a byte
932 			 * at a time.
933 			 */
934 			while (todo > 0) {
935 				assert(len >= 2);
936 				*cp = parse_byte(data);
937 				data += 2;
938 				len -= 2;
939 				cp++;
940 				gpa++;
941 				gva++;
942 				resid--;
943 				todo--;
944 			}
945 		} else {
946 			/*
947 			 * If this page isn't guest RAM, try to handle
948 			 * it via MMIO.  For MMIO requests, use
949 			 * aligned writes of words when possible.
950 			 */
951 			while (todo > 0) {
952 				if (gpa & 1 || todo == 1) {
953 					bytes = 1;
954 					val = parse_byte(data);
955 				} else if (gpa & 2 || todo == 2) {
956 					bytes = 2;
957 					val = be16toh(parse_integer(data, 4));
958 				} else {
959 					bytes = 4;
960 					val = be32toh(parse_integer(data, 8));
961 				}
962 				error = write_mem(ctx, cur_vcpu, gpa, val,
963 				    bytes);
964 				if (error == 0) {
965 					gpa += bytes;
966 					gva += bytes;
967 					resid -= bytes;
968 					todo -= bytes;
969 					data += 2 * bytes;
970 					len -= 2 * bytes;
971 				} else {
972 					send_error(EFAULT);
973 					return;
974 				}
975 			}
976 		}
977 		assert(resid == 0 || gpa % getpagesize() == 0);
978 	}
979 	assert(len == 0);
980 	send_ok();
981 }
982 
983 static bool
984 command_equals(const uint8_t *data, size_t len, const char *cmd)
985 {
986 
987 	if (strlen(cmd) > len)
988 		return (false);
989 	return (memcmp(data, cmd, strlen(cmd)) == 0);
990 }
991 
992 static void
993 check_features(const uint8_t *data, size_t len)
994 {
995 	char *feature, *next_feature, *str, *value;
996 	bool supported;
997 
998 	str = malloc(len + 1);
999 	memcpy(str, data, len);
1000 	str[len] = '\0';
1001 	next_feature = str;
1002 
1003 	while ((feature = strsep(&next_feature, ";")) != NULL) {
1004 		/*
1005 		 * Null features shouldn't exist, but skip if they
1006 		 * do.
1007 		 */
1008 		if (strcmp(feature, "") == 0)
1009 			continue;
1010 
1011 		/*
1012 		 * Look for the value or supported / not supported
1013 		 * flag.
1014 		 */
1015 		value = strchr(feature, '=');
1016 		if (value != NULL) {
1017 			*value = '\0';
1018 			value++;
1019 			supported = true;
1020 		} else {
1021 			value = feature + strlen(feature) - 1;
1022 			switch (*value) {
1023 			case '+':
1024 				supported = true;
1025 				break;
1026 			case '-':
1027 				supported = false;
1028 				break;
1029 			default:
1030 				/*
1031 				 * This is really a protocol error,
1032 				 * but we just ignore malformed
1033 				 * features for ease of
1034 				 * implementation.
1035 				 */
1036 				continue;
1037 			}
1038 			value = NULL;
1039 		}
1040 
1041 		/* No currently supported features. */
1042 	}
1043 	free(str);
1044 
1045 	start_packet();
1046 
1047 	/* This is an arbitrary limit. */
1048 	append_string("PacketSize=4096");
1049 	finish_packet();
1050 }
1051 
1052 static void
1053 gdb_query(const uint8_t *data, size_t len)
1054 {
1055 
1056 	/*
1057 	 * TODO:
1058 	 * - qSearch
1059 	 */
1060 	if (command_equals(data, len, "qAttached")) {
1061 		start_packet();
1062 		append_char('1');
1063 		finish_packet();
1064 	} else if (command_equals(data, len, "qC")) {
1065 		start_packet();
1066 		append_string("QC");
1067 		append_integer(cur_vcpu + 1);
1068 		finish_packet();
1069 	} else if (command_equals(data, len, "qfThreadInfo")) {
1070 		cpuset_t mask;
1071 		bool first;
1072 		int vcpu;
1073 
1074 		if (CPU_EMPTY(&vcpus_active)) {
1075 			send_error(EINVAL);
1076 			return;
1077 		}
1078 		mask = vcpus_active;
1079 		start_packet();
1080 		append_char('m');
1081 		first = true;
1082 		while (!CPU_EMPTY(&mask)) {
1083 			vcpu = CPU_FFS(&mask) - 1;
1084 			CPU_CLR(vcpu, &mask);
1085 			if (first)
1086 				first = false;
1087 			else
1088 				append_char(',');
1089 			append_integer(vcpu + 1);
1090 		}
1091 		finish_packet();
1092 	} else if (command_equals(data, len, "qsThreadInfo")) {
1093 		start_packet();
1094 		append_char('l');
1095 		finish_packet();
1096 	} else if (command_equals(data, len, "qSupported")) {
1097 		data += strlen("qSupported");
1098 		len -= strlen("qSupported");
1099 		check_features(data, len);
1100 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1101 		char buf[16];
1102 		int tid;
1103 
1104 		data += strlen("qThreadExtraInfo");
1105 		len -= strlen("qThreadExtraInfo");
1106 		if (*data != ',') {
1107 			send_error(EINVAL);
1108 			return;
1109 		}
1110 		tid = parse_threadid(data + 1, len - 1);
1111 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1112 			send_error(EINVAL);
1113 			return;
1114 		}
1115 
1116 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1117 		start_packet();
1118 		append_asciihex(buf);
1119 		finish_packet();
1120 	} else
1121 		send_empty_response();
1122 }
1123 
1124 static void
1125 handle_command(const uint8_t *data, size_t len)
1126 {
1127 
1128 	/* Reject packets with a sequence-id. */
1129 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1130 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1131 		send_empty_response();
1132 		return;
1133 	}
1134 
1135 	switch (*data) {
1136 	case 'c':
1137 		if (len != 1) {
1138 			send_error(EINVAL);
1139 			break;
1140 		}
1141 
1142 		/* Don't send a reply until a stop occurs. */
1143 		gdb_resume_vcpus();
1144 		break;
1145 	case 'D':
1146 		send_ok();
1147 
1148 		/* TODO: Resume any stopped CPUs. */
1149 		break;
1150 	case 'g': {
1151 		gdb_read_regs();
1152 		break;
1153 	}
1154 	case 'H': {
1155 		int tid;
1156 
1157 		if (data[1] != 'g' && data[1] != 'c') {
1158 			send_error(EINVAL);
1159 			break;
1160 		}
1161 		tid = parse_threadid(data + 2, len - 2);
1162 		if (tid == -2) {
1163 			send_error(EINVAL);
1164 			break;
1165 		}
1166 
1167 		if (CPU_EMPTY(&vcpus_active)) {
1168 			send_error(EINVAL);
1169 			break;
1170 		}
1171 		if (tid == -1 || tid == 0)
1172 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1173 		else if (CPU_ISSET(tid - 1, &vcpus_active))
1174 			cur_vcpu = tid - 1;
1175 		else {
1176 			send_error(EINVAL);
1177 			break;
1178 		}
1179 		send_ok();
1180 		break;
1181 	}
1182 	case 'm':
1183 		gdb_read_mem(data, len);
1184 		break;
1185 	case 'M':
1186 		gdb_write_mem(data, len);
1187 		break;
1188 	case 'T': {
1189 		int tid;
1190 
1191 		tid = parse_threadid(data + 1, len - 1);
1192 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1193 			send_error(EINVAL);
1194 			return;
1195 		}
1196 		send_ok();
1197 		break;
1198 	}
1199 	case 'q':
1200 		gdb_query(data, len);
1201 		break;
1202 	case 's':
1203 		if (len != 1) {
1204 			send_error(EINVAL);
1205 			break;
1206 		}
1207 
1208 		/* Don't send a reply until a stop occurs. */
1209 		if (!gdb_step_vcpu(cur_vcpu)) {
1210 			send_error(EOPNOTSUPP);
1211 			break;
1212 		}
1213 		break;
1214 	case '?':
1215 		/* XXX: Only if stopped? */
1216 		/* For now, just report that we are always stopped. */
1217 		start_packet();
1218 		append_char('S');
1219 		append_byte(GDB_SIGNAL_TRAP);
1220 		finish_packet();
1221 		break;
1222 	case 'G': /* TODO */
1223 	case 'v':
1224 		/* Handle 'vCont' */
1225 		/* 'vCtrlC' */
1226 	case 'p': /* TODO */
1227 	case 'P': /* TODO */
1228 	case 'Q': /* TODO */
1229 	case 't': /* TODO */
1230 	case 'X': /* TODO */
1231 	case 'z': /* TODO */
1232 	case 'Z': /* TODO */
1233 	default:
1234 		send_empty_response();
1235 	}
1236 }
1237 
1238 /* Check for a valid packet in the command buffer. */
1239 static void
1240 check_command(int fd)
1241 {
1242 	uint8_t *head, *hash, *p, sum;
1243 	size_t avail, plen;
1244 
1245 	for (;;) {
1246 		avail = cur_comm.len;
1247 		if (avail == 0)
1248 			return;
1249 		head = io_buffer_head(&cur_comm);
1250 		switch (*head) {
1251 		case 0x03:
1252 			debug("<- Ctrl-C\n");
1253 			io_buffer_consume(&cur_comm, 1);
1254 
1255 			gdb_suspend_vcpus();
1256 			break;
1257 		case '+':
1258 			/* ACK of previous response. */
1259 			debug("<- +\n");
1260 			if (response_pending())
1261 				io_buffer_reset(&cur_resp);
1262 			io_buffer_consume(&cur_comm, 1);
1263 			if (stop_pending) {
1264 				stop_pending = false;
1265 				report_stop();
1266 				send_pending_data(fd);
1267 			}
1268 			break;
1269 		case '-':
1270 			/* NACK of previous response. */
1271 			debug("<- -\n");
1272 			if (response_pending()) {
1273 				cur_resp.len += cur_resp.start;
1274 				cur_resp.start = 0;
1275 				if (cur_resp.data[0] == '+')
1276 					io_buffer_advance(&cur_resp, 1);
1277 				debug("-> %.*s\n", (int)cur_resp.len,
1278 				    io_buffer_head(&cur_resp));
1279 			}
1280 			io_buffer_consume(&cur_comm, 1);
1281 			send_pending_data(fd);
1282 			break;
1283 		case '$':
1284 			/* Packet. */
1285 
1286 			if (response_pending()) {
1287 				warnx("New GDB command while response in "
1288 				    "progress");
1289 				io_buffer_reset(&cur_resp);
1290 			}
1291 
1292 			/* Is packet complete? */
1293 			hash = memchr(head, '#', avail);
1294 			if (hash == NULL)
1295 				return;
1296 			plen = (hash - head + 1) + 2;
1297 			if (avail < plen)
1298 				return;
1299 			debug("<- %.*s\n", (int)plen, head);
1300 
1301 			/* Verify checksum. */
1302 			for (sum = 0, p = head + 1; p < hash; p++)
1303 				sum += *p;
1304 			if (sum != parse_byte(hash + 1)) {
1305 				io_buffer_consume(&cur_comm, plen);
1306 				debug("-> -\n");
1307 				send_char('-');
1308 				send_pending_data(fd);
1309 				break;
1310 			}
1311 			send_char('+');
1312 
1313 			handle_command(head + 1, hash - (head + 1));
1314 			io_buffer_consume(&cur_comm, plen);
1315 			if (!response_pending())
1316 				debug("-> +\n");
1317 			send_pending_data(fd);
1318 			break;
1319 		default:
1320 			/* XXX: Possibly drop connection instead. */
1321 			debug("-> %02x\n", *head);
1322 			io_buffer_consume(&cur_comm, 1);
1323 			break;
1324 		}
1325 	}
1326 }
1327 
1328 static void
1329 gdb_readable(int fd, enum ev_type event, void *arg)
1330 {
1331 	ssize_t nread;
1332 	int pending;
1333 
1334 	if (ioctl(fd, FIONREAD, &pending) == -1) {
1335 		warn("FIONREAD on GDB socket");
1336 		return;
1337 	}
1338 
1339 	/*
1340 	 * 'pending' might be zero due to EOF.  We need to call read
1341 	 * with a non-zero length to detect EOF.
1342 	 */
1343 	if (pending == 0)
1344 		pending = 1;
1345 
1346 	/* Ensure there is room in the command buffer. */
1347 	io_buffer_grow(&cur_comm, pending);
1348 	assert(io_buffer_avail(&cur_comm) >= pending);
1349 
1350 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1351 	if (nread == 0) {
1352 		close_connection();
1353 	} else if (nread == -1) {
1354 		if (errno == EAGAIN)
1355 			return;
1356 
1357 		warn("Read from GDB socket");
1358 		close_connection();
1359 	} else {
1360 		cur_comm.len += nread;
1361 		pthread_mutex_lock(&gdb_lock);
1362 		check_command(fd);
1363 		pthread_mutex_unlock(&gdb_lock);
1364 	}
1365 }
1366 
1367 static void
1368 gdb_writable(int fd, enum ev_type event, void *arg)
1369 {
1370 
1371 	send_pending_data(fd);
1372 }
1373 
1374 static void
1375 new_connection(int fd, enum ev_type event, void *arg)
1376 {
1377 	int optval, s;
1378 
1379 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1380 	if (s == -1) {
1381 		if (arg != NULL)
1382 			err(1, "Failed accepting initial GDB connection");
1383 
1384 		/* Silently ignore errors post-startup. */
1385 		return;
1386 	}
1387 
1388 	optval = 1;
1389 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1390 	    -1) {
1391 		warn("Failed to disable SIGPIPE for GDB connection");
1392 		close(s);
1393 		return;
1394 	}
1395 
1396 	pthread_mutex_lock(&gdb_lock);
1397 	if (cur_fd != -1) {
1398 		close(s);
1399 		warnx("Ignoring additional GDB connection.");
1400 	}
1401 
1402 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1403 	if (read_event == NULL) {
1404 		if (arg != NULL)
1405 			err(1, "Failed to setup initial GDB connection");
1406 		pthread_mutex_unlock(&gdb_lock);
1407 		return;
1408 	}
1409 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1410 	if (write_event == NULL) {
1411 		if (arg != NULL)
1412 			err(1, "Failed to setup initial GDB connection");
1413 		mevent_delete_close(read_event);
1414 		read_event = NULL;
1415 	}
1416 
1417 	cur_fd = s;
1418 	cur_vcpu = 0;
1419 	stepping_vcpu = -1;
1420 	stopped_vcpu = -1;
1421 	stop_pending = false;
1422 
1423 	/* Break on attach. */
1424 	first_stop = true;
1425 	gdb_suspend_vcpus();
1426 	pthread_mutex_unlock(&gdb_lock);
1427 }
1428 
1429 #ifndef WITHOUT_CAPSICUM
1430 void
1431 limit_gdb_socket(int s)
1432 {
1433 	cap_rights_t rights;
1434 	unsigned long ioctls[] = { FIONREAD };
1435 
1436 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1437 	    CAP_SETSOCKOPT, CAP_IOCTL);
1438 	if (caph_rights_limit(s, &rights) == -1)
1439 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1440 	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1441 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1442 }
1443 #endif
1444 
1445 void
1446 init_gdb(struct vmctx *_ctx, int sport, bool wait)
1447 {
1448 	struct sockaddr_in sin;
1449 	int error, flags, s;
1450 
1451 	debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not ");
1452 
1453 	error = pthread_mutex_init(&gdb_lock, NULL);
1454 	if (error != 0)
1455 		errc(1, error, "gdb mutex init");
1456 	error = pthread_cond_init(&idle_vcpus, NULL);
1457 	if (error != 0)
1458 		errc(1, error, "gdb cv init");
1459 
1460 	ctx = _ctx;
1461 	s = socket(PF_INET, SOCK_STREAM, 0);
1462 	if (s < 0)
1463 		err(1, "gdb socket create");
1464 
1465 	sin.sin_len = sizeof(sin);
1466 	sin.sin_family = AF_INET;
1467 	sin.sin_addr.s_addr = htonl(INADDR_ANY);
1468 	sin.sin_port = htons(sport);
1469 
1470 	if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0)
1471 		err(1, "gdb socket bind");
1472 
1473 	if (listen(s, 1) < 0)
1474 		err(1, "gdb socket listen");
1475 
1476 	if (wait) {
1477 		/*
1478 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
1479 		 * logic in gdb_cpu_add() to suspend the first vcpu before
1480 		 * it starts execution.  The vcpu will remain suspended
1481 		 * until a debugger connects.
1482 		 */
1483 		stepping_vcpu = -1;
1484 		stopped_vcpu = -1;
1485 		CPU_SET(0, &vcpus_suspended);
1486 	}
1487 
1488 	flags = fcntl(s, F_GETFL);
1489 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1490 		err(1, "Failed to mark gdb socket non-blocking");
1491 
1492 #ifndef WITHOUT_CAPSICUM
1493 	limit_gdb_socket(s);
1494 #endif
1495 	mevent_add(s, EVF_READ, new_connection, NULL);
1496 }
1497