xref: /freebsd/usr.sbin/bhyve/gdb.c (revision 0a8314e06cf69b7a9c73de807ee5890329301224)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/param.h>
32 #ifndef WITHOUT_CAPSICUM
33 #include <sys/capsicum.h>
34 #endif
35 #include <sys/ioctl.h>
36 #include <sys/mman.h>
37 #include <sys/socket.h>
38 #include <machine/atomic.h>
39 #include <machine/specialreg.h>
40 #include <machine/vmm.h>
41 #include <netinet/in.h>
42 #include <assert.h>
43 #ifndef WITHOUT_CAPSICUM
44 #include <capsicum_helpers.h>
45 #endif
46 #include <err.h>
47 #include <errno.h>
48 #include <fcntl.h>
49 #include <pthread.h>
50 #include <pthread_np.h>
51 #include <stdbool.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <sysexits.h>
56 #include <unistd.h>
57 #include <vmmapi.h>
58 
59 #include "bhyverun.h"
60 #include "mem.h"
61 #include "mevent.h"
62 
63 /*
64  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
65  * use SIGTRAP.
66  */
67 #define	GDB_SIGNAL_TRAP		5
68 
69 static void gdb_resume_vcpus(void);
70 static void check_command(int fd);
71 
72 static struct mevent *read_event, *write_event;
73 
74 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
75 static pthread_mutex_t gdb_lock;
76 static pthread_cond_t idle_vcpus;
77 static bool stop_pending, first_stop;
78 static int stepping_vcpu, stopped_vcpu;
79 
80 /*
81  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
82  * read buffer, 'start' is unused and 'len' contains the number of
83  * valid bytes in the buffer.  For a write buffer, 'start' is set to
84  * the index of the next byte in 'data' to send, and 'len' contains
85  * the remaining number of valid bytes to send.
86  */
87 struct io_buffer {
88 	uint8_t *data;
89 	size_t capacity;
90 	size_t start;
91 	size_t len;
92 };
93 
94 static struct io_buffer cur_comm, cur_resp;
95 static uint8_t cur_csum;
96 static int cur_vcpu;
97 static struct vmctx *ctx;
98 static int cur_fd = -1;
99 
100 const int gdb_regset[] = {
101 	VM_REG_GUEST_RAX,
102 	VM_REG_GUEST_RBX,
103 	VM_REG_GUEST_RCX,
104 	VM_REG_GUEST_RDX,
105 	VM_REG_GUEST_RSI,
106 	VM_REG_GUEST_RDI,
107 	VM_REG_GUEST_RBP,
108 	VM_REG_GUEST_RSP,
109 	VM_REG_GUEST_R8,
110 	VM_REG_GUEST_R9,
111 	VM_REG_GUEST_R10,
112 	VM_REG_GUEST_R11,
113 	VM_REG_GUEST_R12,
114 	VM_REG_GUEST_R13,
115 	VM_REG_GUEST_R14,
116 	VM_REG_GUEST_R15,
117 	VM_REG_GUEST_RIP,
118 	VM_REG_GUEST_RFLAGS,
119 	VM_REG_GUEST_CS,
120 	VM_REG_GUEST_SS,
121 	VM_REG_GUEST_DS,
122 	VM_REG_GUEST_ES,
123 	VM_REG_GUEST_FS,
124 	VM_REG_GUEST_GS
125 };
126 
127 const int gdb_regsize[] = {
128 	8,
129 	8,
130 	8,
131 	8,
132 	8,
133 	8,
134 	8,
135 	8,
136 	8,
137 	8,
138 	8,
139 	8,
140 	8,
141 	8,
142 	8,
143 	8,
144 	8,
145 	4,
146 	4,
147 	4,
148 	4,
149 	4,
150 	4,
151 	4
152 };
153 
154 #ifdef GDB_LOG
155 #include <stdarg.h>
156 #include <stdio.h>
157 
158 static void __printflike(1, 2)
159 debug(const char *fmt, ...)
160 {
161 	static FILE *logfile;
162 	va_list ap;
163 
164 	if (logfile == NULL) {
165 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
166 		if (logfile == NULL)
167 			return;
168 #ifndef WITHOUT_CAPSICUM
169 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
170 			fclose(logfile);
171 			logfile = NULL;
172 			return;
173 		}
174 #endif
175 		setlinebuf(logfile);
176 	}
177 	va_start(ap, fmt);
178 	vfprintf(logfile, fmt, ap);
179 	va_end(ap);
180 }
181 #else
182 #define debug(...)
183 #endif
184 
185 static int
186 guest_paging_info(int vcpu, struct vm_guest_paging *paging)
187 {
188 	uint64_t regs[4];
189 	const int regset[4] = {
190 		VM_REG_GUEST_CR0,
191 		VM_REG_GUEST_CR3,
192 		VM_REG_GUEST_CR4,
193 		VM_REG_GUEST_EFER
194 	};
195 
196 	if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
197 		return (-1);
198 
199 	/*
200 	 * For the debugger, always pretend to be the kernel (CPL 0),
201 	 * and if long-mode is enabled, always parse addresses as if
202 	 * in 64-bit mode.
203 	 */
204 	paging->cr3 = regs[1];
205 	paging->cpl = 0;
206 	if (regs[3] & EFER_LMA)
207 		paging->cpu_mode = CPU_MODE_64BIT;
208 	else if (regs[0] & CR0_PE)
209 		paging->cpu_mode = CPU_MODE_PROTECTED;
210 	else
211 		paging->cpu_mode = CPU_MODE_REAL;
212 	if (!(regs[0] & CR0_PG))
213 		paging->paging_mode = PAGING_MODE_FLAT;
214 	else if (!(regs[2] & CR4_PAE))
215 		paging->paging_mode = PAGING_MODE_32;
216 	else if (regs[3] & EFER_LME)
217 		paging->paging_mode = PAGING_MODE_64;
218 	else
219 		paging->paging_mode = PAGING_MODE_PAE;
220 	return (0);
221 }
222 
223 /*
224  * Map a guest virtual address to a physical address (for a given vcpu).
225  * If a guest virtual address is valid, return 1.  If the address is
226  * not valid, return 0.  If an error occurs obtaining the mapping,
227  * return -1.
228  */
229 static int
230 guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
231 {
232 	struct vm_guest_paging paging;
233 	int fault;
234 
235 	if (guest_paging_info(vcpu, &paging) == -1)
236 		return (-1);
237 
238 	/*
239 	 * Always use PROT_READ.  We really care if the VA is
240 	 * accessible, not if the current vCPU can write.
241 	 */
242 	if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
243 	    &fault) == -1)
244 		return (-1);
245 	if (fault)
246 		return (0);
247 	return (1);
248 }
249 
250 static void
251 io_buffer_reset(struct io_buffer *io)
252 {
253 
254 	io->start = 0;
255 	io->len = 0;
256 }
257 
258 /* Available room for adding data. */
259 static size_t
260 io_buffer_avail(struct io_buffer *io)
261 {
262 
263 	return (io->capacity - (io->start + io->len));
264 }
265 
266 static uint8_t *
267 io_buffer_head(struct io_buffer *io)
268 {
269 
270 	return (io->data + io->start);
271 }
272 
273 static uint8_t *
274 io_buffer_tail(struct io_buffer *io)
275 {
276 
277 	return (io->data + io->start + io->len);
278 }
279 
280 static void
281 io_buffer_advance(struct io_buffer *io, size_t amount)
282 {
283 
284 	assert(amount <= io->len);
285 	io->start += amount;
286 	io->len -= amount;
287 }
288 
289 static void
290 io_buffer_consume(struct io_buffer *io, size_t amount)
291 {
292 
293 	io_buffer_advance(io, amount);
294 	if (io->len == 0) {
295 		io->start = 0;
296 		return;
297 	}
298 
299 	/*
300 	 * XXX: Consider making this move optional and compacting on a
301 	 * future read() before realloc().
302 	 */
303 	memmove(io->data, io_buffer_head(io), io->len);
304 	io->start = 0;
305 }
306 
307 static void
308 io_buffer_grow(struct io_buffer *io, size_t newsize)
309 {
310 	uint8_t *new_data;
311 	size_t avail, new_cap;
312 
313 	avail = io_buffer_avail(io);
314 	if (newsize <= avail)
315 		return;
316 
317 	new_cap = io->capacity + (newsize - avail);
318 	new_data = realloc(io->data, new_cap);
319 	if (new_data == NULL)
320 		err(1, "Failed to grow GDB I/O buffer");
321 	io->data = new_data;
322 	io->capacity = new_cap;
323 }
324 
325 static bool
326 response_pending(void)
327 {
328 
329 	if (cur_resp.start == 0 && cur_resp.len == 0)
330 		return (false);
331 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
332 		return (false);
333 	return (true);
334 }
335 
336 static void
337 close_connection(void)
338 {
339 
340 	/*
341 	 * XXX: This triggers a warning because mevent does the close
342 	 * before the EV_DELETE.
343 	 */
344 	pthread_mutex_lock(&gdb_lock);
345 	mevent_delete(write_event);
346 	mevent_delete_close(read_event);
347 	write_event = NULL;
348 	read_event = NULL;
349 	io_buffer_reset(&cur_comm);
350 	io_buffer_reset(&cur_resp);
351 	cur_fd = -1;
352 
353 	/* Resume any stopped vCPUs. */
354 	gdb_resume_vcpus();
355 	pthread_mutex_unlock(&gdb_lock);
356 }
357 
358 static uint8_t
359 hex_digit(uint8_t nibble)
360 {
361 
362 	if (nibble <= 9)
363 		return (nibble + '0');
364 	else
365 		return (nibble + 'a' - 10);
366 }
367 
368 static uint8_t
369 parse_digit(uint8_t v)
370 {
371 
372 	if (v >= '0' && v <= '9')
373 		return (v - '0');
374 	if (v >= 'a' && v <= 'f')
375 		return (v - 'a' + 10);
376 	if (v >= 'A' && v <= 'F')
377 		return (v - 'A' + 10);
378 	return (0xF);
379 }
380 
381 /* Parses big-endian hexadecimal. */
382 static uintmax_t
383 parse_integer(const uint8_t *p, size_t len)
384 {
385 	uintmax_t v;
386 
387 	v = 0;
388 	while (len > 0) {
389 		v <<= 4;
390 		v |= parse_digit(*p);
391 		p++;
392 		len--;
393 	}
394 	return (v);
395 }
396 
397 static uint8_t
398 parse_byte(const uint8_t *p)
399 {
400 
401 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
402 }
403 
404 static void
405 send_pending_data(int fd)
406 {
407 	ssize_t nwritten;
408 
409 	if (cur_resp.len == 0) {
410 		mevent_disable(write_event);
411 		return;
412 	}
413 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
414 	if (nwritten == -1) {
415 		warn("Write to GDB socket failed");
416 		close_connection();
417 	} else {
418 		io_buffer_advance(&cur_resp, nwritten);
419 		if (cur_resp.len == 0)
420 			mevent_disable(write_event);
421 		else
422 			mevent_enable(write_event);
423 	}
424 }
425 
426 /* Append a single character to the output buffer. */
427 static void
428 send_char(uint8_t data)
429 {
430 	io_buffer_grow(&cur_resp, 1);
431 	*io_buffer_tail(&cur_resp) = data;
432 	cur_resp.len++;
433 }
434 
435 /* Append an array of bytes to the output buffer. */
436 static void
437 send_data(const uint8_t *data, size_t len)
438 {
439 
440 	io_buffer_grow(&cur_resp, len);
441 	memcpy(io_buffer_tail(&cur_resp), data, len);
442 	cur_resp.len += len;
443 }
444 
445 static void
446 format_byte(uint8_t v, uint8_t *buf)
447 {
448 
449 	buf[0] = hex_digit(v >> 4);
450 	buf[1] = hex_digit(v & 0xf);
451 }
452 
453 /*
454  * Append a single byte (formatted as two hex characters) to the
455  * output buffer.
456  */
457 static void
458 send_byte(uint8_t v)
459 {
460 	uint8_t buf[2];
461 
462 	format_byte(v, buf);
463 	send_data(buf, sizeof(buf));
464 }
465 
466 static void
467 start_packet(void)
468 {
469 
470 	send_char('$');
471 	cur_csum = 0;
472 }
473 
474 static void
475 finish_packet(void)
476 {
477 
478 	send_char('#');
479 	send_byte(cur_csum);
480 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
481 }
482 
483 /*
484  * Append a single character (for the packet payload) and update the
485  * checksum.
486  */
487 static void
488 append_char(uint8_t v)
489 {
490 
491 	send_char(v);
492 	cur_csum += v;
493 }
494 
495 /*
496  * Append an array of bytes (for the packet payload) and update the
497  * checksum.
498  */
499 static void
500 append_packet_data(const uint8_t *data, size_t len)
501 {
502 
503 	send_data(data, len);
504 	while (len > 0) {
505 		cur_csum += *data;
506 		data++;
507 		len--;
508 	}
509 }
510 
511 static void
512 append_string(const char *str)
513 {
514 
515 	append_packet_data(str, strlen(str));
516 }
517 
518 static void
519 append_byte(uint8_t v)
520 {
521 	uint8_t buf[2];
522 
523 	format_byte(v, buf);
524 	append_packet_data(buf, sizeof(buf));
525 }
526 
527 static void
528 append_unsigned_native(uintmax_t value, size_t len)
529 {
530 	size_t i;
531 
532 	for (i = 0; i < len; i++) {
533 		append_byte(value);
534 		value >>= 8;
535 	}
536 }
537 
538 static void
539 append_unsigned_be(uintmax_t value, size_t len)
540 {
541 	char buf[len * 2];
542 	size_t i;
543 
544 	for (i = 0; i < len; i++) {
545 		format_byte(value, buf + (len - i - 1) * 2);
546 		value >>= 8;
547 	}
548 	append_packet_data(buf, sizeof(buf));
549 }
550 
551 static void
552 append_integer(unsigned int value)
553 {
554 
555 	if (value == 0)
556 		append_char('0');
557 	else
558 		append_unsigned_be(value, fls(value) + 7 / 8);
559 }
560 
561 static void
562 append_asciihex(const char *str)
563 {
564 
565 	while (*str != '\0') {
566 		append_byte(*str);
567 		str++;
568 	}
569 }
570 
571 static void
572 send_empty_response(void)
573 {
574 
575 	start_packet();
576 	finish_packet();
577 }
578 
579 static void
580 send_error(int error)
581 {
582 
583 	start_packet();
584 	append_char('E');
585 	append_byte(error);
586 	finish_packet();
587 }
588 
589 static void
590 send_ok(void)
591 {
592 
593 	start_packet();
594 	append_string("OK");
595 	finish_packet();
596 }
597 
598 static int
599 parse_threadid(const uint8_t *data, size_t len)
600 {
601 
602 	if (len == 1 && *data == '0')
603 		return (0);
604 	if (len == 2 && memcmp(data, "-1", 2) == 0)
605 		return (-1);
606 	if (len == 0)
607 		return (-2);
608 	return (parse_integer(data, len));
609 }
610 
611 static void
612 report_stop(void)
613 {
614 
615 	start_packet();
616 	if (stopped_vcpu == -1)
617 		append_char('S');
618 	else
619 		append_char('T');
620 	append_byte(GDB_SIGNAL_TRAP);
621 	if (stopped_vcpu != -1) {
622 		append_string("thread:");
623 		append_integer(stopped_vcpu + 1);
624 		append_char(';');
625 	}
626 	stopped_vcpu = -1;
627 	finish_packet();
628 }
629 
630 static void
631 gdb_finish_suspend_vcpus(void)
632 {
633 
634 	if (first_stop) {
635 		first_stop = false;
636 		stopped_vcpu = -1;
637 	} else if (response_pending())
638 		stop_pending = true;
639 	else {
640 		report_stop();
641 		send_pending_data(cur_fd);
642 	}
643 }
644 
645 static void
646 _gdb_cpu_suspend(int vcpu, bool report_stop)
647 {
648 
649 	debug("$vCPU %d suspending\n", vcpu);
650 	CPU_SET(vcpu, &vcpus_waiting);
651 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
652 		gdb_finish_suspend_vcpus();
653 	while (CPU_ISSET(vcpu, &vcpus_suspended) && vcpu != stepping_vcpu)
654 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
655 	CPU_CLR(vcpu, &vcpus_waiting);
656 	debug("$vCPU %d resuming\n", vcpu);
657 }
658 
659 void
660 gdb_cpu_add(int vcpu)
661 {
662 
663 	debug("$vCPU %d starting\n", vcpu);
664 	pthread_mutex_lock(&gdb_lock);
665 	CPU_SET(vcpu, &vcpus_active);
666 
667 	/*
668 	 * If a vcpu is added while vcpus are stopped, suspend the new
669 	 * vcpu so that it will pop back out with a debug exit before
670 	 * executing the first instruction.
671 	 */
672 	if (!CPU_EMPTY(&vcpus_suspended)) {
673 		CPU_SET(vcpu, &vcpus_suspended);
674 		_gdb_cpu_suspend(vcpu, false);
675 	}
676 	pthread_mutex_unlock(&gdb_lock);
677 }
678 
679 void
680 gdb_cpu_suspend(int vcpu)
681 {
682 
683 	pthread_mutex_lock(&gdb_lock);
684 	_gdb_cpu_suspend(vcpu, true);
685 	pthread_mutex_unlock(&gdb_lock);
686 }
687 
688 void
689 gdb_cpu_mtrap(int vcpu)
690 {
691 
692 	debug("$vCPU %d MTRAP\n", vcpu);
693 	pthread_mutex_lock(&gdb_lock);
694 	if (vcpu == stepping_vcpu) {
695 		stepping_vcpu = -1;
696 		vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
697 		vm_suspend_cpu(ctx, vcpu);
698 		assert(stopped_vcpu == -1);
699 		stopped_vcpu = vcpu;
700 		_gdb_cpu_suspend(vcpu, true);
701 	}
702 	pthread_mutex_unlock(&gdb_lock);
703 }
704 
705 static void
706 gdb_suspend_vcpus(void)
707 {
708 
709 	assert(pthread_mutex_isowned_np(&gdb_lock));
710 	debug("suspending all CPUs\n");
711 	vcpus_suspended = vcpus_active;
712 	vm_suspend_cpu(ctx, -1);
713 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
714 		gdb_finish_suspend_vcpus();
715 }
716 
717 static bool
718 gdb_step_vcpu(int vcpu)
719 {
720 	int error, val;
721 
722 	debug("$vCPU %d step\n", vcpu);
723 	error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
724 	if (error < 0)
725 		return (false);
726 	error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
727 	vm_resume_cpu(ctx, vcpu);
728 	stepping_vcpu = vcpu;
729 	pthread_cond_broadcast(&idle_vcpus);
730 	return (true);
731 }
732 
733 static void
734 gdb_resume_vcpus(void)
735 {
736 
737 	assert(pthread_mutex_isowned_np(&gdb_lock));
738 	vm_resume_cpu(ctx, -1);
739 	debug("resuming all CPUs\n");
740 	CPU_ZERO(&vcpus_suspended);
741 	pthread_cond_broadcast(&idle_vcpus);
742 }
743 
744 static void
745 gdb_read_regs(void)
746 {
747 	uint64_t regvals[nitems(gdb_regset)];
748 	int i;
749 
750 	if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
751 	    gdb_regset, regvals) == -1) {
752 		send_error(errno);
753 		return;
754 	}
755 	start_packet();
756 	for (i = 0; i < nitems(regvals); i++)
757 		append_unsigned_native(regvals[i], gdb_regsize[i]);
758 	finish_packet();
759 }
760 
761 static void
762 gdb_read_mem(const uint8_t *data, size_t len)
763 {
764 	uint64_t gpa, gva, val;
765 	uint8_t *cp;
766 	size_t resid, todo, bytes;
767 	bool started;
768 	int error;
769 
770 	/* Skip 'm' */
771 	data += 1;
772 	len -= 1;
773 
774 	/* Parse and consume address. */
775 	cp = memchr(data, ',', len);
776 	if (cp == NULL || cp == data) {
777 		send_error(EINVAL);
778 		return;
779 	}
780 	gva = parse_integer(data, cp - data);
781 	len -= (cp - data) + 1;
782 	data += (cp - data) + 1;
783 
784 	/* Parse length. */
785 	resid = parse_integer(data, len);
786 
787 	started = false;
788 	while (resid > 0) {
789 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
790 		if (error == -1) {
791 			if (started)
792 				finish_packet();
793 			else
794 				send_error(errno);
795 			return;
796 		}
797 		if (error == 0) {
798 			if (started)
799 				finish_packet();
800 			else
801 				send_error(EFAULT);
802 			return;
803 		}
804 
805 		/* Read bytes from current page. */
806 		todo = getpagesize() - gpa % getpagesize();
807 		if (todo > resid)
808 			todo = resid;
809 
810 		cp = paddr_guest2host(ctx, gpa, todo);
811 		if (cp != NULL) {
812 			/*
813 			 * If this page is guest RAM, read it a byte
814 			 * at a time.
815 			 */
816 			if (!started) {
817 				start_packet();
818 				started = true;
819 			}
820 			while (todo > 0) {
821 				append_byte(*cp);
822 				cp++;
823 				gpa++;
824 				gva++;
825 				resid--;
826 				todo--;
827 			}
828 		} else {
829 			/*
830 			 * If this page isn't guest RAM, try to handle
831 			 * it via MMIO.  For MMIO requests, use
832 			 * aligned reads of words when possible.
833 			 */
834 			while (todo > 0) {
835 				if (gpa & 1 || todo == 1)
836 					bytes = 1;
837 				else if (gpa & 2 || todo == 2)
838 					bytes = 2;
839 				else
840 					bytes = 4;
841 				error = read_mem(ctx, cur_vcpu, gpa, &val,
842 				    bytes);
843 				if (error == 0) {
844 					if (!started) {
845 						start_packet();
846 						started = true;
847 					}
848 					gpa += bytes;
849 					gva += bytes;
850 					resid -= bytes;
851 					todo -= bytes;
852 					while (bytes > 0) {
853 						append_byte(val);
854 						val >>= 8;
855 						bytes--;
856 					}
857 				} else {
858 					if (started)
859 						finish_packet();
860 					else
861 						send_error(EFAULT);
862 					return;
863 				}
864 			}
865 		}
866 		assert(resid == 0 || gpa % getpagesize() == 0);
867 	}
868 	if (!started)
869 		start_packet();
870 	finish_packet();
871 }
872 
873 static void
874 gdb_write_mem(const uint8_t *data, size_t len)
875 {
876 	uint64_t gpa, gva, val;
877 	uint8_t *cp;
878 	size_t resid, todo, bytes;
879 	int error;
880 
881 	/* Skip 'M' */
882 	data += 1;
883 	len -= 1;
884 
885 	/* Parse and consume address. */
886 	cp = memchr(data, ',', len);
887 	if (cp == NULL || cp == data) {
888 		send_error(EINVAL);
889 		return;
890 	}
891 	gva = parse_integer(data, cp - data);
892 	len -= (cp - data) + 1;
893 	data += (cp - data) + 1;
894 
895 	/* Parse and consume length. */
896 	cp = memchr(data, ':', len);
897 	if (cp == NULL || cp == data) {
898 		send_error(EINVAL);
899 		return;
900 	}
901 	resid = parse_integer(data, cp - data);
902 	len -= (cp - data) + 1;
903 	data += (cp - data) + 1;
904 
905 	/* Verify the available bytes match the length. */
906 	if (len != resid * 2) {
907 		send_error(EINVAL);
908 		return;
909 	}
910 
911 	while (resid > 0) {
912 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
913 		if (error == -1) {
914 			send_error(errno);
915 			return;
916 		}
917 		if (error == 0) {
918 			send_error(EFAULT);
919 			return;
920 		}
921 
922 		/* Write bytes to current page. */
923 		todo = getpagesize() - gpa % getpagesize();
924 		if (todo > resid)
925 			todo = resid;
926 
927 		cp = paddr_guest2host(ctx, gpa, todo);
928 		if (cp != NULL) {
929 			/*
930 			 * If this page is guest RAM, write it a byte
931 			 * at a time.
932 			 */
933 			while (todo > 0) {
934 				assert(len >= 2);
935 				*cp = parse_byte(data);
936 				data += 2;
937 				len -= 2;
938 				cp++;
939 				gpa++;
940 				gva++;
941 				resid--;
942 				todo--;
943 			}
944 		} else {
945 			/*
946 			 * If this page isn't guest RAM, try to handle
947 			 * it via MMIO.  For MMIO requests, use
948 			 * aligned writes of words when possible.
949 			 */
950 			while (todo > 0) {
951 				if (gpa & 1 || todo == 1) {
952 					bytes = 1;
953 					val = parse_byte(data);
954 				} else if (gpa & 2 || todo == 2) {
955 					bytes = 2;
956 					val = parse_byte(data) |
957 					    (parse_byte(data + 2) << 8);
958 				} else {
959 					bytes = 4;
960 					val = parse_byte(data) |
961 					    (parse_byte(data + 2) << 8) |
962 					    (parse_byte(data + 4) << 16) |
963 					    (parse_byte(data + 6) << 24);
964 				}
965 				error = write_mem(ctx, cur_vcpu, gpa, val,
966 				    bytes);
967 				if (error == 0) {
968 					gpa += bytes;
969 					gva += bytes;
970 					resid -= bytes;
971 					todo -= bytes;
972 					data += 2 * bytes;
973 					len -= 2 * bytes;
974 				} else {
975 					send_error(EFAULT);
976 					return;
977 				}
978 			}
979 		}
980 		assert(resid == 0 || gpa % getpagesize() == 0);
981 	}
982 	assert(len == 0);
983 	send_ok();
984 }
985 
986 static bool
987 command_equals(const uint8_t *data, size_t len, const char *cmd)
988 {
989 
990 	if (strlen(cmd) > len)
991 		return (false);
992 	return (memcmp(data, cmd, strlen(cmd)) == 0);
993 }
994 
995 static void
996 check_features(const uint8_t *data, size_t len)
997 {
998 	char *feature, *next_feature, *str, *value;
999 	bool supported;
1000 
1001 	str = malloc(len + 1);
1002 	memcpy(str, data, len);
1003 	str[len] = '\0';
1004 	next_feature = str;
1005 
1006 	while ((feature = strsep(&next_feature, ";")) != NULL) {
1007 		/*
1008 		 * Null features shouldn't exist, but skip if they
1009 		 * do.
1010 		 */
1011 		if (strcmp(feature, "") == 0)
1012 			continue;
1013 
1014 		/*
1015 		 * Look for the value or supported / not supported
1016 		 * flag.
1017 		 */
1018 		value = strchr(feature, '=');
1019 		if (value != NULL) {
1020 			*value = '\0';
1021 			value++;
1022 			supported = true;
1023 		} else {
1024 			value = feature + strlen(feature) - 1;
1025 			switch (*value) {
1026 			case '+':
1027 				supported = true;
1028 				break;
1029 			case '-':
1030 				supported = false;
1031 				break;
1032 			default:
1033 				/*
1034 				 * This is really a protocol error,
1035 				 * but we just ignore malformed
1036 				 * features for ease of
1037 				 * implementation.
1038 				 */
1039 				continue;
1040 			}
1041 			value = NULL;
1042 		}
1043 
1044 		/* No currently supported features. */
1045 	}
1046 	free(str);
1047 
1048 	start_packet();
1049 
1050 	/* This is an arbitrary limit. */
1051 	append_string("PacketSize=4096");
1052 	finish_packet();
1053 }
1054 
1055 static void
1056 gdb_query(const uint8_t *data, size_t len)
1057 {
1058 
1059 	/*
1060 	 * TODO:
1061 	 * - qSearch
1062 	 */
1063 	if (command_equals(data, len, "qAttached")) {
1064 		start_packet();
1065 		append_char('1');
1066 		finish_packet();
1067 	} else if (command_equals(data, len, "qC")) {
1068 		start_packet();
1069 		append_string("QC");
1070 		append_integer(cur_vcpu + 1);
1071 		finish_packet();
1072 	} else if (command_equals(data, len, "qfThreadInfo")) {
1073 		cpuset_t mask;
1074 		bool first;
1075 		int vcpu;
1076 
1077 		if (CPU_EMPTY(&vcpus_active)) {
1078 			send_error(EINVAL);
1079 			return;
1080 		}
1081 		mask = vcpus_active;
1082 		start_packet();
1083 		append_char('m');
1084 		first = true;
1085 		while (!CPU_EMPTY(&mask)) {
1086 			vcpu = CPU_FFS(&mask) - 1;
1087 			CPU_CLR(vcpu, &mask);
1088 			if (first)
1089 				first = false;
1090 			else
1091 				append_char(',');
1092 			append_integer(vcpu + 1);
1093 		}
1094 		finish_packet();
1095 	} else if (command_equals(data, len, "qsThreadInfo")) {
1096 		start_packet();
1097 		append_char('l');
1098 		finish_packet();
1099 	} else if (command_equals(data, len, "qSupported")) {
1100 		data += strlen("qSupported");
1101 		len -= strlen("qSupported");
1102 		check_features(data, len);
1103 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1104 		char buf[16];
1105 		int tid;
1106 
1107 		data += strlen("qThreadExtraInfo");
1108 		len -= strlen("qThreadExtraInfo");
1109 		if (*data != ',') {
1110 			send_error(EINVAL);
1111 			return;
1112 		}
1113 		tid = parse_threadid(data + 1, len - 1);
1114 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1115 			send_error(EINVAL);
1116 			return;
1117 		}
1118 
1119 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1120 		start_packet();
1121 		append_asciihex(buf);
1122 		finish_packet();
1123 	} else
1124 		send_empty_response();
1125 }
1126 
1127 static void
1128 handle_command(const uint8_t *data, size_t len)
1129 {
1130 
1131 	/* Reject packets with a sequence-id. */
1132 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1133 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1134 		send_empty_response();
1135 		return;
1136 	}
1137 
1138 	switch (*data) {
1139 	case 'c':
1140 		if (len != 1) {
1141 			send_error(EINVAL);
1142 			break;
1143 		}
1144 
1145 		/* Don't send a reply until a stop occurs. */
1146 		gdb_resume_vcpus();
1147 		break;
1148 	case 'D':
1149 		send_ok();
1150 
1151 		/* TODO: Resume any stopped CPUs. */
1152 		break;
1153 	case 'g': {
1154 		gdb_read_regs();
1155 		break;
1156 	}
1157 	case 'H': {
1158 		int tid;
1159 
1160 		if (data[1] != 'g' && data[1] != 'c') {
1161 			send_error(EINVAL);
1162 			break;
1163 		}
1164 		tid = parse_threadid(data + 2, len - 2);
1165 		if (tid == -2) {
1166 			send_error(EINVAL);
1167 			break;
1168 		}
1169 
1170 		if (CPU_EMPTY(&vcpus_active)) {
1171 			send_error(EINVAL);
1172 			break;
1173 		}
1174 		if (tid == -1 || tid == 0)
1175 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1176 		else if (CPU_ISSET(tid - 1, &vcpus_active))
1177 			cur_vcpu = tid - 1;
1178 		else {
1179 			send_error(EINVAL);
1180 			break;
1181 		}
1182 		send_ok();
1183 		break;
1184 	}
1185 	case 'm':
1186 		gdb_read_mem(data, len);
1187 		break;
1188 	case 'M':
1189 		gdb_write_mem(data, len);
1190 		break;
1191 	case 'T': {
1192 		int tid;
1193 
1194 		tid = parse_threadid(data + 1, len - 1);
1195 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1196 			send_error(EINVAL);
1197 			return;
1198 		}
1199 		send_ok();
1200 		break;
1201 	}
1202 	case 'q':
1203 		gdb_query(data, len);
1204 		break;
1205 	case 's':
1206 		if (len != 1) {
1207 			send_error(EINVAL);
1208 			break;
1209 		}
1210 
1211 		/* Don't send a reply until a stop occurs. */
1212 		if (!gdb_step_vcpu(cur_vcpu)) {
1213 			send_error(EOPNOTSUPP);
1214 			break;
1215 		}
1216 		break;
1217 	case '?':
1218 		/* XXX: Only if stopped? */
1219 		/* For now, just report that we are always stopped. */
1220 		start_packet();
1221 		append_char('S');
1222 		append_byte(GDB_SIGNAL_TRAP);
1223 		finish_packet();
1224 		break;
1225 	case 'G': /* TODO */
1226 	case 'v':
1227 		/* Handle 'vCont' */
1228 		/* 'vCtrlC' */
1229 	case 'p': /* TODO */
1230 	case 'P': /* TODO */
1231 	case 'Q': /* TODO */
1232 	case 't': /* TODO */
1233 	case 'X': /* TODO */
1234 	case 'z': /* TODO */
1235 	case 'Z': /* TODO */
1236 	default:
1237 		send_empty_response();
1238 	}
1239 }
1240 
1241 /* Check for a valid packet in the command buffer. */
1242 static void
1243 check_command(int fd)
1244 {
1245 	uint8_t *head, *hash, *p, sum;
1246 	size_t avail, plen;
1247 
1248 	for (;;) {
1249 		avail = cur_comm.len;
1250 		if (avail == 0)
1251 			return;
1252 		head = io_buffer_head(&cur_comm);
1253 		switch (*head) {
1254 		case 0x03:
1255 			debug("<- Ctrl-C\n");
1256 			io_buffer_consume(&cur_comm, 1);
1257 
1258 			gdb_suspend_vcpus();
1259 			break;
1260 		case '+':
1261 			/* ACK of previous response. */
1262 			debug("<- +\n");
1263 			if (response_pending())
1264 				io_buffer_reset(&cur_resp);
1265 			io_buffer_consume(&cur_comm, 1);
1266 			if (stop_pending) {
1267 				stop_pending = false;
1268 				report_stop();
1269 				send_pending_data(fd);
1270 			}
1271 			break;
1272 		case '-':
1273 			/* NACK of previous response. */
1274 			debug("<- -\n");
1275 			if (response_pending()) {
1276 				cur_resp.len += cur_resp.start;
1277 				cur_resp.start = 0;
1278 				if (cur_resp.data[0] == '+')
1279 					io_buffer_advance(&cur_resp, 1);
1280 				debug("-> %.*s\n", (int)cur_resp.len,
1281 				    io_buffer_head(&cur_resp));
1282 			}
1283 			io_buffer_consume(&cur_comm, 1);
1284 			send_pending_data(fd);
1285 			break;
1286 		case '$':
1287 			/* Packet. */
1288 
1289 			if (response_pending()) {
1290 				warnx("New GDB command while response in "
1291 				    "progress");
1292 				io_buffer_reset(&cur_resp);
1293 			}
1294 
1295 			/* Is packet complete? */
1296 			hash = memchr(head, '#', avail);
1297 			if (hash == NULL)
1298 				return;
1299 			plen = (hash - head + 1) + 2;
1300 			if (avail < plen)
1301 				return;
1302 			debug("<- %.*s\n", (int)plen, head);
1303 
1304 			/* Verify checksum. */
1305 			for (sum = 0, p = head + 1; p < hash; p++)
1306 				sum += *p;
1307 			if (sum != parse_byte(hash + 1)) {
1308 				io_buffer_consume(&cur_comm, plen);
1309 				debug("-> -\n");
1310 				send_char('-');
1311 				send_pending_data(fd);
1312 				break;
1313 			}
1314 			send_char('+');
1315 
1316 			handle_command(head + 1, hash - (head + 1));
1317 			io_buffer_consume(&cur_comm, plen);
1318 			if (!response_pending())
1319 				debug("-> +\n");
1320 			send_pending_data(fd);
1321 			break;
1322 		default:
1323 			/* XXX: Possibly drop connection instead. */
1324 			debug("-> %02x\n", *head);
1325 			io_buffer_consume(&cur_comm, 1);
1326 			break;
1327 		}
1328 	}
1329 }
1330 
1331 static void
1332 gdb_readable(int fd, enum ev_type event, void *arg)
1333 {
1334 	ssize_t nread;
1335 	int pending;
1336 
1337 	if (ioctl(fd, FIONREAD, &pending) == -1) {
1338 		warn("FIONREAD on GDB socket");
1339 		return;
1340 	}
1341 
1342 	/*
1343 	 * 'pending' might be zero due to EOF.  We need to call read
1344 	 * with a non-zero length to detect EOF.
1345 	 */
1346 	if (pending == 0)
1347 		pending = 1;
1348 
1349 	/* Ensure there is room in the command buffer. */
1350 	io_buffer_grow(&cur_comm, pending);
1351 	assert(io_buffer_avail(&cur_comm) >= pending);
1352 
1353 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1354 	if (nread == 0) {
1355 		close_connection();
1356 	} else if (nread == -1) {
1357 		if (errno == EAGAIN)
1358 			return;
1359 
1360 		warn("Read from GDB socket");
1361 		close_connection();
1362 	} else {
1363 		cur_comm.len += nread;
1364 		pthread_mutex_lock(&gdb_lock);
1365 		check_command(fd);
1366 		pthread_mutex_unlock(&gdb_lock);
1367 	}
1368 }
1369 
1370 static void
1371 gdb_writable(int fd, enum ev_type event, void *arg)
1372 {
1373 
1374 	send_pending_data(fd);
1375 }
1376 
1377 static void
1378 new_connection(int fd, enum ev_type event, void *arg)
1379 {
1380 	int optval, s;
1381 
1382 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1383 	if (s == -1) {
1384 		if (arg != NULL)
1385 			err(1, "Failed accepting initial GDB connection");
1386 
1387 		/* Silently ignore errors post-startup. */
1388 		return;
1389 	}
1390 
1391 	optval = 1;
1392 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1393 	    -1) {
1394 		warn("Failed to disable SIGPIPE for GDB connection");
1395 		close(s);
1396 		return;
1397 	}
1398 
1399 	pthread_mutex_lock(&gdb_lock);
1400 	if (cur_fd != -1) {
1401 		close(s);
1402 		warnx("Ignoring additional GDB connection.");
1403 	}
1404 
1405 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1406 	if (read_event == NULL) {
1407 		if (arg != NULL)
1408 			err(1, "Failed to setup initial GDB connection");
1409 		pthread_mutex_unlock(&gdb_lock);
1410 		return;
1411 	}
1412 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1413 	if (write_event == NULL) {
1414 		if (arg != NULL)
1415 			err(1, "Failed to setup initial GDB connection");
1416 		mevent_delete_close(read_event);
1417 		read_event = NULL;
1418 	}
1419 
1420 	cur_fd = s;
1421 	cur_vcpu = 0;
1422 	stepping_vcpu = -1;
1423 	stopped_vcpu = -1;
1424 	stop_pending = false;
1425 
1426 	/* Break on attach. */
1427 	first_stop = true;
1428 	gdb_suspend_vcpus();
1429 	pthread_mutex_unlock(&gdb_lock);
1430 }
1431 
1432 #ifndef WITHOUT_CAPSICUM
1433 void
1434 limit_gdb_socket(int s)
1435 {
1436 	cap_rights_t rights;
1437 	unsigned long ioctls[] = { FIONREAD };
1438 
1439 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1440 	    CAP_SETSOCKOPT, CAP_IOCTL);
1441 	if (caph_rights_limit(s, &rights) == -1)
1442 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1443 	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1444 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1445 }
1446 #endif
1447 
1448 void
1449 init_gdb(struct vmctx *_ctx, int sport, bool wait)
1450 {
1451 	struct sockaddr_in sin;
1452 	int error, flags, s;
1453 
1454 	debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not ");
1455 
1456 	error = pthread_mutex_init(&gdb_lock, NULL);
1457 	if (error != 0)
1458 		errc(1, error, "gdb mutex init");
1459 	error = pthread_cond_init(&idle_vcpus, NULL);
1460 	if (error != 0)
1461 		errc(1, error, "gdb cv init");
1462 
1463 	ctx = _ctx;
1464 	s = socket(PF_INET, SOCK_STREAM, 0);
1465 	if (s < 0)
1466 		err(1, "gdb socket create");
1467 
1468 	sin.sin_len = sizeof(sin);
1469 	sin.sin_family = AF_INET;
1470 	sin.sin_addr.s_addr = htonl(INADDR_ANY);
1471 	sin.sin_port = htons(sport);
1472 
1473 	if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0)
1474 		err(1, "gdb socket bind");
1475 
1476 	if (listen(s, 1) < 0)
1477 		err(1, "gdb socket listen");
1478 
1479 	if (wait) {
1480 		/*
1481 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
1482 		 * logic in gdb_cpu_add() to suspend the first vcpu before
1483 		 * it starts execution.  The vcpu will remain suspended
1484 		 * until a debugger connects.
1485 		 */
1486 		stepping_vcpu = -1;
1487 		stopped_vcpu = -1;
1488 		CPU_SET(0, &vcpus_suspended);
1489 	}
1490 
1491 	flags = fcntl(s, F_GETFL);
1492 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1493 		err(1, "Failed to mark gdb socket non-blocking");
1494 
1495 #ifndef WITHOUT_CAPSICUM
1496 	limit_gdb_socket(s);
1497 #endif
1498 	mevent_add(s, EVF_READ, new_connection, NULL);
1499 }
1500