xref: /freebsd/usr.sbin/bhyve/gdb.c (revision 123af6ec70016f5556da5972d4d63c7d175c06d3)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/param.h>
32 #ifndef WITHOUT_CAPSICUM
33 #include <sys/capsicum.h>
34 #endif
35 #include <sys/ioctl.h>
36 #include <sys/mman.h>
37 #include <sys/socket.h>
38 #include <machine/atomic.h>
39 #include <machine/specialreg.h>
40 #include <machine/vmm.h>
41 #include <netinet/in.h>
42 #include <assert.h>
43 #ifndef WITHOUT_CAPSICUM
44 #include <capsicum_helpers.h>
45 #endif
46 #include <err.h>
47 #include <errno.h>
48 #include <fcntl.h>
49 #include <pthread.h>
50 #include <pthread_np.h>
51 #include <stdbool.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <sysexits.h>
56 #include <unistd.h>
57 #include <vmmapi.h>
58 
59 #include "bhyverun.h"
60 #include "mem.h"
61 #include "mevent.h"
62 
63 /*
64  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
65  * use SIGTRAP.
66  */
67 #define	GDB_SIGNAL_TRAP		5
68 
69 static void gdb_resume_vcpus(void);
70 static void check_command(int fd);
71 
72 static struct mevent *read_event, *write_event;
73 
74 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
75 static pthread_mutex_t gdb_lock;
76 static pthread_cond_t idle_vcpus;
77 static bool stop_pending, first_stop;
78 static int stepping_vcpu, stopped_vcpu;
79 
80 /*
81  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
82  * read buffer, 'start' is unused and 'len' contains the number of
83  * valid bytes in the buffer.  For a write buffer, 'start' is set to
84  * the index of the next byte in 'data' to send, and 'len' contains
85  * the remaining number of valid bytes to send.
86  */
87 struct io_buffer {
88 	uint8_t *data;
89 	size_t capacity;
90 	size_t start;
91 	size_t len;
92 };
93 
94 static struct io_buffer cur_comm, cur_resp;
95 static uint8_t cur_csum;
96 static int cur_vcpu;
97 static struct vmctx *ctx;
98 static int cur_fd = -1;
99 
100 const int gdb_regset[] = {
101 	VM_REG_GUEST_RAX,
102 	VM_REG_GUEST_RBX,
103 	VM_REG_GUEST_RCX,
104 	VM_REG_GUEST_RDX,
105 	VM_REG_GUEST_RSI,
106 	VM_REG_GUEST_RDI,
107 	VM_REG_GUEST_RBP,
108 	VM_REG_GUEST_RSP,
109 	VM_REG_GUEST_R8,
110 	VM_REG_GUEST_R9,
111 	VM_REG_GUEST_R10,
112 	VM_REG_GUEST_R11,
113 	VM_REG_GUEST_R12,
114 	VM_REG_GUEST_R13,
115 	VM_REG_GUEST_R14,
116 	VM_REG_GUEST_R15,
117 	VM_REG_GUEST_RIP,
118 	VM_REG_GUEST_RFLAGS,
119 	VM_REG_GUEST_CS,
120 	VM_REG_GUEST_SS,
121 	VM_REG_GUEST_DS,
122 	VM_REG_GUEST_ES,
123 	VM_REG_GUEST_FS,
124 	VM_REG_GUEST_GS
125 };
126 
127 const int gdb_regsize[] = {
128 	8,
129 	8,
130 	8,
131 	8,
132 	8,
133 	8,
134 	8,
135 	8,
136 	8,
137 	8,
138 	8,
139 	8,
140 	8,
141 	8,
142 	8,
143 	8,
144 	8,
145 	4,
146 	4,
147 	4,
148 	4,
149 	4,
150 	4,
151 	4
152 };
153 
154 #ifdef GDB_LOG
155 #include <stdarg.h>
156 #include <stdio.h>
157 
158 static void __printflike(1, 2)
159 debug(const char *fmt, ...)
160 {
161 	static FILE *logfile;
162 	va_list ap;
163 
164 	if (logfile == NULL) {
165 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
166 		if (logfile == NULL)
167 			return;
168 #ifndef WITHOUT_CAPSICUM
169 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
170 			fclose(logfile);
171 			logfile = NULL;
172 			return;
173 		}
174 #endif
175 		setlinebuf(logfile);
176 	}
177 	va_start(ap, fmt);
178 	vfprintf(logfile, fmt, ap);
179 	va_end(ap);
180 }
181 #else
182 #define debug(...)
183 #endif
184 
185 static int
186 guest_paging_info(int vcpu, struct vm_guest_paging *paging)
187 {
188 	uint64_t regs[4];
189 	const int regset[4] = {
190 		VM_REG_GUEST_CR0,
191 		VM_REG_GUEST_CR3,
192 		VM_REG_GUEST_CR4,
193 		VM_REG_GUEST_EFER
194 	};
195 
196 	if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
197 		return (-1);
198 
199 	/*
200 	 * For the debugger, always pretend to be the kernel (CPL 0),
201 	 * and if long-mode is enabled, always parse addresses as if
202 	 * in 64-bit mode.
203 	 */
204 	paging->cr3 = regs[1];
205 	paging->cpl = 0;
206 	if (regs[3] & EFER_LMA)
207 		paging->cpu_mode = CPU_MODE_64BIT;
208 	else if (regs[0] & CR0_PE)
209 		paging->cpu_mode = CPU_MODE_PROTECTED;
210 	else
211 		paging->cpu_mode = CPU_MODE_REAL;
212 	if (!(regs[0] & CR0_PG))
213 		paging->paging_mode = PAGING_MODE_FLAT;
214 	else if (!(regs[2] & CR4_PAE))
215 		paging->paging_mode = PAGING_MODE_32;
216 	else if (regs[3] & EFER_LME)
217 		paging->paging_mode = PAGING_MODE_64;
218 	else
219 		paging->paging_mode = PAGING_MODE_PAE;
220 	return (0);
221 }
222 
223 /*
224  * Map a guest virtual address to a physical address (for a given vcpu).
225  * If a guest virtual address is valid, return 1.  If the address is
226  * not valid, return 0.  If an error occurs obtaining the mapping,
227  * return -1.
228  */
229 static int
230 guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
231 {
232 	struct vm_guest_paging paging;
233 	int fault;
234 
235 	if (guest_paging_info(vcpu, &paging) == -1)
236 		return (-1);
237 
238 	/*
239 	 * Always use PROT_READ.  We really care if the VA is
240 	 * accessible, not if the current vCPU can write.
241 	 */
242 	if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
243 	    &fault) == -1)
244 		return (-1);
245 	if (fault)
246 		return (0);
247 	return (1);
248 }
249 
250 static void
251 io_buffer_reset(struct io_buffer *io)
252 {
253 
254 	io->start = 0;
255 	io->len = 0;
256 }
257 
258 /* Available room for adding data. */
259 static size_t
260 io_buffer_avail(struct io_buffer *io)
261 {
262 
263 	return (io->capacity - (io->start + io->len));
264 }
265 
266 static uint8_t *
267 io_buffer_head(struct io_buffer *io)
268 {
269 
270 	return (io->data + io->start);
271 }
272 
273 static uint8_t *
274 io_buffer_tail(struct io_buffer *io)
275 {
276 
277 	return (io->data + io->start + io->len);
278 }
279 
280 static void
281 io_buffer_advance(struct io_buffer *io, size_t amount)
282 {
283 
284 	assert(amount <= io->len);
285 	io->start += amount;
286 	io->len -= amount;
287 }
288 
289 static void
290 io_buffer_consume(struct io_buffer *io, size_t amount)
291 {
292 
293 	io_buffer_advance(io, amount);
294 	if (io->len == 0) {
295 		io->start = 0;
296 		return;
297 	}
298 
299 	/*
300 	 * XXX: Consider making this move optional and compacting on a
301 	 * future read() before realloc().
302 	 */
303 	memmove(io->data, io_buffer_head(io), io->len);
304 	io->start = 0;
305 }
306 
307 static void
308 io_buffer_grow(struct io_buffer *io, size_t newsize)
309 {
310 	uint8_t *new_data;
311 	size_t avail, new_cap;
312 
313 	avail = io_buffer_avail(io);
314 	if (newsize <= avail)
315 		return;
316 
317 	new_cap = io->capacity + (newsize - avail);
318 	new_data = realloc(io->data, new_cap);
319 	if (new_data == NULL)
320 		err(1, "Failed to grow GDB I/O buffer");
321 	io->data = new_data;
322 	io->capacity = new_cap;
323 }
324 
325 static bool
326 response_pending(void)
327 {
328 
329 	if (cur_resp.start == 0 && cur_resp.len == 0)
330 		return (false);
331 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
332 		return (false);
333 	return (true);
334 }
335 
336 static void
337 close_connection(void)
338 {
339 
340 	/*
341 	 * XXX: This triggers a warning because mevent does the close
342 	 * before the EV_DELETE.
343 	 */
344 	pthread_mutex_lock(&gdb_lock);
345 	mevent_delete(write_event);
346 	mevent_delete_close(read_event);
347 	write_event = NULL;
348 	read_event = NULL;
349 	io_buffer_reset(&cur_comm);
350 	io_buffer_reset(&cur_resp);
351 	cur_fd = -1;
352 
353 	/* Resume any stopped vCPUs. */
354 	gdb_resume_vcpus();
355 	pthread_mutex_unlock(&gdb_lock);
356 }
357 
358 static uint8_t
359 hex_digit(uint8_t nibble)
360 {
361 
362 	if (nibble <= 9)
363 		return (nibble + '0');
364 	else
365 		return (nibble + 'a' - 10);
366 }
367 
368 static uint8_t
369 parse_digit(uint8_t v)
370 {
371 
372 	if (v >= '0' && v <= '9')
373 		return (v - '0');
374 	if (v >= 'a' && v <= 'f')
375 		return (v - 'a' + 10);
376 	if (v >= 'A' && v <= 'F')
377 		return (v - 'A' + 10);
378 	return (0xF);
379 }
380 
381 /* Parses big-endian hexadecimal. */
382 static uintmax_t
383 parse_integer(const uint8_t *p, size_t len)
384 {
385 	uintmax_t v;
386 
387 	v = 0;
388 	while (len > 0) {
389 		v <<= 4;
390 		v |= parse_digit(*p);
391 		p++;
392 		len--;
393 	}
394 	return (v);
395 }
396 
397 static uint8_t
398 parse_byte(const uint8_t *p)
399 {
400 
401 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
402 }
403 
404 static void
405 send_pending_data(int fd)
406 {
407 	ssize_t nwritten;
408 
409 	if (cur_resp.len == 0) {
410 		mevent_disable(write_event);
411 		return;
412 	}
413 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
414 	if (nwritten == -1) {
415 		warn("Write to GDB socket failed");
416 		close_connection();
417 	} else {
418 		io_buffer_advance(&cur_resp, nwritten);
419 		if (cur_resp.len == 0)
420 			mevent_disable(write_event);
421 		else
422 			mevent_enable(write_event);
423 	}
424 }
425 
426 /* Append a single character to the output buffer. */
427 static void
428 send_char(uint8_t data)
429 {
430 	io_buffer_grow(&cur_resp, 1);
431 	*io_buffer_tail(&cur_resp) = data;
432 	cur_resp.len++;
433 }
434 
435 /* Append an array of bytes to the output buffer. */
436 static void
437 send_data(const uint8_t *data, size_t len)
438 {
439 
440 	io_buffer_grow(&cur_resp, len);
441 	memcpy(io_buffer_tail(&cur_resp), data, len);
442 	cur_resp.len += len;
443 }
444 
445 static void
446 format_byte(uint8_t v, uint8_t *buf)
447 {
448 
449 	buf[0] = hex_digit(v >> 4);
450 	buf[1] = hex_digit(v & 0xf);
451 }
452 
453 /*
454  * Append a single byte (formatted as two hex characters) to the
455  * output buffer.
456  */
457 static void
458 send_byte(uint8_t v)
459 {
460 	uint8_t buf[2];
461 
462 	format_byte(v, buf);
463 	send_data(buf, sizeof(buf));
464 }
465 
466 static void
467 start_packet(void)
468 {
469 
470 	send_char('$');
471 	cur_csum = 0;
472 }
473 
474 static void
475 finish_packet(void)
476 {
477 
478 	send_char('#');
479 	send_byte(cur_csum);
480 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
481 }
482 
483 /*
484  * Append a single character (for the packet payload) and update the
485  * checksum.
486  */
487 static void
488 append_char(uint8_t v)
489 {
490 
491 	send_char(v);
492 	cur_csum += v;
493 }
494 
495 /*
496  * Append an array of bytes (for the packet payload) and update the
497  * checksum.
498  */
499 static void
500 append_packet_data(const uint8_t *data, size_t len)
501 {
502 
503 	send_data(data, len);
504 	while (len > 0) {
505 		cur_csum += *data;
506 		data++;
507 		len--;
508 	}
509 }
510 
511 static void
512 append_string(const char *str)
513 {
514 
515 	append_packet_data(str, strlen(str));
516 }
517 
518 static void
519 append_byte(uint8_t v)
520 {
521 	uint8_t buf[2];
522 
523 	format_byte(v, buf);
524 	append_packet_data(buf, sizeof(buf));
525 }
526 
527 static void
528 append_unsigned_native(uintmax_t value, size_t len)
529 {
530 	size_t i;
531 
532 	for (i = 0; i < len; i++) {
533 		append_byte(value);
534 		value >>= 8;
535 	}
536 }
537 
538 static void
539 append_unsigned_be(uintmax_t value, size_t len)
540 {
541 	char buf[len * 2];
542 	size_t i;
543 
544 	for (i = 0; i < len; i++) {
545 		format_byte(value, buf + (len - i - 1) * 2);
546 		value >>= 8;
547 	}
548 	append_packet_data(buf, sizeof(buf));
549 }
550 
551 static void
552 append_integer(unsigned int value)
553 {
554 
555 	if (value == 0)
556 		append_char('0');
557 	else
558 		append_unsigned_be(value, fls(value) + 7 / 8);
559 }
560 
561 static void
562 append_asciihex(const char *str)
563 {
564 
565 	while (*str != '\0') {
566 		append_byte(*str);
567 		str++;
568 	}
569 }
570 
571 static void
572 send_empty_response(void)
573 {
574 
575 	start_packet();
576 	finish_packet();
577 }
578 
579 static void
580 send_error(int error)
581 {
582 
583 	start_packet();
584 	append_char('E');
585 	append_byte(error);
586 	finish_packet();
587 }
588 
589 static void
590 send_ok(void)
591 {
592 
593 	start_packet();
594 	append_string("OK");
595 	finish_packet();
596 }
597 
598 static int
599 parse_threadid(const uint8_t *data, size_t len)
600 {
601 
602 	if (len == 1 && *data == '0')
603 		return (0);
604 	if (len == 2 && memcmp(data, "-1", 2) == 0)
605 		return (-1);
606 	if (len == 0)
607 		return (-2);
608 	return (parse_integer(data, len));
609 }
610 
611 static void
612 report_stop(void)
613 {
614 
615 	start_packet();
616 	if (stopped_vcpu == -1)
617 		append_char('S');
618 	else
619 		append_char('T');
620 	append_byte(GDB_SIGNAL_TRAP);
621 	if (stopped_vcpu != -1) {
622 		append_string("thread:");
623 		append_integer(stopped_vcpu + 1);
624 		append_char(';');
625 	}
626 	stopped_vcpu = -1;
627 	finish_packet();
628 }
629 
630 static void
631 gdb_finish_suspend_vcpus(void)
632 {
633 
634 	if (first_stop) {
635 		first_stop = false;
636 		stopped_vcpu = -1;
637 	} else if (response_pending())
638 		stop_pending = true;
639 	else {
640 		report_stop();
641 		send_pending_data(cur_fd);
642 	}
643 }
644 
645 static void
646 _gdb_cpu_suspend(int vcpu, bool report_stop)
647 {
648 
649 	debug("$vCPU %d suspending\n", vcpu);
650 	CPU_SET(vcpu, &vcpus_waiting);
651 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
652 		gdb_finish_suspend_vcpus();
653 	while (CPU_ISSET(vcpu, &vcpus_suspended) && vcpu != stepping_vcpu)
654 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
655 	CPU_CLR(vcpu, &vcpus_waiting);
656 	debug("$vCPU %d resuming\n", vcpu);
657 }
658 
659 void
660 gdb_cpu_add(int vcpu)
661 {
662 
663 	debug("$vCPU %d starting\n", vcpu);
664 	pthread_mutex_lock(&gdb_lock);
665 	CPU_SET(vcpu, &vcpus_active);
666 
667 	/*
668 	 * If a vcpu is added while vcpus are stopped, suspend the new
669 	 * vcpu so that it will pop back out with a debug exit before
670 	 * executing the first instruction.
671 	 */
672 	if (!CPU_EMPTY(&vcpus_suspended)) {
673 		CPU_SET(vcpu, &vcpus_suspended);
674 		_gdb_cpu_suspend(vcpu, false);
675 	}
676 	pthread_mutex_unlock(&gdb_lock);
677 }
678 
679 void
680 gdb_cpu_suspend(int vcpu)
681 {
682 
683 	pthread_mutex_lock(&gdb_lock);
684 	_gdb_cpu_suspend(vcpu, true);
685 	pthread_mutex_unlock(&gdb_lock);
686 }
687 
688 void
689 gdb_cpu_mtrap(int vcpu)
690 {
691 
692 	debug("$vCPU %d MTRAP\n", vcpu);
693 	pthread_mutex_lock(&gdb_lock);
694 	if (vcpu == stepping_vcpu) {
695 		stepping_vcpu = -1;
696 		vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
697 		vm_suspend_cpu(ctx, vcpu);
698 		assert(stopped_vcpu == -1);
699 		stopped_vcpu = vcpu;
700 		_gdb_cpu_suspend(vcpu, true);
701 	}
702 	pthread_mutex_unlock(&gdb_lock);
703 }
704 
705 static void
706 gdb_suspend_vcpus(void)
707 {
708 
709 	assert(pthread_mutex_isowned_np(&gdb_lock));
710 	debug("suspending all CPUs\n");
711 	vcpus_suspended = vcpus_active;
712 	vm_suspend_cpu(ctx, -1);
713 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
714 		gdb_finish_suspend_vcpus();
715 }
716 
717 static bool
718 gdb_step_vcpu(int vcpu)
719 {
720 	int error, val;
721 
722 	debug("$vCPU %d step\n", vcpu);
723 	error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
724 	if (error < 0)
725 		return (false);
726 	error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
727 	vm_resume_cpu(ctx, vcpu);
728 	stepping_vcpu = vcpu;
729 	pthread_cond_broadcast(&idle_vcpus);
730 	return (true);
731 }
732 
733 static void
734 gdb_resume_vcpus(void)
735 {
736 
737 	assert(pthread_mutex_isowned_np(&gdb_lock));
738 	vm_resume_cpu(ctx, -1);
739 	debug("resuming all CPUs\n");
740 	CPU_ZERO(&vcpus_suspended);
741 	pthread_cond_broadcast(&idle_vcpus);
742 }
743 
744 static void
745 gdb_read_regs(void)
746 {
747 	uint64_t regvals[nitems(gdb_regset)];
748 	int i;
749 
750 	if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
751 	    gdb_regset, regvals) == -1) {
752 		send_error(errno);
753 		return;
754 	}
755 	start_packet();
756 	for (i = 0; i < nitems(regvals); i++)
757 		append_unsigned_native(regvals[i], gdb_regsize[i]);
758 	finish_packet();
759 }
760 
761 static void
762 gdb_read_mem(const uint8_t *data, size_t len)
763 {
764 	uint64_t gpa, gva, val;
765 	uint8_t *cp;
766 	size_t resid, todo, bytes;
767 	bool started;
768 	int error;
769 
770 	cp = memchr(data, ',', len);
771 	if (cp == NULL) {
772 		send_error(EINVAL);
773 		return;
774 	}
775 	gva = parse_integer(data + 1, cp - (data + 1));
776 	resid = parse_integer(cp + 1, len - (cp + 1 - data));
777 	started = false;
778 
779 	while (resid > 0) {
780 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
781 		if (error == -1) {
782 			if (started)
783 				finish_packet();
784 			else
785 				send_error(errno);
786 			return;
787 		}
788 		if (error == 0) {
789 			if (started)
790 				finish_packet();
791 			else
792 				send_error(EFAULT);
793 			return;
794 		}
795 
796 		/* Read bytes from current page. */
797 		todo = getpagesize() - gpa % getpagesize();
798 		if (todo > resid)
799 			todo = resid;
800 
801 		cp = paddr_guest2host(ctx, gpa, todo);
802 		if (cp != NULL) {
803 			/*
804 			 * If this page is guest RAM, read it a byte
805 			 * at a time.
806 			 */
807 			if (!started) {
808 				start_packet();
809 				started = true;
810 			}
811 			while (todo > 0) {
812 				append_byte(*cp);
813 				cp++;
814 				gpa++;
815 				gva++;
816 				resid--;
817 				todo--;
818 			}
819 		} else {
820 			/*
821 			 * If this page isn't guest RAM, try to handle
822 			 * it via MMIO.  For MMIO requests, use
823 			 * aligned reads of words when possible.
824 			 */
825 			while (todo > 0) {
826 				if (gpa & 1 || todo == 1)
827 					bytes = 1;
828 				else if (gpa & 2 || todo == 2)
829 					bytes = 2;
830 				else
831 					bytes = 4;
832 				error = read_mem(ctx, cur_vcpu, gpa, &val,
833 				    bytes);
834 				if (error == 0) {
835 					if (!started) {
836 						start_packet();
837 						started = true;
838 					}
839 					gpa += bytes;
840 					gva += bytes;
841 					resid -= bytes;
842 					todo -= bytes;
843 					while (bytes > 0) {
844 						append_byte(val);
845 						val >>= 8;
846 						bytes--;
847 					}
848 				} else {
849 					if (started)
850 						finish_packet();
851 					else
852 						send_error(EFAULT);
853 					return;
854 				}
855 			}
856 		}
857 		assert(resid == 0 || gpa % getpagesize() == 0);
858 	}
859 	if (!started)
860 		start_packet();
861 	finish_packet();
862 }
863 
864 static bool
865 command_equals(const uint8_t *data, size_t len, const char *cmd)
866 {
867 
868 	if (strlen(cmd) > len)
869 		return (false);
870 	return (memcmp(data, cmd, strlen(cmd)) == 0);
871 }
872 
873 static void
874 gdb_query(const uint8_t *data, size_t len)
875 {
876 
877 	/*
878 	 * TODO:
879 	 * - qSearch
880 	 * - qSupported
881 	 */
882 	if (command_equals(data, len, "qAttached")) {
883 		start_packet();
884 		append_char('1');
885 		finish_packet();
886 	} else if (command_equals(data, len, "qC")) {
887 		start_packet();
888 		append_string("QC");
889 		append_integer(cur_vcpu + 1);
890 		finish_packet();
891 	} else if (command_equals(data, len, "qfThreadInfo")) {
892 		cpuset_t mask;
893 		bool first;
894 		int vcpu;
895 
896 		if (CPU_EMPTY(&vcpus_active)) {
897 			send_error(EINVAL);
898 			return;
899 		}
900 		mask = vcpus_active;
901 		start_packet();
902 		append_char('m');
903 		first = true;
904 		while (!CPU_EMPTY(&mask)) {
905 			vcpu = CPU_FFS(&mask) - 1;
906 			CPU_CLR(vcpu, &mask);
907 			if (first)
908 				first = false;
909 			else
910 				append_char(',');
911 			append_integer(vcpu + 1);
912 		}
913 		finish_packet();
914 	} else if (command_equals(data, len, "qsThreadInfo")) {
915 		start_packet();
916 		append_char('l');
917 		finish_packet();
918 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
919 		char buf[16];
920 		int tid;
921 
922 		data += strlen("qThreadExtraInfo");
923 		len -= strlen("qThreadExtraInfo");
924 		if (*data != ',') {
925 			send_error(EINVAL);
926 			return;
927 		}
928 		tid = parse_threadid(data + 1, len - 1);
929 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
930 			send_error(EINVAL);
931 			return;
932 		}
933 
934 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
935 		start_packet();
936 		append_asciihex(buf);
937 		finish_packet();
938 	} else
939 		send_empty_response();
940 }
941 
942 static void
943 handle_command(const uint8_t *data, size_t len)
944 {
945 
946 	/* Reject packets with a sequence-id. */
947 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
948 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
949 		send_empty_response();
950 		return;
951 	}
952 
953 	switch (*data) {
954 	case 'c':
955 		if (len != 1) {
956 			send_error(EINVAL);
957 			break;
958 		}
959 
960 		/* Don't send a reply until a stop occurs. */
961 		gdb_resume_vcpus();
962 		break;
963 	case 'D':
964 		send_ok();
965 
966 		/* TODO: Resume any stopped CPUs. */
967 		break;
968 	case 'g': {
969 		gdb_read_regs();
970 		break;
971 	}
972 	case 'H': {
973 		int tid;
974 
975 		if (data[1] != 'g' && data[1] != 'c') {
976 			send_error(EINVAL);
977 			break;
978 		}
979 		tid = parse_threadid(data + 2, len - 2);
980 		if (tid == -2) {
981 			send_error(EINVAL);
982 			break;
983 		}
984 
985 		if (CPU_EMPTY(&vcpus_active)) {
986 			send_error(EINVAL);
987 			break;
988 		}
989 		if (tid == -1 || tid == 0)
990 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
991 		else if (CPU_ISSET(tid - 1, &vcpus_active))
992 			cur_vcpu = tid - 1;
993 		else {
994 			send_error(EINVAL);
995 			break;
996 		}
997 		send_ok();
998 		break;
999 	}
1000 	case 'm':
1001 		gdb_read_mem(data, len);
1002 		break;
1003 	case 'T': {
1004 		int tid;
1005 
1006 		tid = parse_threadid(data + 1, len - 1);
1007 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1008 			send_error(EINVAL);
1009 			return;
1010 		}
1011 		send_ok();
1012 		break;
1013 	}
1014 	case 'q':
1015 		gdb_query(data, len);
1016 		break;
1017 	case 's':
1018 		if (len != 1) {
1019 			send_error(EINVAL);
1020 			break;
1021 		}
1022 
1023 		/* Don't send a reply until a stop occurs. */
1024 		if (!gdb_step_vcpu(cur_vcpu)) {
1025 			send_error(EOPNOTSUPP);
1026 			break;
1027 		}
1028 		break;
1029 	case '?':
1030 		/* XXX: Only if stopped? */
1031 		/* For now, just report that we are always stopped. */
1032 		start_packet();
1033 		append_char('S');
1034 		append_byte(GDB_SIGNAL_TRAP);
1035 		finish_packet();
1036 		break;
1037 	case 'G': /* TODO */
1038 	case 'M': /* TODO */
1039 	case 'v':
1040 		/* Handle 'vCont' */
1041 		/* 'vCtrlC' */
1042 	case 'p': /* TODO */
1043 	case 'P': /* TODO */
1044 	case 'Q': /* TODO */
1045 	case 't': /* TODO */
1046 	case 'X': /* TODO */
1047 	case 'z': /* TODO */
1048 	case 'Z': /* TODO */
1049 	default:
1050 		send_empty_response();
1051 	}
1052 }
1053 
1054 /* Check for a valid packet in the command buffer. */
1055 static void
1056 check_command(int fd)
1057 {
1058 	uint8_t *head, *hash, *p, sum;
1059 	size_t avail, plen;
1060 
1061 	for (;;) {
1062 		avail = cur_comm.len;
1063 		if (avail == 0)
1064 			return;
1065 		head = io_buffer_head(&cur_comm);
1066 		switch (*head) {
1067 		case 0x03:
1068 			debug("<- Ctrl-C\n");
1069 			io_buffer_consume(&cur_comm, 1);
1070 
1071 			gdb_suspend_vcpus();
1072 			break;
1073 		case '+':
1074 			/* ACK of previous response. */
1075 			debug("<- +\n");
1076 			if (response_pending())
1077 				io_buffer_reset(&cur_resp);
1078 			io_buffer_consume(&cur_comm, 1);
1079 			if (stop_pending) {
1080 				stop_pending = false;
1081 				report_stop();
1082 				send_pending_data(fd);
1083 			}
1084 			break;
1085 		case '-':
1086 			/* NACK of previous response. */
1087 			debug("<- -\n");
1088 			if (response_pending()) {
1089 				cur_resp.len += cur_resp.start;
1090 				cur_resp.start = 0;
1091 				if (cur_resp.data[0] == '+')
1092 					io_buffer_advance(&cur_resp, 1);
1093 				debug("-> %.*s\n", (int)cur_resp.len,
1094 				    io_buffer_head(&cur_resp));
1095 			}
1096 			io_buffer_consume(&cur_comm, 1);
1097 			send_pending_data(fd);
1098 			break;
1099 		case '$':
1100 			/* Packet. */
1101 
1102 			if (response_pending()) {
1103 				warnx("New GDB command while response in "
1104 				    "progress");
1105 				io_buffer_reset(&cur_resp);
1106 			}
1107 
1108 			/* Is packet complete? */
1109 			hash = memchr(head, '#', avail);
1110 			if (hash == NULL)
1111 				return;
1112 			plen = (hash - head + 1) + 2;
1113 			if (avail < plen)
1114 				return;
1115 			debug("<- %.*s\n", (int)plen, head);
1116 
1117 			/* Verify checksum. */
1118 			for (sum = 0, p = head + 1; p < hash; p++)
1119 				sum += *p;
1120 			if (sum != parse_byte(hash + 1)) {
1121 				io_buffer_consume(&cur_comm, plen);
1122 				debug("-> -\n");
1123 				send_char('-');
1124 				send_pending_data(fd);
1125 				break;
1126 			}
1127 			send_char('+');
1128 
1129 			handle_command(head + 1, hash - (head + 1));
1130 			io_buffer_consume(&cur_comm, plen);
1131 			if (!response_pending())
1132 				debug("-> +\n");
1133 			send_pending_data(fd);
1134 			break;
1135 		default:
1136 			/* XXX: Possibly drop connection instead. */
1137 			debug("-> %02x\n", *head);
1138 			io_buffer_consume(&cur_comm, 1);
1139 			break;
1140 		}
1141 	}
1142 }
1143 
1144 static void
1145 gdb_readable(int fd, enum ev_type event, void *arg)
1146 {
1147 	ssize_t nread;
1148 	int pending;
1149 
1150 	if (ioctl(fd, FIONREAD, &pending) == -1) {
1151 		warn("FIONREAD on GDB socket");
1152 		return;
1153 	}
1154 
1155 	/*
1156 	 * 'pending' might be zero due to EOF.  We need to call read
1157 	 * with a non-zero length to detect EOF.
1158 	 */
1159 	if (pending == 0)
1160 		pending = 1;
1161 
1162 	/* Ensure there is room in the command buffer. */
1163 	io_buffer_grow(&cur_comm, pending);
1164 	assert(io_buffer_avail(&cur_comm) >= pending);
1165 
1166 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1167 	if (nread == 0) {
1168 		close_connection();
1169 	} else if (nread == -1) {
1170 		if (errno == EAGAIN)
1171 			return;
1172 
1173 		warn("Read from GDB socket");
1174 		close_connection();
1175 	} else {
1176 		cur_comm.len += nread;
1177 		pthread_mutex_lock(&gdb_lock);
1178 		check_command(fd);
1179 		pthread_mutex_unlock(&gdb_lock);
1180 	}
1181 }
1182 
1183 static void
1184 gdb_writable(int fd, enum ev_type event, void *arg)
1185 {
1186 
1187 	send_pending_data(fd);
1188 }
1189 
1190 static void
1191 new_connection(int fd, enum ev_type event, void *arg)
1192 {
1193 	int optval, s;
1194 
1195 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1196 	if (s == -1) {
1197 		if (arg != NULL)
1198 			err(1, "Failed accepting initial GDB connection");
1199 
1200 		/* Silently ignore errors post-startup. */
1201 		return;
1202 	}
1203 
1204 	optval = 1;
1205 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1206 	    -1) {
1207 		warn("Failed to disable SIGPIPE for GDB connection");
1208 		close(s);
1209 		return;
1210 	}
1211 
1212 	pthread_mutex_lock(&gdb_lock);
1213 	if (cur_fd != -1) {
1214 		close(s);
1215 		warnx("Ignoring additional GDB connection.");
1216 	}
1217 
1218 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1219 	if (read_event == NULL) {
1220 		if (arg != NULL)
1221 			err(1, "Failed to setup initial GDB connection");
1222 		pthread_mutex_unlock(&gdb_lock);
1223 		return;
1224 	}
1225 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1226 	if (write_event == NULL) {
1227 		if (arg != NULL)
1228 			err(1, "Failed to setup initial GDB connection");
1229 		mevent_delete_close(read_event);
1230 		read_event = NULL;
1231 	}
1232 
1233 	cur_fd = s;
1234 	cur_vcpu = 0;
1235 	stepping_vcpu = -1;
1236 	stopped_vcpu = -1;
1237 	stop_pending = false;
1238 
1239 	/* Break on attach. */
1240 	first_stop = true;
1241 	gdb_suspend_vcpus();
1242 	pthread_mutex_unlock(&gdb_lock);
1243 }
1244 
1245 #ifndef WITHOUT_CAPSICUM
1246 void
1247 limit_gdb_socket(int s)
1248 {
1249 	cap_rights_t rights;
1250 	unsigned long ioctls[] = { FIONREAD };
1251 
1252 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1253 	    CAP_SETSOCKOPT, CAP_IOCTL);
1254 	if (caph_rights_limit(s, &rights) == -1)
1255 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1256 	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1257 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1258 }
1259 #endif
1260 
1261 void
1262 init_gdb(struct vmctx *_ctx, int sport, bool wait)
1263 {
1264 	struct sockaddr_in sin;
1265 	int error, flags, s;
1266 
1267 	debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not ");
1268 
1269 	error = pthread_mutex_init(&gdb_lock, NULL);
1270 	if (error != 0)
1271 		errc(1, error, "gdb mutex init");
1272 	error = pthread_cond_init(&idle_vcpus, NULL);
1273 	if (error != 0)
1274 		errc(1, error, "gdb cv init");
1275 
1276 	ctx = _ctx;
1277 	s = socket(PF_INET, SOCK_STREAM, 0);
1278 	if (s < 0)
1279 		err(1, "gdb socket create");
1280 
1281 	sin.sin_len = sizeof(sin);
1282 	sin.sin_family = AF_INET;
1283 	sin.sin_addr.s_addr = htonl(INADDR_ANY);
1284 	sin.sin_port = htons(sport);
1285 
1286 	if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0)
1287 		err(1, "gdb socket bind");
1288 
1289 	if (listen(s, 1) < 0)
1290 		err(1, "gdb socket listen");
1291 
1292 	if (wait) {
1293 		/*
1294 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
1295 		 * logic in gdb_cpu_add() to suspend the first vcpu before
1296 		 * it starts execution.  The vcpu will remain suspended
1297 		 * until a debugger connects.
1298 		 */
1299 		stepping_vcpu = -1;
1300 		stopped_vcpu = -1;
1301 		CPU_SET(0, &vcpus_suspended);
1302 	}
1303 
1304 	flags = fcntl(s, F_GETFL);
1305 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1306 		err(1, "Failed to mark gdb socket non-blocking");
1307 
1308 #ifndef WITHOUT_CAPSICUM
1309 	limit_gdb_socket(s);
1310 #endif
1311 	mevent_add(s, EVF_READ, new_connection, NULL);
1312 }
1313