xref: /freebsd/usr.sbin/bhyve/gdb.c (revision 4f8f43b06ed07e96a250855488cc531799d5b78f)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 #include <sys/param.h>
30 #ifndef WITHOUT_CAPSICUM
31 #include <sys/capsicum.h>
32 #endif
33 #include <sys/endian.h>
34 #include <sys/ioctl.h>
35 #include <sys/mman.h>
36 #include <sys/queue.h>
37 #include <sys/socket.h>
38 #include <machine/atomic.h>
39 #include <machine/specialreg.h>
40 #include <machine/vmm.h>
41 #include <netinet/in.h>
42 #include <assert.h>
43 #ifndef WITHOUT_CAPSICUM
44 #include <capsicum_helpers.h>
45 #endif
46 #include <err.h>
47 #include <errno.h>
48 #include <fcntl.h>
49 #include <netdb.h>
50 #include <pthread.h>
51 #include <pthread_np.h>
52 #include <stdbool.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <sysexits.h>
57 #include <unistd.h>
58 #include <vmmapi.h>
59 
60 #include "bhyverun.h"
61 #include "config.h"
62 #include "debug.h"
63 #include "gdb.h"
64 #include "mem.h"
65 #include "mevent.h"
66 
67 /*
68  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
69  * use SIGTRAP.
70  */
71 #define	GDB_SIGNAL_TRAP		5
72 
73 static void gdb_resume_vcpus(void);
74 static void check_command(int fd);
75 
76 static struct mevent *read_event, *write_event;
77 
78 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
79 static pthread_mutex_t gdb_lock;
80 static pthread_cond_t idle_vcpus;
81 static bool first_stop, report_next_stop, swbreak_enabled;
82 
83 /*
84  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
85  * read buffer, 'start' is unused and 'len' contains the number of
86  * valid bytes in the buffer.  For a write buffer, 'start' is set to
87  * the index of the next byte in 'data' to send, and 'len' contains
88  * the remaining number of valid bytes to send.
89  */
90 struct io_buffer {
91 	uint8_t *data;
92 	size_t capacity;
93 	size_t start;
94 	size_t len;
95 };
96 
97 struct breakpoint {
98 	uint64_t gpa;
99 	uint8_t shadow_inst;
100 	TAILQ_ENTRY(breakpoint) link;
101 };
102 
103 /*
104  * When a vCPU stops to due to an event that should be reported to the
105  * debugger, information about the event is stored in this structure.
106  * The vCPU thread then sets 'stopped_vcpu' if it is not already set
107  * and stops other vCPUs so the event can be reported.  The
108  * report_stop() function reports the event for the 'stopped_vcpu'
109  * vCPU.  When the debugger resumes execution via continue or step,
110  * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
111  * event handlers until the associated event is reported or disabled.
112  *
113  * An idle vCPU will have all of the boolean fields set to false.
114  *
115  * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
116  * released to execute the stepped instruction.  When the vCPU reports
117  * the stepping trap, 'stepped' is set.
118  *
119  * When a vCPU hits a breakpoint set by the debug server,
120  * 'hit_swbreak' is set to true.
121  */
122 struct vcpu_state {
123 	bool stepping;
124 	bool stepped;
125 	bool hit_swbreak;
126 };
127 
128 static struct io_buffer cur_comm, cur_resp;
129 static uint8_t cur_csum;
130 static struct vmctx *ctx;
131 static int cur_fd = -1;
132 static TAILQ_HEAD(, breakpoint) breakpoints;
133 static struct vcpu_state *vcpu_state;
134 static struct vcpu **vcpus;
135 static int cur_vcpu, stopped_vcpu;
136 static bool gdb_active = false;
137 
138 static const int gdb_regset[] = {
139 	VM_REG_GUEST_RAX,
140 	VM_REG_GUEST_RBX,
141 	VM_REG_GUEST_RCX,
142 	VM_REG_GUEST_RDX,
143 	VM_REG_GUEST_RSI,
144 	VM_REG_GUEST_RDI,
145 	VM_REG_GUEST_RBP,
146 	VM_REG_GUEST_RSP,
147 	VM_REG_GUEST_R8,
148 	VM_REG_GUEST_R9,
149 	VM_REG_GUEST_R10,
150 	VM_REG_GUEST_R11,
151 	VM_REG_GUEST_R12,
152 	VM_REG_GUEST_R13,
153 	VM_REG_GUEST_R14,
154 	VM_REG_GUEST_R15,
155 	VM_REG_GUEST_RIP,
156 	VM_REG_GUEST_RFLAGS,
157 	VM_REG_GUEST_CS,
158 	VM_REG_GUEST_SS,
159 	VM_REG_GUEST_DS,
160 	VM_REG_GUEST_ES,
161 	VM_REG_GUEST_FS,
162 	VM_REG_GUEST_GS
163 };
164 
165 static const int gdb_regsize[] = {
166 	8,
167 	8,
168 	8,
169 	8,
170 	8,
171 	8,
172 	8,
173 	8,
174 	8,
175 	8,
176 	8,
177 	8,
178 	8,
179 	8,
180 	8,
181 	8,
182 	8,
183 	4,
184 	4,
185 	4,
186 	4,
187 	4,
188 	4,
189 	4
190 };
191 
192 #ifdef GDB_LOG
193 #include <stdarg.h>
194 #include <stdio.h>
195 
196 static void __printflike(1, 2)
197 debug(const char *fmt, ...)
198 {
199 	static FILE *logfile;
200 	va_list ap;
201 
202 	if (logfile == NULL) {
203 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
204 		if (logfile == NULL)
205 			return;
206 #ifndef WITHOUT_CAPSICUM
207 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
208 			fclose(logfile);
209 			logfile = NULL;
210 			return;
211 		}
212 #endif
213 		setlinebuf(logfile);
214 	}
215 	va_start(ap, fmt);
216 	vfprintf(logfile, fmt, ap);
217 	va_end(ap);
218 }
219 #else
220 #define debug(...)
221 #endif
222 
223 static void	remove_all_sw_breakpoints(void);
224 
225 static int
226 guest_paging_info(struct vcpu *vcpu, struct vm_guest_paging *paging)
227 {
228 	uint64_t regs[4];
229 	const int regset[4] = {
230 		VM_REG_GUEST_CR0,
231 		VM_REG_GUEST_CR3,
232 		VM_REG_GUEST_CR4,
233 		VM_REG_GUEST_EFER
234 	};
235 
236 	if (vm_get_register_set(vcpu, nitems(regset), regset, regs) == -1)
237 		return (-1);
238 
239 	/*
240 	 * For the debugger, always pretend to be the kernel (CPL 0),
241 	 * and if long-mode is enabled, always parse addresses as if
242 	 * in 64-bit mode.
243 	 */
244 	paging->cr3 = regs[1];
245 	paging->cpl = 0;
246 	if (regs[3] & EFER_LMA)
247 		paging->cpu_mode = CPU_MODE_64BIT;
248 	else if (regs[0] & CR0_PE)
249 		paging->cpu_mode = CPU_MODE_PROTECTED;
250 	else
251 		paging->cpu_mode = CPU_MODE_REAL;
252 	if (!(regs[0] & CR0_PG))
253 		paging->paging_mode = PAGING_MODE_FLAT;
254 	else if (!(regs[2] & CR4_PAE))
255 		paging->paging_mode = PAGING_MODE_32;
256 	else if (regs[3] & EFER_LME)
257 		paging->paging_mode = (regs[2] & CR4_LA57) ?
258 		    PAGING_MODE_64_LA57 :  PAGING_MODE_64;
259 	else
260 		paging->paging_mode = PAGING_MODE_PAE;
261 	return (0);
262 }
263 
264 /*
265  * Map a guest virtual address to a physical address (for a given vcpu).
266  * If a guest virtual address is valid, return 1.  If the address is
267  * not valid, return 0.  If an error occurs obtaining the mapping,
268  * return -1.
269  */
270 static int
271 guest_vaddr2paddr(struct vcpu *vcpu, uint64_t vaddr, uint64_t *paddr)
272 {
273 	struct vm_guest_paging paging;
274 	int fault;
275 
276 	if (guest_paging_info(vcpu, &paging) == -1)
277 		return (-1);
278 
279 	/*
280 	 * Always use PROT_READ.  We really care if the VA is
281 	 * accessible, not if the current vCPU can write.
282 	 */
283 	if (vm_gla2gpa_nofault(vcpu, &paging, vaddr, PROT_READ, paddr,
284 	    &fault) == -1)
285 		return (-1);
286 	if (fault)
287 		return (0);
288 	return (1);
289 }
290 
291 static void
292 io_buffer_reset(struct io_buffer *io)
293 {
294 
295 	io->start = 0;
296 	io->len = 0;
297 }
298 
299 /* Available room for adding data. */
300 static size_t
301 io_buffer_avail(struct io_buffer *io)
302 {
303 
304 	return (io->capacity - (io->start + io->len));
305 }
306 
307 static uint8_t *
308 io_buffer_head(struct io_buffer *io)
309 {
310 
311 	return (io->data + io->start);
312 }
313 
314 static uint8_t *
315 io_buffer_tail(struct io_buffer *io)
316 {
317 
318 	return (io->data + io->start + io->len);
319 }
320 
321 static void
322 io_buffer_advance(struct io_buffer *io, size_t amount)
323 {
324 
325 	assert(amount <= io->len);
326 	io->start += amount;
327 	io->len -= amount;
328 }
329 
330 static void
331 io_buffer_consume(struct io_buffer *io, size_t amount)
332 {
333 
334 	io_buffer_advance(io, amount);
335 	if (io->len == 0) {
336 		io->start = 0;
337 		return;
338 	}
339 
340 	/*
341 	 * XXX: Consider making this move optional and compacting on a
342 	 * future read() before realloc().
343 	 */
344 	memmove(io->data, io_buffer_head(io), io->len);
345 	io->start = 0;
346 }
347 
348 static void
349 io_buffer_grow(struct io_buffer *io, size_t newsize)
350 {
351 	uint8_t *new_data;
352 	size_t avail, new_cap;
353 
354 	avail = io_buffer_avail(io);
355 	if (newsize <= avail)
356 		return;
357 
358 	new_cap = io->capacity + (newsize - avail);
359 	new_data = realloc(io->data, new_cap);
360 	if (new_data == NULL)
361 		err(1, "Failed to grow GDB I/O buffer");
362 	io->data = new_data;
363 	io->capacity = new_cap;
364 }
365 
366 static bool
367 response_pending(void)
368 {
369 
370 	if (cur_resp.start == 0 && cur_resp.len == 0)
371 		return (false);
372 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
373 		return (false);
374 	return (true);
375 }
376 
377 static void
378 close_connection(void)
379 {
380 
381 	/*
382 	 * XXX: This triggers a warning because mevent does the close
383 	 * before the EV_DELETE.
384 	 */
385 	pthread_mutex_lock(&gdb_lock);
386 	mevent_delete(write_event);
387 	mevent_delete_close(read_event);
388 	write_event = NULL;
389 	read_event = NULL;
390 	io_buffer_reset(&cur_comm);
391 	io_buffer_reset(&cur_resp);
392 	cur_fd = -1;
393 
394 	remove_all_sw_breakpoints();
395 
396 	/* Clear any pending events. */
397 	memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
398 
399 	/* Resume any stopped vCPUs. */
400 	gdb_resume_vcpus();
401 	pthread_mutex_unlock(&gdb_lock);
402 }
403 
404 static uint8_t
405 hex_digit(uint8_t nibble)
406 {
407 
408 	if (nibble <= 9)
409 		return (nibble + '0');
410 	else
411 		return (nibble + 'a' - 10);
412 }
413 
414 static uint8_t
415 parse_digit(uint8_t v)
416 {
417 
418 	if (v >= '0' && v <= '9')
419 		return (v - '0');
420 	if (v >= 'a' && v <= 'f')
421 		return (v - 'a' + 10);
422 	if (v >= 'A' && v <= 'F')
423 		return (v - 'A' + 10);
424 	return (0xF);
425 }
426 
427 /* Parses big-endian hexadecimal. */
428 static uintmax_t
429 parse_integer(const uint8_t *p, size_t len)
430 {
431 	uintmax_t v;
432 
433 	v = 0;
434 	while (len > 0) {
435 		v <<= 4;
436 		v |= parse_digit(*p);
437 		p++;
438 		len--;
439 	}
440 	return (v);
441 }
442 
443 static uint8_t
444 parse_byte(const uint8_t *p)
445 {
446 
447 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
448 }
449 
450 static void
451 send_pending_data(int fd)
452 {
453 	ssize_t nwritten;
454 
455 	if (cur_resp.len == 0) {
456 		mevent_disable(write_event);
457 		return;
458 	}
459 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
460 	if (nwritten == -1) {
461 		warn("Write to GDB socket failed");
462 		close_connection();
463 	} else {
464 		io_buffer_advance(&cur_resp, nwritten);
465 		if (cur_resp.len == 0)
466 			mevent_disable(write_event);
467 		else
468 			mevent_enable(write_event);
469 	}
470 }
471 
472 /* Append a single character to the output buffer. */
473 static void
474 send_char(uint8_t data)
475 {
476 	io_buffer_grow(&cur_resp, 1);
477 	*io_buffer_tail(&cur_resp) = data;
478 	cur_resp.len++;
479 }
480 
481 /* Append an array of bytes to the output buffer. */
482 static void
483 send_data(const uint8_t *data, size_t len)
484 {
485 
486 	io_buffer_grow(&cur_resp, len);
487 	memcpy(io_buffer_tail(&cur_resp), data, len);
488 	cur_resp.len += len;
489 }
490 
491 static void
492 format_byte(uint8_t v, uint8_t *buf)
493 {
494 
495 	buf[0] = hex_digit(v >> 4);
496 	buf[1] = hex_digit(v & 0xf);
497 }
498 
499 /*
500  * Append a single byte (formatted as two hex characters) to the
501  * output buffer.
502  */
503 static void
504 send_byte(uint8_t v)
505 {
506 	uint8_t buf[2];
507 
508 	format_byte(v, buf);
509 	send_data(buf, sizeof(buf));
510 }
511 
512 static void
513 start_packet(void)
514 {
515 
516 	send_char('$');
517 	cur_csum = 0;
518 }
519 
520 static void
521 finish_packet(void)
522 {
523 
524 	send_char('#');
525 	send_byte(cur_csum);
526 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
527 }
528 
529 /*
530  * Append a single character (for the packet payload) and update the
531  * checksum.
532  */
533 static void
534 append_char(uint8_t v)
535 {
536 
537 	send_char(v);
538 	cur_csum += v;
539 }
540 
541 /*
542  * Append an array of bytes (for the packet payload) and update the
543  * checksum.
544  */
545 static void
546 append_packet_data(const uint8_t *data, size_t len)
547 {
548 
549 	send_data(data, len);
550 	while (len > 0) {
551 		cur_csum += *data;
552 		data++;
553 		len--;
554 	}
555 }
556 
557 static void
558 append_string(const char *str)
559 {
560 
561 	append_packet_data(str, strlen(str));
562 }
563 
564 static void
565 append_byte(uint8_t v)
566 {
567 	uint8_t buf[2];
568 
569 	format_byte(v, buf);
570 	append_packet_data(buf, sizeof(buf));
571 }
572 
573 static void
574 append_unsigned_native(uintmax_t value, size_t len)
575 {
576 	size_t i;
577 
578 	for (i = 0; i < len; i++) {
579 		append_byte(value);
580 		value >>= 8;
581 	}
582 }
583 
584 static void
585 append_unsigned_be(uintmax_t value, size_t len)
586 {
587 	char buf[len * 2];
588 	size_t i;
589 
590 	for (i = 0; i < len; i++) {
591 		format_byte(value, buf + (len - i - 1) * 2);
592 		value >>= 8;
593 	}
594 	append_packet_data(buf, sizeof(buf));
595 }
596 
597 static void
598 append_integer(unsigned int value)
599 {
600 
601 	if (value == 0)
602 		append_char('0');
603 	else
604 		append_unsigned_be(value, (fls(value) + 7) / 8);
605 }
606 
607 static void
608 append_asciihex(const char *str)
609 {
610 
611 	while (*str != '\0') {
612 		append_byte(*str);
613 		str++;
614 	}
615 }
616 
617 static void
618 send_empty_response(void)
619 {
620 
621 	start_packet();
622 	finish_packet();
623 }
624 
625 static void
626 send_error(int error)
627 {
628 
629 	start_packet();
630 	append_char('E');
631 	append_byte(error);
632 	finish_packet();
633 }
634 
635 static void
636 send_ok(void)
637 {
638 
639 	start_packet();
640 	append_string("OK");
641 	finish_packet();
642 }
643 
644 static int
645 parse_threadid(const uint8_t *data, size_t len)
646 {
647 
648 	if (len == 1 && *data == '0')
649 		return (0);
650 	if (len == 2 && memcmp(data, "-1", 2) == 0)
651 		return (-1);
652 	if (len == 0)
653 		return (-2);
654 	return (parse_integer(data, len));
655 }
656 
657 /*
658  * Report the current stop event to the debugger.  If the stop is due
659  * to an event triggered on a specific vCPU such as a breakpoint or
660  * stepping trap, stopped_vcpu will be set to the vCPU triggering the
661  * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
662  * the reporting vCPU for vCPU events.
663  */
664 static void
665 report_stop(bool set_cur_vcpu)
666 {
667 	struct vcpu_state *vs;
668 
669 	start_packet();
670 	if (stopped_vcpu == -1) {
671 		append_char('S');
672 		append_byte(GDB_SIGNAL_TRAP);
673 	} else {
674 		vs = &vcpu_state[stopped_vcpu];
675 		if (set_cur_vcpu)
676 			cur_vcpu = stopped_vcpu;
677 		append_char('T');
678 		append_byte(GDB_SIGNAL_TRAP);
679 		append_string("thread:");
680 		append_integer(stopped_vcpu + 1);
681 		append_char(';');
682 		if (vs->hit_swbreak) {
683 			debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
684 			if (swbreak_enabled)
685 				append_string("swbreak:;");
686 		} else if (vs->stepped)
687 			debug("$vCPU %d reporting step\n", stopped_vcpu);
688 		else
689 			debug("$vCPU %d reporting ???\n", stopped_vcpu);
690 	}
691 	finish_packet();
692 	report_next_stop = false;
693 }
694 
695 /*
696  * If this stop is due to a vCPU event, clear that event to mark it as
697  * acknowledged.
698  */
699 static void
700 discard_stop(void)
701 {
702 	struct vcpu_state *vs;
703 
704 	if (stopped_vcpu != -1) {
705 		vs = &vcpu_state[stopped_vcpu];
706 		vs->hit_swbreak = false;
707 		vs->stepped = false;
708 		stopped_vcpu = -1;
709 	}
710 	report_next_stop = true;
711 }
712 
713 static void
714 gdb_finish_suspend_vcpus(void)
715 {
716 
717 	if (first_stop) {
718 		first_stop = false;
719 		stopped_vcpu = -1;
720 	} else if (report_next_stop) {
721 		assert(!response_pending());
722 		report_stop(true);
723 		send_pending_data(cur_fd);
724 	}
725 }
726 
727 /*
728  * vCPU threads invoke this function whenever the vCPU enters the
729  * debug server to pause or report an event.  vCPU threads wait here
730  * as long as the debug server keeps them suspended.
731  */
732 static void
733 _gdb_cpu_suspend(struct vcpu *vcpu, bool report_stop)
734 {
735 	int vcpuid = vcpu_id(vcpu);
736 
737 	debug("$vCPU %d suspending\n", vcpuid);
738 	CPU_SET(vcpuid, &vcpus_waiting);
739 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
740 		gdb_finish_suspend_vcpus();
741 	while (CPU_ISSET(vcpuid, &vcpus_suspended))
742 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
743 	CPU_CLR(vcpuid, &vcpus_waiting);
744 	debug("$vCPU %d resuming\n", vcpuid);
745 }
746 
747 /*
748  * Invoked at the start of a vCPU thread's execution to inform the
749  * debug server about the new thread.
750  */
751 void
752 gdb_cpu_add(struct vcpu *vcpu)
753 {
754 	int vcpuid;
755 
756 	if (!gdb_active)
757 		return;
758 	vcpuid = vcpu_id(vcpu);
759 	debug("$vCPU %d starting\n", vcpuid);
760 	pthread_mutex_lock(&gdb_lock);
761 	assert(vcpuid < guest_ncpus);
762 	assert(vcpus[vcpuid] == NULL);
763 	vcpus[vcpuid] = vcpu;
764 	CPU_SET(vcpuid, &vcpus_active);
765 	if (!TAILQ_EMPTY(&breakpoints)) {
766 		vm_set_capability(vcpu, VM_CAP_BPT_EXIT, 1);
767 		debug("$vCPU %d enabled breakpoint exits\n", vcpu);
768 	}
769 
770 	/*
771 	 * If a vcpu is added while vcpus are stopped, suspend the new
772 	 * vcpu so that it will pop back out with a debug exit before
773 	 * executing the first instruction.
774 	 */
775 	if (!CPU_EMPTY(&vcpus_suspended)) {
776 		CPU_SET(vcpuid, &vcpus_suspended);
777 		_gdb_cpu_suspend(vcpu, false);
778 	}
779 	pthread_mutex_unlock(&gdb_lock);
780 }
781 
782 /*
783  * Invoked by vCPU before resuming execution.  This enables stepping
784  * if the vCPU is marked as stepping.
785  */
786 static void
787 gdb_cpu_resume(struct vcpu *vcpu)
788 {
789 	struct vcpu_state *vs;
790 	int error;
791 
792 	vs = &vcpu_state[vcpu_id(vcpu)];
793 
794 	/*
795 	 * Any pending event should already be reported before
796 	 * resuming.
797 	 */
798 	assert(vs->hit_swbreak == false);
799 	assert(vs->stepped == false);
800 	if (vs->stepping) {
801 		error = vm_set_capability(vcpu, VM_CAP_MTRAP_EXIT, 1);
802 		assert(error == 0);
803 
804 		error = vm_set_capability(vcpu, VM_CAP_MASK_HWINTR, 1);
805 		assert(error == 0);
806 	}
807 }
808 
809 /*
810  * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
811  * has been suspended due to an event on different vCPU or in response
812  * to a guest-wide suspend such as Ctrl-C or the stop on attach.
813  */
814 void
815 gdb_cpu_suspend(struct vcpu *vcpu)
816 {
817 
818 	if (!gdb_active)
819 		return;
820 	pthread_mutex_lock(&gdb_lock);
821 	_gdb_cpu_suspend(vcpu, true);
822 	gdb_cpu_resume(vcpu);
823 	pthread_mutex_unlock(&gdb_lock);
824 }
825 
826 static void
827 gdb_suspend_vcpus(void)
828 {
829 
830 	assert(pthread_mutex_isowned_np(&gdb_lock));
831 	debug("suspending all CPUs\n");
832 	vcpus_suspended = vcpus_active;
833 	vm_suspend_all_cpus(ctx);
834 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
835 		gdb_finish_suspend_vcpus();
836 }
837 
838 /*
839  * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
840  * the VT-x-specific MTRAP exit.
841  */
842 void
843 gdb_cpu_mtrap(struct vcpu *vcpu)
844 {
845 	struct vcpu_state *vs;
846 	int vcpuid;
847 
848 	if (!gdb_active)
849 		return;
850 	vcpuid = vcpu_id(vcpu);
851 	debug("$vCPU %d MTRAP\n", vcpuid);
852 	pthread_mutex_lock(&gdb_lock);
853 	vs = &vcpu_state[vcpuid];
854 	if (vs->stepping) {
855 		vs->stepping = false;
856 		vs->stepped = true;
857 		vm_set_capability(vcpu, VM_CAP_MTRAP_EXIT, 0);
858 		vm_set_capability(vcpu, VM_CAP_MASK_HWINTR, 0);
859 
860 		while (vs->stepped) {
861 			if (stopped_vcpu == -1) {
862 				debug("$vCPU %d reporting step\n", vcpuid);
863 				stopped_vcpu = vcpuid;
864 				gdb_suspend_vcpus();
865 			}
866 			_gdb_cpu_suspend(vcpu, true);
867 		}
868 		gdb_cpu_resume(vcpu);
869 	}
870 	pthread_mutex_unlock(&gdb_lock);
871 }
872 
873 static struct breakpoint *
874 find_breakpoint(uint64_t gpa)
875 {
876 	struct breakpoint *bp;
877 
878 	TAILQ_FOREACH(bp, &breakpoints, link) {
879 		if (bp->gpa == gpa)
880 			return (bp);
881 	}
882 	return (NULL);
883 }
884 
885 void
886 gdb_cpu_breakpoint(struct vcpu *vcpu, struct vm_exit *vmexit)
887 {
888 	struct breakpoint *bp;
889 	struct vcpu_state *vs;
890 	uint64_t gpa;
891 	int error, vcpuid;
892 
893 	if (!gdb_active) {
894 		EPRINTLN("vm_loop: unexpected VMEXIT_DEBUG");
895 		exit(4);
896 	}
897 	vcpuid = vcpu_id(vcpu);
898 	pthread_mutex_lock(&gdb_lock);
899 	error = guest_vaddr2paddr(vcpu, vmexit->rip, &gpa);
900 	assert(error == 1);
901 	bp = find_breakpoint(gpa);
902 	if (bp != NULL) {
903 		vs = &vcpu_state[vcpuid];
904 		assert(vs->stepping == false);
905 		assert(vs->stepped == false);
906 		assert(vs->hit_swbreak == false);
907 		vs->hit_swbreak = true;
908 		vm_set_register(vcpu, VM_REG_GUEST_RIP, vmexit->rip);
909 		for (;;) {
910 			if (stopped_vcpu == -1) {
911 				debug("$vCPU %d reporting breakpoint at rip %#lx\n",
912 				    vcpuid, vmexit->rip);
913 				stopped_vcpu = vcpuid;
914 				gdb_suspend_vcpus();
915 			}
916 			_gdb_cpu_suspend(vcpu, true);
917 			if (!vs->hit_swbreak) {
918 				/* Breakpoint reported. */
919 				break;
920 			}
921 			bp = find_breakpoint(gpa);
922 			if (bp == NULL) {
923 				/* Breakpoint was removed. */
924 				vs->hit_swbreak = false;
925 				break;
926 			}
927 		}
928 		gdb_cpu_resume(vcpu);
929 	} else {
930 		debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpuid,
931 		    vmexit->rip);
932 		error = vm_set_register(vcpu, VM_REG_GUEST_ENTRY_INST_LENGTH,
933 		    vmexit->u.bpt.inst_length);
934 		assert(error == 0);
935 		error = vm_inject_exception(vcpu, IDT_BP, 0, 0, 0);
936 		assert(error == 0);
937 	}
938 	pthread_mutex_unlock(&gdb_lock);
939 }
940 
941 static bool
942 gdb_step_vcpu(struct vcpu *vcpu)
943 {
944 	int error, val, vcpuid;
945 
946 	vcpuid = vcpu_id(vcpu);
947 	debug("$vCPU %d step\n", vcpuid);
948 	error = vm_get_capability(vcpu, VM_CAP_MTRAP_EXIT, &val);
949 	if (error < 0)
950 		return (false);
951 
952 	discard_stop();
953 	vcpu_state[vcpuid].stepping = true;
954 	vm_resume_cpu(vcpu);
955 	CPU_CLR(vcpuid, &vcpus_suspended);
956 	pthread_cond_broadcast(&idle_vcpus);
957 	return (true);
958 }
959 
960 static void
961 gdb_resume_vcpus(void)
962 {
963 
964 	assert(pthread_mutex_isowned_np(&gdb_lock));
965 	vm_resume_all_cpus(ctx);
966 	debug("resuming all CPUs\n");
967 	CPU_ZERO(&vcpus_suspended);
968 	pthread_cond_broadcast(&idle_vcpus);
969 }
970 
971 static void
972 gdb_read_regs(void)
973 {
974 	uint64_t regvals[nitems(gdb_regset)];
975 
976 	if (vm_get_register_set(vcpus[cur_vcpu], nitems(gdb_regset),
977 	    gdb_regset, regvals) == -1) {
978 		send_error(errno);
979 		return;
980 	}
981 	start_packet();
982 	for (size_t i = 0; i < nitems(regvals); i++)
983 		append_unsigned_native(regvals[i], gdb_regsize[i]);
984 	finish_packet();
985 }
986 
987 static void
988 gdb_read_mem(const uint8_t *data, size_t len)
989 {
990 	uint64_t gpa, gva, val;
991 	uint8_t *cp;
992 	size_t resid, todo, bytes;
993 	bool started;
994 	int error;
995 
996 	/* Skip 'm' */
997 	data += 1;
998 	len -= 1;
999 
1000 	/* Parse and consume address. */
1001 	cp = memchr(data, ',', len);
1002 	if (cp == NULL || cp == data) {
1003 		send_error(EINVAL);
1004 		return;
1005 	}
1006 	gva = parse_integer(data, cp - data);
1007 	len -= (cp - data) + 1;
1008 	data += (cp - data) + 1;
1009 
1010 	/* Parse length. */
1011 	resid = parse_integer(data, len);
1012 
1013 	started = false;
1014 	while (resid > 0) {
1015 		error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1016 		if (error == -1) {
1017 			if (started)
1018 				finish_packet();
1019 			else
1020 				send_error(errno);
1021 			return;
1022 		}
1023 		if (error == 0) {
1024 			if (started)
1025 				finish_packet();
1026 			else
1027 				send_error(EFAULT);
1028 			return;
1029 		}
1030 
1031 		/* Read bytes from current page. */
1032 		todo = getpagesize() - gpa % getpagesize();
1033 		if (todo > resid)
1034 			todo = resid;
1035 
1036 		cp = paddr_guest2host(ctx, gpa, todo);
1037 		if (cp != NULL) {
1038 			/*
1039 			 * If this page is guest RAM, read it a byte
1040 			 * at a time.
1041 			 */
1042 			if (!started) {
1043 				start_packet();
1044 				started = true;
1045 			}
1046 			while (todo > 0) {
1047 				append_byte(*cp);
1048 				cp++;
1049 				gpa++;
1050 				gva++;
1051 				resid--;
1052 				todo--;
1053 			}
1054 		} else {
1055 			/*
1056 			 * If this page isn't guest RAM, try to handle
1057 			 * it via MMIO.  For MMIO requests, use
1058 			 * aligned reads of words when possible.
1059 			 */
1060 			while (todo > 0) {
1061 				if (gpa & 1 || todo == 1)
1062 					bytes = 1;
1063 				else if (gpa & 2 || todo == 2)
1064 					bytes = 2;
1065 				else
1066 					bytes = 4;
1067 				error = read_mem(vcpus[cur_vcpu], gpa, &val,
1068 				    bytes);
1069 				if (error == 0) {
1070 					if (!started) {
1071 						start_packet();
1072 						started = true;
1073 					}
1074 					gpa += bytes;
1075 					gva += bytes;
1076 					resid -= bytes;
1077 					todo -= bytes;
1078 					while (bytes > 0) {
1079 						append_byte(val);
1080 						val >>= 8;
1081 						bytes--;
1082 					}
1083 				} else {
1084 					if (started)
1085 						finish_packet();
1086 					else
1087 						send_error(EFAULT);
1088 					return;
1089 				}
1090 			}
1091 		}
1092 		assert(resid == 0 || gpa % getpagesize() == 0);
1093 	}
1094 	if (!started)
1095 		start_packet();
1096 	finish_packet();
1097 }
1098 
1099 static void
1100 gdb_write_mem(const uint8_t *data, size_t len)
1101 {
1102 	uint64_t gpa, gva, val;
1103 	uint8_t *cp;
1104 	size_t resid, todo, bytes;
1105 	int error;
1106 
1107 	/* Skip 'M' */
1108 	data += 1;
1109 	len -= 1;
1110 
1111 	/* Parse and consume address. */
1112 	cp = memchr(data, ',', len);
1113 	if (cp == NULL || cp == data) {
1114 		send_error(EINVAL);
1115 		return;
1116 	}
1117 	gva = parse_integer(data, cp - data);
1118 	len -= (cp - data) + 1;
1119 	data += (cp - data) + 1;
1120 
1121 	/* Parse and consume length. */
1122 	cp = memchr(data, ':', len);
1123 	if (cp == NULL || cp == data) {
1124 		send_error(EINVAL);
1125 		return;
1126 	}
1127 	resid = parse_integer(data, cp - data);
1128 	len -= (cp - data) + 1;
1129 	data += (cp - data) + 1;
1130 
1131 	/* Verify the available bytes match the length. */
1132 	if (len != resid * 2) {
1133 		send_error(EINVAL);
1134 		return;
1135 	}
1136 
1137 	while (resid > 0) {
1138 		error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1139 		if (error == -1) {
1140 			send_error(errno);
1141 			return;
1142 		}
1143 		if (error == 0) {
1144 			send_error(EFAULT);
1145 			return;
1146 		}
1147 
1148 		/* Write bytes to current page. */
1149 		todo = getpagesize() - gpa % getpagesize();
1150 		if (todo > resid)
1151 			todo = resid;
1152 
1153 		cp = paddr_guest2host(ctx, gpa, todo);
1154 		if (cp != NULL) {
1155 			/*
1156 			 * If this page is guest RAM, write it a byte
1157 			 * at a time.
1158 			 */
1159 			while (todo > 0) {
1160 				assert(len >= 2);
1161 				*cp = parse_byte(data);
1162 				data += 2;
1163 				len -= 2;
1164 				cp++;
1165 				gpa++;
1166 				gva++;
1167 				resid--;
1168 				todo--;
1169 			}
1170 		} else {
1171 			/*
1172 			 * If this page isn't guest RAM, try to handle
1173 			 * it via MMIO.  For MMIO requests, use
1174 			 * aligned writes of words when possible.
1175 			 */
1176 			while (todo > 0) {
1177 				if (gpa & 1 || todo == 1) {
1178 					bytes = 1;
1179 					val = parse_byte(data);
1180 				} else if (gpa & 2 || todo == 2) {
1181 					bytes = 2;
1182 					val = be16toh(parse_integer(data, 4));
1183 				} else {
1184 					bytes = 4;
1185 					val = be32toh(parse_integer(data, 8));
1186 				}
1187 				error = write_mem(vcpus[cur_vcpu], gpa, val,
1188 				    bytes);
1189 				if (error == 0) {
1190 					gpa += bytes;
1191 					gva += bytes;
1192 					resid -= bytes;
1193 					todo -= bytes;
1194 					data += 2 * bytes;
1195 					len -= 2 * bytes;
1196 				} else {
1197 					send_error(EFAULT);
1198 					return;
1199 				}
1200 			}
1201 		}
1202 		assert(resid == 0 || gpa % getpagesize() == 0);
1203 	}
1204 	assert(len == 0);
1205 	send_ok();
1206 }
1207 
1208 static bool
1209 set_breakpoint_caps(bool enable)
1210 {
1211 	cpuset_t mask;
1212 	int vcpu;
1213 
1214 	mask = vcpus_active;
1215 	while (!CPU_EMPTY(&mask)) {
1216 		vcpu = CPU_FFS(&mask) - 1;
1217 		CPU_CLR(vcpu, &mask);
1218 		if (vm_set_capability(vcpus[vcpu], VM_CAP_BPT_EXIT,
1219 		    enable ? 1 : 0) < 0)
1220 			return (false);
1221 		debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1222 		    enable ? "en" : "dis");
1223 	}
1224 	return (true);
1225 }
1226 
1227 static void
1228 remove_all_sw_breakpoints(void)
1229 {
1230 	struct breakpoint *bp, *nbp;
1231 	uint8_t *cp;
1232 
1233 	if (TAILQ_EMPTY(&breakpoints))
1234 		return;
1235 
1236 	TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1237 		debug("remove breakpoint at %#lx\n", bp->gpa);
1238 		cp = paddr_guest2host(ctx, bp->gpa, 1);
1239 		*cp = bp->shadow_inst;
1240 		TAILQ_REMOVE(&breakpoints, bp, link);
1241 		free(bp);
1242 	}
1243 	TAILQ_INIT(&breakpoints);
1244 	set_breakpoint_caps(false);
1245 }
1246 
1247 static void
1248 update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1249 {
1250 	struct breakpoint *bp;
1251 	uint64_t gpa;
1252 	uint8_t *cp;
1253 	int error;
1254 
1255 	if (kind != 1) {
1256 		send_error(EINVAL);
1257 		return;
1258 	}
1259 
1260 	error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1261 	if (error == -1) {
1262 		send_error(errno);
1263 		return;
1264 	}
1265 	if (error == 0) {
1266 		send_error(EFAULT);
1267 		return;
1268 	}
1269 
1270 	cp = paddr_guest2host(ctx, gpa, 1);
1271 
1272 	/* Only permit breakpoints in guest RAM. */
1273 	if (cp == NULL) {
1274 		send_error(EFAULT);
1275 		return;
1276 	}
1277 
1278 	/* Find any existing breakpoint. */
1279 	bp = find_breakpoint(gpa);
1280 
1281 	/*
1282 	 * Silently ignore duplicate commands since the protocol
1283 	 * requires these packets to be idempotent.
1284 	 */
1285 	if (insert) {
1286 		if (bp == NULL) {
1287 			if (TAILQ_EMPTY(&breakpoints) &&
1288 			    !set_breakpoint_caps(true)) {
1289 				send_empty_response();
1290 				return;
1291 			}
1292 			bp = malloc(sizeof(*bp));
1293 			bp->gpa = gpa;
1294 			bp->shadow_inst = *cp;
1295 			*cp = 0xcc;	/* INT 3 */
1296 			TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1297 			debug("new breakpoint at %#lx\n", gpa);
1298 		}
1299 	} else {
1300 		if (bp != NULL) {
1301 			debug("remove breakpoint at %#lx\n", gpa);
1302 			*cp = bp->shadow_inst;
1303 			TAILQ_REMOVE(&breakpoints, bp, link);
1304 			free(bp);
1305 			if (TAILQ_EMPTY(&breakpoints))
1306 				set_breakpoint_caps(false);
1307 		}
1308 	}
1309 	send_ok();
1310 }
1311 
1312 static void
1313 parse_breakpoint(const uint8_t *data, size_t len)
1314 {
1315 	uint64_t gva;
1316 	uint8_t *cp;
1317 	bool insert;
1318 	int kind, type;
1319 
1320 	insert = data[0] == 'Z';
1321 
1322 	/* Skip 'Z/z' */
1323 	data += 1;
1324 	len -= 1;
1325 
1326 	/* Parse and consume type. */
1327 	cp = memchr(data, ',', len);
1328 	if (cp == NULL || cp == data) {
1329 		send_error(EINVAL);
1330 		return;
1331 	}
1332 	type = parse_integer(data, cp - data);
1333 	len -= (cp - data) + 1;
1334 	data += (cp - data) + 1;
1335 
1336 	/* Parse and consume address. */
1337 	cp = memchr(data, ',', len);
1338 	if (cp == NULL || cp == data) {
1339 		send_error(EINVAL);
1340 		return;
1341 	}
1342 	gva = parse_integer(data, cp - data);
1343 	len -= (cp - data) + 1;
1344 	data += (cp - data) + 1;
1345 
1346 	/* Parse and consume kind. */
1347 	cp = memchr(data, ';', len);
1348 	if (cp == data) {
1349 		send_error(EINVAL);
1350 		return;
1351 	}
1352 	if (cp != NULL) {
1353 		/*
1354 		 * We do not advertise support for either the
1355 		 * ConditionalBreakpoints or BreakpointCommands
1356 		 * features, so we should not be getting conditions or
1357 		 * commands from the remote end.
1358 		 */
1359 		send_empty_response();
1360 		return;
1361 	}
1362 	kind = parse_integer(data, len);
1363 	data += len;
1364 	len = 0;
1365 
1366 	switch (type) {
1367 	case 0:
1368 		update_sw_breakpoint(gva, kind, insert);
1369 		break;
1370 	default:
1371 		send_empty_response();
1372 		break;
1373 	}
1374 }
1375 
1376 static bool
1377 command_equals(const uint8_t *data, size_t len, const char *cmd)
1378 {
1379 
1380 	if (strlen(cmd) > len)
1381 		return (false);
1382 	return (memcmp(data, cmd, strlen(cmd)) == 0);
1383 }
1384 
1385 static void
1386 check_features(const uint8_t *data, size_t len)
1387 {
1388 	char *feature, *next_feature, *str, *value;
1389 	bool supported;
1390 
1391 	str = malloc(len + 1);
1392 	memcpy(str, data, len);
1393 	str[len] = '\0';
1394 	next_feature = str;
1395 
1396 	while ((feature = strsep(&next_feature, ";")) != NULL) {
1397 		/*
1398 		 * Null features shouldn't exist, but skip if they
1399 		 * do.
1400 		 */
1401 		if (strcmp(feature, "") == 0)
1402 			continue;
1403 
1404 		/*
1405 		 * Look for the value or supported / not supported
1406 		 * flag.
1407 		 */
1408 		value = strchr(feature, '=');
1409 		if (value != NULL) {
1410 			*value = '\0';
1411 			value++;
1412 			supported = true;
1413 		} else {
1414 			value = feature + strlen(feature) - 1;
1415 			switch (*value) {
1416 			case '+':
1417 				supported = true;
1418 				break;
1419 			case '-':
1420 				supported = false;
1421 				break;
1422 			default:
1423 				/*
1424 				 * This is really a protocol error,
1425 				 * but we just ignore malformed
1426 				 * features for ease of
1427 				 * implementation.
1428 				 */
1429 				continue;
1430 			}
1431 			value = NULL;
1432 		}
1433 
1434 		if (strcmp(feature, "swbreak") == 0)
1435 			swbreak_enabled = supported;
1436 	}
1437 	free(str);
1438 
1439 	start_packet();
1440 
1441 	/* This is an arbitrary limit. */
1442 	append_string("PacketSize=4096");
1443 	append_string(";swbreak+");
1444 	finish_packet();
1445 }
1446 
1447 static void
1448 gdb_query(const uint8_t *data, size_t len)
1449 {
1450 
1451 	/*
1452 	 * TODO:
1453 	 * - qSearch
1454 	 */
1455 	if (command_equals(data, len, "qAttached")) {
1456 		start_packet();
1457 		append_char('1');
1458 		finish_packet();
1459 	} else if (command_equals(data, len, "qC")) {
1460 		start_packet();
1461 		append_string("QC");
1462 		append_integer(cur_vcpu + 1);
1463 		finish_packet();
1464 	} else if (command_equals(data, len, "qfThreadInfo")) {
1465 		cpuset_t mask;
1466 		bool first;
1467 		int vcpu;
1468 
1469 		if (CPU_EMPTY(&vcpus_active)) {
1470 			send_error(EINVAL);
1471 			return;
1472 		}
1473 		mask = vcpus_active;
1474 		start_packet();
1475 		append_char('m');
1476 		first = true;
1477 		while (!CPU_EMPTY(&mask)) {
1478 			vcpu = CPU_FFS(&mask) - 1;
1479 			CPU_CLR(vcpu, &mask);
1480 			if (first)
1481 				first = false;
1482 			else
1483 				append_char(',');
1484 			append_integer(vcpu + 1);
1485 		}
1486 		finish_packet();
1487 	} else if (command_equals(data, len, "qsThreadInfo")) {
1488 		start_packet();
1489 		append_char('l');
1490 		finish_packet();
1491 	} else if (command_equals(data, len, "qSupported")) {
1492 		data += strlen("qSupported");
1493 		len -= strlen("qSupported");
1494 		check_features(data, len);
1495 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1496 		char buf[16];
1497 		int tid;
1498 
1499 		data += strlen("qThreadExtraInfo");
1500 		len -= strlen("qThreadExtraInfo");
1501 		if (*data != ',') {
1502 			send_error(EINVAL);
1503 			return;
1504 		}
1505 		tid = parse_threadid(data + 1, len - 1);
1506 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1507 			send_error(EINVAL);
1508 			return;
1509 		}
1510 
1511 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1512 		start_packet();
1513 		append_asciihex(buf);
1514 		finish_packet();
1515 	} else
1516 		send_empty_response();
1517 }
1518 
1519 static void
1520 handle_command(const uint8_t *data, size_t len)
1521 {
1522 
1523 	/* Reject packets with a sequence-id. */
1524 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1525 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1526 		send_empty_response();
1527 		return;
1528 	}
1529 
1530 	switch (*data) {
1531 	case 'c':
1532 		if (len != 1) {
1533 			send_error(EINVAL);
1534 			break;
1535 		}
1536 
1537 		discard_stop();
1538 		gdb_resume_vcpus();
1539 		break;
1540 	case 'D':
1541 		send_ok();
1542 
1543 		/* TODO: Resume any stopped CPUs. */
1544 		break;
1545 	case 'g': {
1546 		gdb_read_regs();
1547 		break;
1548 	}
1549 	case 'H': {
1550 		int tid;
1551 
1552 		if (data[1] != 'g' && data[1] != 'c') {
1553 			send_error(EINVAL);
1554 			break;
1555 		}
1556 		tid = parse_threadid(data + 2, len - 2);
1557 		if (tid == -2) {
1558 			send_error(EINVAL);
1559 			break;
1560 		}
1561 
1562 		if (CPU_EMPTY(&vcpus_active)) {
1563 			send_error(EINVAL);
1564 			break;
1565 		}
1566 		if (tid == -1 || tid == 0)
1567 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1568 		else if (CPU_ISSET(tid - 1, &vcpus_active))
1569 			cur_vcpu = tid - 1;
1570 		else {
1571 			send_error(EINVAL);
1572 			break;
1573 		}
1574 		send_ok();
1575 		break;
1576 	}
1577 	case 'm':
1578 		gdb_read_mem(data, len);
1579 		break;
1580 	case 'M':
1581 		gdb_write_mem(data, len);
1582 		break;
1583 	case 'T': {
1584 		int tid;
1585 
1586 		tid = parse_threadid(data + 1, len - 1);
1587 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1588 			send_error(EINVAL);
1589 			return;
1590 		}
1591 		send_ok();
1592 		break;
1593 	}
1594 	case 'q':
1595 		gdb_query(data, len);
1596 		break;
1597 	case 's':
1598 		if (len != 1) {
1599 			send_error(EINVAL);
1600 			break;
1601 		}
1602 
1603 		/* Don't send a reply until a stop occurs. */
1604 		if (!gdb_step_vcpu(vcpus[cur_vcpu])) {
1605 			send_error(EOPNOTSUPP);
1606 			break;
1607 		}
1608 		break;
1609 	case 'z':
1610 	case 'Z':
1611 		parse_breakpoint(data, len);
1612 		break;
1613 	case '?':
1614 		report_stop(false);
1615 		break;
1616 	case 'G': /* TODO */
1617 	case 'v':
1618 		/* Handle 'vCont' */
1619 		/* 'vCtrlC' */
1620 	case 'p': /* TODO */
1621 	case 'P': /* TODO */
1622 	case 'Q': /* TODO */
1623 	case 't': /* TODO */
1624 	case 'X': /* TODO */
1625 	default:
1626 		send_empty_response();
1627 	}
1628 }
1629 
1630 /* Check for a valid packet in the command buffer. */
1631 static void
1632 check_command(int fd)
1633 {
1634 	uint8_t *head, *hash, *p, sum;
1635 	size_t avail, plen;
1636 
1637 	for (;;) {
1638 		avail = cur_comm.len;
1639 		if (avail == 0)
1640 			return;
1641 		head = io_buffer_head(&cur_comm);
1642 		switch (*head) {
1643 		case 0x03:
1644 			debug("<- Ctrl-C\n");
1645 			io_buffer_consume(&cur_comm, 1);
1646 
1647 			gdb_suspend_vcpus();
1648 			break;
1649 		case '+':
1650 			/* ACK of previous response. */
1651 			debug("<- +\n");
1652 			if (response_pending())
1653 				io_buffer_reset(&cur_resp);
1654 			io_buffer_consume(&cur_comm, 1);
1655 			if (stopped_vcpu != -1 && report_next_stop) {
1656 				report_stop(true);
1657 				send_pending_data(fd);
1658 			}
1659 			break;
1660 		case '-':
1661 			/* NACK of previous response. */
1662 			debug("<- -\n");
1663 			if (response_pending()) {
1664 				cur_resp.len += cur_resp.start;
1665 				cur_resp.start = 0;
1666 				if (cur_resp.data[0] == '+')
1667 					io_buffer_advance(&cur_resp, 1);
1668 				debug("-> %.*s\n", (int)cur_resp.len,
1669 				    io_buffer_head(&cur_resp));
1670 			}
1671 			io_buffer_consume(&cur_comm, 1);
1672 			send_pending_data(fd);
1673 			break;
1674 		case '$':
1675 			/* Packet. */
1676 
1677 			if (response_pending()) {
1678 				warnx("New GDB command while response in "
1679 				    "progress");
1680 				io_buffer_reset(&cur_resp);
1681 			}
1682 
1683 			/* Is packet complete? */
1684 			hash = memchr(head, '#', avail);
1685 			if (hash == NULL)
1686 				return;
1687 			plen = (hash - head + 1) + 2;
1688 			if (avail < plen)
1689 				return;
1690 			debug("<- %.*s\n", (int)plen, head);
1691 
1692 			/* Verify checksum. */
1693 			for (sum = 0, p = head + 1; p < hash; p++)
1694 				sum += *p;
1695 			if (sum != parse_byte(hash + 1)) {
1696 				io_buffer_consume(&cur_comm, plen);
1697 				debug("-> -\n");
1698 				send_char('-');
1699 				send_pending_data(fd);
1700 				break;
1701 			}
1702 			send_char('+');
1703 
1704 			handle_command(head + 1, hash - (head + 1));
1705 			io_buffer_consume(&cur_comm, plen);
1706 			if (!response_pending())
1707 				debug("-> +\n");
1708 			send_pending_data(fd);
1709 			break;
1710 		default:
1711 			/* XXX: Possibly drop connection instead. */
1712 			debug("-> %02x\n", *head);
1713 			io_buffer_consume(&cur_comm, 1);
1714 			break;
1715 		}
1716 	}
1717 }
1718 
1719 static void
1720 gdb_readable(int fd, enum ev_type event __unused, void *arg __unused)
1721 {
1722 	size_t pending;
1723 	ssize_t nread;
1724 	int n;
1725 
1726 	if (ioctl(fd, FIONREAD, &n) == -1) {
1727 		warn("FIONREAD on GDB socket");
1728 		return;
1729 	}
1730 	assert(n >= 0);
1731 	pending = n;
1732 
1733 	/*
1734 	 * 'pending' might be zero due to EOF.  We need to call read
1735 	 * with a non-zero length to detect EOF.
1736 	 */
1737 	if (pending == 0)
1738 		pending = 1;
1739 
1740 	/* Ensure there is room in the command buffer. */
1741 	io_buffer_grow(&cur_comm, pending);
1742 	assert(io_buffer_avail(&cur_comm) >= pending);
1743 
1744 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1745 	if (nread == 0) {
1746 		close_connection();
1747 	} else if (nread == -1) {
1748 		if (errno == EAGAIN)
1749 			return;
1750 
1751 		warn("Read from GDB socket");
1752 		close_connection();
1753 	} else {
1754 		cur_comm.len += nread;
1755 		pthread_mutex_lock(&gdb_lock);
1756 		check_command(fd);
1757 		pthread_mutex_unlock(&gdb_lock);
1758 	}
1759 }
1760 
1761 static void
1762 gdb_writable(int fd, enum ev_type event __unused, void *arg __unused)
1763 {
1764 
1765 	send_pending_data(fd);
1766 }
1767 
1768 static void
1769 new_connection(int fd, enum ev_type event __unused, void *arg)
1770 {
1771 	int optval, s;
1772 
1773 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1774 	if (s == -1) {
1775 		if (arg != NULL)
1776 			err(1, "Failed accepting initial GDB connection");
1777 
1778 		/* Silently ignore errors post-startup. */
1779 		return;
1780 	}
1781 
1782 	optval = 1;
1783 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1784 	    -1) {
1785 		warn("Failed to disable SIGPIPE for GDB connection");
1786 		close(s);
1787 		return;
1788 	}
1789 
1790 	pthread_mutex_lock(&gdb_lock);
1791 	if (cur_fd != -1) {
1792 		close(s);
1793 		warnx("Ignoring additional GDB connection.");
1794 	}
1795 
1796 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1797 	if (read_event == NULL) {
1798 		if (arg != NULL)
1799 			err(1, "Failed to setup initial GDB connection");
1800 		pthread_mutex_unlock(&gdb_lock);
1801 		return;
1802 	}
1803 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1804 	if (write_event == NULL) {
1805 		if (arg != NULL)
1806 			err(1, "Failed to setup initial GDB connection");
1807 		mevent_delete_close(read_event);
1808 		read_event = NULL;
1809 	}
1810 
1811 	cur_fd = s;
1812 	cur_vcpu = 0;
1813 	stopped_vcpu = -1;
1814 
1815 	/* Break on attach. */
1816 	first_stop = true;
1817 	report_next_stop = false;
1818 	gdb_suspend_vcpus();
1819 	pthread_mutex_unlock(&gdb_lock);
1820 }
1821 
1822 #ifndef WITHOUT_CAPSICUM
1823 static void
1824 limit_gdb_socket(int s)
1825 {
1826 	cap_rights_t rights;
1827 	unsigned long ioctls[] = { FIONREAD };
1828 
1829 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1830 	    CAP_SETSOCKOPT, CAP_IOCTL);
1831 	if (caph_rights_limit(s, &rights) == -1)
1832 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1833 	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1834 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1835 }
1836 #endif
1837 
1838 void
1839 init_gdb(struct vmctx *_ctx)
1840 {
1841 	int error, flags, optval, s;
1842 	struct addrinfo hints;
1843 	struct addrinfo *gdbaddr;
1844 	const char *saddr, *value;
1845 	char *sport;
1846 	bool wait;
1847 
1848 	value = get_config_value("gdb.port");
1849 	if (value == NULL)
1850 		return;
1851 	sport = strdup(value);
1852 	if (sport == NULL)
1853 		errx(4, "Failed to allocate memory");
1854 
1855 	wait = get_config_bool_default("gdb.wait", false);
1856 
1857 	saddr = get_config_value("gdb.address");
1858 	if (saddr == NULL) {
1859 		saddr = "localhost";
1860 	}
1861 
1862 	debug("==> starting on %s:%s, %swaiting\n",
1863 	    saddr, sport, wait ? "" : "not ");
1864 
1865 	error = pthread_mutex_init(&gdb_lock, NULL);
1866 	if (error != 0)
1867 		errc(1, error, "gdb mutex init");
1868 	error = pthread_cond_init(&idle_vcpus, NULL);
1869 	if (error != 0)
1870 		errc(1, error, "gdb cv init");
1871 
1872 	memset(&hints, 0, sizeof(hints));
1873 	hints.ai_family = AF_UNSPEC;
1874 	hints.ai_socktype = SOCK_STREAM;
1875 	hints.ai_flags = AI_NUMERICSERV | AI_PASSIVE;
1876 
1877 	error = getaddrinfo(saddr, sport, &hints, &gdbaddr);
1878 	if (error != 0)
1879 		errx(1, "gdb address resolution: %s", gai_strerror(error));
1880 
1881 	ctx = _ctx;
1882 	s = socket(gdbaddr->ai_family, gdbaddr->ai_socktype, 0);
1883 	if (s < 0)
1884 		err(1, "gdb socket create");
1885 
1886 	optval = 1;
1887 	(void)setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
1888 
1889 	if (bind(s, gdbaddr->ai_addr, gdbaddr->ai_addrlen) < 0)
1890 		err(1, "gdb socket bind");
1891 
1892 	if (listen(s, 1) < 0)
1893 		err(1, "gdb socket listen");
1894 
1895 	stopped_vcpu = -1;
1896 	TAILQ_INIT(&breakpoints);
1897 	vcpus = calloc(guest_ncpus, sizeof(*vcpus));
1898 	vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
1899 	if (wait) {
1900 		/*
1901 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
1902 		 * logic in gdb_cpu_add() to suspend the first vcpu before
1903 		 * it starts execution.  The vcpu will remain suspended
1904 		 * until a debugger connects.
1905 		 */
1906 		CPU_SET(0, &vcpus_suspended);
1907 		stopped_vcpu = 0;
1908 	}
1909 
1910 	flags = fcntl(s, F_GETFL);
1911 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1912 		err(1, "Failed to mark gdb socket non-blocking");
1913 
1914 #ifndef WITHOUT_CAPSICUM
1915 	limit_gdb_socket(s);
1916 #endif
1917 	mevent_add(s, EVF_READ, new_connection, NULL);
1918 	gdb_active = true;
1919 	freeaddrinfo(gdbaddr);
1920 	free(sport);
1921 }
1922