xref: /freebsd/usr.sbin/bhyve/gdb.c (revision 258a0d760aa8b42899a000e30f610f900a402556)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/param.h>
32 #ifndef WITHOUT_CAPSICUM
33 #include <sys/capsicum.h>
34 #endif
35 #include <sys/endian.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <sys/queue.h>
39 #include <sys/socket.h>
40 #include <machine/atomic.h>
41 #include <machine/specialreg.h>
42 #include <machine/vmm.h>
43 #include <netinet/in.h>
44 #include <assert.h>
45 #ifndef WITHOUT_CAPSICUM
46 #include <capsicum_helpers.h>
47 #endif
48 #include <err.h>
49 #include <errno.h>
50 #include <fcntl.h>
51 #include <netdb.h>
52 #include <pthread.h>
53 #include <pthread_np.h>
54 #include <stdbool.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <sysexits.h>
59 #include <unistd.h>
60 #include <vmmapi.h>
61 
62 #include "bhyverun.h"
63 #include "config.h"
64 #include "gdb.h"
65 #include "mem.h"
66 #include "mevent.h"
67 
68 /*
69  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
70  * use SIGTRAP.
71  */
72 #define	GDB_SIGNAL_TRAP		5
73 
74 static void gdb_resume_vcpus(void);
75 static void check_command(int fd);
76 
77 static struct mevent *read_event, *write_event;
78 
79 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
80 static pthread_mutex_t gdb_lock;
81 static pthread_cond_t idle_vcpus;
82 static bool first_stop, report_next_stop, swbreak_enabled;
83 
84 /*
85  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
86  * read buffer, 'start' is unused and 'len' contains the number of
87  * valid bytes in the buffer.  For a write buffer, 'start' is set to
88  * the index of the next byte in 'data' to send, and 'len' contains
89  * the remaining number of valid bytes to send.
90  */
91 struct io_buffer {
92 	uint8_t *data;
93 	size_t capacity;
94 	size_t start;
95 	size_t len;
96 };
97 
98 struct breakpoint {
99 	uint64_t gpa;
100 	uint8_t shadow_inst;
101 	TAILQ_ENTRY(breakpoint) link;
102 };
103 
104 /*
105  * When a vCPU stops to due to an event that should be reported to the
106  * debugger, information about the event is stored in this structure.
107  * The vCPU thread then sets 'stopped_vcpu' if it is not already set
108  * and stops other vCPUs so the event can be reported.  The
109  * report_stop() function reports the event for the 'stopped_vcpu'
110  * vCPU.  When the debugger resumes execution via continue or step,
111  * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
112  * event handlers until the associated event is reported or disabled.
113  *
114  * An idle vCPU will have all of the boolean fields set to false.
115  *
116  * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
117  * released to execute the stepped instruction.  When the vCPU reports
118  * the stepping trap, 'stepped' is set.
119  *
120  * When a vCPU hits a breakpoint set by the debug server,
121  * 'hit_swbreak' is set to true.
122  */
123 struct vcpu_state {
124 	bool stepping;
125 	bool stepped;
126 	bool hit_swbreak;
127 };
128 
129 static struct io_buffer cur_comm, cur_resp;
130 static uint8_t cur_csum;
131 static struct vmctx *ctx;
132 static int cur_fd = -1;
133 static TAILQ_HEAD(, breakpoint) breakpoints;
134 static struct vcpu_state *vcpu_state;
135 static struct vcpu **vcpus;
136 static int cur_vcpu, stopped_vcpu;
137 static bool gdb_active = false;
138 
139 static const int gdb_regset[] = {
140 	VM_REG_GUEST_RAX,
141 	VM_REG_GUEST_RBX,
142 	VM_REG_GUEST_RCX,
143 	VM_REG_GUEST_RDX,
144 	VM_REG_GUEST_RSI,
145 	VM_REG_GUEST_RDI,
146 	VM_REG_GUEST_RBP,
147 	VM_REG_GUEST_RSP,
148 	VM_REG_GUEST_R8,
149 	VM_REG_GUEST_R9,
150 	VM_REG_GUEST_R10,
151 	VM_REG_GUEST_R11,
152 	VM_REG_GUEST_R12,
153 	VM_REG_GUEST_R13,
154 	VM_REG_GUEST_R14,
155 	VM_REG_GUEST_R15,
156 	VM_REG_GUEST_RIP,
157 	VM_REG_GUEST_RFLAGS,
158 	VM_REG_GUEST_CS,
159 	VM_REG_GUEST_SS,
160 	VM_REG_GUEST_DS,
161 	VM_REG_GUEST_ES,
162 	VM_REG_GUEST_FS,
163 	VM_REG_GUEST_GS
164 };
165 
166 static const int gdb_regsize[] = {
167 	8,
168 	8,
169 	8,
170 	8,
171 	8,
172 	8,
173 	8,
174 	8,
175 	8,
176 	8,
177 	8,
178 	8,
179 	8,
180 	8,
181 	8,
182 	8,
183 	8,
184 	4,
185 	4,
186 	4,
187 	4,
188 	4,
189 	4,
190 	4
191 };
192 
193 #ifdef GDB_LOG
194 #include <stdarg.h>
195 #include <stdio.h>
196 
197 static void __printflike(1, 2)
198 debug(const char *fmt, ...)
199 {
200 	static FILE *logfile;
201 	va_list ap;
202 
203 	if (logfile == NULL) {
204 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
205 		if (logfile == NULL)
206 			return;
207 #ifndef WITHOUT_CAPSICUM
208 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
209 			fclose(logfile);
210 			logfile = NULL;
211 			return;
212 		}
213 #endif
214 		setlinebuf(logfile);
215 	}
216 	va_start(ap, fmt);
217 	vfprintf(logfile, fmt, ap);
218 	va_end(ap);
219 }
220 #else
221 #define debug(...)
222 #endif
223 
224 static void	remove_all_sw_breakpoints(void);
225 
226 static int
227 guest_paging_info(struct vcpu *vcpu, struct vm_guest_paging *paging)
228 {
229 	uint64_t regs[4];
230 	const int regset[4] = {
231 		VM_REG_GUEST_CR0,
232 		VM_REG_GUEST_CR3,
233 		VM_REG_GUEST_CR4,
234 		VM_REG_GUEST_EFER
235 	};
236 
237 	if (vm_get_register_set(vcpu, nitems(regset), regset, regs) == -1)
238 		return (-1);
239 
240 	/*
241 	 * For the debugger, always pretend to be the kernel (CPL 0),
242 	 * and if long-mode is enabled, always parse addresses as if
243 	 * in 64-bit mode.
244 	 */
245 	paging->cr3 = regs[1];
246 	paging->cpl = 0;
247 	if (regs[3] & EFER_LMA)
248 		paging->cpu_mode = CPU_MODE_64BIT;
249 	else if (regs[0] & CR0_PE)
250 		paging->cpu_mode = CPU_MODE_PROTECTED;
251 	else
252 		paging->cpu_mode = CPU_MODE_REAL;
253 	if (!(regs[0] & CR0_PG))
254 		paging->paging_mode = PAGING_MODE_FLAT;
255 	else if (!(regs[2] & CR4_PAE))
256 		paging->paging_mode = PAGING_MODE_32;
257 	else if (regs[3] & EFER_LME)
258 		paging->paging_mode = (regs[2] & CR4_LA57) ?
259 		    PAGING_MODE_64_LA57 :  PAGING_MODE_64;
260 	else
261 		paging->paging_mode = PAGING_MODE_PAE;
262 	return (0);
263 }
264 
265 /*
266  * Map a guest virtual address to a physical address (for a given vcpu).
267  * If a guest virtual address is valid, return 1.  If the address is
268  * not valid, return 0.  If an error occurs obtaining the mapping,
269  * return -1.
270  */
271 static int
272 guest_vaddr2paddr(struct vcpu *vcpu, uint64_t vaddr, uint64_t *paddr)
273 {
274 	struct vm_guest_paging paging;
275 	int fault;
276 
277 	if (guest_paging_info(vcpu, &paging) == -1)
278 		return (-1);
279 
280 	/*
281 	 * Always use PROT_READ.  We really care if the VA is
282 	 * accessible, not if the current vCPU can write.
283 	 */
284 	if (vm_gla2gpa_nofault(vcpu, &paging, vaddr, PROT_READ, paddr,
285 	    &fault) == -1)
286 		return (-1);
287 	if (fault)
288 		return (0);
289 	return (1);
290 }
291 
292 static void
293 io_buffer_reset(struct io_buffer *io)
294 {
295 
296 	io->start = 0;
297 	io->len = 0;
298 }
299 
300 /* Available room for adding data. */
301 static size_t
302 io_buffer_avail(struct io_buffer *io)
303 {
304 
305 	return (io->capacity - (io->start + io->len));
306 }
307 
308 static uint8_t *
309 io_buffer_head(struct io_buffer *io)
310 {
311 
312 	return (io->data + io->start);
313 }
314 
315 static uint8_t *
316 io_buffer_tail(struct io_buffer *io)
317 {
318 
319 	return (io->data + io->start + io->len);
320 }
321 
322 static void
323 io_buffer_advance(struct io_buffer *io, size_t amount)
324 {
325 
326 	assert(amount <= io->len);
327 	io->start += amount;
328 	io->len -= amount;
329 }
330 
331 static void
332 io_buffer_consume(struct io_buffer *io, size_t amount)
333 {
334 
335 	io_buffer_advance(io, amount);
336 	if (io->len == 0) {
337 		io->start = 0;
338 		return;
339 	}
340 
341 	/*
342 	 * XXX: Consider making this move optional and compacting on a
343 	 * future read() before realloc().
344 	 */
345 	memmove(io->data, io_buffer_head(io), io->len);
346 	io->start = 0;
347 }
348 
349 static void
350 io_buffer_grow(struct io_buffer *io, size_t newsize)
351 {
352 	uint8_t *new_data;
353 	size_t avail, new_cap;
354 
355 	avail = io_buffer_avail(io);
356 	if (newsize <= avail)
357 		return;
358 
359 	new_cap = io->capacity + (newsize - avail);
360 	new_data = realloc(io->data, new_cap);
361 	if (new_data == NULL)
362 		err(1, "Failed to grow GDB I/O buffer");
363 	io->data = new_data;
364 	io->capacity = new_cap;
365 }
366 
367 static bool
368 response_pending(void)
369 {
370 
371 	if (cur_resp.start == 0 && cur_resp.len == 0)
372 		return (false);
373 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
374 		return (false);
375 	return (true);
376 }
377 
378 static void
379 close_connection(void)
380 {
381 
382 	/*
383 	 * XXX: This triggers a warning because mevent does the close
384 	 * before the EV_DELETE.
385 	 */
386 	pthread_mutex_lock(&gdb_lock);
387 	mevent_delete(write_event);
388 	mevent_delete_close(read_event);
389 	write_event = NULL;
390 	read_event = NULL;
391 	io_buffer_reset(&cur_comm);
392 	io_buffer_reset(&cur_resp);
393 	cur_fd = -1;
394 
395 	remove_all_sw_breakpoints();
396 
397 	/* Clear any pending events. */
398 	memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
399 
400 	/* Resume any stopped vCPUs. */
401 	gdb_resume_vcpus();
402 	pthread_mutex_unlock(&gdb_lock);
403 }
404 
405 static uint8_t
406 hex_digit(uint8_t nibble)
407 {
408 
409 	if (nibble <= 9)
410 		return (nibble + '0');
411 	else
412 		return (nibble + 'a' - 10);
413 }
414 
415 static uint8_t
416 parse_digit(uint8_t v)
417 {
418 
419 	if (v >= '0' && v <= '9')
420 		return (v - '0');
421 	if (v >= 'a' && v <= 'f')
422 		return (v - 'a' + 10);
423 	if (v >= 'A' && v <= 'F')
424 		return (v - 'A' + 10);
425 	return (0xF);
426 }
427 
428 /* Parses big-endian hexadecimal. */
429 static uintmax_t
430 parse_integer(const uint8_t *p, size_t len)
431 {
432 	uintmax_t v;
433 
434 	v = 0;
435 	while (len > 0) {
436 		v <<= 4;
437 		v |= parse_digit(*p);
438 		p++;
439 		len--;
440 	}
441 	return (v);
442 }
443 
444 static uint8_t
445 parse_byte(const uint8_t *p)
446 {
447 
448 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
449 }
450 
451 static void
452 send_pending_data(int fd)
453 {
454 	ssize_t nwritten;
455 
456 	if (cur_resp.len == 0) {
457 		mevent_disable(write_event);
458 		return;
459 	}
460 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
461 	if (nwritten == -1) {
462 		warn("Write to GDB socket failed");
463 		close_connection();
464 	} else {
465 		io_buffer_advance(&cur_resp, nwritten);
466 		if (cur_resp.len == 0)
467 			mevent_disable(write_event);
468 		else
469 			mevent_enable(write_event);
470 	}
471 }
472 
473 /* Append a single character to the output buffer. */
474 static void
475 send_char(uint8_t data)
476 {
477 	io_buffer_grow(&cur_resp, 1);
478 	*io_buffer_tail(&cur_resp) = data;
479 	cur_resp.len++;
480 }
481 
482 /* Append an array of bytes to the output buffer. */
483 static void
484 send_data(const uint8_t *data, size_t len)
485 {
486 
487 	io_buffer_grow(&cur_resp, len);
488 	memcpy(io_buffer_tail(&cur_resp), data, len);
489 	cur_resp.len += len;
490 }
491 
492 static void
493 format_byte(uint8_t v, uint8_t *buf)
494 {
495 
496 	buf[0] = hex_digit(v >> 4);
497 	buf[1] = hex_digit(v & 0xf);
498 }
499 
500 /*
501  * Append a single byte (formatted as two hex characters) to the
502  * output buffer.
503  */
504 static void
505 send_byte(uint8_t v)
506 {
507 	uint8_t buf[2];
508 
509 	format_byte(v, buf);
510 	send_data(buf, sizeof(buf));
511 }
512 
513 static void
514 start_packet(void)
515 {
516 
517 	send_char('$');
518 	cur_csum = 0;
519 }
520 
521 static void
522 finish_packet(void)
523 {
524 
525 	send_char('#');
526 	send_byte(cur_csum);
527 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
528 }
529 
530 /*
531  * Append a single character (for the packet payload) and update the
532  * checksum.
533  */
534 static void
535 append_char(uint8_t v)
536 {
537 
538 	send_char(v);
539 	cur_csum += v;
540 }
541 
542 /*
543  * Append an array of bytes (for the packet payload) and update the
544  * checksum.
545  */
546 static void
547 append_packet_data(const uint8_t *data, size_t len)
548 {
549 
550 	send_data(data, len);
551 	while (len > 0) {
552 		cur_csum += *data;
553 		data++;
554 		len--;
555 	}
556 }
557 
558 static void
559 append_string(const char *str)
560 {
561 
562 	append_packet_data(str, strlen(str));
563 }
564 
565 static void
566 append_byte(uint8_t v)
567 {
568 	uint8_t buf[2];
569 
570 	format_byte(v, buf);
571 	append_packet_data(buf, sizeof(buf));
572 }
573 
574 static void
575 append_unsigned_native(uintmax_t value, size_t len)
576 {
577 	size_t i;
578 
579 	for (i = 0; i < len; i++) {
580 		append_byte(value);
581 		value >>= 8;
582 	}
583 }
584 
585 static void
586 append_unsigned_be(uintmax_t value, size_t len)
587 {
588 	char buf[len * 2];
589 	size_t i;
590 
591 	for (i = 0; i < len; i++) {
592 		format_byte(value, buf + (len - i - 1) * 2);
593 		value >>= 8;
594 	}
595 	append_packet_data(buf, sizeof(buf));
596 }
597 
598 static void
599 append_integer(unsigned int value)
600 {
601 
602 	if (value == 0)
603 		append_char('0');
604 	else
605 		append_unsigned_be(value, (fls(value) + 7) / 8);
606 }
607 
608 static void
609 append_asciihex(const char *str)
610 {
611 
612 	while (*str != '\0') {
613 		append_byte(*str);
614 		str++;
615 	}
616 }
617 
618 static void
619 send_empty_response(void)
620 {
621 
622 	start_packet();
623 	finish_packet();
624 }
625 
626 static void
627 send_error(int error)
628 {
629 
630 	start_packet();
631 	append_char('E');
632 	append_byte(error);
633 	finish_packet();
634 }
635 
636 static void
637 send_ok(void)
638 {
639 
640 	start_packet();
641 	append_string("OK");
642 	finish_packet();
643 }
644 
645 static int
646 parse_threadid(const uint8_t *data, size_t len)
647 {
648 
649 	if (len == 1 && *data == '0')
650 		return (0);
651 	if (len == 2 && memcmp(data, "-1", 2) == 0)
652 		return (-1);
653 	if (len == 0)
654 		return (-2);
655 	return (parse_integer(data, len));
656 }
657 
658 /*
659  * Report the current stop event to the debugger.  If the stop is due
660  * to an event triggered on a specific vCPU such as a breakpoint or
661  * stepping trap, stopped_vcpu will be set to the vCPU triggering the
662  * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
663  * the reporting vCPU for vCPU events.
664  */
665 static void
666 report_stop(bool set_cur_vcpu)
667 {
668 	struct vcpu_state *vs;
669 
670 	start_packet();
671 	if (stopped_vcpu == -1) {
672 		append_char('S');
673 		append_byte(GDB_SIGNAL_TRAP);
674 	} else {
675 		vs = &vcpu_state[stopped_vcpu];
676 		if (set_cur_vcpu)
677 			cur_vcpu = stopped_vcpu;
678 		append_char('T');
679 		append_byte(GDB_SIGNAL_TRAP);
680 		append_string("thread:");
681 		append_integer(stopped_vcpu + 1);
682 		append_char(';');
683 		if (vs->hit_swbreak) {
684 			debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
685 			if (swbreak_enabled)
686 				append_string("swbreak:;");
687 		} else if (vs->stepped)
688 			debug("$vCPU %d reporting step\n", stopped_vcpu);
689 		else
690 			debug("$vCPU %d reporting ???\n", stopped_vcpu);
691 	}
692 	finish_packet();
693 	report_next_stop = false;
694 }
695 
696 /*
697  * If this stop is due to a vCPU event, clear that event to mark it as
698  * acknowledged.
699  */
700 static void
701 discard_stop(void)
702 {
703 	struct vcpu_state *vs;
704 
705 	if (stopped_vcpu != -1) {
706 		vs = &vcpu_state[stopped_vcpu];
707 		vs->hit_swbreak = false;
708 		vs->stepped = false;
709 		stopped_vcpu = -1;
710 	}
711 	report_next_stop = true;
712 }
713 
714 static void
715 gdb_finish_suspend_vcpus(void)
716 {
717 
718 	if (first_stop) {
719 		first_stop = false;
720 		stopped_vcpu = -1;
721 	} else if (report_next_stop) {
722 		assert(!response_pending());
723 		report_stop(true);
724 		send_pending_data(cur_fd);
725 	}
726 }
727 
728 /*
729  * vCPU threads invoke this function whenever the vCPU enters the
730  * debug server to pause or report an event.  vCPU threads wait here
731  * as long as the debug server keeps them suspended.
732  */
733 static void
734 _gdb_cpu_suspend(struct vcpu *vcpu, bool report_stop)
735 {
736 	int vcpuid = vcpu_id(vcpu);
737 
738 	debug("$vCPU %d suspending\n", vcpuid);
739 	CPU_SET(vcpuid, &vcpus_waiting);
740 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
741 		gdb_finish_suspend_vcpus();
742 	while (CPU_ISSET(vcpuid, &vcpus_suspended))
743 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
744 	CPU_CLR(vcpuid, &vcpus_waiting);
745 	debug("$vCPU %d resuming\n", vcpuid);
746 }
747 
748 /*
749  * Invoked at the start of a vCPU thread's execution to inform the
750  * debug server about the new thread.
751  */
752 void
753 gdb_cpu_add(struct vcpu *vcpu)
754 {
755 	int vcpuid;
756 
757 	if (!gdb_active)
758 		return;
759 	vcpuid = vcpu_id(vcpu);
760 	debug("$vCPU %d starting\n", vcpuid);
761 	pthread_mutex_lock(&gdb_lock);
762 	assert(vcpuid < guest_ncpus);
763 	assert(vcpus[vcpuid] == NULL);
764 	vcpus[vcpuid] = vcpu;
765 	CPU_SET(vcpuid, &vcpus_active);
766 	if (!TAILQ_EMPTY(&breakpoints)) {
767 		vm_set_capability(vcpu, VM_CAP_BPT_EXIT, 1);
768 		debug("$vCPU %d enabled breakpoint exits\n", vcpu);
769 	}
770 
771 	/*
772 	 * If a vcpu is added while vcpus are stopped, suspend the new
773 	 * vcpu so that it will pop back out with a debug exit before
774 	 * executing the first instruction.
775 	 */
776 	if (!CPU_EMPTY(&vcpus_suspended)) {
777 		CPU_SET(vcpuid, &vcpus_suspended);
778 		_gdb_cpu_suspend(vcpu, false);
779 	}
780 	pthread_mutex_unlock(&gdb_lock);
781 }
782 
783 /*
784  * Invoked by vCPU before resuming execution.  This enables stepping
785  * if the vCPU is marked as stepping.
786  */
787 static void
788 gdb_cpu_resume(struct vcpu *vcpu)
789 {
790 	struct vcpu_state *vs;
791 	int error;
792 
793 	vs = &vcpu_state[vcpu_id(vcpu)];
794 
795 	/*
796 	 * Any pending event should already be reported before
797 	 * resuming.
798 	 */
799 	assert(vs->hit_swbreak == false);
800 	assert(vs->stepped == false);
801 	if (vs->stepping) {
802 		error = vm_set_capability(vcpu, VM_CAP_MTRAP_EXIT, 1);
803 		assert(error == 0);
804 
805 		error = vm_set_capability(vcpu, VM_CAP_MASK_HWINTR, 1);
806 		assert(error == 0);
807 	}
808 }
809 
810 /*
811  * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
812  * has been suspended due to an event on different vCPU or in response
813  * to a guest-wide suspend such as Ctrl-C or the stop on attach.
814  */
815 void
816 gdb_cpu_suspend(struct vcpu *vcpu)
817 {
818 
819 	if (!gdb_active)
820 		return;
821 	pthread_mutex_lock(&gdb_lock);
822 	_gdb_cpu_suspend(vcpu, true);
823 	gdb_cpu_resume(vcpu);
824 	pthread_mutex_unlock(&gdb_lock);
825 }
826 
827 static void
828 gdb_suspend_vcpus(void)
829 {
830 
831 	assert(pthread_mutex_isowned_np(&gdb_lock));
832 	debug("suspending all CPUs\n");
833 	vcpus_suspended = vcpus_active;
834 	vm_suspend_all_cpus(ctx);
835 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
836 		gdb_finish_suspend_vcpus();
837 }
838 
839 /*
840  * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
841  * the VT-x-specific MTRAP exit.
842  */
843 void
844 gdb_cpu_mtrap(struct vcpu *vcpu)
845 {
846 	struct vcpu_state *vs;
847 	int vcpuid;
848 
849 	if (!gdb_active)
850 		return;
851 	vcpuid = vcpu_id(vcpu);
852 	debug("$vCPU %d MTRAP\n", vcpuid);
853 	pthread_mutex_lock(&gdb_lock);
854 	vs = &vcpu_state[vcpuid];
855 	if (vs->stepping) {
856 		vs->stepping = false;
857 		vs->stepped = true;
858 		vm_set_capability(vcpu, VM_CAP_MTRAP_EXIT, 0);
859 		vm_set_capability(vcpu, VM_CAP_MASK_HWINTR, 0);
860 
861 		while (vs->stepped) {
862 			if (stopped_vcpu == -1) {
863 				debug("$vCPU %d reporting step\n", vcpuid);
864 				stopped_vcpu = vcpuid;
865 				gdb_suspend_vcpus();
866 			}
867 			_gdb_cpu_suspend(vcpu, true);
868 		}
869 		gdb_cpu_resume(vcpu);
870 	}
871 	pthread_mutex_unlock(&gdb_lock);
872 }
873 
874 static struct breakpoint *
875 find_breakpoint(uint64_t gpa)
876 {
877 	struct breakpoint *bp;
878 
879 	TAILQ_FOREACH(bp, &breakpoints, link) {
880 		if (bp->gpa == gpa)
881 			return (bp);
882 	}
883 	return (NULL);
884 }
885 
886 void
887 gdb_cpu_breakpoint(struct vcpu *vcpu, struct vm_exit *vmexit)
888 {
889 	struct breakpoint *bp;
890 	struct vcpu_state *vs;
891 	uint64_t gpa;
892 	int error, vcpuid;
893 
894 	if (!gdb_active) {
895 		fprintf(stderr, "vm_loop: unexpected VMEXIT_DEBUG\n");
896 		exit(4);
897 	}
898 	vcpuid = vcpu_id(vcpu);
899 	pthread_mutex_lock(&gdb_lock);
900 	error = guest_vaddr2paddr(vcpu, vmexit->rip, &gpa);
901 	assert(error == 1);
902 	bp = find_breakpoint(gpa);
903 	if (bp != NULL) {
904 		vs = &vcpu_state[vcpuid];
905 		assert(vs->stepping == false);
906 		assert(vs->stepped == false);
907 		assert(vs->hit_swbreak == false);
908 		vs->hit_swbreak = true;
909 		vm_set_register(vcpu, VM_REG_GUEST_RIP, vmexit->rip);
910 		for (;;) {
911 			if (stopped_vcpu == -1) {
912 				debug("$vCPU %d reporting breakpoint at rip %#lx\n",
913 				    vcpuid, vmexit->rip);
914 				stopped_vcpu = vcpuid;
915 				gdb_suspend_vcpus();
916 			}
917 			_gdb_cpu_suspend(vcpu, true);
918 			if (!vs->hit_swbreak) {
919 				/* Breakpoint reported. */
920 				break;
921 			}
922 			bp = find_breakpoint(gpa);
923 			if (bp == NULL) {
924 				/* Breakpoint was removed. */
925 				vs->hit_swbreak = false;
926 				break;
927 			}
928 		}
929 		gdb_cpu_resume(vcpu);
930 	} else {
931 		debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpuid,
932 		    vmexit->rip);
933 		error = vm_set_register(vcpu, VM_REG_GUEST_ENTRY_INST_LENGTH,
934 		    vmexit->u.bpt.inst_length);
935 		assert(error == 0);
936 		error = vm_inject_exception(vcpu, IDT_BP, 0, 0, 0);
937 		assert(error == 0);
938 	}
939 	pthread_mutex_unlock(&gdb_lock);
940 }
941 
942 static bool
943 gdb_step_vcpu(struct vcpu *vcpu)
944 {
945 	int error, val, vcpuid;
946 
947 	vcpuid = vcpu_id(vcpu);
948 	debug("$vCPU %d step\n", vcpuid);
949 	error = vm_get_capability(vcpu, VM_CAP_MTRAP_EXIT, &val);
950 	if (error < 0)
951 		return (false);
952 
953 	discard_stop();
954 	vcpu_state[vcpuid].stepping = true;
955 	vm_resume_cpu(vcpu);
956 	CPU_CLR(vcpuid, &vcpus_suspended);
957 	pthread_cond_broadcast(&idle_vcpus);
958 	return (true);
959 }
960 
961 static void
962 gdb_resume_vcpus(void)
963 {
964 
965 	assert(pthread_mutex_isowned_np(&gdb_lock));
966 	vm_resume_all_cpus(ctx);
967 	debug("resuming all CPUs\n");
968 	CPU_ZERO(&vcpus_suspended);
969 	pthread_cond_broadcast(&idle_vcpus);
970 }
971 
972 static void
973 gdb_read_regs(void)
974 {
975 	uint64_t regvals[nitems(gdb_regset)];
976 
977 	if (vm_get_register_set(vcpus[cur_vcpu], nitems(gdb_regset),
978 	    gdb_regset, regvals) == -1) {
979 		send_error(errno);
980 		return;
981 	}
982 	start_packet();
983 	for (size_t i = 0; i < nitems(regvals); i++)
984 		append_unsigned_native(regvals[i], gdb_regsize[i]);
985 	finish_packet();
986 }
987 
988 static void
989 gdb_read_mem(const uint8_t *data, size_t len)
990 {
991 	uint64_t gpa, gva, val;
992 	uint8_t *cp;
993 	size_t resid, todo, bytes;
994 	bool started;
995 	int error;
996 
997 	/* Skip 'm' */
998 	data += 1;
999 	len -= 1;
1000 
1001 	/* Parse and consume address. */
1002 	cp = memchr(data, ',', len);
1003 	if (cp == NULL || cp == data) {
1004 		send_error(EINVAL);
1005 		return;
1006 	}
1007 	gva = parse_integer(data, cp - data);
1008 	len -= (cp - data) + 1;
1009 	data += (cp - data) + 1;
1010 
1011 	/* Parse length. */
1012 	resid = parse_integer(data, len);
1013 
1014 	started = false;
1015 	while (resid > 0) {
1016 		error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1017 		if (error == -1) {
1018 			if (started)
1019 				finish_packet();
1020 			else
1021 				send_error(errno);
1022 			return;
1023 		}
1024 		if (error == 0) {
1025 			if (started)
1026 				finish_packet();
1027 			else
1028 				send_error(EFAULT);
1029 			return;
1030 		}
1031 
1032 		/* Read bytes from current page. */
1033 		todo = getpagesize() - gpa % getpagesize();
1034 		if (todo > resid)
1035 			todo = resid;
1036 
1037 		cp = paddr_guest2host(ctx, gpa, todo);
1038 		if (cp != NULL) {
1039 			/*
1040 			 * If this page is guest RAM, read it a byte
1041 			 * at a time.
1042 			 */
1043 			if (!started) {
1044 				start_packet();
1045 				started = true;
1046 			}
1047 			while (todo > 0) {
1048 				append_byte(*cp);
1049 				cp++;
1050 				gpa++;
1051 				gva++;
1052 				resid--;
1053 				todo--;
1054 			}
1055 		} else {
1056 			/*
1057 			 * If this page isn't guest RAM, try to handle
1058 			 * it via MMIO.  For MMIO requests, use
1059 			 * aligned reads of words when possible.
1060 			 */
1061 			while (todo > 0) {
1062 				if (gpa & 1 || todo == 1)
1063 					bytes = 1;
1064 				else if (gpa & 2 || todo == 2)
1065 					bytes = 2;
1066 				else
1067 					bytes = 4;
1068 				error = read_mem(vcpus[cur_vcpu], gpa, &val,
1069 				    bytes);
1070 				if (error == 0) {
1071 					if (!started) {
1072 						start_packet();
1073 						started = true;
1074 					}
1075 					gpa += bytes;
1076 					gva += bytes;
1077 					resid -= bytes;
1078 					todo -= bytes;
1079 					while (bytes > 0) {
1080 						append_byte(val);
1081 						val >>= 8;
1082 						bytes--;
1083 					}
1084 				} else {
1085 					if (started)
1086 						finish_packet();
1087 					else
1088 						send_error(EFAULT);
1089 					return;
1090 				}
1091 			}
1092 		}
1093 		assert(resid == 0 || gpa % getpagesize() == 0);
1094 	}
1095 	if (!started)
1096 		start_packet();
1097 	finish_packet();
1098 }
1099 
1100 static void
1101 gdb_write_mem(const uint8_t *data, size_t len)
1102 {
1103 	uint64_t gpa, gva, val;
1104 	uint8_t *cp;
1105 	size_t resid, todo, bytes;
1106 	int error;
1107 
1108 	/* Skip 'M' */
1109 	data += 1;
1110 	len -= 1;
1111 
1112 	/* Parse and consume address. */
1113 	cp = memchr(data, ',', len);
1114 	if (cp == NULL || cp == data) {
1115 		send_error(EINVAL);
1116 		return;
1117 	}
1118 	gva = parse_integer(data, cp - data);
1119 	len -= (cp - data) + 1;
1120 	data += (cp - data) + 1;
1121 
1122 	/* Parse and consume length. */
1123 	cp = memchr(data, ':', len);
1124 	if (cp == NULL || cp == data) {
1125 		send_error(EINVAL);
1126 		return;
1127 	}
1128 	resid = parse_integer(data, cp - data);
1129 	len -= (cp - data) + 1;
1130 	data += (cp - data) + 1;
1131 
1132 	/* Verify the available bytes match the length. */
1133 	if (len != resid * 2) {
1134 		send_error(EINVAL);
1135 		return;
1136 	}
1137 
1138 	while (resid > 0) {
1139 		error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1140 		if (error == -1) {
1141 			send_error(errno);
1142 			return;
1143 		}
1144 		if (error == 0) {
1145 			send_error(EFAULT);
1146 			return;
1147 		}
1148 
1149 		/* Write bytes to current page. */
1150 		todo = getpagesize() - gpa % getpagesize();
1151 		if (todo > resid)
1152 			todo = resid;
1153 
1154 		cp = paddr_guest2host(ctx, gpa, todo);
1155 		if (cp != NULL) {
1156 			/*
1157 			 * If this page is guest RAM, write it a byte
1158 			 * at a time.
1159 			 */
1160 			while (todo > 0) {
1161 				assert(len >= 2);
1162 				*cp = parse_byte(data);
1163 				data += 2;
1164 				len -= 2;
1165 				cp++;
1166 				gpa++;
1167 				gva++;
1168 				resid--;
1169 				todo--;
1170 			}
1171 		} else {
1172 			/*
1173 			 * If this page isn't guest RAM, try to handle
1174 			 * it via MMIO.  For MMIO requests, use
1175 			 * aligned writes of words when possible.
1176 			 */
1177 			while (todo > 0) {
1178 				if (gpa & 1 || todo == 1) {
1179 					bytes = 1;
1180 					val = parse_byte(data);
1181 				} else if (gpa & 2 || todo == 2) {
1182 					bytes = 2;
1183 					val = be16toh(parse_integer(data, 4));
1184 				} else {
1185 					bytes = 4;
1186 					val = be32toh(parse_integer(data, 8));
1187 				}
1188 				error = write_mem(vcpus[cur_vcpu], gpa, val,
1189 				    bytes);
1190 				if (error == 0) {
1191 					gpa += bytes;
1192 					gva += bytes;
1193 					resid -= bytes;
1194 					todo -= bytes;
1195 					data += 2 * bytes;
1196 					len -= 2 * bytes;
1197 				} else {
1198 					send_error(EFAULT);
1199 					return;
1200 				}
1201 			}
1202 		}
1203 		assert(resid == 0 || gpa % getpagesize() == 0);
1204 	}
1205 	assert(len == 0);
1206 	send_ok();
1207 }
1208 
1209 static bool
1210 set_breakpoint_caps(bool enable)
1211 {
1212 	cpuset_t mask;
1213 	int vcpu;
1214 
1215 	mask = vcpus_active;
1216 	while (!CPU_EMPTY(&mask)) {
1217 		vcpu = CPU_FFS(&mask) - 1;
1218 		CPU_CLR(vcpu, &mask);
1219 		if (vm_set_capability(vcpus[vcpu], VM_CAP_BPT_EXIT,
1220 		    enable ? 1 : 0) < 0)
1221 			return (false);
1222 		debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1223 		    enable ? "en" : "dis");
1224 	}
1225 	return (true);
1226 }
1227 
1228 static void
1229 remove_all_sw_breakpoints(void)
1230 {
1231 	struct breakpoint *bp, *nbp;
1232 	uint8_t *cp;
1233 
1234 	if (TAILQ_EMPTY(&breakpoints))
1235 		return;
1236 
1237 	TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1238 		debug("remove breakpoint at %#lx\n", bp->gpa);
1239 		cp = paddr_guest2host(ctx, bp->gpa, 1);
1240 		*cp = bp->shadow_inst;
1241 		TAILQ_REMOVE(&breakpoints, bp, link);
1242 		free(bp);
1243 	}
1244 	TAILQ_INIT(&breakpoints);
1245 	set_breakpoint_caps(false);
1246 }
1247 
1248 static void
1249 update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1250 {
1251 	struct breakpoint *bp;
1252 	uint64_t gpa;
1253 	uint8_t *cp;
1254 	int error;
1255 
1256 	if (kind != 1) {
1257 		send_error(EINVAL);
1258 		return;
1259 	}
1260 
1261 	error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1262 	if (error == -1) {
1263 		send_error(errno);
1264 		return;
1265 	}
1266 	if (error == 0) {
1267 		send_error(EFAULT);
1268 		return;
1269 	}
1270 
1271 	cp = paddr_guest2host(ctx, gpa, 1);
1272 
1273 	/* Only permit breakpoints in guest RAM. */
1274 	if (cp == NULL) {
1275 		send_error(EFAULT);
1276 		return;
1277 	}
1278 
1279 	/* Find any existing breakpoint. */
1280 	bp = find_breakpoint(gpa);
1281 
1282 	/*
1283 	 * Silently ignore duplicate commands since the protocol
1284 	 * requires these packets to be idempotent.
1285 	 */
1286 	if (insert) {
1287 		if (bp == NULL) {
1288 			if (TAILQ_EMPTY(&breakpoints) &&
1289 			    !set_breakpoint_caps(true)) {
1290 				send_empty_response();
1291 				return;
1292 			}
1293 			bp = malloc(sizeof(*bp));
1294 			bp->gpa = gpa;
1295 			bp->shadow_inst = *cp;
1296 			*cp = 0xcc;	/* INT 3 */
1297 			TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1298 			debug("new breakpoint at %#lx\n", gpa);
1299 		}
1300 	} else {
1301 		if (bp != NULL) {
1302 			debug("remove breakpoint at %#lx\n", gpa);
1303 			*cp = bp->shadow_inst;
1304 			TAILQ_REMOVE(&breakpoints, bp, link);
1305 			free(bp);
1306 			if (TAILQ_EMPTY(&breakpoints))
1307 				set_breakpoint_caps(false);
1308 		}
1309 	}
1310 	send_ok();
1311 }
1312 
1313 static void
1314 parse_breakpoint(const uint8_t *data, size_t len)
1315 {
1316 	uint64_t gva;
1317 	uint8_t *cp;
1318 	bool insert;
1319 	int kind, type;
1320 
1321 	insert = data[0] == 'Z';
1322 
1323 	/* Skip 'Z/z' */
1324 	data += 1;
1325 	len -= 1;
1326 
1327 	/* Parse and consume type. */
1328 	cp = memchr(data, ',', len);
1329 	if (cp == NULL || cp == data) {
1330 		send_error(EINVAL);
1331 		return;
1332 	}
1333 	type = parse_integer(data, cp - data);
1334 	len -= (cp - data) + 1;
1335 	data += (cp - data) + 1;
1336 
1337 	/* Parse and consume address. */
1338 	cp = memchr(data, ',', len);
1339 	if (cp == NULL || cp == data) {
1340 		send_error(EINVAL);
1341 		return;
1342 	}
1343 	gva = parse_integer(data, cp - data);
1344 	len -= (cp - data) + 1;
1345 	data += (cp - data) + 1;
1346 
1347 	/* Parse and consume kind. */
1348 	cp = memchr(data, ';', len);
1349 	if (cp == data) {
1350 		send_error(EINVAL);
1351 		return;
1352 	}
1353 	if (cp != NULL) {
1354 		/*
1355 		 * We do not advertise support for either the
1356 		 * ConditionalBreakpoints or BreakpointCommands
1357 		 * features, so we should not be getting conditions or
1358 		 * commands from the remote end.
1359 		 */
1360 		send_empty_response();
1361 		return;
1362 	}
1363 	kind = parse_integer(data, len);
1364 	data += len;
1365 	len = 0;
1366 
1367 	switch (type) {
1368 	case 0:
1369 		update_sw_breakpoint(gva, kind, insert);
1370 		break;
1371 	default:
1372 		send_empty_response();
1373 		break;
1374 	}
1375 }
1376 
1377 static bool
1378 command_equals(const uint8_t *data, size_t len, const char *cmd)
1379 {
1380 
1381 	if (strlen(cmd) > len)
1382 		return (false);
1383 	return (memcmp(data, cmd, strlen(cmd)) == 0);
1384 }
1385 
1386 static void
1387 check_features(const uint8_t *data, size_t len)
1388 {
1389 	char *feature, *next_feature, *str, *value;
1390 	bool supported;
1391 
1392 	str = malloc(len + 1);
1393 	memcpy(str, data, len);
1394 	str[len] = '\0';
1395 	next_feature = str;
1396 
1397 	while ((feature = strsep(&next_feature, ";")) != NULL) {
1398 		/*
1399 		 * Null features shouldn't exist, but skip if they
1400 		 * do.
1401 		 */
1402 		if (strcmp(feature, "") == 0)
1403 			continue;
1404 
1405 		/*
1406 		 * Look for the value or supported / not supported
1407 		 * flag.
1408 		 */
1409 		value = strchr(feature, '=');
1410 		if (value != NULL) {
1411 			*value = '\0';
1412 			value++;
1413 			supported = true;
1414 		} else {
1415 			value = feature + strlen(feature) - 1;
1416 			switch (*value) {
1417 			case '+':
1418 				supported = true;
1419 				break;
1420 			case '-':
1421 				supported = false;
1422 				break;
1423 			default:
1424 				/*
1425 				 * This is really a protocol error,
1426 				 * but we just ignore malformed
1427 				 * features for ease of
1428 				 * implementation.
1429 				 */
1430 				continue;
1431 			}
1432 			value = NULL;
1433 		}
1434 
1435 		if (strcmp(feature, "swbreak") == 0)
1436 			swbreak_enabled = supported;
1437 	}
1438 	free(str);
1439 
1440 	start_packet();
1441 
1442 	/* This is an arbitrary limit. */
1443 	append_string("PacketSize=4096");
1444 	append_string(";swbreak+");
1445 	finish_packet();
1446 }
1447 
1448 static void
1449 gdb_query(const uint8_t *data, size_t len)
1450 {
1451 
1452 	/*
1453 	 * TODO:
1454 	 * - qSearch
1455 	 */
1456 	if (command_equals(data, len, "qAttached")) {
1457 		start_packet();
1458 		append_char('1');
1459 		finish_packet();
1460 	} else if (command_equals(data, len, "qC")) {
1461 		start_packet();
1462 		append_string("QC");
1463 		append_integer(cur_vcpu + 1);
1464 		finish_packet();
1465 	} else if (command_equals(data, len, "qfThreadInfo")) {
1466 		cpuset_t mask;
1467 		bool first;
1468 		int vcpu;
1469 
1470 		if (CPU_EMPTY(&vcpus_active)) {
1471 			send_error(EINVAL);
1472 			return;
1473 		}
1474 		mask = vcpus_active;
1475 		start_packet();
1476 		append_char('m');
1477 		first = true;
1478 		while (!CPU_EMPTY(&mask)) {
1479 			vcpu = CPU_FFS(&mask) - 1;
1480 			CPU_CLR(vcpu, &mask);
1481 			if (first)
1482 				first = false;
1483 			else
1484 				append_char(',');
1485 			append_integer(vcpu + 1);
1486 		}
1487 		finish_packet();
1488 	} else if (command_equals(data, len, "qsThreadInfo")) {
1489 		start_packet();
1490 		append_char('l');
1491 		finish_packet();
1492 	} else if (command_equals(data, len, "qSupported")) {
1493 		data += strlen("qSupported");
1494 		len -= strlen("qSupported");
1495 		check_features(data, len);
1496 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1497 		char buf[16];
1498 		int tid;
1499 
1500 		data += strlen("qThreadExtraInfo");
1501 		len -= strlen("qThreadExtraInfo");
1502 		if (*data != ',') {
1503 			send_error(EINVAL);
1504 			return;
1505 		}
1506 		tid = parse_threadid(data + 1, len - 1);
1507 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1508 			send_error(EINVAL);
1509 			return;
1510 		}
1511 
1512 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1513 		start_packet();
1514 		append_asciihex(buf);
1515 		finish_packet();
1516 	} else
1517 		send_empty_response();
1518 }
1519 
1520 static void
1521 handle_command(const uint8_t *data, size_t len)
1522 {
1523 
1524 	/* Reject packets with a sequence-id. */
1525 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1526 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1527 		send_empty_response();
1528 		return;
1529 	}
1530 
1531 	switch (*data) {
1532 	case 'c':
1533 		if (len != 1) {
1534 			send_error(EINVAL);
1535 			break;
1536 		}
1537 
1538 		discard_stop();
1539 		gdb_resume_vcpus();
1540 		break;
1541 	case 'D':
1542 		send_ok();
1543 
1544 		/* TODO: Resume any stopped CPUs. */
1545 		break;
1546 	case 'g': {
1547 		gdb_read_regs();
1548 		break;
1549 	}
1550 	case 'H': {
1551 		int tid;
1552 
1553 		if (data[1] != 'g' && data[1] != 'c') {
1554 			send_error(EINVAL);
1555 			break;
1556 		}
1557 		tid = parse_threadid(data + 2, len - 2);
1558 		if (tid == -2) {
1559 			send_error(EINVAL);
1560 			break;
1561 		}
1562 
1563 		if (CPU_EMPTY(&vcpus_active)) {
1564 			send_error(EINVAL);
1565 			break;
1566 		}
1567 		if (tid == -1 || tid == 0)
1568 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1569 		else if (CPU_ISSET(tid - 1, &vcpus_active))
1570 			cur_vcpu = tid - 1;
1571 		else {
1572 			send_error(EINVAL);
1573 			break;
1574 		}
1575 		send_ok();
1576 		break;
1577 	}
1578 	case 'm':
1579 		gdb_read_mem(data, len);
1580 		break;
1581 	case 'M':
1582 		gdb_write_mem(data, len);
1583 		break;
1584 	case 'T': {
1585 		int tid;
1586 
1587 		tid = parse_threadid(data + 1, len - 1);
1588 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1589 			send_error(EINVAL);
1590 			return;
1591 		}
1592 		send_ok();
1593 		break;
1594 	}
1595 	case 'q':
1596 		gdb_query(data, len);
1597 		break;
1598 	case 's':
1599 		if (len != 1) {
1600 			send_error(EINVAL);
1601 			break;
1602 		}
1603 
1604 		/* Don't send a reply until a stop occurs. */
1605 		if (!gdb_step_vcpu(vcpus[cur_vcpu])) {
1606 			send_error(EOPNOTSUPP);
1607 			break;
1608 		}
1609 		break;
1610 	case 'z':
1611 	case 'Z':
1612 		parse_breakpoint(data, len);
1613 		break;
1614 	case '?':
1615 		report_stop(false);
1616 		break;
1617 	case 'G': /* TODO */
1618 	case 'v':
1619 		/* Handle 'vCont' */
1620 		/* 'vCtrlC' */
1621 	case 'p': /* TODO */
1622 	case 'P': /* TODO */
1623 	case 'Q': /* TODO */
1624 	case 't': /* TODO */
1625 	case 'X': /* TODO */
1626 	default:
1627 		send_empty_response();
1628 	}
1629 }
1630 
1631 /* Check for a valid packet in the command buffer. */
1632 static void
1633 check_command(int fd)
1634 {
1635 	uint8_t *head, *hash, *p, sum;
1636 	size_t avail, plen;
1637 
1638 	for (;;) {
1639 		avail = cur_comm.len;
1640 		if (avail == 0)
1641 			return;
1642 		head = io_buffer_head(&cur_comm);
1643 		switch (*head) {
1644 		case 0x03:
1645 			debug("<- Ctrl-C\n");
1646 			io_buffer_consume(&cur_comm, 1);
1647 
1648 			gdb_suspend_vcpus();
1649 			break;
1650 		case '+':
1651 			/* ACK of previous response. */
1652 			debug("<- +\n");
1653 			if (response_pending())
1654 				io_buffer_reset(&cur_resp);
1655 			io_buffer_consume(&cur_comm, 1);
1656 			if (stopped_vcpu != -1 && report_next_stop) {
1657 				report_stop(true);
1658 				send_pending_data(fd);
1659 			}
1660 			break;
1661 		case '-':
1662 			/* NACK of previous response. */
1663 			debug("<- -\n");
1664 			if (response_pending()) {
1665 				cur_resp.len += cur_resp.start;
1666 				cur_resp.start = 0;
1667 				if (cur_resp.data[0] == '+')
1668 					io_buffer_advance(&cur_resp, 1);
1669 				debug("-> %.*s\n", (int)cur_resp.len,
1670 				    io_buffer_head(&cur_resp));
1671 			}
1672 			io_buffer_consume(&cur_comm, 1);
1673 			send_pending_data(fd);
1674 			break;
1675 		case '$':
1676 			/* Packet. */
1677 
1678 			if (response_pending()) {
1679 				warnx("New GDB command while response in "
1680 				    "progress");
1681 				io_buffer_reset(&cur_resp);
1682 			}
1683 
1684 			/* Is packet complete? */
1685 			hash = memchr(head, '#', avail);
1686 			if (hash == NULL)
1687 				return;
1688 			plen = (hash - head + 1) + 2;
1689 			if (avail < plen)
1690 				return;
1691 			debug("<- %.*s\n", (int)plen, head);
1692 
1693 			/* Verify checksum. */
1694 			for (sum = 0, p = head + 1; p < hash; p++)
1695 				sum += *p;
1696 			if (sum != parse_byte(hash + 1)) {
1697 				io_buffer_consume(&cur_comm, plen);
1698 				debug("-> -\n");
1699 				send_char('-');
1700 				send_pending_data(fd);
1701 				break;
1702 			}
1703 			send_char('+');
1704 
1705 			handle_command(head + 1, hash - (head + 1));
1706 			io_buffer_consume(&cur_comm, plen);
1707 			if (!response_pending())
1708 				debug("-> +\n");
1709 			send_pending_data(fd);
1710 			break;
1711 		default:
1712 			/* XXX: Possibly drop connection instead. */
1713 			debug("-> %02x\n", *head);
1714 			io_buffer_consume(&cur_comm, 1);
1715 			break;
1716 		}
1717 	}
1718 }
1719 
1720 static void
1721 gdb_readable(int fd, enum ev_type event __unused, void *arg __unused)
1722 {
1723 	size_t pending;
1724 	ssize_t nread;
1725 	int n;
1726 
1727 	if (ioctl(fd, FIONREAD, &n) == -1) {
1728 		warn("FIONREAD on GDB socket");
1729 		return;
1730 	}
1731 	assert(n >= 0);
1732 	pending = n;
1733 
1734 	/*
1735 	 * 'pending' might be zero due to EOF.  We need to call read
1736 	 * with a non-zero length to detect EOF.
1737 	 */
1738 	if (pending == 0)
1739 		pending = 1;
1740 
1741 	/* Ensure there is room in the command buffer. */
1742 	io_buffer_grow(&cur_comm, pending);
1743 	assert(io_buffer_avail(&cur_comm) >= pending);
1744 
1745 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1746 	if (nread == 0) {
1747 		close_connection();
1748 	} else if (nread == -1) {
1749 		if (errno == EAGAIN)
1750 			return;
1751 
1752 		warn("Read from GDB socket");
1753 		close_connection();
1754 	} else {
1755 		cur_comm.len += nread;
1756 		pthread_mutex_lock(&gdb_lock);
1757 		check_command(fd);
1758 		pthread_mutex_unlock(&gdb_lock);
1759 	}
1760 }
1761 
1762 static void
1763 gdb_writable(int fd, enum ev_type event __unused, void *arg __unused)
1764 {
1765 
1766 	send_pending_data(fd);
1767 }
1768 
1769 static void
1770 new_connection(int fd, enum ev_type event __unused, void *arg)
1771 {
1772 	int optval, s;
1773 
1774 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1775 	if (s == -1) {
1776 		if (arg != NULL)
1777 			err(1, "Failed accepting initial GDB connection");
1778 
1779 		/* Silently ignore errors post-startup. */
1780 		return;
1781 	}
1782 
1783 	optval = 1;
1784 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1785 	    -1) {
1786 		warn("Failed to disable SIGPIPE for GDB connection");
1787 		close(s);
1788 		return;
1789 	}
1790 
1791 	pthread_mutex_lock(&gdb_lock);
1792 	if (cur_fd != -1) {
1793 		close(s);
1794 		warnx("Ignoring additional GDB connection.");
1795 	}
1796 
1797 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1798 	if (read_event == NULL) {
1799 		if (arg != NULL)
1800 			err(1, "Failed to setup initial GDB connection");
1801 		pthread_mutex_unlock(&gdb_lock);
1802 		return;
1803 	}
1804 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1805 	if (write_event == NULL) {
1806 		if (arg != NULL)
1807 			err(1, "Failed to setup initial GDB connection");
1808 		mevent_delete_close(read_event);
1809 		read_event = NULL;
1810 	}
1811 
1812 	cur_fd = s;
1813 	cur_vcpu = 0;
1814 	stopped_vcpu = -1;
1815 
1816 	/* Break on attach. */
1817 	first_stop = true;
1818 	report_next_stop = false;
1819 	gdb_suspend_vcpus();
1820 	pthread_mutex_unlock(&gdb_lock);
1821 }
1822 
1823 #ifndef WITHOUT_CAPSICUM
1824 static void
1825 limit_gdb_socket(int s)
1826 {
1827 	cap_rights_t rights;
1828 	unsigned long ioctls[] = { FIONREAD };
1829 
1830 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1831 	    CAP_SETSOCKOPT, CAP_IOCTL);
1832 	if (caph_rights_limit(s, &rights) == -1)
1833 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1834 	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1835 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1836 }
1837 #endif
1838 
1839 void
1840 init_gdb(struct vmctx *_ctx)
1841 {
1842 	int error, flags, optval, s;
1843 	struct addrinfo hints;
1844 	struct addrinfo *gdbaddr;
1845 	const char *saddr, *value;
1846 	char *sport;
1847 	bool wait;
1848 
1849 	value = get_config_value("gdb.port");
1850 	if (value == NULL)
1851 		return;
1852 	sport = strdup(value);
1853 	if (sport == NULL)
1854 		errx(4, "Failed to allocate memory");
1855 
1856 	wait = get_config_bool_default("gdb.wait", false);
1857 
1858 	saddr = get_config_value("gdb.address");
1859 	if (saddr == NULL) {
1860 		saddr = "localhost";
1861 	}
1862 
1863 	debug("==> starting on %s:%s, %swaiting\n",
1864 	    saddr, sport, wait ? "" : "not ");
1865 
1866 	error = pthread_mutex_init(&gdb_lock, NULL);
1867 	if (error != 0)
1868 		errc(1, error, "gdb mutex init");
1869 	error = pthread_cond_init(&idle_vcpus, NULL);
1870 	if (error != 0)
1871 		errc(1, error, "gdb cv init");
1872 
1873 	memset(&hints, 0, sizeof(hints));
1874 	hints.ai_family = AF_UNSPEC;
1875 	hints.ai_socktype = SOCK_STREAM;
1876 	hints.ai_flags = AI_NUMERICSERV | AI_PASSIVE;
1877 
1878 	error = getaddrinfo(saddr, sport, &hints, &gdbaddr);
1879 	if (error != 0)
1880 		errx(1, "gdb address resolution: %s", gai_strerror(error));
1881 
1882 	ctx = _ctx;
1883 	s = socket(gdbaddr->ai_family, gdbaddr->ai_socktype, 0);
1884 	if (s < 0)
1885 		err(1, "gdb socket create");
1886 
1887 	optval = 1;
1888 	(void)setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
1889 
1890 	if (bind(s, gdbaddr->ai_addr, gdbaddr->ai_addrlen) < 0)
1891 		err(1, "gdb socket bind");
1892 
1893 	if (listen(s, 1) < 0)
1894 		err(1, "gdb socket listen");
1895 
1896 	stopped_vcpu = -1;
1897 	TAILQ_INIT(&breakpoints);
1898 	vcpus = calloc(guest_ncpus, sizeof(*vcpus));
1899 	vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
1900 	if (wait) {
1901 		/*
1902 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
1903 		 * logic in gdb_cpu_add() to suspend the first vcpu before
1904 		 * it starts execution.  The vcpu will remain suspended
1905 		 * until a debugger connects.
1906 		 */
1907 		CPU_SET(0, &vcpus_suspended);
1908 		stopped_vcpu = 0;
1909 	}
1910 
1911 	flags = fcntl(s, F_GETFL);
1912 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1913 		err(1, "Failed to mark gdb socket non-blocking");
1914 
1915 #ifndef WITHOUT_CAPSICUM
1916 	limit_gdb_socket(s);
1917 #endif
1918 	mevent_add(s, EVF_READ, new_connection, NULL);
1919 	gdb_active = true;
1920 	freeaddrinfo(gdbaddr);
1921 	free(sport);
1922 }
1923