xref: /freebsd/usr.sbin/bhyve/gdb.c (revision c07d6445eb89d9dd3950361b065b7bd110e3a043)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/param.h>
32 #ifndef WITHOUT_CAPSICUM
33 #include <sys/capsicum.h>
34 #endif
35 #include <sys/endian.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <sys/queue.h>
39 #include <sys/socket.h>
40 #include <machine/atomic.h>
41 #include <machine/specialreg.h>
42 #include <machine/vmm.h>
43 #include <netinet/in.h>
44 #include <assert.h>
45 #ifndef WITHOUT_CAPSICUM
46 #include <capsicum_helpers.h>
47 #endif
48 #include <err.h>
49 #include <errno.h>
50 #include <fcntl.h>
51 #include <netdb.h>
52 #include <pthread.h>
53 #include <pthread_np.h>
54 #include <stdbool.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <sysexits.h>
59 #include <unistd.h>
60 #include <vmmapi.h>
61 
62 #include "bhyverun.h"
63 #include "config.h"
64 #include "gdb.h"
65 #include "mem.h"
66 #include "mevent.h"
67 
68 /*
69  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
70  * use SIGTRAP.
71  */
72 #define	GDB_SIGNAL_TRAP		5
73 
74 static void gdb_resume_vcpus(void);
75 static void check_command(int fd);
76 
77 static struct mevent *read_event, *write_event;
78 
79 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
80 static pthread_mutex_t gdb_lock;
81 static pthread_cond_t idle_vcpus;
82 static bool first_stop, report_next_stop, swbreak_enabled;
83 
84 /*
85  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
86  * read buffer, 'start' is unused and 'len' contains the number of
87  * valid bytes in the buffer.  For a write buffer, 'start' is set to
88  * the index of the next byte in 'data' to send, and 'len' contains
89  * the remaining number of valid bytes to send.
90  */
91 struct io_buffer {
92 	uint8_t *data;
93 	size_t capacity;
94 	size_t start;
95 	size_t len;
96 };
97 
98 struct breakpoint {
99 	uint64_t gpa;
100 	uint8_t shadow_inst;
101 	TAILQ_ENTRY(breakpoint) link;
102 };
103 
104 /*
105  * When a vCPU stops to due to an event that should be reported to the
106  * debugger, information about the event is stored in this structure.
107  * The vCPU thread then sets 'stopped_vcpu' if it is not already set
108  * and stops other vCPUs so the event can be reported.  The
109  * report_stop() function reports the event for the 'stopped_vcpu'
110  * vCPU.  When the debugger resumes execution via continue or step,
111  * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
112  * event handlers until the associated event is reported or disabled.
113  *
114  * An idle vCPU will have all of the boolean fields set to false.
115  *
116  * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
117  * released to execute the stepped instruction.  When the vCPU reports
118  * the stepping trap, 'stepped' is set.
119  *
120  * When a vCPU hits a breakpoint set by the debug server,
121  * 'hit_swbreak' is set to true.
122  */
123 struct vcpu_state {
124 	bool stepping;
125 	bool stepped;
126 	bool hit_swbreak;
127 };
128 
129 static struct io_buffer cur_comm, cur_resp;
130 static uint8_t cur_csum;
131 static struct vmctx *ctx;
132 static int cur_fd = -1;
133 static TAILQ_HEAD(, breakpoint) breakpoints;
134 static struct vcpu_state *vcpu_state;
135 static int cur_vcpu, stopped_vcpu;
136 static bool gdb_active = false;
137 
138 static const int gdb_regset[] = {
139 	VM_REG_GUEST_RAX,
140 	VM_REG_GUEST_RBX,
141 	VM_REG_GUEST_RCX,
142 	VM_REG_GUEST_RDX,
143 	VM_REG_GUEST_RSI,
144 	VM_REG_GUEST_RDI,
145 	VM_REG_GUEST_RBP,
146 	VM_REG_GUEST_RSP,
147 	VM_REG_GUEST_R8,
148 	VM_REG_GUEST_R9,
149 	VM_REG_GUEST_R10,
150 	VM_REG_GUEST_R11,
151 	VM_REG_GUEST_R12,
152 	VM_REG_GUEST_R13,
153 	VM_REG_GUEST_R14,
154 	VM_REG_GUEST_R15,
155 	VM_REG_GUEST_RIP,
156 	VM_REG_GUEST_RFLAGS,
157 	VM_REG_GUEST_CS,
158 	VM_REG_GUEST_SS,
159 	VM_REG_GUEST_DS,
160 	VM_REG_GUEST_ES,
161 	VM_REG_GUEST_FS,
162 	VM_REG_GUEST_GS
163 };
164 
165 static const int gdb_regsize[] = {
166 	8,
167 	8,
168 	8,
169 	8,
170 	8,
171 	8,
172 	8,
173 	8,
174 	8,
175 	8,
176 	8,
177 	8,
178 	8,
179 	8,
180 	8,
181 	8,
182 	8,
183 	4,
184 	4,
185 	4,
186 	4,
187 	4,
188 	4,
189 	4
190 };
191 
192 #ifdef GDB_LOG
193 #include <stdarg.h>
194 #include <stdio.h>
195 
196 static void __printflike(1, 2)
197 debug(const char *fmt, ...)
198 {
199 	static FILE *logfile;
200 	va_list ap;
201 
202 	if (logfile == NULL) {
203 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
204 		if (logfile == NULL)
205 			return;
206 #ifndef WITHOUT_CAPSICUM
207 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
208 			fclose(logfile);
209 			logfile = NULL;
210 			return;
211 		}
212 #endif
213 		setlinebuf(logfile);
214 	}
215 	va_start(ap, fmt);
216 	vfprintf(logfile, fmt, ap);
217 	va_end(ap);
218 }
219 #else
220 #define debug(...)
221 #endif
222 
223 static void	remove_all_sw_breakpoints(void);
224 
225 static int
226 guest_paging_info(int vcpu, struct vm_guest_paging *paging)
227 {
228 	uint64_t regs[4];
229 	const int regset[4] = {
230 		VM_REG_GUEST_CR0,
231 		VM_REG_GUEST_CR3,
232 		VM_REG_GUEST_CR4,
233 		VM_REG_GUEST_EFER
234 	};
235 
236 	if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
237 		return (-1);
238 
239 	/*
240 	 * For the debugger, always pretend to be the kernel (CPL 0),
241 	 * and if long-mode is enabled, always parse addresses as if
242 	 * in 64-bit mode.
243 	 */
244 	paging->cr3 = regs[1];
245 	paging->cpl = 0;
246 	if (regs[3] & EFER_LMA)
247 		paging->cpu_mode = CPU_MODE_64BIT;
248 	else if (regs[0] & CR0_PE)
249 		paging->cpu_mode = CPU_MODE_PROTECTED;
250 	else
251 		paging->cpu_mode = CPU_MODE_REAL;
252 	if (!(regs[0] & CR0_PG))
253 		paging->paging_mode = PAGING_MODE_FLAT;
254 	else if (!(regs[2] & CR4_PAE))
255 		paging->paging_mode = PAGING_MODE_32;
256 	else if (regs[3] & EFER_LME)
257 		paging->paging_mode = (regs[2] & CR4_LA57) ?
258 		    PAGING_MODE_64_LA57 :  PAGING_MODE_64;
259 	else
260 		paging->paging_mode = PAGING_MODE_PAE;
261 	return (0);
262 }
263 
264 /*
265  * Map a guest virtual address to a physical address (for a given vcpu).
266  * If a guest virtual address is valid, return 1.  If the address is
267  * not valid, return 0.  If an error occurs obtaining the mapping,
268  * return -1.
269  */
270 static int
271 guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
272 {
273 	struct vm_guest_paging paging;
274 	int fault;
275 
276 	if (guest_paging_info(vcpu, &paging) == -1)
277 		return (-1);
278 
279 	/*
280 	 * Always use PROT_READ.  We really care if the VA is
281 	 * accessible, not if the current vCPU can write.
282 	 */
283 	if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
284 	    &fault) == -1)
285 		return (-1);
286 	if (fault)
287 		return (0);
288 	return (1);
289 }
290 
291 static void
292 io_buffer_reset(struct io_buffer *io)
293 {
294 
295 	io->start = 0;
296 	io->len = 0;
297 }
298 
299 /* Available room for adding data. */
300 static size_t
301 io_buffer_avail(struct io_buffer *io)
302 {
303 
304 	return (io->capacity - (io->start + io->len));
305 }
306 
307 static uint8_t *
308 io_buffer_head(struct io_buffer *io)
309 {
310 
311 	return (io->data + io->start);
312 }
313 
314 static uint8_t *
315 io_buffer_tail(struct io_buffer *io)
316 {
317 
318 	return (io->data + io->start + io->len);
319 }
320 
321 static void
322 io_buffer_advance(struct io_buffer *io, size_t amount)
323 {
324 
325 	assert(amount <= io->len);
326 	io->start += amount;
327 	io->len -= amount;
328 }
329 
330 static void
331 io_buffer_consume(struct io_buffer *io, size_t amount)
332 {
333 
334 	io_buffer_advance(io, amount);
335 	if (io->len == 0) {
336 		io->start = 0;
337 		return;
338 	}
339 
340 	/*
341 	 * XXX: Consider making this move optional and compacting on a
342 	 * future read() before realloc().
343 	 */
344 	memmove(io->data, io_buffer_head(io), io->len);
345 	io->start = 0;
346 }
347 
348 static void
349 io_buffer_grow(struct io_buffer *io, size_t newsize)
350 {
351 	uint8_t *new_data;
352 	size_t avail, new_cap;
353 
354 	avail = io_buffer_avail(io);
355 	if (newsize <= avail)
356 		return;
357 
358 	new_cap = io->capacity + (newsize - avail);
359 	new_data = realloc(io->data, new_cap);
360 	if (new_data == NULL)
361 		err(1, "Failed to grow GDB I/O buffer");
362 	io->data = new_data;
363 	io->capacity = new_cap;
364 }
365 
366 static bool
367 response_pending(void)
368 {
369 
370 	if (cur_resp.start == 0 && cur_resp.len == 0)
371 		return (false);
372 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
373 		return (false);
374 	return (true);
375 }
376 
377 static void
378 close_connection(void)
379 {
380 
381 	/*
382 	 * XXX: This triggers a warning because mevent does the close
383 	 * before the EV_DELETE.
384 	 */
385 	pthread_mutex_lock(&gdb_lock);
386 	mevent_delete(write_event);
387 	mevent_delete_close(read_event);
388 	write_event = NULL;
389 	read_event = NULL;
390 	io_buffer_reset(&cur_comm);
391 	io_buffer_reset(&cur_resp);
392 	cur_fd = -1;
393 
394 	remove_all_sw_breakpoints();
395 
396 	/* Clear any pending events. */
397 	memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
398 
399 	/* Resume any stopped vCPUs. */
400 	gdb_resume_vcpus();
401 	pthread_mutex_unlock(&gdb_lock);
402 }
403 
404 static uint8_t
405 hex_digit(uint8_t nibble)
406 {
407 
408 	if (nibble <= 9)
409 		return (nibble + '0');
410 	else
411 		return (nibble + 'a' - 10);
412 }
413 
414 static uint8_t
415 parse_digit(uint8_t v)
416 {
417 
418 	if (v >= '0' && v <= '9')
419 		return (v - '0');
420 	if (v >= 'a' && v <= 'f')
421 		return (v - 'a' + 10);
422 	if (v >= 'A' && v <= 'F')
423 		return (v - 'A' + 10);
424 	return (0xF);
425 }
426 
427 /* Parses big-endian hexadecimal. */
428 static uintmax_t
429 parse_integer(const uint8_t *p, size_t len)
430 {
431 	uintmax_t v;
432 
433 	v = 0;
434 	while (len > 0) {
435 		v <<= 4;
436 		v |= parse_digit(*p);
437 		p++;
438 		len--;
439 	}
440 	return (v);
441 }
442 
443 static uint8_t
444 parse_byte(const uint8_t *p)
445 {
446 
447 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
448 }
449 
450 static void
451 send_pending_data(int fd)
452 {
453 	ssize_t nwritten;
454 
455 	if (cur_resp.len == 0) {
456 		mevent_disable(write_event);
457 		return;
458 	}
459 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
460 	if (nwritten == -1) {
461 		warn("Write to GDB socket failed");
462 		close_connection();
463 	} else {
464 		io_buffer_advance(&cur_resp, nwritten);
465 		if (cur_resp.len == 0)
466 			mevent_disable(write_event);
467 		else
468 			mevent_enable(write_event);
469 	}
470 }
471 
472 /* Append a single character to the output buffer. */
473 static void
474 send_char(uint8_t data)
475 {
476 	io_buffer_grow(&cur_resp, 1);
477 	*io_buffer_tail(&cur_resp) = data;
478 	cur_resp.len++;
479 }
480 
481 /* Append an array of bytes to the output buffer. */
482 static void
483 send_data(const uint8_t *data, size_t len)
484 {
485 
486 	io_buffer_grow(&cur_resp, len);
487 	memcpy(io_buffer_tail(&cur_resp), data, len);
488 	cur_resp.len += len;
489 }
490 
491 static void
492 format_byte(uint8_t v, uint8_t *buf)
493 {
494 
495 	buf[0] = hex_digit(v >> 4);
496 	buf[1] = hex_digit(v & 0xf);
497 }
498 
499 /*
500  * Append a single byte (formatted as two hex characters) to the
501  * output buffer.
502  */
503 static void
504 send_byte(uint8_t v)
505 {
506 	uint8_t buf[2];
507 
508 	format_byte(v, buf);
509 	send_data(buf, sizeof(buf));
510 }
511 
512 static void
513 start_packet(void)
514 {
515 
516 	send_char('$');
517 	cur_csum = 0;
518 }
519 
520 static void
521 finish_packet(void)
522 {
523 
524 	send_char('#');
525 	send_byte(cur_csum);
526 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
527 }
528 
529 /*
530  * Append a single character (for the packet payload) and update the
531  * checksum.
532  */
533 static void
534 append_char(uint8_t v)
535 {
536 
537 	send_char(v);
538 	cur_csum += v;
539 }
540 
541 /*
542  * Append an array of bytes (for the packet payload) and update the
543  * checksum.
544  */
545 static void
546 append_packet_data(const uint8_t *data, size_t len)
547 {
548 
549 	send_data(data, len);
550 	while (len > 0) {
551 		cur_csum += *data;
552 		data++;
553 		len--;
554 	}
555 }
556 
557 static void
558 append_string(const char *str)
559 {
560 
561 	append_packet_data(str, strlen(str));
562 }
563 
564 static void
565 append_byte(uint8_t v)
566 {
567 	uint8_t buf[2];
568 
569 	format_byte(v, buf);
570 	append_packet_data(buf, sizeof(buf));
571 }
572 
573 static void
574 append_unsigned_native(uintmax_t value, size_t len)
575 {
576 	size_t i;
577 
578 	for (i = 0; i < len; i++) {
579 		append_byte(value);
580 		value >>= 8;
581 	}
582 }
583 
584 static void
585 append_unsigned_be(uintmax_t value, size_t len)
586 {
587 	char buf[len * 2];
588 	size_t i;
589 
590 	for (i = 0; i < len; i++) {
591 		format_byte(value, buf + (len - i - 1) * 2);
592 		value >>= 8;
593 	}
594 	append_packet_data(buf, sizeof(buf));
595 }
596 
597 static void
598 append_integer(unsigned int value)
599 {
600 
601 	if (value == 0)
602 		append_char('0');
603 	else
604 		append_unsigned_be(value, (fls(value) + 7) / 8);
605 }
606 
607 static void
608 append_asciihex(const char *str)
609 {
610 
611 	while (*str != '\0') {
612 		append_byte(*str);
613 		str++;
614 	}
615 }
616 
617 static void
618 send_empty_response(void)
619 {
620 
621 	start_packet();
622 	finish_packet();
623 }
624 
625 static void
626 send_error(int error)
627 {
628 
629 	start_packet();
630 	append_char('E');
631 	append_byte(error);
632 	finish_packet();
633 }
634 
635 static void
636 send_ok(void)
637 {
638 
639 	start_packet();
640 	append_string("OK");
641 	finish_packet();
642 }
643 
644 static int
645 parse_threadid(const uint8_t *data, size_t len)
646 {
647 
648 	if (len == 1 && *data == '0')
649 		return (0);
650 	if (len == 2 && memcmp(data, "-1", 2) == 0)
651 		return (-1);
652 	if (len == 0)
653 		return (-2);
654 	return (parse_integer(data, len));
655 }
656 
657 /*
658  * Report the current stop event to the debugger.  If the stop is due
659  * to an event triggered on a specific vCPU such as a breakpoint or
660  * stepping trap, stopped_vcpu will be set to the vCPU triggering the
661  * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
662  * the reporting vCPU for vCPU events.
663  */
664 static void
665 report_stop(bool set_cur_vcpu)
666 {
667 	struct vcpu_state *vs;
668 
669 	start_packet();
670 	if (stopped_vcpu == -1) {
671 		append_char('S');
672 		append_byte(GDB_SIGNAL_TRAP);
673 	} else {
674 		vs = &vcpu_state[stopped_vcpu];
675 		if (set_cur_vcpu)
676 			cur_vcpu = stopped_vcpu;
677 		append_char('T');
678 		append_byte(GDB_SIGNAL_TRAP);
679 		append_string("thread:");
680 		append_integer(stopped_vcpu + 1);
681 		append_char(';');
682 		if (vs->hit_swbreak) {
683 			debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
684 			if (swbreak_enabled)
685 				append_string("swbreak:;");
686 		} else if (vs->stepped)
687 			debug("$vCPU %d reporting step\n", stopped_vcpu);
688 		else
689 			debug("$vCPU %d reporting ???\n", stopped_vcpu);
690 	}
691 	finish_packet();
692 	report_next_stop = false;
693 }
694 
695 /*
696  * If this stop is due to a vCPU event, clear that event to mark it as
697  * acknowledged.
698  */
699 static void
700 discard_stop(void)
701 {
702 	struct vcpu_state *vs;
703 
704 	if (stopped_vcpu != -1) {
705 		vs = &vcpu_state[stopped_vcpu];
706 		vs->hit_swbreak = false;
707 		vs->stepped = false;
708 		stopped_vcpu = -1;
709 	}
710 	report_next_stop = true;
711 }
712 
713 static void
714 gdb_finish_suspend_vcpus(void)
715 {
716 
717 	if (first_stop) {
718 		first_stop = false;
719 		stopped_vcpu = -1;
720 	} else if (report_next_stop) {
721 		assert(!response_pending());
722 		report_stop(true);
723 		send_pending_data(cur_fd);
724 	}
725 }
726 
727 /*
728  * vCPU threads invoke this function whenever the vCPU enters the
729  * debug server to pause or report an event.  vCPU threads wait here
730  * as long as the debug server keeps them suspended.
731  */
732 static void
733 _gdb_cpu_suspend(int vcpu, bool report_stop)
734 {
735 
736 	debug("$vCPU %d suspending\n", vcpu);
737 	CPU_SET(vcpu, &vcpus_waiting);
738 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
739 		gdb_finish_suspend_vcpus();
740 	while (CPU_ISSET(vcpu, &vcpus_suspended))
741 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
742 	CPU_CLR(vcpu, &vcpus_waiting);
743 	debug("$vCPU %d resuming\n", vcpu);
744 }
745 
746 /*
747  * Invoked at the start of a vCPU thread's execution to inform the
748  * debug server about the new thread.
749  */
750 void
751 gdb_cpu_add(int vcpu)
752 {
753 
754 	if (!gdb_active)
755 		return;
756 	debug("$vCPU %d starting\n", vcpu);
757 	pthread_mutex_lock(&gdb_lock);
758 	assert(vcpu < guest_ncpus);
759 	CPU_SET(vcpu, &vcpus_active);
760 	if (!TAILQ_EMPTY(&breakpoints)) {
761 		vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT, 1);
762 		debug("$vCPU %d enabled breakpoint exits\n", vcpu);
763 	}
764 
765 	/*
766 	 * If a vcpu is added while vcpus are stopped, suspend the new
767 	 * vcpu so that it will pop back out with a debug exit before
768 	 * executing the first instruction.
769 	 */
770 	if (!CPU_EMPTY(&vcpus_suspended)) {
771 		CPU_SET(vcpu, &vcpus_suspended);
772 		_gdb_cpu_suspend(vcpu, false);
773 	}
774 	pthread_mutex_unlock(&gdb_lock);
775 }
776 
777 /*
778  * Invoked by vCPU before resuming execution.  This enables stepping
779  * if the vCPU is marked as stepping.
780  */
781 static void
782 gdb_cpu_resume(int vcpu)
783 {
784 	struct vcpu_state *vs;
785 	int error;
786 
787 	vs = &vcpu_state[vcpu];
788 
789 	/*
790 	 * Any pending event should already be reported before
791 	 * resuming.
792 	 */
793 	assert(vs->hit_swbreak == false);
794 	assert(vs->stepped == false);
795 	if (vs->stepping) {
796 		error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
797 		assert(error == 0);
798 	}
799 }
800 
801 /*
802  * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
803  * has been suspended due to an event on different vCPU or in response
804  * to a guest-wide suspend such as Ctrl-C or the stop on attach.
805  */
806 void
807 gdb_cpu_suspend(int vcpu)
808 {
809 
810 	if (!gdb_active)
811 		return;
812 	pthread_mutex_lock(&gdb_lock);
813 	_gdb_cpu_suspend(vcpu, true);
814 	gdb_cpu_resume(vcpu);
815 	pthread_mutex_unlock(&gdb_lock);
816 }
817 
818 static void
819 gdb_suspend_vcpus(void)
820 {
821 
822 	assert(pthread_mutex_isowned_np(&gdb_lock));
823 	debug("suspending all CPUs\n");
824 	vcpus_suspended = vcpus_active;
825 	vm_suspend_cpu(ctx, -1);
826 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
827 		gdb_finish_suspend_vcpus();
828 }
829 
830 /*
831  * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
832  * the VT-x-specific MTRAP exit.
833  */
834 void
835 gdb_cpu_mtrap(int vcpu)
836 {
837 	struct vcpu_state *vs;
838 
839 	if (!gdb_active)
840 		return;
841 	debug("$vCPU %d MTRAP\n", vcpu);
842 	pthread_mutex_lock(&gdb_lock);
843 	vs = &vcpu_state[vcpu];
844 	if (vs->stepping) {
845 		vs->stepping = false;
846 		vs->stepped = true;
847 		vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
848 		while (vs->stepped) {
849 			if (stopped_vcpu == -1) {
850 				debug("$vCPU %d reporting step\n", vcpu);
851 				stopped_vcpu = vcpu;
852 				gdb_suspend_vcpus();
853 			}
854 			_gdb_cpu_suspend(vcpu, true);
855 		}
856 		gdb_cpu_resume(vcpu);
857 	}
858 	pthread_mutex_unlock(&gdb_lock);
859 }
860 
861 static struct breakpoint *
862 find_breakpoint(uint64_t gpa)
863 {
864 	struct breakpoint *bp;
865 
866 	TAILQ_FOREACH(bp, &breakpoints, link) {
867 		if (bp->gpa == gpa)
868 			return (bp);
869 	}
870 	return (NULL);
871 }
872 
873 void
874 gdb_cpu_breakpoint(int vcpu, struct vm_exit *vmexit)
875 {
876 	struct breakpoint *bp;
877 	struct vcpu_state *vs;
878 	uint64_t gpa;
879 	int error;
880 
881 	if (!gdb_active) {
882 		fprintf(stderr, "vm_loop: unexpected VMEXIT_DEBUG\n");
883 		exit(4);
884 	}
885 	pthread_mutex_lock(&gdb_lock);
886 	error = guest_vaddr2paddr(vcpu, vmexit->rip, &gpa);
887 	assert(error == 1);
888 	bp = find_breakpoint(gpa);
889 	if (bp != NULL) {
890 		vs = &vcpu_state[vcpu];
891 		assert(vs->stepping == false);
892 		assert(vs->stepped == false);
893 		assert(vs->hit_swbreak == false);
894 		vs->hit_swbreak = true;
895 		vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, vmexit->rip);
896 		for (;;) {
897 			if (stopped_vcpu == -1) {
898 				debug("$vCPU %d reporting breakpoint at rip %#lx\n", vcpu,
899 				    vmexit->rip);
900 				stopped_vcpu = vcpu;
901 				gdb_suspend_vcpus();
902 			}
903 			_gdb_cpu_suspend(vcpu, true);
904 			if (!vs->hit_swbreak) {
905 				/* Breakpoint reported. */
906 				break;
907 			}
908 			bp = find_breakpoint(gpa);
909 			if (bp == NULL) {
910 				/* Breakpoint was removed. */
911 				vs->hit_swbreak = false;
912 				break;
913 			}
914 		}
915 		gdb_cpu_resume(vcpu);
916 	} else {
917 		debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpu,
918 		    vmexit->rip);
919 		error = vm_set_register(ctx, vcpu,
920 		    VM_REG_GUEST_ENTRY_INST_LENGTH, vmexit->u.bpt.inst_length);
921 		assert(error == 0);
922 		error = vm_inject_exception(ctx, vcpu, IDT_BP, 0, 0, 0);
923 		assert(error == 0);
924 	}
925 	pthread_mutex_unlock(&gdb_lock);
926 }
927 
928 static bool
929 gdb_step_vcpu(int vcpu)
930 {
931 	int error, val;
932 
933 	debug("$vCPU %d step\n", vcpu);
934 	error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
935 	if (error < 0)
936 		return (false);
937 
938 	discard_stop();
939 	vcpu_state[vcpu].stepping = true;
940 	vm_resume_cpu(ctx, vcpu);
941 	CPU_CLR(vcpu, &vcpus_suspended);
942 	pthread_cond_broadcast(&idle_vcpus);
943 	return (true);
944 }
945 
946 static void
947 gdb_resume_vcpus(void)
948 {
949 
950 	assert(pthread_mutex_isowned_np(&gdb_lock));
951 	vm_resume_cpu(ctx, -1);
952 	debug("resuming all CPUs\n");
953 	CPU_ZERO(&vcpus_suspended);
954 	pthread_cond_broadcast(&idle_vcpus);
955 }
956 
957 static void
958 gdb_read_regs(void)
959 {
960 	uint64_t regvals[nitems(gdb_regset)];
961 
962 	if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
963 	    gdb_regset, regvals) == -1) {
964 		send_error(errno);
965 		return;
966 	}
967 	start_packet();
968 	for (size_t i = 0; i < nitems(regvals); i++)
969 		append_unsigned_native(regvals[i], gdb_regsize[i]);
970 	finish_packet();
971 }
972 
973 static void
974 gdb_read_mem(const uint8_t *data, size_t len)
975 {
976 	uint64_t gpa, gva, val;
977 	uint8_t *cp;
978 	size_t resid, todo, bytes;
979 	bool started;
980 	int error;
981 
982 	/* Skip 'm' */
983 	data += 1;
984 	len -= 1;
985 
986 	/* Parse and consume address. */
987 	cp = memchr(data, ',', len);
988 	if (cp == NULL || cp == data) {
989 		send_error(EINVAL);
990 		return;
991 	}
992 	gva = parse_integer(data, cp - data);
993 	len -= (cp - data) + 1;
994 	data += (cp - data) + 1;
995 
996 	/* Parse length. */
997 	resid = parse_integer(data, len);
998 
999 	started = false;
1000 	while (resid > 0) {
1001 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1002 		if (error == -1) {
1003 			if (started)
1004 				finish_packet();
1005 			else
1006 				send_error(errno);
1007 			return;
1008 		}
1009 		if (error == 0) {
1010 			if (started)
1011 				finish_packet();
1012 			else
1013 				send_error(EFAULT);
1014 			return;
1015 		}
1016 
1017 		/* Read bytes from current page. */
1018 		todo = getpagesize() - gpa % getpagesize();
1019 		if (todo > resid)
1020 			todo = resid;
1021 
1022 		cp = paddr_guest2host(ctx, gpa, todo);
1023 		if (cp != NULL) {
1024 			/*
1025 			 * If this page is guest RAM, read it a byte
1026 			 * at a time.
1027 			 */
1028 			if (!started) {
1029 				start_packet();
1030 				started = true;
1031 			}
1032 			while (todo > 0) {
1033 				append_byte(*cp);
1034 				cp++;
1035 				gpa++;
1036 				gva++;
1037 				resid--;
1038 				todo--;
1039 			}
1040 		} else {
1041 			/*
1042 			 * If this page isn't guest RAM, try to handle
1043 			 * it via MMIO.  For MMIO requests, use
1044 			 * aligned reads of words when possible.
1045 			 */
1046 			while (todo > 0) {
1047 				if (gpa & 1 || todo == 1)
1048 					bytes = 1;
1049 				else if (gpa & 2 || todo == 2)
1050 					bytes = 2;
1051 				else
1052 					bytes = 4;
1053 				error = read_mem(ctx, cur_vcpu, gpa, &val,
1054 				    bytes);
1055 				if (error == 0) {
1056 					if (!started) {
1057 						start_packet();
1058 						started = true;
1059 					}
1060 					gpa += bytes;
1061 					gva += bytes;
1062 					resid -= bytes;
1063 					todo -= bytes;
1064 					while (bytes > 0) {
1065 						append_byte(val);
1066 						val >>= 8;
1067 						bytes--;
1068 					}
1069 				} else {
1070 					if (started)
1071 						finish_packet();
1072 					else
1073 						send_error(EFAULT);
1074 					return;
1075 				}
1076 			}
1077 		}
1078 		assert(resid == 0 || gpa % getpagesize() == 0);
1079 	}
1080 	if (!started)
1081 		start_packet();
1082 	finish_packet();
1083 }
1084 
1085 static void
1086 gdb_write_mem(const uint8_t *data, size_t len)
1087 {
1088 	uint64_t gpa, gva, val;
1089 	uint8_t *cp;
1090 	size_t resid, todo, bytes;
1091 	int error;
1092 
1093 	/* Skip 'M' */
1094 	data += 1;
1095 	len -= 1;
1096 
1097 	/* Parse and consume address. */
1098 	cp = memchr(data, ',', len);
1099 	if (cp == NULL || cp == data) {
1100 		send_error(EINVAL);
1101 		return;
1102 	}
1103 	gva = parse_integer(data, cp - data);
1104 	len -= (cp - data) + 1;
1105 	data += (cp - data) + 1;
1106 
1107 	/* Parse and consume length. */
1108 	cp = memchr(data, ':', len);
1109 	if (cp == NULL || cp == data) {
1110 		send_error(EINVAL);
1111 		return;
1112 	}
1113 	resid = parse_integer(data, cp - data);
1114 	len -= (cp - data) + 1;
1115 	data += (cp - data) + 1;
1116 
1117 	/* Verify the available bytes match the length. */
1118 	if (len != resid * 2) {
1119 		send_error(EINVAL);
1120 		return;
1121 	}
1122 
1123 	while (resid > 0) {
1124 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1125 		if (error == -1) {
1126 			send_error(errno);
1127 			return;
1128 		}
1129 		if (error == 0) {
1130 			send_error(EFAULT);
1131 			return;
1132 		}
1133 
1134 		/* Write bytes to current page. */
1135 		todo = getpagesize() - gpa % getpagesize();
1136 		if (todo > resid)
1137 			todo = resid;
1138 
1139 		cp = paddr_guest2host(ctx, gpa, todo);
1140 		if (cp != NULL) {
1141 			/*
1142 			 * If this page is guest RAM, write it a byte
1143 			 * at a time.
1144 			 */
1145 			while (todo > 0) {
1146 				assert(len >= 2);
1147 				*cp = parse_byte(data);
1148 				data += 2;
1149 				len -= 2;
1150 				cp++;
1151 				gpa++;
1152 				gva++;
1153 				resid--;
1154 				todo--;
1155 			}
1156 		} else {
1157 			/*
1158 			 * If this page isn't guest RAM, try to handle
1159 			 * it via MMIO.  For MMIO requests, use
1160 			 * aligned writes of words when possible.
1161 			 */
1162 			while (todo > 0) {
1163 				if (gpa & 1 || todo == 1) {
1164 					bytes = 1;
1165 					val = parse_byte(data);
1166 				} else if (gpa & 2 || todo == 2) {
1167 					bytes = 2;
1168 					val = be16toh(parse_integer(data, 4));
1169 				} else {
1170 					bytes = 4;
1171 					val = be32toh(parse_integer(data, 8));
1172 				}
1173 				error = write_mem(ctx, cur_vcpu, gpa, val,
1174 				    bytes);
1175 				if (error == 0) {
1176 					gpa += bytes;
1177 					gva += bytes;
1178 					resid -= bytes;
1179 					todo -= bytes;
1180 					data += 2 * bytes;
1181 					len -= 2 * bytes;
1182 				} else {
1183 					send_error(EFAULT);
1184 					return;
1185 				}
1186 			}
1187 		}
1188 		assert(resid == 0 || gpa % getpagesize() == 0);
1189 	}
1190 	assert(len == 0);
1191 	send_ok();
1192 }
1193 
1194 static bool
1195 set_breakpoint_caps(bool enable)
1196 {
1197 	cpuset_t mask;
1198 	int vcpu;
1199 
1200 	mask = vcpus_active;
1201 	while (!CPU_EMPTY(&mask)) {
1202 		vcpu = CPU_FFS(&mask) - 1;
1203 		CPU_CLR(vcpu, &mask);
1204 		if (vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT,
1205 		    enable ? 1 : 0) < 0)
1206 			return (false);
1207 		debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1208 		    enable ? "en" : "dis");
1209 	}
1210 	return (true);
1211 }
1212 
1213 static void
1214 remove_all_sw_breakpoints(void)
1215 {
1216 	struct breakpoint *bp, *nbp;
1217 	uint8_t *cp;
1218 
1219 	if (TAILQ_EMPTY(&breakpoints))
1220 		return;
1221 
1222 	TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1223 		debug("remove breakpoint at %#lx\n", bp->gpa);
1224 		cp = paddr_guest2host(ctx, bp->gpa, 1);
1225 		*cp = bp->shadow_inst;
1226 		TAILQ_REMOVE(&breakpoints, bp, link);
1227 		free(bp);
1228 	}
1229 	TAILQ_INIT(&breakpoints);
1230 	set_breakpoint_caps(false);
1231 }
1232 
1233 static void
1234 update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1235 {
1236 	struct breakpoint *bp;
1237 	uint64_t gpa;
1238 	uint8_t *cp;
1239 	int error;
1240 
1241 	if (kind != 1) {
1242 		send_error(EINVAL);
1243 		return;
1244 	}
1245 
1246 	error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1247 	if (error == -1) {
1248 		send_error(errno);
1249 		return;
1250 	}
1251 	if (error == 0) {
1252 		send_error(EFAULT);
1253 		return;
1254 	}
1255 
1256 	cp = paddr_guest2host(ctx, gpa, 1);
1257 
1258 	/* Only permit breakpoints in guest RAM. */
1259 	if (cp == NULL) {
1260 		send_error(EFAULT);
1261 		return;
1262 	}
1263 
1264 	/* Find any existing breakpoint. */
1265 	bp = find_breakpoint(gpa);
1266 
1267 	/*
1268 	 * Silently ignore duplicate commands since the protocol
1269 	 * requires these packets to be idempotent.
1270 	 */
1271 	if (insert) {
1272 		if (bp == NULL) {
1273 			if (TAILQ_EMPTY(&breakpoints) &&
1274 			    !set_breakpoint_caps(true)) {
1275 				send_empty_response();
1276 				return;
1277 			}
1278 			bp = malloc(sizeof(*bp));
1279 			bp->gpa = gpa;
1280 			bp->shadow_inst = *cp;
1281 			*cp = 0xcc;	/* INT 3 */
1282 			TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1283 			debug("new breakpoint at %#lx\n", gpa);
1284 		}
1285 	} else {
1286 		if (bp != NULL) {
1287 			debug("remove breakpoint at %#lx\n", gpa);
1288 			*cp = bp->shadow_inst;
1289 			TAILQ_REMOVE(&breakpoints, bp, link);
1290 			free(bp);
1291 			if (TAILQ_EMPTY(&breakpoints))
1292 				set_breakpoint_caps(false);
1293 		}
1294 	}
1295 	send_ok();
1296 }
1297 
1298 static void
1299 parse_breakpoint(const uint8_t *data, size_t len)
1300 {
1301 	uint64_t gva;
1302 	uint8_t *cp;
1303 	bool insert;
1304 	int kind, type;
1305 
1306 	insert = data[0] == 'Z';
1307 
1308 	/* Skip 'Z/z' */
1309 	data += 1;
1310 	len -= 1;
1311 
1312 	/* Parse and consume type. */
1313 	cp = memchr(data, ',', len);
1314 	if (cp == NULL || cp == data) {
1315 		send_error(EINVAL);
1316 		return;
1317 	}
1318 	type = parse_integer(data, cp - data);
1319 	len -= (cp - data) + 1;
1320 	data += (cp - data) + 1;
1321 
1322 	/* Parse and consume address. */
1323 	cp = memchr(data, ',', len);
1324 	if (cp == NULL || cp == data) {
1325 		send_error(EINVAL);
1326 		return;
1327 	}
1328 	gva = parse_integer(data, cp - data);
1329 	len -= (cp - data) + 1;
1330 	data += (cp - data) + 1;
1331 
1332 	/* Parse and consume kind. */
1333 	cp = memchr(data, ';', len);
1334 	if (cp == data) {
1335 		send_error(EINVAL);
1336 		return;
1337 	}
1338 	if (cp != NULL) {
1339 		/*
1340 		 * We do not advertise support for either the
1341 		 * ConditionalBreakpoints or BreakpointCommands
1342 		 * features, so we should not be getting conditions or
1343 		 * commands from the remote end.
1344 		 */
1345 		send_empty_response();
1346 		return;
1347 	}
1348 	kind = parse_integer(data, len);
1349 	data += len;
1350 	len = 0;
1351 
1352 	switch (type) {
1353 	case 0:
1354 		update_sw_breakpoint(gva, kind, insert);
1355 		break;
1356 	default:
1357 		send_empty_response();
1358 		break;
1359 	}
1360 }
1361 
1362 static bool
1363 command_equals(const uint8_t *data, size_t len, const char *cmd)
1364 {
1365 
1366 	if (strlen(cmd) > len)
1367 		return (false);
1368 	return (memcmp(data, cmd, strlen(cmd)) == 0);
1369 }
1370 
1371 static void
1372 check_features(const uint8_t *data, size_t len)
1373 {
1374 	char *feature, *next_feature, *str, *value;
1375 	bool supported;
1376 
1377 	str = malloc(len + 1);
1378 	memcpy(str, data, len);
1379 	str[len] = '\0';
1380 	next_feature = str;
1381 
1382 	while ((feature = strsep(&next_feature, ";")) != NULL) {
1383 		/*
1384 		 * Null features shouldn't exist, but skip if they
1385 		 * do.
1386 		 */
1387 		if (strcmp(feature, "") == 0)
1388 			continue;
1389 
1390 		/*
1391 		 * Look for the value or supported / not supported
1392 		 * flag.
1393 		 */
1394 		value = strchr(feature, '=');
1395 		if (value != NULL) {
1396 			*value = '\0';
1397 			value++;
1398 			supported = true;
1399 		} else {
1400 			value = feature + strlen(feature) - 1;
1401 			switch (*value) {
1402 			case '+':
1403 				supported = true;
1404 				break;
1405 			case '-':
1406 				supported = false;
1407 				break;
1408 			default:
1409 				/*
1410 				 * This is really a protocol error,
1411 				 * but we just ignore malformed
1412 				 * features for ease of
1413 				 * implementation.
1414 				 */
1415 				continue;
1416 			}
1417 			value = NULL;
1418 		}
1419 
1420 		if (strcmp(feature, "swbreak") == 0)
1421 			swbreak_enabled = supported;
1422 	}
1423 	free(str);
1424 
1425 	start_packet();
1426 
1427 	/* This is an arbitrary limit. */
1428 	append_string("PacketSize=4096");
1429 	append_string(";swbreak+");
1430 	finish_packet();
1431 }
1432 
1433 static void
1434 gdb_query(const uint8_t *data, size_t len)
1435 {
1436 
1437 	/*
1438 	 * TODO:
1439 	 * - qSearch
1440 	 */
1441 	if (command_equals(data, len, "qAttached")) {
1442 		start_packet();
1443 		append_char('1');
1444 		finish_packet();
1445 	} else if (command_equals(data, len, "qC")) {
1446 		start_packet();
1447 		append_string("QC");
1448 		append_integer(cur_vcpu + 1);
1449 		finish_packet();
1450 	} else if (command_equals(data, len, "qfThreadInfo")) {
1451 		cpuset_t mask;
1452 		bool first;
1453 		int vcpu;
1454 
1455 		if (CPU_EMPTY(&vcpus_active)) {
1456 			send_error(EINVAL);
1457 			return;
1458 		}
1459 		mask = vcpus_active;
1460 		start_packet();
1461 		append_char('m');
1462 		first = true;
1463 		while (!CPU_EMPTY(&mask)) {
1464 			vcpu = CPU_FFS(&mask) - 1;
1465 			CPU_CLR(vcpu, &mask);
1466 			if (first)
1467 				first = false;
1468 			else
1469 				append_char(',');
1470 			append_integer(vcpu + 1);
1471 		}
1472 		finish_packet();
1473 	} else if (command_equals(data, len, "qsThreadInfo")) {
1474 		start_packet();
1475 		append_char('l');
1476 		finish_packet();
1477 	} else if (command_equals(data, len, "qSupported")) {
1478 		data += strlen("qSupported");
1479 		len -= strlen("qSupported");
1480 		check_features(data, len);
1481 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1482 		char buf[16];
1483 		int tid;
1484 
1485 		data += strlen("qThreadExtraInfo");
1486 		len -= strlen("qThreadExtraInfo");
1487 		if (*data != ',') {
1488 			send_error(EINVAL);
1489 			return;
1490 		}
1491 		tid = parse_threadid(data + 1, len - 1);
1492 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1493 			send_error(EINVAL);
1494 			return;
1495 		}
1496 
1497 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1498 		start_packet();
1499 		append_asciihex(buf);
1500 		finish_packet();
1501 	} else
1502 		send_empty_response();
1503 }
1504 
1505 static void
1506 handle_command(const uint8_t *data, size_t len)
1507 {
1508 
1509 	/* Reject packets with a sequence-id. */
1510 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1511 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1512 		send_empty_response();
1513 		return;
1514 	}
1515 
1516 	switch (*data) {
1517 	case 'c':
1518 		if (len != 1) {
1519 			send_error(EINVAL);
1520 			break;
1521 		}
1522 
1523 		discard_stop();
1524 		gdb_resume_vcpus();
1525 		break;
1526 	case 'D':
1527 		send_ok();
1528 
1529 		/* TODO: Resume any stopped CPUs. */
1530 		break;
1531 	case 'g': {
1532 		gdb_read_regs();
1533 		break;
1534 	}
1535 	case 'H': {
1536 		int tid;
1537 
1538 		if (data[1] != 'g' && data[1] != 'c') {
1539 			send_error(EINVAL);
1540 			break;
1541 		}
1542 		tid = parse_threadid(data + 2, len - 2);
1543 		if (tid == -2) {
1544 			send_error(EINVAL);
1545 			break;
1546 		}
1547 
1548 		if (CPU_EMPTY(&vcpus_active)) {
1549 			send_error(EINVAL);
1550 			break;
1551 		}
1552 		if (tid == -1 || tid == 0)
1553 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1554 		else if (CPU_ISSET(tid - 1, &vcpus_active))
1555 			cur_vcpu = tid - 1;
1556 		else {
1557 			send_error(EINVAL);
1558 			break;
1559 		}
1560 		send_ok();
1561 		break;
1562 	}
1563 	case 'm':
1564 		gdb_read_mem(data, len);
1565 		break;
1566 	case 'M':
1567 		gdb_write_mem(data, len);
1568 		break;
1569 	case 'T': {
1570 		int tid;
1571 
1572 		tid = parse_threadid(data + 1, len - 1);
1573 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1574 			send_error(EINVAL);
1575 			return;
1576 		}
1577 		send_ok();
1578 		break;
1579 	}
1580 	case 'q':
1581 		gdb_query(data, len);
1582 		break;
1583 	case 's':
1584 		if (len != 1) {
1585 			send_error(EINVAL);
1586 			break;
1587 		}
1588 
1589 		/* Don't send a reply until a stop occurs. */
1590 		if (!gdb_step_vcpu(cur_vcpu)) {
1591 			send_error(EOPNOTSUPP);
1592 			break;
1593 		}
1594 		break;
1595 	case 'z':
1596 	case 'Z':
1597 		parse_breakpoint(data, len);
1598 		break;
1599 	case '?':
1600 		report_stop(false);
1601 		break;
1602 	case 'G': /* TODO */
1603 	case 'v':
1604 		/* Handle 'vCont' */
1605 		/* 'vCtrlC' */
1606 	case 'p': /* TODO */
1607 	case 'P': /* TODO */
1608 	case 'Q': /* TODO */
1609 	case 't': /* TODO */
1610 	case 'X': /* TODO */
1611 	default:
1612 		send_empty_response();
1613 	}
1614 }
1615 
1616 /* Check for a valid packet in the command buffer. */
1617 static void
1618 check_command(int fd)
1619 {
1620 	uint8_t *head, *hash, *p, sum;
1621 	size_t avail, plen;
1622 
1623 	for (;;) {
1624 		avail = cur_comm.len;
1625 		if (avail == 0)
1626 			return;
1627 		head = io_buffer_head(&cur_comm);
1628 		switch (*head) {
1629 		case 0x03:
1630 			debug("<- Ctrl-C\n");
1631 			io_buffer_consume(&cur_comm, 1);
1632 
1633 			gdb_suspend_vcpus();
1634 			break;
1635 		case '+':
1636 			/* ACK of previous response. */
1637 			debug("<- +\n");
1638 			if (response_pending())
1639 				io_buffer_reset(&cur_resp);
1640 			io_buffer_consume(&cur_comm, 1);
1641 			if (stopped_vcpu != -1 && report_next_stop) {
1642 				report_stop(true);
1643 				send_pending_data(fd);
1644 			}
1645 			break;
1646 		case '-':
1647 			/* NACK of previous response. */
1648 			debug("<- -\n");
1649 			if (response_pending()) {
1650 				cur_resp.len += cur_resp.start;
1651 				cur_resp.start = 0;
1652 				if (cur_resp.data[0] == '+')
1653 					io_buffer_advance(&cur_resp, 1);
1654 				debug("-> %.*s\n", (int)cur_resp.len,
1655 				    io_buffer_head(&cur_resp));
1656 			}
1657 			io_buffer_consume(&cur_comm, 1);
1658 			send_pending_data(fd);
1659 			break;
1660 		case '$':
1661 			/* Packet. */
1662 
1663 			if (response_pending()) {
1664 				warnx("New GDB command while response in "
1665 				    "progress");
1666 				io_buffer_reset(&cur_resp);
1667 			}
1668 
1669 			/* Is packet complete? */
1670 			hash = memchr(head, '#', avail);
1671 			if (hash == NULL)
1672 				return;
1673 			plen = (hash - head + 1) + 2;
1674 			if (avail < plen)
1675 				return;
1676 			debug("<- %.*s\n", (int)plen, head);
1677 
1678 			/* Verify checksum. */
1679 			for (sum = 0, p = head + 1; p < hash; p++)
1680 				sum += *p;
1681 			if (sum != parse_byte(hash + 1)) {
1682 				io_buffer_consume(&cur_comm, plen);
1683 				debug("-> -\n");
1684 				send_char('-');
1685 				send_pending_data(fd);
1686 				break;
1687 			}
1688 			send_char('+');
1689 
1690 			handle_command(head + 1, hash - (head + 1));
1691 			io_buffer_consume(&cur_comm, plen);
1692 			if (!response_pending())
1693 				debug("-> +\n");
1694 			send_pending_data(fd);
1695 			break;
1696 		default:
1697 			/* XXX: Possibly drop connection instead. */
1698 			debug("-> %02x\n", *head);
1699 			io_buffer_consume(&cur_comm, 1);
1700 			break;
1701 		}
1702 	}
1703 }
1704 
1705 static void
1706 gdb_readable(int fd, enum ev_type event __unused, void *arg __unused)
1707 {
1708 	size_t pending;
1709 	ssize_t nread;
1710 	int n;
1711 
1712 	if (ioctl(fd, FIONREAD, &n) == -1) {
1713 		warn("FIONREAD on GDB socket");
1714 		return;
1715 	}
1716 	assert(n >= 0);
1717 	pending = n;
1718 
1719 	/*
1720 	 * 'pending' might be zero due to EOF.  We need to call read
1721 	 * with a non-zero length to detect EOF.
1722 	 */
1723 	if (pending == 0)
1724 		pending = 1;
1725 
1726 	/* Ensure there is room in the command buffer. */
1727 	io_buffer_grow(&cur_comm, pending);
1728 	assert(io_buffer_avail(&cur_comm) >= pending);
1729 
1730 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1731 	if (nread == 0) {
1732 		close_connection();
1733 	} else if (nread == -1) {
1734 		if (errno == EAGAIN)
1735 			return;
1736 
1737 		warn("Read from GDB socket");
1738 		close_connection();
1739 	} else {
1740 		cur_comm.len += nread;
1741 		pthread_mutex_lock(&gdb_lock);
1742 		check_command(fd);
1743 		pthread_mutex_unlock(&gdb_lock);
1744 	}
1745 }
1746 
1747 static void
1748 gdb_writable(int fd, enum ev_type event __unused, void *arg __unused)
1749 {
1750 
1751 	send_pending_data(fd);
1752 }
1753 
1754 static void
1755 new_connection(int fd, enum ev_type event __unused, void *arg)
1756 {
1757 	int optval, s;
1758 
1759 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1760 	if (s == -1) {
1761 		if (arg != NULL)
1762 			err(1, "Failed accepting initial GDB connection");
1763 
1764 		/* Silently ignore errors post-startup. */
1765 		return;
1766 	}
1767 
1768 	optval = 1;
1769 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1770 	    -1) {
1771 		warn("Failed to disable SIGPIPE for GDB connection");
1772 		close(s);
1773 		return;
1774 	}
1775 
1776 	pthread_mutex_lock(&gdb_lock);
1777 	if (cur_fd != -1) {
1778 		close(s);
1779 		warnx("Ignoring additional GDB connection.");
1780 	}
1781 
1782 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1783 	if (read_event == NULL) {
1784 		if (arg != NULL)
1785 			err(1, "Failed to setup initial GDB connection");
1786 		pthread_mutex_unlock(&gdb_lock);
1787 		return;
1788 	}
1789 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1790 	if (write_event == NULL) {
1791 		if (arg != NULL)
1792 			err(1, "Failed to setup initial GDB connection");
1793 		mevent_delete_close(read_event);
1794 		read_event = NULL;
1795 	}
1796 
1797 	cur_fd = s;
1798 	cur_vcpu = 0;
1799 	stopped_vcpu = -1;
1800 
1801 	/* Break on attach. */
1802 	first_stop = true;
1803 	report_next_stop = false;
1804 	gdb_suspend_vcpus();
1805 	pthread_mutex_unlock(&gdb_lock);
1806 }
1807 
1808 #ifndef WITHOUT_CAPSICUM
1809 static void
1810 limit_gdb_socket(int s)
1811 {
1812 	cap_rights_t rights;
1813 	unsigned long ioctls[] = { FIONREAD };
1814 
1815 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1816 	    CAP_SETSOCKOPT, CAP_IOCTL);
1817 	if (caph_rights_limit(s, &rights) == -1)
1818 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1819 	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1820 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1821 }
1822 #endif
1823 
1824 void
1825 init_gdb(struct vmctx *_ctx)
1826 {
1827 	int error, flags, optval, s;
1828 	struct addrinfo hints;
1829 	struct addrinfo *gdbaddr;
1830 	const char *saddr, *value;
1831 	char *sport;
1832 	bool wait;
1833 
1834 	value = get_config_value("gdb.port");
1835 	if (value == NULL)
1836 		return;
1837 	sport = strdup(value);
1838 	if (sport == NULL)
1839 		errx(4, "Failed to allocate memory");
1840 
1841 	wait = get_config_bool_default("gdb.wait", false);
1842 
1843 	saddr = get_config_value("gdb.address");
1844 	if (saddr == NULL) {
1845 		saddr = "localhost";
1846 	}
1847 
1848 	debug("==> starting on %s:%s, %swaiting\n",
1849 	    saddr, sport, wait ? "" : "not ");
1850 
1851 	error = pthread_mutex_init(&gdb_lock, NULL);
1852 	if (error != 0)
1853 		errc(1, error, "gdb mutex init");
1854 	error = pthread_cond_init(&idle_vcpus, NULL);
1855 	if (error != 0)
1856 		errc(1, error, "gdb cv init");
1857 
1858 	memset(&hints, 0, sizeof(hints));
1859 	hints.ai_family = AF_UNSPEC;
1860 	hints.ai_socktype = SOCK_STREAM;
1861 	hints.ai_flags = AI_NUMERICSERV | AI_PASSIVE;
1862 
1863 	error = getaddrinfo(saddr, sport, &hints, &gdbaddr);
1864 	if (error != 0)
1865 		errx(1, "gdb address resolution: %s", gai_strerror(error));
1866 
1867 	ctx = _ctx;
1868 	s = socket(gdbaddr->ai_family, gdbaddr->ai_socktype, 0);
1869 	if (s < 0)
1870 		err(1, "gdb socket create");
1871 
1872 	optval = 1;
1873 	(void)setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
1874 
1875 	if (bind(s, gdbaddr->ai_addr, gdbaddr->ai_addrlen) < 0)
1876 		err(1, "gdb socket bind");
1877 
1878 	if (listen(s, 1) < 0)
1879 		err(1, "gdb socket listen");
1880 
1881 	stopped_vcpu = -1;
1882 	TAILQ_INIT(&breakpoints);
1883 	vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
1884 	if (wait) {
1885 		/*
1886 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
1887 		 * logic in gdb_cpu_add() to suspend the first vcpu before
1888 		 * it starts execution.  The vcpu will remain suspended
1889 		 * until a debugger connects.
1890 		 */
1891 		CPU_SET(0, &vcpus_suspended);
1892 		stopped_vcpu = 0;
1893 	}
1894 
1895 	flags = fcntl(s, F_GETFL);
1896 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1897 		err(1, "Failed to mark gdb socket non-blocking");
1898 
1899 #ifndef WITHOUT_CAPSICUM
1900 	limit_gdb_socket(s);
1901 #endif
1902 	mevent_add(s, EVF_READ, new_connection, NULL);
1903 	gdb_active = true;
1904 	freeaddrinfo(gdbaddr);
1905 	free(sport);
1906 }
1907