xref: /freebsd/usr.sbin/bhyve/gdb.c (revision a50d73d5782a351ad83e8d1f84d11720a12e70d3)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/param.h>
32 #ifndef WITHOUT_CAPSICUM
33 #include <sys/capsicum.h>
34 #endif
35 #include <sys/endian.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <sys/queue.h>
39 #include <sys/socket.h>
40 #include <machine/atomic.h>
41 #include <machine/specialreg.h>
42 #include <machine/vmm.h>
43 #include <netinet/in.h>
44 #include <assert.h>
45 #ifndef WITHOUT_CAPSICUM
46 #include <capsicum_helpers.h>
47 #endif
48 #include <err.h>
49 #include <errno.h>
50 #include <fcntl.h>
51 #include <netdb.h>
52 #include <pthread.h>
53 #include <pthread_np.h>
54 #include <stdbool.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <sysexits.h>
59 #include <unistd.h>
60 #include <vmmapi.h>
61 
62 #include "bhyverun.h"
63 #include "config.h"
64 #include "gdb.h"
65 #include "mem.h"
66 #include "mevent.h"
67 
68 /*
69  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
70  * use SIGTRAP.
71  */
72 #define	GDB_SIGNAL_TRAP		5
73 
74 static void gdb_resume_vcpus(void);
75 static void check_command(int fd);
76 
77 static struct mevent *read_event, *write_event;
78 
79 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
80 static pthread_mutex_t gdb_lock;
81 static pthread_cond_t idle_vcpus;
82 static bool first_stop, report_next_stop, swbreak_enabled;
83 
84 /*
85  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
86  * read buffer, 'start' is unused and 'len' contains the number of
87  * valid bytes in the buffer.  For a write buffer, 'start' is set to
88  * the index of the next byte in 'data' to send, and 'len' contains
89  * the remaining number of valid bytes to send.
90  */
91 struct io_buffer {
92 	uint8_t *data;
93 	size_t capacity;
94 	size_t start;
95 	size_t len;
96 };
97 
98 struct breakpoint {
99 	uint64_t gpa;
100 	uint8_t shadow_inst;
101 	TAILQ_ENTRY(breakpoint) link;
102 };
103 
104 /*
105  * When a vCPU stops to due to an event that should be reported to the
106  * debugger, information about the event is stored in this structure.
107  * The vCPU thread then sets 'stopped_vcpu' if it is not already set
108  * and stops other vCPUs so the event can be reported.  The
109  * report_stop() function reports the event for the 'stopped_vcpu'
110  * vCPU.  When the debugger resumes execution via continue or step,
111  * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
112  * event handlers until the associated event is reported or disabled.
113  *
114  * An idle vCPU will have all of the boolean fields set to false.
115  *
116  * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
117  * released to execute the stepped instruction.  When the vCPU reports
118  * the stepping trap, 'stepped' is set.
119  *
120  * When a vCPU hits a breakpoint set by the debug server,
121  * 'hit_swbreak' is set to true.
122  */
123 struct vcpu_state {
124 	bool stepping;
125 	bool stepped;
126 	bool hit_swbreak;
127 };
128 
129 static struct io_buffer cur_comm, cur_resp;
130 static uint8_t cur_csum;
131 static struct vmctx *ctx;
132 static int cur_fd = -1;
133 static TAILQ_HEAD(, breakpoint) breakpoints;
134 static struct vcpu_state *vcpu_state;
135 static struct vcpu **vcpus;
136 static int cur_vcpu, stopped_vcpu;
137 static bool gdb_active = false;
138 
139 static const int gdb_regset[] = {
140 	VM_REG_GUEST_RAX,
141 	VM_REG_GUEST_RBX,
142 	VM_REG_GUEST_RCX,
143 	VM_REG_GUEST_RDX,
144 	VM_REG_GUEST_RSI,
145 	VM_REG_GUEST_RDI,
146 	VM_REG_GUEST_RBP,
147 	VM_REG_GUEST_RSP,
148 	VM_REG_GUEST_R8,
149 	VM_REG_GUEST_R9,
150 	VM_REG_GUEST_R10,
151 	VM_REG_GUEST_R11,
152 	VM_REG_GUEST_R12,
153 	VM_REG_GUEST_R13,
154 	VM_REG_GUEST_R14,
155 	VM_REG_GUEST_R15,
156 	VM_REG_GUEST_RIP,
157 	VM_REG_GUEST_RFLAGS,
158 	VM_REG_GUEST_CS,
159 	VM_REG_GUEST_SS,
160 	VM_REG_GUEST_DS,
161 	VM_REG_GUEST_ES,
162 	VM_REG_GUEST_FS,
163 	VM_REG_GUEST_GS
164 };
165 
166 static const int gdb_regsize[] = {
167 	8,
168 	8,
169 	8,
170 	8,
171 	8,
172 	8,
173 	8,
174 	8,
175 	8,
176 	8,
177 	8,
178 	8,
179 	8,
180 	8,
181 	8,
182 	8,
183 	8,
184 	4,
185 	4,
186 	4,
187 	4,
188 	4,
189 	4,
190 	4
191 };
192 
193 #ifdef GDB_LOG
194 #include <stdarg.h>
195 #include <stdio.h>
196 
197 static void __printflike(1, 2)
198 debug(const char *fmt, ...)
199 {
200 	static FILE *logfile;
201 	va_list ap;
202 
203 	if (logfile == NULL) {
204 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
205 		if (logfile == NULL)
206 			return;
207 #ifndef WITHOUT_CAPSICUM
208 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
209 			fclose(logfile);
210 			logfile = NULL;
211 			return;
212 		}
213 #endif
214 		setlinebuf(logfile);
215 	}
216 	va_start(ap, fmt);
217 	vfprintf(logfile, fmt, ap);
218 	va_end(ap);
219 }
220 #else
221 #define debug(...)
222 #endif
223 
224 static void	remove_all_sw_breakpoints(void);
225 
226 static int
227 guest_paging_info(struct vcpu *vcpu, struct vm_guest_paging *paging)
228 {
229 	uint64_t regs[4];
230 	const int regset[4] = {
231 		VM_REG_GUEST_CR0,
232 		VM_REG_GUEST_CR3,
233 		VM_REG_GUEST_CR4,
234 		VM_REG_GUEST_EFER
235 	};
236 
237 	if (vm_get_register_set(vcpu, nitems(regset), regset, regs) == -1)
238 		return (-1);
239 
240 	/*
241 	 * For the debugger, always pretend to be the kernel (CPL 0),
242 	 * and if long-mode is enabled, always parse addresses as if
243 	 * in 64-bit mode.
244 	 */
245 	paging->cr3 = regs[1];
246 	paging->cpl = 0;
247 	if (regs[3] & EFER_LMA)
248 		paging->cpu_mode = CPU_MODE_64BIT;
249 	else if (regs[0] & CR0_PE)
250 		paging->cpu_mode = CPU_MODE_PROTECTED;
251 	else
252 		paging->cpu_mode = CPU_MODE_REAL;
253 	if (!(regs[0] & CR0_PG))
254 		paging->paging_mode = PAGING_MODE_FLAT;
255 	else if (!(regs[2] & CR4_PAE))
256 		paging->paging_mode = PAGING_MODE_32;
257 	else if (regs[3] & EFER_LME)
258 		paging->paging_mode = (regs[2] & CR4_LA57) ?
259 		    PAGING_MODE_64_LA57 :  PAGING_MODE_64;
260 	else
261 		paging->paging_mode = PAGING_MODE_PAE;
262 	return (0);
263 }
264 
265 /*
266  * Map a guest virtual address to a physical address (for a given vcpu).
267  * If a guest virtual address is valid, return 1.  If the address is
268  * not valid, return 0.  If an error occurs obtaining the mapping,
269  * return -1.
270  */
271 static int
272 guest_vaddr2paddr(struct vcpu *vcpu, uint64_t vaddr, uint64_t *paddr)
273 {
274 	struct vm_guest_paging paging;
275 	int fault;
276 
277 	if (guest_paging_info(vcpu, &paging) == -1)
278 		return (-1);
279 
280 	/*
281 	 * Always use PROT_READ.  We really care if the VA is
282 	 * accessible, not if the current vCPU can write.
283 	 */
284 	if (vm_gla2gpa_nofault(vcpu, &paging, vaddr, PROT_READ, paddr,
285 	    &fault) == -1)
286 		return (-1);
287 	if (fault)
288 		return (0);
289 	return (1);
290 }
291 
292 static void
293 io_buffer_reset(struct io_buffer *io)
294 {
295 
296 	io->start = 0;
297 	io->len = 0;
298 }
299 
300 /* Available room for adding data. */
301 static size_t
302 io_buffer_avail(struct io_buffer *io)
303 {
304 
305 	return (io->capacity - (io->start + io->len));
306 }
307 
308 static uint8_t *
309 io_buffer_head(struct io_buffer *io)
310 {
311 
312 	return (io->data + io->start);
313 }
314 
315 static uint8_t *
316 io_buffer_tail(struct io_buffer *io)
317 {
318 
319 	return (io->data + io->start + io->len);
320 }
321 
322 static void
323 io_buffer_advance(struct io_buffer *io, size_t amount)
324 {
325 
326 	assert(amount <= io->len);
327 	io->start += amount;
328 	io->len -= amount;
329 }
330 
331 static void
332 io_buffer_consume(struct io_buffer *io, size_t amount)
333 {
334 
335 	io_buffer_advance(io, amount);
336 	if (io->len == 0) {
337 		io->start = 0;
338 		return;
339 	}
340 
341 	/*
342 	 * XXX: Consider making this move optional and compacting on a
343 	 * future read() before realloc().
344 	 */
345 	memmove(io->data, io_buffer_head(io), io->len);
346 	io->start = 0;
347 }
348 
349 static void
350 io_buffer_grow(struct io_buffer *io, size_t newsize)
351 {
352 	uint8_t *new_data;
353 	size_t avail, new_cap;
354 
355 	avail = io_buffer_avail(io);
356 	if (newsize <= avail)
357 		return;
358 
359 	new_cap = io->capacity + (newsize - avail);
360 	new_data = realloc(io->data, new_cap);
361 	if (new_data == NULL)
362 		err(1, "Failed to grow GDB I/O buffer");
363 	io->data = new_data;
364 	io->capacity = new_cap;
365 }
366 
367 static bool
368 response_pending(void)
369 {
370 
371 	if (cur_resp.start == 0 && cur_resp.len == 0)
372 		return (false);
373 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
374 		return (false);
375 	return (true);
376 }
377 
378 static void
379 close_connection(void)
380 {
381 
382 	/*
383 	 * XXX: This triggers a warning because mevent does the close
384 	 * before the EV_DELETE.
385 	 */
386 	pthread_mutex_lock(&gdb_lock);
387 	mevent_delete(write_event);
388 	mevent_delete_close(read_event);
389 	write_event = NULL;
390 	read_event = NULL;
391 	io_buffer_reset(&cur_comm);
392 	io_buffer_reset(&cur_resp);
393 	cur_fd = -1;
394 
395 	remove_all_sw_breakpoints();
396 
397 	/* Clear any pending events. */
398 	memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
399 
400 	/* Resume any stopped vCPUs. */
401 	gdb_resume_vcpus();
402 	pthread_mutex_unlock(&gdb_lock);
403 }
404 
405 static uint8_t
406 hex_digit(uint8_t nibble)
407 {
408 
409 	if (nibble <= 9)
410 		return (nibble + '0');
411 	else
412 		return (nibble + 'a' - 10);
413 }
414 
415 static uint8_t
416 parse_digit(uint8_t v)
417 {
418 
419 	if (v >= '0' && v <= '9')
420 		return (v - '0');
421 	if (v >= 'a' && v <= 'f')
422 		return (v - 'a' + 10);
423 	if (v >= 'A' && v <= 'F')
424 		return (v - 'A' + 10);
425 	return (0xF);
426 }
427 
428 /* Parses big-endian hexadecimal. */
429 static uintmax_t
430 parse_integer(const uint8_t *p, size_t len)
431 {
432 	uintmax_t v;
433 
434 	v = 0;
435 	while (len > 0) {
436 		v <<= 4;
437 		v |= parse_digit(*p);
438 		p++;
439 		len--;
440 	}
441 	return (v);
442 }
443 
444 static uint8_t
445 parse_byte(const uint8_t *p)
446 {
447 
448 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
449 }
450 
451 static void
452 send_pending_data(int fd)
453 {
454 	ssize_t nwritten;
455 
456 	if (cur_resp.len == 0) {
457 		mevent_disable(write_event);
458 		return;
459 	}
460 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
461 	if (nwritten == -1) {
462 		warn("Write to GDB socket failed");
463 		close_connection();
464 	} else {
465 		io_buffer_advance(&cur_resp, nwritten);
466 		if (cur_resp.len == 0)
467 			mevent_disable(write_event);
468 		else
469 			mevent_enable(write_event);
470 	}
471 }
472 
473 /* Append a single character to the output buffer. */
474 static void
475 send_char(uint8_t data)
476 {
477 	io_buffer_grow(&cur_resp, 1);
478 	*io_buffer_tail(&cur_resp) = data;
479 	cur_resp.len++;
480 }
481 
482 /* Append an array of bytes to the output buffer. */
483 static void
484 send_data(const uint8_t *data, size_t len)
485 {
486 
487 	io_buffer_grow(&cur_resp, len);
488 	memcpy(io_buffer_tail(&cur_resp), data, len);
489 	cur_resp.len += len;
490 }
491 
492 static void
493 format_byte(uint8_t v, uint8_t *buf)
494 {
495 
496 	buf[0] = hex_digit(v >> 4);
497 	buf[1] = hex_digit(v & 0xf);
498 }
499 
500 /*
501  * Append a single byte (formatted as two hex characters) to the
502  * output buffer.
503  */
504 static void
505 send_byte(uint8_t v)
506 {
507 	uint8_t buf[2];
508 
509 	format_byte(v, buf);
510 	send_data(buf, sizeof(buf));
511 }
512 
513 static void
514 start_packet(void)
515 {
516 
517 	send_char('$');
518 	cur_csum = 0;
519 }
520 
521 static void
522 finish_packet(void)
523 {
524 
525 	send_char('#');
526 	send_byte(cur_csum);
527 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
528 }
529 
530 /*
531  * Append a single character (for the packet payload) and update the
532  * checksum.
533  */
534 static void
535 append_char(uint8_t v)
536 {
537 
538 	send_char(v);
539 	cur_csum += v;
540 }
541 
542 /*
543  * Append an array of bytes (for the packet payload) and update the
544  * checksum.
545  */
546 static void
547 append_packet_data(const uint8_t *data, size_t len)
548 {
549 
550 	send_data(data, len);
551 	while (len > 0) {
552 		cur_csum += *data;
553 		data++;
554 		len--;
555 	}
556 }
557 
558 static void
559 append_string(const char *str)
560 {
561 
562 	append_packet_data(str, strlen(str));
563 }
564 
565 static void
566 append_byte(uint8_t v)
567 {
568 	uint8_t buf[2];
569 
570 	format_byte(v, buf);
571 	append_packet_data(buf, sizeof(buf));
572 }
573 
574 static void
575 append_unsigned_native(uintmax_t value, size_t len)
576 {
577 	size_t i;
578 
579 	for (i = 0; i < len; i++) {
580 		append_byte(value);
581 		value >>= 8;
582 	}
583 }
584 
585 static void
586 append_unsigned_be(uintmax_t value, size_t len)
587 {
588 	char buf[len * 2];
589 	size_t i;
590 
591 	for (i = 0; i < len; i++) {
592 		format_byte(value, buf + (len - i - 1) * 2);
593 		value >>= 8;
594 	}
595 	append_packet_data(buf, sizeof(buf));
596 }
597 
598 static void
599 append_integer(unsigned int value)
600 {
601 
602 	if (value == 0)
603 		append_char('0');
604 	else
605 		append_unsigned_be(value, (fls(value) + 7) / 8);
606 }
607 
608 static void
609 append_asciihex(const char *str)
610 {
611 
612 	while (*str != '\0') {
613 		append_byte(*str);
614 		str++;
615 	}
616 }
617 
618 static void
619 send_empty_response(void)
620 {
621 
622 	start_packet();
623 	finish_packet();
624 }
625 
626 static void
627 send_error(int error)
628 {
629 
630 	start_packet();
631 	append_char('E');
632 	append_byte(error);
633 	finish_packet();
634 }
635 
636 static void
637 send_ok(void)
638 {
639 
640 	start_packet();
641 	append_string("OK");
642 	finish_packet();
643 }
644 
645 static int
646 parse_threadid(const uint8_t *data, size_t len)
647 {
648 
649 	if (len == 1 && *data == '0')
650 		return (0);
651 	if (len == 2 && memcmp(data, "-1", 2) == 0)
652 		return (-1);
653 	if (len == 0)
654 		return (-2);
655 	return (parse_integer(data, len));
656 }
657 
658 /*
659  * Report the current stop event to the debugger.  If the stop is due
660  * to an event triggered on a specific vCPU such as a breakpoint or
661  * stepping trap, stopped_vcpu will be set to the vCPU triggering the
662  * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
663  * the reporting vCPU for vCPU events.
664  */
665 static void
666 report_stop(bool set_cur_vcpu)
667 {
668 	struct vcpu_state *vs;
669 
670 	start_packet();
671 	if (stopped_vcpu == -1) {
672 		append_char('S');
673 		append_byte(GDB_SIGNAL_TRAP);
674 	} else {
675 		vs = &vcpu_state[stopped_vcpu];
676 		if (set_cur_vcpu)
677 			cur_vcpu = stopped_vcpu;
678 		append_char('T');
679 		append_byte(GDB_SIGNAL_TRAP);
680 		append_string("thread:");
681 		append_integer(stopped_vcpu + 1);
682 		append_char(';');
683 		if (vs->hit_swbreak) {
684 			debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
685 			if (swbreak_enabled)
686 				append_string("swbreak:;");
687 		} else if (vs->stepped)
688 			debug("$vCPU %d reporting step\n", stopped_vcpu);
689 		else
690 			debug("$vCPU %d reporting ???\n", stopped_vcpu);
691 	}
692 	finish_packet();
693 	report_next_stop = false;
694 }
695 
696 /*
697  * If this stop is due to a vCPU event, clear that event to mark it as
698  * acknowledged.
699  */
700 static void
701 discard_stop(void)
702 {
703 	struct vcpu_state *vs;
704 
705 	if (stopped_vcpu != -1) {
706 		vs = &vcpu_state[stopped_vcpu];
707 		vs->hit_swbreak = false;
708 		vs->stepped = false;
709 		stopped_vcpu = -1;
710 	}
711 	report_next_stop = true;
712 }
713 
714 static void
715 gdb_finish_suspend_vcpus(void)
716 {
717 
718 	if (first_stop) {
719 		first_stop = false;
720 		stopped_vcpu = -1;
721 	} else if (report_next_stop) {
722 		assert(!response_pending());
723 		report_stop(true);
724 		send_pending_data(cur_fd);
725 	}
726 }
727 
728 /*
729  * vCPU threads invoke this function whenever the vCPU enters the
730  * debug server to pause or report an event.  vCPU threads wait here
731  * as long as the debug server keeps them suspended.
732  */
733 static void
734 _gdb_cpu_suspend(struct vcpu *vcpu, bool report_stop)
735 {
736 	int vcpuid = vcpu_id(vcpu);
737 
738 	debug("$vCPU %d suspending\n", vcpuid);
739 	CPU_SET(vcpuid, &vcpus_waiting);
740 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
741 		gdb_finish_suspend_vcpus();
742 	while (CPU_ISSET(vcpuid, &vcpus_suspended))
743 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
744 	CPU_CLR(vcpuid, &vcpus_waiting);
745 	debug("$vCPU %d resuming\n", vcpuid);
746 }
747 
748 /*
749  * Invoked at the start of a vCPU thread's execution to inform the
750  * debug server about the new thread.
751  */
752 void
753 gdb_cpu_add(struct vcpu *vcpu)
754 {
755 	int vcpuid;
756 
757 	if (!gdb_active)
758 		return;
759 	vcpuid = vcpu_id(vcpu);
760 	debug("$vCPU %d starting\n", vcpuid);
761 	pthread_mutex_lock(&gdb_lock);
762 	assert(vcpuid < guest_ncpus);
763 	assert(vcpus[vcpuid] == NULL);
764 	vcpus[vcpuid] = vcpu;
765 	CPU_SET(vcpuid, &vcpus_active);
766 	if (!TAILQ_EMPTY(&breakpoints)) {
767 		vm_set_capability(vcpu, VM_CAP_BPT_EXIT, 1);
768 		debug("$vCPU %d enabled breakpoint exits\n", vcpu);
769 	}
770 
771 	/*
772 	 * If a vcpu is added while vcpus are stopped, suspend the new
773 	 * vcpu so that it will pop back out with a debug exit before
774 	 * executing the first instruction.
775 	 */
776 	if (!CPU_EMPTY(&vcpus_suspended)) {
777 		CPU_SET(vcpuid, &vcpus_suspended);
778 		_gdb_cpu_suspend(vcpu, false);
779 	}
780 	pthread_mutex_unlock(&gdb_lock);
781 }
782 
783 /*
784  * Invoked by vCPU before resuming execution.  This enables stepping
785  * if the vCPU is marked as stepping.
786  */
787 static void
788 gdb_cpu_resume(struct vcpu *vcpu)
789 {
790 	struct vcpu_state *vs;
791 	int error;
792 
793 	vs = &vcpu_state[vcpu_id(vcpu)];
794 
795 	/*
796 	 * Any pending event should already be reported before
797 	 * resuming.
798 	 */
799 	assert(vs->hit_swbreak == false);
800 	assert(vs->stepped == false);
801 	if (vs->stepping) {
802 		error = vm_set_capability(vcpu, VM_CAP_MTRAP_EXIT, 1);
803 		assert(error == 0);
804 	}
805 }
806 
807 /*
808  * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
809  * has been suspended due to an event on different vCPU or in response
810  * to a guest-wide suspend such as Ctrl-C or the stop on attach.
811  */
812 void
813 gdb_cpu_suspend(struct vcpu *vcpu)
814 {
815 
816 	if (!gdb_active)
817 		return;
818 	pthread_mutex_lock(&gdb_lock);
819 	_gdb_cpu_suspend(vcpu, true);
820 	gdb_cpu_resume(vcpu);
821 	pthread_mutex_unlock(&gdb_lock);
822 }
823 
824 static void
825 gdb_suspend_vcpus(void)
826 {
827 
828 	assert(pthread_mutex_isowned_np(&gdb_lock));
829 	debug("suspending all CPUs\n");
830 	vcpus_suspended = vcpus_active;
831 	vm_suspend_all_cpus(ctx);
832 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
833 		gdb_finish_suspend_vcpus();
834 }
835 
836 /*
837  * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
838  * the VT-x-specific MTRAP exit.
839  */
840 void
841 gdb_cpu_mtrap(struct vcpu *vcpu)
842 {
843 	struct vcpu_state *vs;
844 	int vcpuid;
845 
846 	if (!gdb_active)
847 		return;
848 	vcpuid = vcpu_id(vcpu);
849 	debug("$vCPU %d MTRAP\n", vcpuid);
850 	pthread_mutex_lock(&gdb_lock);
851 	vs = &vcpu_state[vcpuid];
852 	if (vs->stepping) {
853 		vs->stepping = false;
854 		vs->stepped = true;
855 		vm_set_capability(vcpu, VM_CAP_MTRAP_EXIT, 0);
856 		while (vs->stepped) {
857 			if (stopped_vcpu == -1) {
858 				debug("$vCPU %d reporting step\n", vcpuid);
859 				stopped_vcpu = vcpuid;
860 				gdb_suspend_vcpus();
861 			}
862 			_gdb_cpu_suspend(vcpu, true);
863 		}
864 		gdb_cpu_resume(vcpu);
865 	}
866 	pthread_mutex_unlock(&gdb_lock);
867 }
868 
869 static struct breakpoint *
870 find_breakpoint(uint64_t gpa)
871 {
872 	struct breakpoint *bp;
873 
874 	TAILQ_FOREACH(bp, &breakpoints, link) {
875 		if (bp->gpa == gpa)
876 			return (bp);
877 	}
878 	return (NULL);
879 }
880 
881 void
882 gdb_cpu_breakpoint(struct vcpu *vcpu, struct vm_exit *vmexit)
883 {
884 	struct breakpoint *bp;
885 	struct vcpu_state *vs;
886 	uint64_t gpa;
887 	int error, vcpuid;
888 
889 	if (!gdb_active) {
890 		fprintf(stderr, "vm_loop: unexpected VMEXIT_DEBUG\n");
891 		exit(4);
892 	}
893 	vcpuid = vcpu_id(vcpu);
894 	pthread_mutex_lock(&gdb_lock);
895 	error = guest_vaddr2paddr(vcpu, vmexit->rip, &gpa);
896 	assert(error == 1);
897 	bp = find_breakpoint(gpa);
898 	if (bp != NULL) {
899 		vs = &vcpu_state[vcpuid];
900 		assert(vs->stepping == false);
901 		assert(vs->stepped == false);
902 		assert(vs->hit_swbreak == false);
903 		vs->hit_swbreak = true;
904 		vm_set_register(vcpu, VM_REG_GUEST_RIP, vmexit->rip);
905 		for (;;) {
906 			if (stopped_vcpu == -1) {
907 				debug("$vCPU %d reporting breakpoint at rip %#lx\n",
908 				    vcpuid, vmexit->rip);
909 				stopped_vcpu = vcpuid;
910 				gdb_suspend_vcpus();
911 			}
912 			_gdb_cpu_suspend(vcpu, true);
913 			if (!vs->hit_swbreak) {
914 				/* Breakpoint reported. */
915 				break;
916 			}
917 			bp = find_breakpoint(gpa);
918 			if (bp == NULL) {
919 				/* Breakpoint was removed. */
920 				vs->hit_swbreak = false;
921 				break;
922 			}
923 		}
924 		gdb_cpu_resume(vcpu);
925 	} else {
926 		debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpuid,
927 		    vmexit->rip);
928 		error = vm_set_register(vcpu, VM_REG_GUEST_ENTRY_INST_LENGTH,
929 		    vmexit->u.bpt.inst_length);
930 		assert(error == 0);
931 		error = vm_inject_exception(vcpu, IDT_BP, 0, 0, 0);
932 		assert(error == 0);
933 	}
934 	pthread_mutex_unlock(&gdb_lock);
935 }
936 
937 static bool
938 gdb_step_vcpu(struct vcpu *vcpu)
939 {
940 	int error, val, vcpuid;
941 
942 	vcpuid = vcpu_id(vcpu);
943 	debug("$vCPU %d step\n", vcpuid);
944 	error = vm_get_capability(vcpu, VM_CAP_MTRAP_EXIT, &val);
945 	if (error < 0)
946 		return (false);
947 
948 	discard_stop();
949 	vcpu_state[vcpuid].stepping = true;
950 	vm_resume_cpu(vcpu);
951 	CPU_CLR(vcpuid, &vcpus_suspended);
952 	pthread_cond_broadcast(&idle_vcpus);
953 	return (true);
954 }
955 
956 static void
957 gdb_resume_vcpus(void)
958 {
959 
960 	assert(pthread_mutex_isowned_np(&gdb_lock));
961 	vm_resume_all_cpus(ctx);
962 	debug("resuming all CPUs\n");
963 	CPU_ZERO(&vcpus_suspended);
964 	pthread_cond_broadcast(&idle_vcpus);
965 }
966 
967 static void
968 gdb_read_regs(void)
969 {
970 	uint64_t regvals[nitems(gdb_regset)];
971 
972 	if (vm_get_register_set(vcpus[cur_vcpu], nitems(gdb_regset),
973 	    gdb_regset, regvals) == -1) {
974 		send_error(errno);
975 		return;
976 	}
977 	start_packet();
978 	for (size_t i = 0; i < nitems(regvals); i++)
979 		append_unsigned_native(regvals[i], gdb_regsize[i]);
980 	finish_packet();
981 }
982 
983 static void
984 gdb_read_mem(const uint8_t *data, size_t len)
985 {
986 	uint64_t gpa, gva, val;
987 	uint8_t *cp;
988 	size_t resid, todo, bytes;
989 	bool started;
990 	int error;
991 
992 	/* Skip 'm' */
993 	data += 1;
994 	len -= 1;
995 
996 	/* Parse and consume address. */
997 	cp = memchr(data, ',', len);
998 	if (cp == NULL || cp == data) {
999 		send_error(EINVAL);
1000 		return;
1001 	}
1002 	gva = parse_integer(data, cp - data);
1003 	len -= (cp - data) + 1;
1004 	data += (cp - data) + 1;
1005 
1006 	/* Parse length. */
1007 	resid = parse_integer(data, len);
1008 
1009 	started = false;
1010 	while (resid > 0) {
1011 		error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1012 		if (error == -1) {
1013 			if (started)
1014 				finish_packet();
1015 			else
1016 				send_error(errno);
1017 			return;
1018 		}
1019 		if (error == 0) {
1020 			if (started)
1021 				finish_packet();
1022 			else
1023 				send_error(EFAULT);
1024 			return;
1025 		}
1026 
1027 		/* Read bytes from current page. */
1028 		todo = getpagesize() - gpa % getpagesize();
1029 		if (todo > resid)
1030 			todo = resid;
1031 
1032 		cp = paddr_guest2host(ctx, gpa, todo);
1033 		if (cp != NULL) {
1034 			/*
1035 			 * If this page is guest RAM, read it a byte
1036 			 * at a time.
1037 			 */
1038 			if (!started) {
1039 				start_packet();
1040 				started = true;
1041 			}
1042 			while (todo > 0) {
1043 				append_byte(*cp);
1044 				cp++;
1045 				gpa++;
1046 				gva++;
1047 				resid--;
1048 				todo--;
1049 			}
1050 		} else {
1051 			/*
1052 			 * If this page isn't guest RAM, try to handle
1053 			 * it via MMIO.  For MMIO requests, use
1054 			 * aligned reads of words when possible.
1055 			 */
1056 			while (todo > 0) {
1057 				if (gpa & 1 || todo == 1)
1058 					bytes = 1;
1059 				else if (gpa & 2 || todo == 2)
1060 					bytes = 2;
1061 				else
1062 					bytes = 4;
1063 				error = read_mem(vcpus[cur_vcpu], gpa, &val,
1064 				    bytes);
1065 				if (error == 0) {
1066 					if (!started) {
1067 						start_packet();
1068 						started = true;
1069 					}
1070 					gpa += bytes;
1071 					gva += bytes;
1072 					resid -= bytes;
1073 					todo -= bytes;
1074 					while (bytes > 0) {
1075 						append_byte(val);
1076 						val >>= 8;
1077 						bytes--;
1078 					}
1079 				} else {
1080 					if (started)
1081 						finish_packet();
1082 					else
1083 						send_error(EFAULT);
1084 					return;
1085 				}
1086 			}
1087 		}
1088 		assert(resid == 0 || gpa % getpagesize() == 0);
1089 	}
1090 	if (!started)
1091 		start_packet();
1092 	finish_packet();
1093 }
1094 
1095 static void
1096 gdb_write_mem(const uint8_t *data, size_t len)
1097 {
1098 	uint64_t gpa, gva, val;
1099 	uint8_t *cp;
1100 	size_t resid, todo, bytes;
1101 	int error;
1102 
1103 	/* Skip 'M' */
1104 	data += 1;
1105 	len -= 1;
1106 
1107 	/* Parse and consume address. */
1108 	cp = memchr(data, ',', len);
1109 	if (cp == NULL || cp == data) {
1110 		send_error(EINVAL);
1111 		return;
1112 	}
1113 	gva = parse_integer(data, cp - data);
1114 	len -= (cp - data) + 1;
1115 	data += (cp - data) + 1;
1116 
1117 	/* Parse and consume length. */
1118 	cp = memchr(data, ':', len);
1119 	if (cp == NULL || cp == data) {
1120 		send_error(EINVAL);
1121 		return;
1122 	}
1123 	resid = parse_integer(data, cp - data);
1124 	len -= (cp - data) + 1;
1125 	data += (cp - data) + 1;
1126 
1127 	/* Verify the available bytes match the length. */
1128 	if (len != resid * 2) {
1129 		send_error(EINVAL);
1130 		return;
1131 	}
1132 
1133 	while (resid > 0) {
1134 		error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1135 		if (error == -1) {
1136 			send_error(errno);
1137 			return;
1138 		}
1139 		if (error == 0) {
1140 			send_error(EFAULT);
1141 			return;
1142 		}
1143 
1144 		/* Write bytes to current page. */
1145 		todo = getpagesize() - gpa % getpagesize();
1146 		if (todo > resid)
1147 			todo = resid;
1148 
1149 		cp = paddr_guest2host(ctx, gpa, todo);
1150 		if (cp != NULL) {
1151 			/*
1152 			 * If this page is guest RAM, write it a byte
1153 			 * at a time.
1154 			 */
1155 			while (todo > 0) {
1156 				assert(len >= 2);
1157 				*cp = parse_byte(data);
1158 				data += 2;
1159 				len -= 2;
1160 				cp++;
1161 				gpa++;
1162 				gva++;
1163 				resid--;
1164 				todo--;
1165 			}
1166 		} else {
1167 			/*
1168 			 * If this page isn't guest RAM, try to handle
1169 			 * it via MMIO.  For MMIO requests, use
1170 			 * aligned writes of words when possible.
1171 			 */
1172 			while (todo > 0) {
1173 				if (gpa & 1 || todo == 1) {
1174 					bytes = 1;
1175 					val = parse_byte(data);
1176 				} else if (gpa & 2 || todo == 2) {
1177 					bytes = 2;
1178 					val = be16toh(parse_integer(data, 4));
1179 				} else {
1180 					bytes = 4;
1181 					val = be32toh(parse_integer(data, 8));
1182 				}
1183 				error = write_mem(vcpus[cur_vcpu], gpa, val,
1184 				    bytes);
1185 				if (error == 0) {
1186 					gpa += bytes;
1187 					gva += bytes;
1188 					resid -= bytes;
1189 					todo -= bytes;
1190 					data += 2 * bytes;
1191 					len -= 2 * bytes;
1192 				} else {
1193 					send_error(EFAULT);
1194 					return;
1195 				}
1196 			}
1197 		}
1198 		assert(resid == 0 || gpa % getpagesize() == 0);
1199 	}
1200 	assert(len == 0);
1201 	send_ok();
1202 }
1203 
1204 static bool
1205 set_breakpoint_caps(bool enable)
1206 {
1207 	cpuset_t mask;
1208 	int vcpu;
1209 
1210 	mask = vcpus_active;
1211 	while (!CPU_EMPTY(&mask)) {
1212 		vcpu = CPU_FFS(&mask) - 1;
1213 		CPU_CLR(vcpu, &mask);
1214 		if (vm_set_capability(vcpus[vcpu], VM_CAP_BPT_EXIT,
1215 		    enable ? 1 : 0) < 0)
1216 			return (false);
1217 		debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1218 		    enable ? "en" : "dis");
1219 	}
1220 	return (true);
1221 }
1222 
1223 static void
1224 remove_all_sw_breakpoints(void)
1225 {
1226 	struct breakpoint *bp, *nbp;
1227 	uint8_t *cp;
1228 
1229 	if (TAILQ_EMPTY(&breakpoints))
1230 		return;
1231 
1232 	TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1233 		debug("remove breakpoint at %#lx\n", bp->gpa);
1234 		cp = paddr_guest2host(ctx, bp->gpa, 1);
1235 		*cp = bp->shadow_inst;
1236 		TAILQ_REMOVE(&breakpoints, bp, link);
1237 		free(bp);
1238 	}
1239 	TAILQ_INIT(&breakpoints);
1240 	set_breakpoint_caps(false);
1241 }
1242 
1243 static void
1244 update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1245 {
1246 	struct breakpoint *bp;
1247 	uint64_t gpa;
1248 	uint8_t *cp;
1249 	int error;
1250 
1251 	if (kind != 1) {
1252 		send_error(EINVAL);
1253 		return;
1254 	}
1255 
1256 	error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1257 	if (error == -1) {
1258 		send_error(errno);
1259 		return;
1260 	}
1261 	if (error == 0) {
1262 		send_error(EFAULT);
1263 		return;
1264 	}
1265 
1266 	cp = paddr_guest2host(ctx, gpa, 1);
1267 
1268 	/* Only permit breakpoints in guest RAM. */
1269 	if (cp == NULL) {
1270 		send_error(EFAULT);
1271 		return;
1272 	}
1273 
1274 	/* Find any existing breakpoint. */
1275 	bp = find_breakpoint(gpa);
1276 
1277 	/*
1278 	 * Silently ignore duplicate commands since the protocol
1279 	 * requires these packets to be idempotent.
1280 	 */
1281 	if (insert) {
1282 		if (bp == NULL) {
1283 			if (TAILQ_EMPTY(&breakpoints) &&
1284 			    !set_breakpoint_caps(true)) {
1285 				send_empty_response();
1286 				return;
1287 			}
1288 			bp = malloc(sizeof(*bp));
1289 			bp->gpa = gpa;
1290 			bp->shadow_inst = *cp;
1291 			*cp = 0xcc;	/* INT 3 */
1292 			TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1293 			debug("new breakpoint at %#lx\n", gpa);
1294 		}
1295 	} else {
1296 		if (bp != NULL) {
1297 			debug("remove breakpoint at %#lx\n", gpa);
1298 			*cp = bp->shadow_inst;
1299 			TAILQ_REMOVE(&breakpoints, bp, link);
1300 			free(bp);
1301 			if (TAILQ_EMPTY(&breakpoints))
1302 				set_breakpoint_caps(false);
1303 		}
1304 	}
1305 	send_ok();
1306 }
1307 
1308 static void
1309 parse_breakpoint(const uint8_t *data, size_t len)
1310 {
1311 	uint64_t gva;
1312 	uint8_t *cp;
1313 	bool insert;
1314 	int kind, type;
1315 
1316 	insert = data[0] == 'Z';
1317 
1318 	/* Skip 'Z/z' */
1319 	data += 1;
1320 	len -= 1;
1321 
1322 	/* Parse and consume type. */
1323 	cp = memchr(data, ',', len);
1324 	if (cp == NULL || cp == data) {
1325 		send_error(EINVAL);
1326 		return;
1327 	}
1328 	type = parse_integer(data, cp - data);
1329 	len -= (cp - data) + 1;
1330 	data += (cp - data) + 1;
1331 
1332 	/* Parse and consume address. */
1333 	cp = memchr(data, ',', len);
1334 	if (cp == NULL || cp == data) {
1335 		send_error(EINVAL);
1336 		return;
1337 	}
1338 	gva = parse_integer(data, cp - data);
1339 	len -= (cp - data) + 1;
1340 	data += (cp - data) + 1;
1341 
1342 	/* Parse and consume kind. */
1343 	cp = memchr(data, ';', len);
1344 	if (cp == data) {
1345 		send_error(EINVAL);
1346 		return;
1347 	}
1348 	if (cp != NULL) {
1349 		/*
1350 		 * We do not advertise support for either the
1351 		 * ConditionalBreakpoints or BreakpointCommands
1352 		 * features, so we should not be getting conditions or
1353 		 * commands from the remote end.
1354 		 */
1355 		send_empty_response();
1356 		return;
1357 	}
1358 	kind = parse_integer(data, len);
1359 	data += len;
1360 	len = 0;
1361 
1362 	switch (type) {
1363 	case 0:
1364 		update_sw_breakpoint(gva, kind, insert);
1365 		break;
1366 	default:
1367 		send_empty_response();
1368 		break;
1369 	}
1370 }
1371 
1372 static bool
1373 command_equals(const uint8_t *data, size_t len, const char *cmd)
1374 {
1375 
1376 	if (strlen(cmd) > len)
1377 		return (false);
1378 	return (memcmp(data, cmd, strlen(cmd)) == 0);
1379 }
1380 
1381 static void
1382 check_features(const uint8_t *data, size_t len)
1383 {
1384 	char *feature, *next_feature, *str, *value;
1385 	bool supported;
1386 
1387 	str = malloc(len + 1);
1388 	memcpy(str, data, len);
1389 	str[len] = '\0';
1390 	next_feature = str;
1391 
1392 	while ((feature = strsep(&next_feature, ";")) != NULL) {
1393 		/*
1394 		 * Null features shouldn't exist, but skip if they
1395 		 * do.
1396 		 */
1397 		if (strcmp(feature, "") == 0)
1398 			continue;
1399 
1400 		/*
1401 		 * Look for the value or supported / not supported
1402 		 * flag.
1403 		 */
1404 		value = strchr(feature, '=');
1405 		if (value != NULL) {
1406 			*value = '\0';
1407 			value++;
1408 			supported = true;
1409 		} else {
1410 			value = feature + strlen(feature) - 1;
1411 			switch (*value) {
1412 			case '+':
1413 				supported = true;
1414 				break;
1415 			case '-':
1416 				supported = false;
1417 				break;
1418 			default:
1419 				/*
1420 				 * This is really a protocol error,
1421 				 * but we just ignore malformed
1422 				 * features for ease of
1423 				 * implementation.
1424 				 */
1425 				continue;
1426 			}
1427 			value = NULL;
1428 		}
1429 
1430 		if (strcmp(feature, "swbreak") == 0)
1431 			swbreak_enabled = supported;
1432 	}
1433 	free(str);
1434 
1435 	start_packet();
1436 
1437 	/* This is an arbitrary limit. */
1438 	append_string("PacketSize=4096");
1439 	append_string(";swbreak+");
1440 	finish_packet();
1441 }
1442 
1443 static void
1444 gdb_query(const uint8_t *data, size_t len)
1445 {
1446 
1447 	/*
1448 	 * TODO:
1449 	 * - qSearch
1450 	 */
1451 	if (command_equals(data, len, "qAttached")) {
1452 		start_packet();
1453 		append_char('1');
1454 		finish_packet();
1455 	} else if (command_equals(data, len, "qC")) {
1456 		start_packet();
1457 		append_string("QC");
1458 		append_integer(cur_vcpu + 1);
1459 		finish_packet();
1460 	} else if (command_equals(data, len, "qfThreadInfo")) {
1461 		cpuset_t mask;
1462 		bool first;
1463 		int vcpu;
1464 
1465 		if (CPU_EMPTY(&vcpus_active)) {
1466 			send_error(EINVAL);
1467 			return;
1468 		}
1469 		mask = vcpus_active;
1470 		start_packet();
1471 		append_char('m');
1472 		first = true;
1473 		while (!CPU_EMPTY(&mask)) {
1474 			vcpu = CPU_FFS(&mask) - 1;
1475 			CPU_CLR(vcpu, &mask);
1476 			if (first)
1477 				first = false;
1478 			else
1479 				append_char(',');
1480 			append_integer(vcpu + 1);
1481 		}
1482 		finish_packet();
1483 	} else if (command_equals(data, len, "qsThreadInfo")) {
1484 		start_packet();
1485 		append_char('l');
1486 		finish_packet();
1487 	} else if (command_equals(data, len, "qSupported")) {
1488 		data += strlen("qSupported");
1489 		len -= strlen("qSupported");
1490 		check_features(data, len);
1491 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1492 		char buf[16];
1493 		int tid;
1494 
1495 		data += strlen("qThreadExtraInfo");
1496 		len -= strlen("qThreadExtraInfo");
1497 		if (*data != ',') {
1498 			send_error(EINVAL);
1499 			return;
1500 		}
1501 		tid = parse_threadid(data + 1, len - 1);
1502 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1503 			send_error(EINVAL);
1504 			return;
1505 		}
1506 
1507 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1508 		start_packet();
1509 		append_asciihex(buf);
1510 		finish_packet();
1511 	} else
1512 		send_empty_response();
1513 }
1514 
1515 static void
1516 handle_command(const uint8_t *data, size_t len)
1517 {
1518 
1519 	/* Reject packets with a sequence-id. */
1520 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1521 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1522 		send_empty_response();
1523 		return;
1524 	}
1525 
1526 	switch (*data) {
1527 	case 'c':
1528 		if (len != 1) {
1529 			send_error(EINVAL);
1530 			break;
1531 		}
1532 
1533 		discard_stop();
1534 		gdb_resume_vcpus();
1535 		break;
1536 	case 'D':
1537 		send_ok();
1538 
1539 		/* TODO: Resume any stopped CPUs. */
1540 		break;
1541 	case 'g': {
1542 		gdb_read_regs();
1543 		break;
1544 	}
1545 	case 'H': {
1546 		int tid;
1547 
1548 		if (data[1] != 'g' && data[1] != 'c') {
1549 			send_error(EINVAL);
1550 			break;
1551 		}
1552 		tid = parse_threadid(data + 2, len - 2);
1553 		if (tid == -2) {
1554 			send_error(EINVAL);
1555 			break;
1556 		}
1557 
1558 		if (CPU_EMPTY(&vcpus_active)) {
1559 			send_error(EINVAL);
1560 			break;
1561 		}
1562 		if (tid == -1 || tid == 0)
1563 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1564 		else if (CPU_ISSET(tid - 1, &vcpus_active))
1565 			cur_vcpu = tid - 1;
1566 		else {
1567 			send_error(EINVAL);
1568 			break;
1569 		}
1570 		send_ok();
1571 		break;
1572 	}
1573 	case 'm':
1574 		gdb_read_mem(data, len);
1575 		break;
1576 	case 'M':
1577 		gdb_write_mem(data, len);
1578 		break;
1579 	case 'T': {
1580 		int tid;
1581 
1582 		tid = parse_threadid(data + 1, len - 1);
1583 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1584 			send_error(EINVAL);
1585 			return;
1586 		}
1587 		send_ok();
1588 		break;
1589 	}
1590 	case 'q':
1591 		gdb_query(data, len);
1592 		break;
1593 	case 's':
1594 		if (len != 1) {
1595 			send_error(EINVAL);
1596 			break;
1597 		}
1598 
1599 		/* Don't send a reply until a stop occurs. */
1600 		if (!gdb_step_vcpu(vcpus[cur_vcpu])) {
1601 			send_error(EOPNOTSUPP);
1602 			break;
1603 		}
1604 		break;
1605 	case 'z':
1606 	case 'Z':
1607 		parse_breakpoint(data, len);
1608 		break;
1609 	case '?':
1610 		report_stop(false);
1611 		break;
1612 	case 'G': /* TODO */
1613 	case 'v':
1614 		/* Handle 'vCont' */
1615 		/* 'vCtrlC' */
1616 	case 'p': /* TODO */
1617 	case 'P': /* TODO */
1618 	case 'Q': /* TODO */
1619 	case 't': /* TODO */
1620 	case 'X': /* TODO */
1621 	default:
1622 		send_empty_response();
1623 	}
1624 }
1625 
1626 /* Check for a valid packet in the command buffer. */
1627 static void
1628 check_command(int fd)
1629 {
1630 	uint8_t *head, *hash, *p, sum;
1631 	size_t avail, plen;
1632 
1633 	for (;;) {
1634 		avail = cur_comm.len;
1635 		if (avail == 0)
1636 			return;
1637 		head = io_buffer_head(&cur_comm);
1638 		switch (*head) {
1639 		case 0x03:
1640 			debug("<- Ctrl-C\n");
1641 			io_buffer_consume(&cur_comm, 1);
1642 
1643 			gdb_suspend_vcpus();
1644 			break;
1645 		case '+':
1646 			/* ACK of previous response. */
1647 			debug("<- +\n");
1648 			if (response_pending())
1649 				io_buffer_reset(&cur_resp);
1650 			io_buffer_consume(&cur_comm, 1);
1651 			if (stopped_vcpu != -1 && report_next_stop) {
1652 				report_stop(true);
1653 				send_pending_data(fd);
1654 			}
1655 			break;
1656 		case '-':
1657 			/* NACK of previous response. */
1658 			debug("<- -\n");
1659 			if (response_pending()) {
1660 				cur_resp.len += cur_resp.start;
1661 				cur_resp.start = 0;
1662 				if (cur_resp.data[0] == '+')
1663 					io_buffer_advance(&cur_resp, 1);
1664 				debug("-> %.*s\n", (int)cur_resp.len,
1665 				    io_buffer_head(&cur_resp));
1666 			}
1667 			io_buffer_consume(&cur_comm, 1);
1668 			send_pending_data(fd);
1669 			break;
1670 		case '$':
1671 			/* Packet. */
1672 
1673 			if (response_pending()) {
1674 				warnx("New GDB command while response in "
1675 				    "progress");
1676 				io_buffer_reset(&cur_resp);
1677 			}
1678 
1679 			/* Is packet complete? */
1680 			hash = memchr(head, '#', avail);
1681 			if (hash == NULL)
1682 				return;
1683 			plen = (hash - head + 1) + 2;
1684 			if (avail < plen)
1685 				return;
1686 			debug("<- %.*s\n", (int)plen, head);
1687 
1688 			/* Verify checksum. */
1689 			for (sum = 0, p = head + 1; p < hash; p++)
1690 				sum += *p;
1691 			if (sum != parse_byte(hash + 1)) {
1692 				io_buffer_consume(&cur_comm, plen);
1693 				debug("-> -\n");
1694 				send_char('-');
1695 				send_pending_data(fd);
1696 				break;
1697 			}
1698 			send_char('+');
1699 
1700 			handle_command(head + 1, hash - (head + 1));
1701 			io_buffer_consume(&cur_comm, plen);
1702 			if (!response_pending())
1703 				debug("-> +\n");
1704 			send_pending_data(fd);
1705 			break;
1706 		default:
1707 			/* XXX: Possibly drop connection instead. */
1708 			debug("-> %02x\n", *head);
1709 			io_buffer_consume(&cur_comm, 1);
1710 			break;
1711 		}
1712 	}
1713 }
1714 
1715 static void
1716 gdb_readable(int fd, enum ev_type event __unused, void *arg __unused)
1717 {
1718 	size_t pending;
1719 	ssize_t nread;
1720 	int n;
1721 
1722 	if (ioctl(fd, FIONREAD, &n) == -1) {
1723 		warn("FIONREAD on GDB socket");
1724 		return;
1725 	}
1726 	assert(n >= 0);
1727 	pending = n;
1728 
1729 	/*
1730 	 * 'pending' might be zero due to EOF.  We need to call read
1731 	 * with a non-zero length to detect EOF.
1732 	 */
1733 	if (pending == 0)
1734 		pending = 1;
1735 
1736 	/* Ensure there is room in the command buffer. */
1737 	io_buffer_grow(&cur_comm, pending);
1738 	assert(io_buffer_avail(&cur_comm) >= pending);
1739 
1740 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1741 	if (nread == 0) {
1742 		close_connection();
1743 	} else if (nread == -1) {
1744 		if (errno == EAGAIN)
1745 			return;
1746 
1747 		warn("Read from GDB socket");
1748 		close_connection();
1749 	} else {
1750 		cur_comm.len += nread;
1751 		pthread_mutex_lock(&gdb_lock);
1752 		check_command(fd);
1753 		pthread_mutex_unlock(&gdb_lock);
1754 	}
1755 }
1756 
1757 static void
1758 gdb_writable(int fd, enum ev_type event __unused, void *arg __unused)
1759 {
1760 
1761 	send_pending_data(fd);
1762 }
1763 
1764 static void
1765 new_connection(int fd, enum ev_type event __unused, void *arg)
1766 {
1767 	int optval, s;
1768 
1769 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1770 	if (s == -1) {
1771 		if (arg != NULL)
1772 			err(1, "Failed accepting initial GDB connection");
1773 
1774 		/* Silently ignore errors post-startup. */
1775 		return;
1776 	}
1777 
1778 	optval = 1;
1779 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1780 	    -1) {
1781 		warn("Failed to disable SIGPIPE for GDB connection");
1782 		close(s);
1783 		return;
1784 	}
1785 
1786 	pthread_mutex_lock(&gdb_lock);
1787 	if (cur_fd != -1) {
1788 		close(s);
1789 		warnx("Ignoring additional GDB connection.");
1790 	}
1791 
1792 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1793 	if (read_event == NULL) {
1794 		if (arg != NULL)
1795 			err(1, "Failed to setup initial GDB connection");
1796 		pthread_mutex_unlock(&gdb_lock);
1797 		return;
1798 	}
1799 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1800 	if (write_event == NULL) {
1801 		if (arg != NULL)
1802 			err(1, "Failed to setup initial GDB connection");
1803 		mevent_delete_close(read_event);
1804 		read_event = NULL;
1805 	}
1806 
1807 	cur_fd = s;
1808 	cur_vcpu = 0;
1809 	stopped_vcpu = -1;
1810 
1811 	/* Break on attach. */
1812 	first_stop = true;
1813 	report_next_stop = false;
1814 	gdb_suspend_vcpus();
1815 	pthread_mutex_unlock(&gdb_lock);
1816 }
1817 
1818 #ifndef WITHOUT_CAPSICUM
1819 static void
1820 limit_gdb_socket(int s)
1821 {
1822 	cap_rights_t rights;
1823 	unsigned long ioctls[] = { FIONREAD };
1824 
1825 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1826 	    CAP_SETSOCKOPT, CAP_IOCTL);
1827 	if (caph_rights_limit(s, &rights) == -1)
1828 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1829 	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1830 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1831 }
1832 #endif
1833 
1834 void
1835 init_gdb(struct vmctx *_ctx)
1836 {
1837 	int error, flags, optval, s;
1838 	struct addrinfo hints;
1839 	struct addrinfo *gdbaddr;
1840 	const char *saddr, *value;
1841 	char *sport;
1842 	bool wait;
1843 
1844 	value = get_config_value("gdb.port");
1845 	if (value == NULL)
1846 		return;
1847 	sport = strdup(value);
1848 	if (sport == NULL)
1849 		errx(4, "Failed to allocate memory");
1850 
1851 	wait = get_config_bool_default("gdb.wait", false);
1852 
1853 	saddr = get_config_value("gdb.address");
1854 	if (saddr == NULL) {
1855 		saddr = "localhost";
1856 	}
1857 
1858 	debug("==> starting on %s:%s, %swaiting\n",
1859 	    saddr, sport, wait ? "" : "not ");
1860 
1861 	error = pthread_mutex_init(&gdb_lock, NULL);
1862 	if (error != 0)
1863 		errc(1, error, "gdb mutex init");
1864 	error = pthread_cond_init(&idle_vcpus, NULL);
1865 	if (error != 0)
1866 		errc(1, error, "gdb cv init");
1867 
1868 	memset(&hints, 0, sizeof(hints));
1869 	hints.ai_family = AF_UNSPEC;
1870 	hints.ai_socktype = SOCK_STREAM;
1871 	hints.ai_flags = AI_NUMERICSERV | AI_PASSIVE;
1872 
1873 	error = getaddrinfo(saddr, sport, &hints, &gdbaddr);
1874 	if (error != 0)
1875 		errx(1, "gdb address resolution: %s", gai_strerror(error));
1876 
1877 	ctx = _ctx;
1878 	s = socket(gdbaddr->ai_family, gdbaddr->ai_socktype, 0);
1879 	if (s < 0)
1880 		err(1, "gdb socket create");
1881 
1882 	optval = 1;
1883 	(void)setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
1884 
1885 	if (bind(s, gdbaddr->ai_addr, gdbaddr->ai_addrlen) < 0)
1886 		err(1, "gdb socket bind");
1887 
1888 	if (listen(s, 1) < 0)
1889 		err(1, "gdb socket listen");
1890 
1891 	stopped_vcpu = -1;
1892 	TAILQ_INIT(&breakpoints);
1893 	vcpus = calloc(guest_ncpus, sizeof(*vcpus));
1894 	vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
1895 	if (wait) {
1896 		/*
1897 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
1898 		 * logic in gdb_cpu_add() to suspend the first vcpu before
1899 		 * it starts execution.  The vcpu will remain suspended
1900 		 * until a debugger connects.
1901 		 */
1902 		CPU_SET(0, &vcpus_suspended);
1903 		stopped_vcpu = 0;
1904 	}
1905 
1906 	flags = fcntl(s, F_GETFL);
1907 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1908 		err(1, "Failed to mark gdb socket non-blocking");
1909 
1910 #ifndef WITHOUT_CAPSICUM
1911 	limit_gdb_socket(s);
1912 #endif
1913 	mevent_add(s, EVF_READ, new_connection, NULL);
1914 	gdb_active = true;
1915 	freeaddrinfo(gdbaddr);
1916 	free(sport);
1917 }
1918