xref: /freebsd/usr.sbin/bhyve/gdb.c (revision 1843dfb05ed80149f5a412180af882e3cb8f451b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 #include <sys/param.h>
30 #ifndef WITHOUT_CAPSICUM
31 #include <sys/capsicum.h>
32 #endif
33 #include <sys/endian.h>
34 #include <sys/ioctl.h>
35 #include <sys/mman.h>
36 #include <sys/queue.h>
37 #include <sys/socket.h>
38 #include <machine/atomic.h>
39 #include <machine/specialreg.h>
40 #include <machine/vmm.h>
41 #include <netinet/in.h>
42 #include <assert.h>
43 #ifndef WITHOUT_CAPSICUM
44 #include <capsicum_helpers.h>
45 #endif
46 #include <err.h>
47 #include <errno.h>
48 #include <fcntl.h>
49 #include <netdb.h>
50 #include <pthread.h>
51 #include <pthread_np.h>
52 #include <stdbool.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <sysexits.h>
57 #include <unistd.h>
58 #include <vmmapi.h>
59 
60 #include "bhyverun.h"
61 #include "config.h"
62 #include "gdb.h"
63 #include "mem.h"
64 #include "mevent.h"
65 
66 /*
67  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
68  * use SIGTRAP.
69  */
70 #define	GDB_SIGNAL_TRAP		5
71 
72 static void gdb_resume_vcpus(void);
73 static void check_command(int fd);
74 
75 static struct mevent *read_event, *write_event;
76 
77 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
78 static pthread_mutex_t gdb_lock;
79 static pthread_cond_t idle_vcpus;
80 static bool first_stop, report_next_stop, swbreak_enabled;
81 
82 /*
83  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
84  * read buffer, 'start' is unused and 'len' contains the number of
85  * valid bytes in the buffer.  For a write buffer, 'start' is set to
86  * the index of the next byte in 'data' to send, and 'len' contains
87  * the remaining number of valid bytes to send.
88  */
89 struct io_buffer {
90 	uint8_t *data;
91 	size_t capacity;
92 	size_t start;
93 	size_t len;
94 };
95 
96 struct breakpoint {
97 	uint64_t gpa;
98 	uint8_t shadow_inst;
99 	TAILQ_ENTRY(breakpoint) link;
100 };
101 
102 /*
103  * When a vCPU stops to due to an event that should be reported to the
104  * debugger, information about the event is stored in this structure.
105  * The vCPU thread then sets 'stopped_vcpu' if it is not already set
106  * and stops other vCPUs so the event can be reported.  The
107  * report_stop() function reports the event for the 'stopped_vcpu'
108  * vCPU.  When the debugger resumes execution via continue or step,
109  * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
110  * event handlers until the associated event is reported or disabled.
111  *
112  * An idle vCPU will have all of the boolean fields set to false.
113  *
114  * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
115  * released to execute the stepped instruction.  When the vCPU reports
116  * the stepping trap, 'stepped' is set.
117  *
118  * When a vCPU hits a breakpoint set by the debug server,
119  * 'hit_swbreak' is set to true.
120  */
121 struct vcpu_state {
122 	bool stepping;
123 	bool stepped;
124 	bool hit_swbreak;
125 };
126 
127 static struct io_buffer cur_comm, cur_resp;
128 static uint8_t cur_csum;
129 static struct vmctx *ctx;
130 static int cur_fd = -1;
131 static TAILQ_HEAD(, breakpoint) breakpoints;
132 static struct vcpu_state *vcpu_state;
133 static struct vcpu **vcpus;
134 static int cur_vcpu, stopped_vcpu;
135 static bool gdb_active = false;
136 
137 static const int gdb_regset[] = {
138 	VM_REG_GUEST_RAX,
139 	VM_REG_GUEST_RBX,
140 	VM_REG_GUEST_RCX,
141 	VM_REG_GUEST_RDX,
142 	VM_REG_GUEST_RSI,
143 	VM_REG_GUEST_RDI,
144 	VM_REG_GUEST_RBP,
145 	VM_REG_GUEST_RSP,
146 	VM_REG_GUEST_R8,
147 	VM_REG_GUEST_R9,
148 	VM_REG_GUEST_R10,
149 	VM_REG_GUEST_R11,
150 	VM_REG_GUEST_R12,
151 	VM_REG_GUEST_R13,
152 	VM_REG_GUEST_R14,
153 	VM_REG_GUEST_R15,
154 	VM_REG_GUEST_RIP,
155 	VM_REG_GUEST_RFLAGS,
156 	VM_REG_GUEST_CS,
157 	VM_REG_GUEST_SS,
158 	VM_REG_GUEST_DS,
159 	VM_REG_GUEST_ES,
160 	VM_REG_GUEST_FS,
161 	VM_REG_GUEST_GS
162 };
163 
164 static const int gdb_regsize[] = {
165 	8,
166 	8,
167 	8,
168 	8,
169 	8,
170 	8,
171 	8,
172 	8,
173 	8,
174 	8,
175 	8,
176 	8,
177 	8,
178 	8,
179 	8,
180 	8,
181 	8,
182 	4,
183 	4,
184 	4,
185 	4,
186 	4,
187 	4,
188 	4
189 };
190 
191 #ifdef GDB_LOG
192 #include <stdarg.h>
193 #include <stdio.h>
194 
195 static void __printflike(1, 2)
196 debug(const char *fmt, ...)
197 {
198 	static FILE *logfile;
199 	va_list ap;
200 
201 	if (logfile == NULL) {
202 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
203 		if (logfile == NULL)
204 			return;
205 #ifndef WITHOUT_CAPSICUM
206 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
207 			fclose(logfile);
208 			logfile = NULL;
209 			return;
210 		}
211 #endif
212 		setlinebuf(logfile);
213 	}
214 	va_start(ap, fmt);
215 	vfprintf(logfile, fmt, ap);
216 	va_end(ap);
217 }
218 #else
219 #define debug(...)
220 #endif
221 
222 static void	remove_all_sw_breakpoints(void);
223 
224 static int
225 guest_paging_info(struct vcpu *vcpu, struct vm_guest_paging *paging)
226 {
227 	uint64_t regs[4];
228 	const int regset[4] = {
229 		VM_REG_GUEST_CR0,
230 		VM_REG_GUEST_CR3,
231 		VM_REG_GUEST_CR4,
232 		VM_REG_GUEST_EFER
233 	};
234 
235 	if (vm_get_register_set(vcpu, nitems(regset), regset, regs) == -1)
236 		return (-1);
237 
238 	/*
239 	 * For the debugger, always pretend to be the kernel (CPL 0),
240 	 * and if long-mode is enabled, always parse addresses as if
241 	 * in 64-bit mode.
242 	 */
243 	paging->cr3 = regs[1];
244 	paging->cpl = 0;
245 	if (regs[3] & EFER_LMA)
246 		paging->cpu_mode = CPU_MODE_64BIT;
247 	else if (regs[0] & CR0_PE)
248 		paging->cpu_mode = CPU_MODE_PROTECTED;
249 	else
250 		paging->cpu_mode = CPU_MODE_REAL;
251 	if (!(regs[0] & CR0_PG))
252 		paging->paging_mode = PAGING_MODE_FLAT;
253 	else if (!(regs[2] & CR4_PAE))
254 		paging->paging_mode = PAGING_MODE_32;
255 	else if (regs[3] & EFER_LME)
256 		paging->paging_mode = (regs[2] & CR4_LA57) ?
257 		    PAGING_MODE_64_LA57 :  PAGING_MODE_64;
258 	else
259 		paging->paging_mode = PAGING_MODE_PAE;
260 	return (0);
261 }
262 
263 /*
264  * Map a guest virtual address to a physical address (for a given vcpu).
265  * If a guest virtual address is valid, return 1.  If the address is
266  * not valid, return 0.  If an error occurs obtaining the mapping,
267  * return -1.
268  */
269 static int
270 guest_vaddr2paddr(struct vcpu *vcpu, uint64_t vaddr, uint64_t *paddr)
271 {
272 	struct vm_guest_paging paging;
273 	int fault;
274 
275 	if (guest_paging_info(vcpu, &paging) == -1)
276 		return (-1);
277 
278 	/*
279 	 * Always use PROT_READ.  We really care if the VA is
280 	 * accessible, not if the current vCPU can write.
281 	 */
282 	if (vm_gla2gpa_nofault(vcpu, &paging, vaddr, PROT_READ, paddr,
283 	    &fault) == -1)
284 		return (-1);
285 	if (fault)
286 		return (0);
287 	return (1);
288 }
289 
290 static void
291 io_buffer_reset(struct io_buffer *io)
292 {
293 
294 	io->start = 0;
295 	io->len = 0;
296 }
297 
298 /* Available room for adding data. */
299 static size_t
300 io_buffer_avail(struct io_buffer *io)
301 {
302 
303 	return (io->capacity - (io->start + io->len));
304 }
305 
306 static uint8_t *
307 io_buffer_head(struct io_buffer *io)
308 {
309 
310 	return (io->data + io->start);
311 }
312 
313 static uint8_t *
314 io_buffer_tail(struct io_buffer *io)
315 {
316 
317 	return (io->data + io->start + io->len);
318 }
319 
320 static void
321 io_buffer_advance(struct io_buffer *io, size_t amount)
322 {
323 
324 	assert(amount <= io->len);
325 	io->start += amount;
326 	io->len -= amount;
327 }
328 
329 static void
330 io_buffer_consume(struct io_buffer *io, size_t amount)
331 {
332 
333 	io_buffer_advance(io, amount);
334 	if (io->len == 0) {
335 		io->start = 0;
336 		return;
337 	}
338 
339 	/*
340 	 * XXX: Consider making this move optional and compacting on a
341 	 * future read() before realloc().
342 	 */
343 	memmove(io->data, io_buffer_head(io), io->len);
344 	io->start = 0;
345 }
346 
347 static void
348 io_buffer_grow(struct io_buffer *io, size_t newsize)
349 {
350 	uint8_t *new_data;
351 	size_t avail, new_cap;
352 
353 	avail = io_buffer_avail(io);
354 	if (newsize <= avail)
355 		return;
356 
357 	new_cap = io->capacity + (newsize - avail);
358 	new_data = realloc(io->data, new_cap);
359 	if (new_data == NULL)
360 		err(1, "Failed to grow GDB I/O buffer");
361 	io->data = new_data;
362 	io->capacity = new_cap;
363 }
364 
365 static bool
366 response_pending(void)
367 {
368 
369 	if (cur_resp.start == 0 && cur_resp.len == 0)
370 		return (false);
371 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
372 		return (false);
373 	return (true);
374 }
375 
376 static void
377 close_connection(void)
378 {
379 
380 	/*
381 	 * XXX: This triggers a warning because mevent does the close
382 	 * before the EV_DELETE.
383 	 */
384 	pthread_mutex_lock(&gdb_lock);
385 	mevent_delete(write_event);
386 	mevent_delete_close(read_event);
387 	write_event = NULL;
388 	read_event = NULL;
389 	io_buffer_reset(&cur_comm);
390 	io_buffer_reset(&cur_resp);
391 	cur_fd = -1;
392 
393 	remove_all_sw_breakpoints();
394 
395 	/* Clear any pending events. */
396 	memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
397 
398 	/* Resume any stopped vCPUs. */
399 	gdb_resume_vcpus();
400 	pthread_mutex_unlock(&gdb_lock);
401 }
402 
403 static uint8_t
404 hex_digit(uint8_t nibble)
405 {
406 
407 	if (nibble <= 9)
408 		return (nibble + '0');
409 	else
410 		return (nibble + 'a' - 10);
411 }
412 
413 static uint8_t
414 parse_digit(uint8_t v)
415 {
416 
417 	if (v >= '0' && v <= '9')
418 		return (v - '0');
419 	if (v >= 'a' && v <= 'f')
420 		return (v - 'a' + 10);
421 	if (v >= 'A' && v <= 'F')
422 		return (v - 'A' + 10);
423 	return (0xF);
424 }
425 
426 /* Parses big-endian hexadecimal. */
427 static uintmax_t
428 parse_integer(const uint8_t *p, size_t len)
429 {
430 	uintmax_t v;
431 
432 	v = 0;
433 	while (len > 0) {
434 		v <<= 4;
435 		v |= parse_digit(*p);
436 		p++;
437 		len--;
438 	}
439 	return (v);
440 }
441 
442 static uint8_t
443 parse_byte(const uint8_t *p)
444 {
445 
446 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
447 }
448 
449 static void
450 send_pending_data(int fd)
451 {
452 	ssize_t nwritten;
453 
454 	if (cur_resp.len == 0) {
455 		mevent_disable(write_event);
456 		return;
457 	}
458 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
459 	if (nwritten == -1) {
460 		warn("Write to GDB socket failed");
461 		close_connection();
462 	} else {
463 		io_buffer_advance(&cur_resp, nwritten);
464 		if (cur_resp.len == 0)
465 			mevent_disable(write_event);
466 		else
467 			mevent_enable(write_event);
468 	}
469 }
470 
471 /* Append a single character to the output buffer. */
472 static void
473 send_char(uint8_t data)
474 {
475 	io_buffer_grow(&cur_resp, 1);
476 	*io_buffer_tail(&cur_resp) = data;
477 	cur_resp.len++;
478 }
479 
480 /* Append an array of bytes to the output buffer. */
481 static void
482 send_data(const uint8_t *data, size_t len)
483 {
484 
485 	io_buffer_grow(&cur_resp, len);
486 	memcpy(io_buffer_tail(&cur_resp), data, len);
487 	cur_resp.len += len;
488 }
489 
490 static void
491 format_byte(uint8_t v, uint8_t *buf)
492 {
493 
494 	buf[0] = hex_digit(v >> 4);
495 	buf[1] = hex_digit(v & 0xf);
496 }
497 
498 /*
499  * Append a single byte (formatted as two hex characters) to the
500  * output buffer.
501  */
502 static void
503 send_byte(uint8_t v)
504 {
505 	uint8_t buf[2];
506 
507 	format_byte(v, buf);
508 	send_data(buf, sizeof(buf));
509 }
510 
511 static void
512 start_packet(void)
513 {
514 
515 	send_char('$');
516 	cur_csum = 0;
517 }
518 
519 static void
520 finish_packet(void)
521 {
522 
523 	send_char('#');
524 	send_byte(cur_csum);
525 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
526 }
527 
528 /*
529  * Append a single character (for the packet payload) and update the
530  * checksum.
531  */
532 static void
533 append_char(uint8_t v)
534 {
535 
536 	send_char(v);
537 	cur_csum += v;
538 }
539 
540 /*
541  * Append an array of bytes (for the packet payload) and update the
542  * checksum.
543  */
544 static void
545 append_packet_data(const uint8_t *data, size_t len)
546 {
547 
548 	send_data(data, len);
549 	while (len > 0) {
550 		cur_csum += *data;
551 		data++;
552 		len--;
553 	}
554 }
555 
556 static void
557 append_string(const char *str)
558 {
559 
560 	append_packet_data(str, strlen(str));
561 }
562 
563 static void
564 append_byte(uint8_t v)
565 {
566 	uint8_t buf[2];
567 
568 	format_byte(v, buf);
569 	append_packet_data(buf, sizeof(buf));
570 }
571 
572 static void
573 append_unsigned_native(uintmax_t value, size_t len)
574 {
575 	size_t i;
576 
577 	for (i = 0; i < len; i++) {
578 		append_byte(value);
579 		value >>= 8;
580 	}
581 }
582 
583 static void
584 append_unsigned_be(uintmax_t value, size_t len)
585 {
586 	char buf[len * 2];
587 	size_t i;
588 
589 	for (i = 0; i < len; i++) {
590 		format_byte(value, buf + (len - i - 1) * 2);
591 		value >>= 8;
592 	}
593 	append_packet_data(buf, sizeof(buf));
594 }
595 
596 static void
597 append_integer(unsigned int value)
598 {
599 
600 	if (value == 0)
601 		append_char('0');
602 	else
603 		append_unsigned_be(value, (fls(value) + 7) / 8);
604 }
605 
606 static void
607 append_asciihex(const char *str)
608 {
609 
610 	while (*str != '\0') {
611 		append_byte(*str);
612 		str++;
613 	}
614 }
615 
616 static void
617 send_empty_response(void)
618 {
619 
620 	start_packet();
621 	finish_packet();
622 }
623 
624 static void
625 send_error(int error)
626 {
627 
628 	start_packet();
629 	append_char('E');
630 	append_byte(error);
631 	finish_packet();
632 }
633 
634 static void
635 send_ok(void)
636 {
637 
638 	start_packet();
639 	append_string("OK");
640 	finish_packet();
641 }
642 
643 static int
644 parse_threadid(const uint8_t *data, size_t len)
645 {
646 
647 	if (len == 1 && *data == '0')
648 		return (0);
649 	if (len == 2 && memcmp(data, "-1", 2) == 0)
650 		return (-1);
651 	if (len == 0)
652 		return (-2);
653 	return (parse_integer(data, len));
654 }
655 
656 /*
657  * Report the current stop event to the debugger.  If the stop is due
658  * to an event triggered on a specific vCPU such as a breakpoint or
659  * stepping trap, stopped_vcpu will be set to the vCPU triggering the
660  * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
661  * the reporting vCPU for vCPU events.
662  */
663 static void
664 report_stop(bool set_cur_vcpu)
665 {
666 	struct vcpu_state *vs;
667 
668 	start_packet();
669 	if (stopped_vcpu == -1) {
670 		append_char('S');
671 		append_byte(GDB_SIGNAL_TRAP);
672 	} else {
673 		vs = &vcpu_state[stopped_vcpu];
674 		if (set_cur_vcpu)
675 			cur_vcpu = stopped_vcpu;
676 		append_char('T');
677 		append_byte(GDB_SIGNAL_TRAP);
678 		append_string("thread:");
679 		append_integer(stopped_vcpu + 1);
680 		append_char(';');
681 		if (vs->hit_swbreak) {
682 			debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
683 			if (swbreak_enabled)
684 				append_string("swbreak:;");
685 		} else if (vs->stepped)
686 			debug("$vCPU %d reporting step\n", stopped_vcpu);
687 		else
688 			debug("$vCPU %d reporting ???\n", stopped_vcpu);
689 	}
690 	finish_packet();
691 	report_next_stop = false;
692 }
693 
694 /*
695  * If this stop is due to a vCPU event, clear that event to mark it as
696  * acknowledged.
697  */
698 static void
699 discard_stop(void)
700 {
701 	struct vcpu_state *vs;
702 
703 	if (stopped_vcpu != -1) {
704 		vs = &vcpu_state[stopped_vcpu];
705 		vs->hit_swbreak = false;
706 		vs->stepped = false;
707 		stopped_vcpu = -1;
708 	}
709 	report_next_stop = true;
710 }
711 
712 static void
713 gdb_finish_suspend_vcpus(void)
714 {
715 
716 	if (first_stop) {
717 		first_stop = false;
718 		stopped_vcpu = -1;
719 	} else if (report_next_stop) {
720 		assert(!response_pending());
721 		report_stop(true);
722 		send_pending_data(cur_fd);
723 	}
724 }
725 
726 /*
727  * vCPU threads invoke this function whenever the vCPU enters the
728  * debug server to pause or report an event.  vCPU threads wait here
729  * as long as the debug server keeps them suspended.
730  */
731 static void
732 _gdb_cpu_suspend(struct vcpu *vcpu, bool report_stop)
733 {
734 	int vcpuid = vcpu_id(vcpu);
735 
736 	debug("$vCPU %d suspending\n", vcpuid);
737 	CPU_SET(vcpuid, &vcpus_waiting);
738 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
739 		gdb_finish_suspend_vcpus();
740 	while (CPU_ISSET(vcpuid, &vcpus_suspended))
741 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
742 	CPU_CLR(vcpuid, &vcpus_waiting);
743 	debug("$vCPU %d resuming\n", vcpuid);
744 }
745 
746 /*
747  * Invoked at the start of a vCPU thread's execution to inform the
748  * debug server about the new thread.
749  */
750 void
751 gdb_cpu_add(struct vcpu *vcpu)
752 {
753 	int vcpuid;
754 
755 	if (!gdb_active)
756 		return;
757 	vcpuid = vcpu_id(vcpu);
758 	debug("$vCPU %d starting\n", vcpuid);
759 	pthread_mutex_lock(&gdb_lock);
760 	assert(vcpuid < guest_ncpus);
761 	assert(vcpus[vcpuid] == NULL);
762 	vcpus[vcpuid] = vcpu;
763 	CPU_SET(vcpuid, &vcpus_active);
764 	if (!TAILQ_EMPTY(&breakpoints)) {
765 		vm_set_capability(vcpu, VM_CAP_BPT_EXIT, 1);
766 		debug("$vCPU %d enabled breakpoint exits\n", vcpu);
767 	}
768 
769 	/*
770 	 * If a vcpu is added while vcpus are stopped, suspend the new
771 	 * vcpu so that it will pop back out with a debug exit before
772 	 * executing the first instruction.
773 	 */
774 	if (!CPU_EMPTY(&vcpus_suspended)) {
775 		CPU_SET(vcpuid, &vcpus_suspended);
776 		_gdb_cpu_suspend(vcpu, false);
777 	}
778 	pthread_mutex_unlock(&gdb_lock);
779 }
780 
781 /*
782  * Invoked by vCPU before resuming execution.  This enables stepping
783  * if the vCPU is marked as stepping.
784  */
785 static void
786 gdb_cpu_resume(struct vcpu *vcpu)
787 {
788 	struct vcpu_state *vs;
789 	int error;
790 
791 	vs = &vcpu_state[vcpu_id(vcpu)];
792 
793 	/*
794 	 * Any pending event should already be reported before
795 	 * resuming.
796 	 */
797 	assert(vs->hit_swbreak == false);
798 	assert(vs->stepped == false);
799 	if (vs->stepping) {
800 		error = vm_set_capability(vcpu, VM_CAP_MTRAP_EXIT, 1);
801 		assert(error == 0);
802 
803 		error = vm_set_capability(vcpu, VM_CAP_MASK_HWINTR, 1);
804 		assert(error == 0);
805 	}
806 }
807 
808 /*
809  * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
810  * has been suspended due to an event on different vCPU or in response
811  * to a guest-wide suspend such as Ctrl-C or the stop on attach.
812  */
813 void
814 gdb_cpu_suspend(struct vcpu *vcpu)
815 {
816 
817 	if (!gdb_active)
818 		return;
819 	pthread_mutex_lock(&gdb_lock);
820 	_gdb_cpu_suspend(vcpu, true);
821 	gdb_cpu_resume(vcpu);
822 	pthread_mutex_unlock(&gdb_lock);
823 }
824 
825 static void
826 gdb_suspend_vcpus(void)
827 {
828 
829 	assert(pthread_mutex_isowned_np(&gdb_lock));
830 	debug("suspending all CPUs\n");
831 	vcpus_suspended = vcpus_active;
832 	vm_suspend_all_cpus(ctx);
833 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
834 		gdb_finish_suspend_vcpus();
835 }
836 
837 /*
838  * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
839  * the VT-x-specific MTRAP exit.
840  */
841 void
842 gdb_cpu_mtrap(struct vcpu *vcpu)
843 {
844 	struct vcpu_state *vs;
845 	int vcpuid;
846 
847 	if (!gdb_active)
848 		return;
849 	vcpuid = vcpu_id(vcpu);
850 	debug("$vCPU %d MTRAP\n", vcpuid);
851 	pthread_mutex_lock(&gdb_lock);
852 	vs = &vcpu_state[vcpuid];
853 	if (vs->stepping) {
854 		vs->stepping = false;
855 		vs->stepped = true;
856 		vm_set_capability(vcpu, VM_CAP_MTRAP_EXIT, 0);
857 		vm_set_capability(vcpu, VM_CAP_MASK_HWINTR, 0);
858 
859 		while (vs->stepped) {
860 			if (stopped_vcpu == -1) {
861 				debug("$vCPU %d reporting step\n", vcpuid);
862 				stopped_vcpu = vcpuid;
863 				gdb_suspend_vcpus();
864 			}
865 			_gdb_cpu_suspend(vcpu, true);
866 		}
867 		gdb_cpu_resume(vcpu);
868 	}
869 	pthread_mutex_unlock(&gdb_lock);
870 }
871 
872 static struct breakpoint *
873 find_breakpoint(uint64_t gpa)
874 {
875 	struct breakpoint *bp;
876 
877 	TAILQ_FOREACH(bp, &breakpoints, link) {
878 		if (bp->gpa == gpa)
879 			return (bp);
880 	}
881 	return (NULL);
882 }
883 
884 void
885 gdb_cpu_breakpoint(struct vcpu *vcpu, struct vm_exit *vmexit)
886 {
887 	struct breakpoint *bp;
888 	struct vcpu_state *vs;
889 	uint64_t gpa;
890 	int error, vcpuid;
891 
892 	if (!gdb_active) {
893 		fprintf(stderr, "vm_loop: unexpected VMEXIT_DEBUG\n");
894 		exit(4);
895 	}
896 	vcpuid = vcpu_id(vcpu);
897 	pthread_mutex_lock(&gdb_lock);
898 	error = guest_vaddr2paddr(vcpu, vmexit->rip, &gpa);
899 	assert(error == 1);
900 	bp = find_breakpoint(gpa);
901 	if (bp != NULL) {
902 		vs = &vcpu_state[vcpuid];
903 		assert(vs->stepping == false);
904 		assert(vs->stepped == false);
905 		assert(vs->hit_swbreak == false);
906 		vs->hit_swbreak = true;
907 		vm_set_register(vcpu, VM_REG_GUEST_RIP, vmexit->rip);
908 		for (;;) {
909 			if (stopped_vcpu == -1) {
910 				debug("$vCPU %d reporting breakpoint at rip %#lx\n",
911 				    vcpuid, vmexit->rip);
912 				stopped_vcpu = vcpuid;
913 				gdb_suspend_vcpus();
914 			}
915 			_gdb_cpu_suspend(vcpu, true);
916 			if (!vs->hit_swbreak) {
917 				/* Breakpoint reported. */
918 				break;
919 			}
920 			bp = find_breakpoint(gpa);
921 			if (bp == NULL) {
922 				/* Breakpoint was removed. */
923 				vs->hit_swbreak = false;
924 				break;
925 			}
926 		}
927 		gdb_cpu_resume(vcpu);
928 	} else {
929 		debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpuid,
930 		    vmexit->rip);
931 		error = vm_set_register(vcpu, VM_REG_GUEST_ENTRY_INST_LENGTH,
932 		    vmexit->u.bpt.inst_length);
933 		assert(error == 0);
934 		error = vm_inject_exception(vcpu, IDT_BP, 0, 0, 0);
935 		assert(error == 0);
936 	}
937 	pthread_mutex_unlock(&gdb_lock);
938 }
939 
940 static bool
941 gdb_step_vcpu(struct vcpu *vcpu)
942 {
943 	int error, val, vcpuid;
944 
945 	vcpuid = vcpu_id(vcpu);
946 	debug("$vCPU %d step\n", vcpuid);
947 	error = vm_get_capability(vcpu, VM_CAP_MTRAP_EXIT, &val);
948 	if (error < 0)
949 		return (false);
950 
951 	discard_stop();
952 	vcpu_state[vcpuid].stepping = true;
953 	vm_resume_cpu(vcpu);
954 	CPU_CLR(vcpuid, &vcpus_suspended);
955 	pthread_cond_broadcast(&idle_vcpus);
956 	return (true);
957 }
958 
959 static void
960 gdb_resume_vcpus(void)
961 {
962 
963 	assert(pthread_mutex_isowned_np(&gdb_lock));
964 	vm_resume_all_cpus(ctx);
965 	debug("resuming all CPUs\n");
966 	CPU_ZERO(&vcpus_suspended);
967 	pthread_cond_broadcast(&idle_vcpus);
968 }
969 
970 static void
971 gdb_read_regs(void)
972 {
973 	uint64_t regvals[nitems(gdb_regset)];
974 
975 	if (vm_get_register_set(vcpus[cur_vcpu], nitems(gdb_regset),
976 	    gdb_regset, regvals) == -1) {
977 		send_error(errno);
978 		return;
979 	}
980 	start_packet();
981 	for (size_t i = 0; i < nitems(regvals); i++)
982 		append_unsigned_native(regvals[i], gdb_regsize[i]);
983 	finish_packet();
984 }
985 
986 static void
987 gdb_read_mem(const uint8_t *data, size_t len)
988 {
989 	uint64_t gpa, gva, val;
990 	uint8_t *cp;
991 	size_t resid, todo, bytes;
992 	bool started;
993 	int error;
994 
995 	/* Skip 'm' */
996 	data += 1;
997 	len -= 1;
998 
999 	/* Parse and consume address. */
1000 	cp = memchr(data, ',', len);
1001 	if (cp == NULL || cp == data) {
1002 		send_error(EINVAL);
1003 		return;
1004 	}
1005 	gva = parse_integer(data, cp - data);
1006 	len -= (cp - data) + 1;
1007 	data += (cp - data) + 1;
1008 
1009 	/* Parse length. */
1010 	resid = parse_integer(data, len);
1011 
1012 	started = false;
1013 	while (resid > 0) {
1014 		error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1015 		if (error == -1) {
1016 			if (started)
1017 				finish_packet();
1018 			else
1019 				send_error(errno);
1020 			return;
1021 		}
1022 		if (error == 0) {
1023 			if (started)
1024 				finish_packet();
1025 			else
1026 				send_error(EFAULT);
1027 			return;
1028 		}
1029 
1030 		/* Read bytes from current page. */
1031 		todo = getpagesize() - gpa % getpagesize();
1032 		if (todo > resid)
1033 			todo = resid;
1034 
1035 		cp = paddr_guest2host(ctx, gpa, todo);
1036 		if (cp != NULL) {
1037 			/*
1038 			 * If this page is guest RAM, read it a byte
1039 			 * at a time.
1040 			 */
1041 			if (!started) {
1042 				start_packet();
1043 				started = true;
1044 			}
1045 			while (todo > 0) {
1046 				append_byte(*cp);
1047 				cp++;
1048 				gpa++;
1049 				gva++;
1050 				resid--;
1051 				todo--;
1052 			}
1053 		} else {
1054 			/*
1055 			 * If this page isn't guest RAM, try to handle
1056 			 * it via MMIO.  For MMIO requests, use
1057 			 * aligned reads of words when possible.
1058 			 */
1059 			while (todo > 0) {
1060 				if (gpa & 1 || todo == 1)
1061 					bytes = 1;
1062 				else if (gpa & 2 || todo == 2)
1063 					bytes = 2;
1064 				else
1065 					bytes = 4;
1066 				error = read_mem(vcpus[cur_vcpu], gpa, &val,
1067 				    bytes);
1068 				if (error == 0) {
1069 					if (!started) {
1070 						start_packet();
1071 						started = true;
1072 					}
1073 					gpa += bytes;
1074 					gva += bytes;
1075 					resid -= bytes;
1076 					todo -= bytes;
1077 					while (bytes > 0) {
1078 						append_byte(val);
1079 						val >>= 8;
1080 						bytes--;
1081 					}
1082 				} else {
1083 					if (started)
1084 						finish_packet();
1085 					else
1086 						send_error(EFAULT);
1087 					return;
1088 				}
1089 			}
1090 		}
1091 		assert(resid == 0 || gpa % getpagesize() == 0);
1092 	}
1093 	if (!started)
1094 		start_packet();
1095 	finish_packet();
1096 }
1097 
1098 static void
1099 gdb_write_mem(const uint8_t *data, size_t len)
1100 {
1101 	uint64_t gpa, gva, val;
1102 	uint8_t *cp;
1103 	size_t resid, todo, bytes;
1104 	int error;
1105 
1106 	/* Skip 'M' */
1107 	data += 1;
1108 	len -= 1;
1109 
1110 	/* Parse and consume address. */
1111 	cp = memchr(data, ',', len);
1112 	if (cp == NULL || cp == data) {
1113 		send_error(EINVAL);
1114 		return;
1115 	}
1116 	gva = parse_integer(data, cp - data);
1117 	len -= (cp - data) + 1;
1118 	data += (cp - data) + 1;
1119 
1120 	/* Parse and consume length. */
1121 	cp = memchr(data, ':', len);
1122 	if (cp == NULL || cp == data) {
1123 		send_error(EINVAL);
1124 		return;
1125 	}
1126 	resid = parse_integer(data, cp - data);
1127 	len -= (cp - data) + 1;
1128 	data += (cp - data) + 1;
1129 
1130 	/* Verify the available bytes match the length. */
1131 	if (len != resid * 2) {
1132 		send_error(EINVAL);
1133 		return;
1134 	}
1135 
1136 	while (resid > 0) {
1137 		error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1138 		if (error == -1) {
1139 			send_error(errno);
1140 			return;
1141 		}
1142 		if (error == 0) {
1143 			send_error(EFAULT);
1144 			return;
1145 		}
1146 
1147 		/* Write bytes to current page. */
1148 		todo = getpagesize() - gpa % getpagesize();
1149 		if (todo > resid)
1150 			todo = resid;
1151 
1152 		cp = paddr_guest2host(ctx, gpa, todo);
1153 		if (cp != NULL) {
1154 			/*
1155 			 * If this page is guest RAM, write it a byte
1156 			 * at a time.
1157 			 */
1158 			while (todo > 0) {
1159 				assert(len >= 2);
1160 				*cp = parse_byte(data);
1161 				data += 2;
1162 				len -= 2;
1163 				cp++;
1164 				gpa++;
1165 				gva++;
1166 				resid--;
1167 				todo--;
1168 			}
1169 		} else {
1170 			/*
1171 			 * If this page isn't guest RAM, try to handle
1172 			 * it via MMIO.  For MMIO requests, use
1173 			 * aligned writes of words when possible.
1174 			 */
1175 			while (todo > 0) {
1176 				if (gpa & 1 || todo == 1) {
1177 					bytes = 1;
1178 					val = parse_byte(data);
1179 				} else if (gpa & 2 || todo == 2) {
1180 					bytes = 2;
1181 					val = be16toh(parse_integer(data, 4));
1182 				} else {
1183 					bytes = 4;
1184 					val = be32toh(parse_integer(data, 8));
1185 				}
1186 				error = write_mem(vcpus[cur_vcpu], gpa, val,
1187 				    bytes);
1188 				if (error == 0) {
1189 					gpa += bytes;
1190 					gva += bytes;
1191 					resid -= bytes;
1192 					todo -= bytes;
1193 					data += 2 * bytes;
1194 					len -= 2 * bytes;
1195 				} else {
1196 					send_error(EFAULT);
1197 					return;
1198 				}
1199 			}
1200 		}
1201 		assert(resid == 0 || gpa % getpagesize() == 0);
1202 	}
1203 	assert(len == 0);
1204 	send_ok();
1205 }
1206 
1207 static bool
1208 set_breakpoint_caps(bool enable)
1209 {
1210 	cpuset_t mask;
1211 	int vcpu;
1212 
1213 	mask = vcpus_active;
1214 	while (!CPU_EMPTY(&mask)) {
1215 		vcpu = CPU_FFS(&mask) - 1;
1216 		CPU_CLR(vcpu, &mask);
1217 		if (vm_set_capability(vcpus[vcpu], VM_CAP_BPT_EXIT,
1218 		    enable ? 1 : 0) < 0)
1219 			return (false);
1220 		debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1221 		    enable ? "en" : "dis");
1222 	}
1223 	return (true);
1224 }
1225 
1226 static void
1227 remove_all_sw_breakpoints(void)
1228 {
1229 	struct breakpoint *bp, *nbp;
1230 	uint8_t *cp;
1231 
1232 	if (TAILQ_EMPTY(&breakpoints))
1233 		return;
1234 
1235 	TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1236 		debug("remove breakpoint at %#lx\n", bp->gpa);
1237 		cp = paddr_guest2host(ctx, bp->gpa, 1);
1238 		*cp = bp->shadow_inst;
1239 		TAILQ_REMOVE(&breakpoints, bp, link);
1240 		free(bp);
1241 	}
1242 	TAILQ_INIT(&breakpoints);
1243 	set_breakpoint_caps(false);
1244 }
1245 
1246 static void
1247 update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1248 {
1249 	struct breakpoint *bp;
1250 	uint64_t gpa;
1251 	uint8_t *cp;
1252 	int error;
1253 
1254 	if (kind != 1) {
1255 		send_error(EINVAL);
1256 		return;
1257 	}
1258 
1259 	error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1260 	if (error == -1) {
1261 		send_error(errno);
1262 		return;
1263 	}
1264 	if (error == 0) {
1265 		send_error(EFAULT);
1266 		return;
1267 	}
1268 
1269 	cp = paddr_guest2host(ctx, gpa, 1);
1270 
1271 	/* Only permit breakpoints in guest RAM. */
1272 	if (cp == NULL) {
1273 		send_error(EFAULT);
1274 		return;
1275 	}
1276 
1277 	/* Find any existing breakpoint. */
1278 	bp = find_breakpoint(gpa);
1279 
1280 	/*
1281 	 * Silently ignore duplicate commands since the protocol
1282 	 * requires these packets to be idempotent.
1283 	 */
1284 	if (insert) {
1285 		if (bp == NULL) {
1286 			if (TAILQ_EMPTY(&breakpoints) &&
1287 			    !set_breakpoint_caps(true)) {
1288 				send_empty_response();
1289 				return;
1290 			}
1291 			bp = malloc(sizeof(*bp));
1292 			bp->gpa = gpa;
1293 			bp->shadow_inst = *cp;
1294 			*cp = 0xcc;	/* INT 3 */
1295 			TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1296 			debug("new breakpoint at %#lx\n", gpa);
1297 		}
1298 	} else {
1299 		if (bp != NULL) {
1300 			debug("remove breakpoint at %#lx\n", gpa);
1301 			*cp = bp->shadow_inst;
1302 			TAILQ_REMOVE(&breakpoints, bp, link);
1303 			free(bp);
1304 			if (TAILQ_EMPTY(&breakpoints))
1305 				set_breakpoint_caps(false);
1306 		}
1307 	}
1308 	send_ok();
1309 }
1310 
1311 static void
1312 parse_breakpoint(const uint8_t *data, size_t len)
1313 {
1314 	uint64_t gva;
1315 	uint8_t *cp;
1316 	bool insert;
1317 	int kind, type;
1318 
1319 	insert = data[0] == 'Z';
1320 
1321 	/* Skip 'Z/z' */
1322 	data += 1;
1323 	len -= 1;
1324 
1325 	/* Parse and consume type. */
1326 	cp = memchr(data, ',', len);
1327 	if (cp == NULL || cp == data) {
1328 		send_error(EINVAL);
1329 		return;
1330 	}
1331 	type = parse_integer(data, cp - data);
1332 	len -= (cp - data) + 1;
1333 	data += (cp - data) + 1;
1334 
1335 	/* Parse and consume address. */
1336 	cp = memchr(data, ',', len);
1337 	if (cp == NULL || cp == data) {
1338 		send_error(EINVAL);
1339 		return;
1340 	}
1341 	gva = parse_integer(data, cp - data);
1342 	len -= (cp - data) + 1;
1343 	data += (cp - data) + 1;
1344 
1345 	/* Parse and consume kind. */
1346 	cp = memchr(data, ';', len);
1347 	if (cp == data) {
1348 		send_error(EINVAL);
1349 		return;
1350 	}
1351 	if (cp != NULL) {
1352 		/*
1353 		 * We do not advertise support for either the
1354 		 * ConditionalBreakpoints or BreakpointCommands
1355 		 * features, so we should not be getting conditions or
1356 		 * commands from the remote end.
1357 		 */
1358 		send_empty_response();
1359 		return;
1360 	}
1361 	kind = parse_integer(data, len);
1362 	data += len;
1363 	len = 0;
1364 
1365 	switch (type) {
1366 	case 0:
1367 		update_sw_breakpoint(gva, kind, insert);
1368 		break;
1369 	default:
1370 		send_empty_response();
1371 		break;
1372 	}
1373 }
1374 
1375 static bool
1376 command_equals(const uint8_t *data, size_t len, const char *cmd)
1377 {
1378 
1379 	if (strlen(cmd) > len)
1380 		return (false);
1381 	return (memcmp(data, cmd, strlen(cmd)) == 0);
1382 }
1383 
1384 static void
1385 check_features(const uint8_t *data, size_t len)
1386 {
1387 	char *feature, *next_feature, *str, *value;
1388 	bool supported;
1389 
1390 	str = malloc(len + 1);
1391 	memcpy(str, data, len);
1392 	str[len] = '\0';
1393 	next_feature = str;
1394 
1395 	while ((feature = strsep(&next_feature, ";")) != NULL) {
1396 		/*
1397 		 * Null features shouldn't exist, but skip if they
1398 		 * do.
1399 		 */
1400 		if (strcmp(feature, "") == 0)
1401 			continue;
1402 
1403 		/*
1404 		 * Look for the value or supported / not supported
1405 		 * flag.
1406 		 */
1407 		value = strchr(feature, '=');
1408 		if (value != NULL) {
1409 			*value = '\0';
1410 			value++;
1411 			supported = true;
1412 		} else {
1413 			value = feature + strlen(feature) - 1;
1414 			switch (*value) {
1415 			case '+':
1416 				supported = true;
1417 				break;
1418 			case '-':
1419 				supported = false;
1420 				break;
1421 			default:
1422 				/*
1423 				 * This is really a protocol error,
1424 				 * but we just ignore malformed
1425 				 * features for ease of
1426 				 * implementation.
1427 				 */
1428 				continue;
1429 			}
1430 			value = NULL;
1431 		}
1432 
1433 		if (strcmp(feature, "swbreak") == 0)
1434 			swbreak_enabled = supported;
1435 	}
1436 	free(str);
1437 
1438 	start_packet();
1439 
1440 	/* This is an arbitrary limit. */
1441 	append_string("PacketSize=4096");
1442 	append_string(";swbreak+");
1443 	finish_packet();
1444 }
1445 
1446 static void
1447 gdb_query(const uint8_t *data, size_t len)
1448 {
1449 
1450 	/*
1451 	 * TODO:
1452 	 * - qSearch
1453 	 */
1454 	if (command_equals(data, len, "qAttached")) {
1455 		start_packet();
1456 		append_char('1');
1457 		finish_packet();
1458 	} else if (command_equals(data, len, "qC")) {
1459 		start_packet();
1460 		append_string("QC");
1461 		append_integer(cur_vcpu + 1);
1462 		finish_packet();
1463 	} else if (command_equals(data, len, "qfThreadInfo")) {
1464 		cpuset_t mask;
1465 		bool first;
1466 		int vcpu;
1467 
1468 		if (CPU_EMPTY(&vcpus_active)) {
1469 			send_error(EINVAL);
1470 			return;
1471 		}
1472 		mask = vcpus_active;
1473 		start_packet();
1474 		append_char('m');
1475 		first = true;
1476 		while (!CPU_EMPTY(&mask)) {
1477 			vcpu = CPU_FFS(&mask) - 1;
1478 			CPU_CLR(vcpu, &mask);
1479 			if (first)
1480 				first = false;
1481 			else
1482 				append_char(',');
1483 			append_integer(vcpu + 1);
1484 		}
1485 		finish_packet();
1486 	} else if (command_equals(data, len, "qsThreadInfo")) {
1487 		start_packet();
1488 		append_char('l');
1489 		finish_packet();
1490 	} else if (command_equals(data, len, "qSupported")) {
1491 		data += strlen("qSupported");
1492 		len -= strlen("qSupported");
1493 		check_features(data, len);
1494 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1495 		char buf[16];
1496 		int tid;
1497 
1498 		data += strlen("qThreadExtraInfo");
1499 		len -= strlen("qThreadExtraInfo");
1500 		if (*data != ',') {
1501 			send_error(EINVAL);
1502 			return;
1503 		}
1504 		tid = parse_threadid(data + 1, len - 1);
1505 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1506 			send_error(EINVAL);
1507 			return;
1508 		}
1509 
1510 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1511 		start_packet();
1512 		append_asciihex(buf);
1513 		finish_packet();
1514 	} else
1515 		send_empty_response();
1516 }
1517 
1518 static void
1519 handle_command(const uint8_t *data, size_t len)
1520 {
1521 
1522 	/* Reject packets with a sequence-id. */
1523 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1524 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1525 		send_empty_response();
1526 		return;
1527 	}
1528 
1529 	switch (*data) {
1530 	case 'c':
1531 		if (len != 1) {
1532 			send_error(EINVAL);
1533 			break;
1534 		}
1535 
1536 		discard_stop();
1537 		gdb_resume_vcpus();
1538 		break;
1539 	case 'D':
1540 		send_ok();
1541 
1542 		/* TODO: Resume any stopped CPUs. */
1543 		break;
1544 	case 'g': {
1545 		gdb_read_regs();
1546 		break;
1547 	}
1548 	case 'H': {
1549 		int tid;
1550 
1551 		if (data[1] != 'g' && data[1] != 'c') {
1552 			send_error(EINVAL);
1553 			break;
1554 		}
1555 		tid = parse_threadid(data + 2, len - 2);
1556 		if (tid == -2) {
1557 			send_error(EINVAL);
1558 			break;
1559 		}
1560 
1561 		if (CPU_EMPTY(&vcpus_active)) {
1562 			send_error(EINVAL);
1563 			break;
1564 		}
1565 		if (tid == -1 || tid == 0)
1566 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1567 		else if (CPU_ISSET(tid - 1, &vcpus_active))
1568 			cur_vcpu = tid - 1;
1569 		else {
1570 			send_error(EINVAL);
1571 			break;
1572 		}
1573 		send_ok();
1574 		break;
1575 	}
1576 	case 'm':
1577 		gdb_read_mem(data, len);
1578 		break;
1579 	case 'M':
1580 		gdb_write_mem(data, len);
1581 		break;
1582 	case 'T': {
1583 		int tid;
1584 
1585 		tid = parse_threadid(data + 1, len - 1);
1586 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1587 			send_error(EINVAL);
1588 			return;
1589 		}
1590 		send_ok();
1591 		break;
1592 	}
1593 	case 'q':
1594 		gdb_query(data, len);
1595 		break;
1596 	case 's':
1597 		if (len != 1) {
1598 			send_error(EINVAL);
1599 			break;
1600 		}
1601 
1602 		/* Don't send a reply until a stop occurs. */
1603 		if (!gdb_step_vcpu(vcpus[cur_vcpu])) {
1604 			send_error(EOPNOTSUPP);
1605 			break;
1606 		}
1607 		break;
1608 	case 'z':
1609 	case 'Z':
1610 		parse_breakpoint(data, len);
1611 		break;
1612 	case '?':
1613 		report_stop(false);
1614 		break;
1615 	case 'G': /* TODO */
1616 	case 'v':
1617 		/* Handle 'vCont' */
1618 		/* 'vCtrlC' */
1619 	case 'p': /* TODO */
1620 	case 'P': /* TODO */
1621 	case 'Q': /* TODO */
1622 	case 't': /* TODO */
1623 	case 'X': /* TODO */
1624 	default:
1625 		send_empty_response();
1626 	}
1627 }
1628 
1629 /* Check for a valid packet in the command buffer. */
1630 static void
1631 check_command(int fd)
1632 {
1633 	uint8_t *head, *hash, *p, sum;
1634 	size_t avail, plen;
1635 
1636 	for (;;) {
1637 		avail = cur_comm.len;
1638 		if (avail == 0)
1639 			return;
1640 		head = io_buffer_head(&cur_comm);
1641 		switch (*head) {
1642 		case 0x03:
1643 			debug("<- Ctrl-C\n");
1644 			io_buffer_consume(&cur_comm, 1);
1645 
1646 			gdb_suspend_vcpus();
1647 			break;
1648 		case '+':
1649 			/* ACK of previous response. */
1650 			debug("<- +\n");
1651 			if (response_pending())
1652 				io_buffer_reset(&cur_resp);
1653 			io_buffer_consume(&cur_comm, 1);
1654 			if (stopped_vcpu != -1 && report_next_stop) {
1655 				report_stop(true);
1656 				send_pending_data(fd);
1657 			}
1658 			break;
1659 		case '-':
1660 			/* NACK of previous response. */
1661 			debug("<- -\n");
1662 			if (response_pending()) {
1663 				cur_resp.len += cur_resp.start;
1664 				cur_resp.start = 0;
1665 				if (cur_resp.data[0] == '+')
1666 					io_buffer_advance(&cur_resp, 1);
1667 				debug("-> %.*s\n", (int)cur_resp.len,
1668 				    io_buffer_head(&cur_resp));
1669 			}
1670 			io_buffer_consume(&cur_comm, 1);
1671 			send_pending_data(fd);
1672 			break;
1673 		case '$':
1674 			/* Packet. */
1675 
1676 			if (response_pending()) {
1677 				warnx("New GDB command while response in "
1678 				    "progress");
1679 				io_buffer_reset(&cur_resp);
1680 			}
1681 
1682 			/* Is packet complete? */
1683 			hash = memchr(head, '#', avail);
1684 			if (hash == NULL)
1685 				return;
1686 			plen = (hash - head + 1) + 2;
1687 			if (avail < plen)
1688 				return;
1689 			debug("<- %.*s\n", (int)plen, head);
1690 
1691 			/* Verify checksum. */
1692 			for (sum = 0, p = head + 1; p < hash; p++)
1693 				sum += *p;
1694 			if (sum != parse_byte(hash + 1)) {
1695 				io_buffer_consume(&cur_comm, plen);
1696 				debug("-> -\n");
1697 				send_char('-');
1698 				send_pending_data(fd);
1699 				break;
1700 			}
1701 			send_char('+');
1702 
1703 			handle_command(head + 1, hash - (head + 1));
1704 			io_buffer_consume(&cur_comm, plen);
1705 			if (!response_pending())
1706 				debug("-> +\n");
1707 			send_pending_data(fd);
1708 			break;
1709 		default:
1710 			/* XXX: Possibly drop connection instead. */
1711 			debug("-> %02x\n", *head);
1712 			io_buffer_consume(&cur_comm, 1);
1713 			break;
1714 		}
1715 	}
1716 }
1717 
1718 static void
1719 gdb_readable(int fd, enum ev_type event __unused, void *arg __unused)
1720 {
1721 	size_t pending;
1722 	ssize_t nread;
1723 	int n;
1724 
1725 	if (ioctl(fd, FIONREAD, &n) == -1) {
1726 		warn("FIONREAD on GDB socket");
1727 		return;
1728 	}
1729 	assert(n >= 0);
1730 	pending = n;
1731 
1732 	/*
1733 	 * 'pending' might be zero due to EOF.  We need to call read
1734 	 * with a non-zero length to detect EOF.
1735 	 */
1736 	if (pending == 0)
1737 		pending = 1;
1738 
1739 	/* Ensure there is room in the command buffer. */
1740 	io_buffer_grow(&cur_comm, pending);
1741 	assert(io_buffer_avail(&cur_comm) >= pending);
1742 
1743 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1744 	if (nread == 0) {
1745 		close_connection();
1746 	} else if (nread == -1) {
1747 		if (errno == EAGAIN)
1748 			return;
1749 
1750 		warn("Read from GDB socket");
1751 		close_connection();
1752 	} else {
1753 		cur_comm.len += nread;
1754 		pthread_mutex_lock(&gdb_lock);
1755 		check_command(fd);
1756 		pthread_mutex_unlock(&gdb_lock);
1757 	}
1758 }
1759 
1760 static void
1761 gdb_writable(int fd, enum ev_type event __unused, void *arg __unused)
1762 {
1763 
1764 	send_pending_data(fd);
1765 }
1766 
1767 static void
1768 new_connection(int fd, enum ev_type event __unused, void *arg)
1769 {
1770 	int optval, s;
1771 
1772 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1773 	if (s == -1) {
1774 		if (arg != NULL)
1775 			err(1, "Failed accepting initial GDB connection");
1776 
1777 		/* Silently ignore errors post-startup. */
1778 		return;
1779 	}
1780 
1781 	optval = 1;
1782 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1783 	    -1) {
1784 		warn("Failed to disable SIGPIPE for GDB connection");
1785 		close(s);
1786 		return;
1787 	}
1788 
1789 	pthread_mutex_lock(&gdb_lock);
1790 	if (cur_fd != -1) {
1791 		close(s);
1792 		warnx("Ignoring additional GDB connection.");
1793 	}
1794 
1795 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1796 	if (read_event == NULL) {
1797 		if (arg != NULL)
1798 			err(1, "Failed to setup initial GDB connection");
1799 		pthread_mutex_unlock(&gdb_lock);
1800 		return;
1801 	}
1802 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1803 	if (write_event == NULL) {
1804 		if (arg != NULL)
1805 			err(1, "Failed to setup initial GDB connection");
1806 		mevent_delete_close(read_event);
1807 		read_event = NULL;
1808 	}
1809 
1810 	cur_fd = s;
1811 	cur_vcpu = 0;
1812 	stopped_vcpu = -1;
1813 
1814 	/* Break on attach. */
1815 	first_stop = true;
1816 	report_next_stop = false;
1817 	gdb_suspend_vcpus();
1818 	pthread_mutex_unlock(&gdb_lock);
1819 }
1820 
1821 #ifndef WITHOUT_CAPSICUM
1822 static void
1823 limit_gdb_socket(int s)
1824 {
1825 	cap_rights_t rights;
1826 	unsigned long ioctls[] = { FIONREAD };
1827 
1828 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1829 	    CAP_SETSOCKOPT, CAP_IOCTL);
1830 	if (caph_rights_limit(s, &rights) == -1)
1831 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1832 	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1833 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1834 }
1835 #endif
1836 
1837 void
1838 init_gdb(struct vmctx *_ctx)
1839 {
1840 	int error, flags, optval, s;
1841 	struct addrinfo hints;
1842 	struct addrinfo *gdbaddr;
1843 	const char *saddr, *value;
1844 	char *sport;
1845 	bool wait;
1846 
1847 	value = get_config_value("gdb.port");
1848 	if (value == NULL)
1849 		return;
1850 	sport = strdup(value);
1851 	if (sport == NULL)
1852 		errx(4, "Failed to allocate memory");
1853 
1854 	wait = get_config_bool_default("gdb.wait", false);
1855 
1856 	saddr = get_config_value("gdb.address");
1857 	if (saddr == NULL) {
1858 		saddr = "localhost";
1859 	}
1860 
1861 	debug("==> starting on %s:%s, %swaiting\n",
1862 	    saddr, sport, wait ? "" : "not ");
1863 
1864 	error = pthread_mutex_init(&gdb_lock, NULL);
1865 	if (error != 0)
1866 		errc(1, error, "gdb mutex init");
1867 	error = pthread_cond_init(&idle_vcpus, NULL);
1868 	if (error != 0)
1869 		errc(1, error, "gdb cv init");
1870 
1871 	memset(&hints, 0, sizeof(hints));
1872 	hints.ai_family = AF_UNSPEC;
1873 	hints.ai_socktype = SOCK_STREAM;
1874 	hints.ai_flags = AI_NUMERICSERV | AI_PASSIVE;
1875 
1876 	error = getaddrinfo(saddr, sport, &hints, &gdbaddr);
1877 	if (error != 0)
1878 		errx(1, "gdb address resolution: %s", gai_strerror(error));
1879 
1880 	ctx = _ctx;
1881 	s = socket(gdbaddr->ai_family, gdbaddr->ai_socktype, 0);
1882 	if (s < 0)
1883 		err(1, "gdb socket create");
1884 
1885 	optval = 1;
1886 	(void)setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
1887 
1888 	if (bind(s, gdbaddr->ai_addr, gdbaddr->ai_addrlen) < 0)
1889 		err(1, "gdb socket bind");
1890 
1891 	if (listen(s, 1) < 0)
1892 		err(1, "gdb socket listen");
1893 
1894 	stopped_vcpu = -1;
1895 	TAILQ_INIT(&breakpoints);
1896 	vcpus = calloc(guest_ncpus, sizeof(*vcpus));
1897 	vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
1898 	if (wait) {
1899 		/*
1900 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
1901 		 * logic in gdb_cpu_add() to suspend the first vcpu before
1902 		 * it starts execution.  The vcpu will remain suspended
1903 		 * until a debugger connects.
1904 		 */
1905 		CPU_SET(0, &vcpus_suspended);
1906 		stopped_vcpu = 0;
1907 	}
1908 
1909 	flags = fcntl(s, F_GETFL);
1910 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1911 		err(1, "Failed to mark gdb socket non-blocking");
1912 
1913 #ifndef WITHOUT_CAPSICUM
1914 	limit_gdb_socket(s);
1915 #endif
1916 	mevent_add(s, EVF_READ, new_connection, NULL);
1917 	gdb_active = true;
1918 	freeaddrinfo(gdbaddr);
1919 	free(sport);
1920 }
1921