xref: /freebsd/usr.sbin/bhyve/gdb.c (revision 5ca8e32633c4ffbbcd6762e5888b6a4ba0708c6c)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/param.h>
29 #ifndef WITHOUT_CAPSICUM
30 #include <sys/capsicum.h>
31 #endif
32 #include <sys/endian.h>
33 #include <sys/ioctl.h>
34 #include <sys/mman.h>
35 #include <sys/queue.h>
36 #include <sys/socket.h>
37 #include <sys/stat.h>
38 
39 #include <machine/atomic.h>
40 #include <machine/specialreg.h>
41 #include <machine/vmm.h>
42 #include <netinet/in.h>
43 #include <assert.h>
44 #ifndef WITHOUT_CAPSICUM
45 #include <capsicum_helpers.h>
46 #endif
47 #include <err.h>
48 #include <errno.h>
49 #include <fcntl.h>
50 #include <netdb.h>
51 #include <pthread.h>
52 #include <pthread_np.h>
53 #include <stdbool.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <sysexits.h>
58 #include <unistd.h>
59 #include <vmmapi.h>
60 
61 #include "bhyverun.h"
62 #include "config.h"
63 #include "debug.h"
64 #include "gdb.h"
65 #include "mem.h"
66 #include "mevent.h"
67 
68 #define	_PATH_GDB_XML		"/usr/share/bhyve/gdb"
69 
70 /*
71  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
72  * use SIGTRAP.
73  */
74 #define	GDB_SIGNAL_TRAP		5
75 
76 #define	GDB_BP_SIZE		1
77 #define	GDB_BP_INSTR		(uint8_t []){0xcc}
78 #define	GDB_PC_REGNAME		VM_REG_GUEST_RIP
79 
80 _Static_assert(sizeof(GDB_BP_INSTR) == GDB_BP_SIZE,
81     "GDB_BP_INSTR has wrong size");
82 
83 static void gdb_resume_vcpus(void);
84 static void check_command(int fd);
85 
86 static struct mevent *read_event, *write_event;
87 
88 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
89 static pthread_mutex_t gdb_lock;
90 static pthread_cond_t idle_vcpus;
91 static bool first_stop, report_next_stop, swbreak_enabled;
92 static int xml_dfd = -1;
93 
94 /*
95  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
96  * read buffer, 'start' is unused and 'len' contains the number of
97  * valid bytes in the buffer.  For a write buffer, 'start' is set to
98  * the index of the next byte in 'data' to send, and 'len' contains
99  * the remaining number of valid bytes to send.
100  */
101 struct io_buffer {
102 	uint8_t *data;
103 	size_t capacity;
104 	size_t start;
105 	size_t len;
106 };
107 
108 struct breakpoint {
109 	uint64_t gpa;
110 	uint8_t shadow_inst[GDB_BP_SIZE];
111 	TAILQ_ENTRY(breakpoint) link;
112 };
113 
114 /*
115  * When a vCPU stops to due to an event that should be reported to the
116  * debugger, information about the event is stored in this structure.
117  * The vCPU thread then sets 'stopped_vcpu' if it is not already set
118  * and stops other vCPUs so the event can be reported.  The
119  * report_stop() function reports the event for the 'stopped_vcpu'
120  * vCPU.  When the debugger resumes execution via continue or step,
121  * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
122  * event handlers until the associated event is reported or disabled.
123  *
124  * An idle vCPU will have all of the boolean fields set to false.
125  *
126  * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
127  * released to execute the stepped instruction.  When the vCPU reports
128  * the stepping trap, 'stepped' is set.
129  *
130  * When a vCPU hits a breakpoint set by the debug server,
131  * 'hit_swbreak' is set to true.
132  */
133 struct vcpu_state {
134 	bool stepping;
135 	bool stepped;
136 	bool hit_swbreak;
137 };
138 
139 static struct io_buffer cur_comm, cur_resp;
140 static uint8_t cur_csum;
141 static struct vmctx *ctx;
142 static int cur_fd = -1;
143 static TAILQ_HEAD(, breakpoint) breakpoints;
144 static struct vcpu_state *vcpu_state;
145 static struct vcpu **vcpus;
146 static int cur_vcpu, stopped_vcpu;
147 static bool gdb_active = false;
148 
149 static const struct gdb_reg {
150 	enum vm_reg_name id;
151 	int size;
152 } gdb_regset[] = {
153 	{ .id = VM_REG_GUEST_RAX, .size = 8 },
154 	{ .id = VM_REG_GUEST_RBX, .size = 8 },
155 	{ .id = VM_REG_GUEST_RCX, .size = 8 },
156 	{ .id = VM_REG_GUEST_RDX, .size = 8 },
157 	{ .id = VM_REG_GUEST_RSI, .size = 8 },
158 	{ .id = VM_REG_GUEST_RDI, .size = 8 },
159 	{ .id = VM_REG_GUEST_RBP, .size = 8 },
160 	{ .id = VM_REG_GUEST_RSP, .size = 8 },
161 	{ .id = VM_REG_GUEST_R8, .size = 8 },
162 	{ .id = VM_REG_GUEST_R9, .size = 8 },
163 	{ .id = VM_REG_GUEST_R10, .size = 8 },
164 	{ .id = VM_REG_GUEST_R11, .size = 8 },
165 	{ .id = VM_REG_GUEST_R12, .size = 8 },
166 	{ .id = VM_REG_GUEST_R13, .size = 8 },
167 	{ .id = VM_REG_GUEST_R14, .size = 8 },
168 	{ .id = VM_REG_GUEST_R15, .size = 8 },
169 	{ .id = VM_REG_GUEST_RIP, .size = 8 },
170 	{ .id = VM_REG_GUEST_RFLAGS, .size = 4 },
171 	{ .id = VM_REG_GUEST_CS, .size = 4 },
172 	{ .id = VM_REG_GUEST_SS, .size = 4 },
173 	{ .id = VM_REG_GUEST_DS, .size = 4 },
174 	{ .id = VM_REG_GUEST_ES, .size = 4 },
175 	{ .id = VM_REG_GUEST_FS, .size = 4 },
176 	{ .id = VM_REG_GUEST_GS, .size = 4 },
177 	/*
178 	 * Registers past this point are not included in a reply to a 'g' query,
179 	 * to provide compatibility with debuggers that do not fetch a target
180 	 * description.  The debugger can query them individually with 'p' if it
181 	 * knows about them.
182 	 */
183 #define	GDB_REG_FIRST_EXT	VM_REG_GUEST_FS_BASE
184 	{ .id = VM_REG_GUEST_FS_BASE, .size = 8 },
185 	{ .id = VM_REG_GUEST_GS_BASE, .size = 8 },
186 	{ .id = VM_REG_GUEST_KGS_BASE, .size = 8 },
187 	{ .id = VM_REG_GUEST_CR0, .size = 8 },
188 	{ .id = VM_REG_GUEST_CR2, .size = 8 },
189 	{ .id = VM_REG_GUEST_CR3, .size = 8 },
190 	{ .id = VM_REG_GUEST_CR4, .size = 8 },
191 	{ .id = VM_REG_GUEST_TPR, .size = 8 },
192 	{ .id = VM_REG_GUEST_EFER, .size = 8 },
193 };
194 
195 #define	GDB_LOG
196 #ifdef GDB_LOG
197 #include <stdarg.h>
198 #include <stdio.h>
199 
200 static void __printflike(1, 2)
201 debug(const char *fmt, ...)
202 {
203 	static FILE *logfile;
204 	va_list ap;
205 
206 	if (logfile == NULL) {
207 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
208 		if (logfile == NULL)
209 			return;
210 #ifndef WITHOUT_CAPSICUM
211 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
212 			fclose(logfile);
213 			logfile = NULL;
214 			return;
215 		}
216 #endif
217 		setlinebuf(logfile);
218 	}
219 	va_start(ap, fmt);
220 	vfprintf(logfile, fmt, ap);
221 	va_end(ap);
222 }
223 #else
224 #define debug(...)
225 #endif
226 
227 static void	remove_all_sw_breakpoints(void);
228 
229 static int
230 guest_paging_info(struct vcpu *vcpu, struct vm_guest_paging *paging)
231 {
232 	uint64_t regs[4];
233 	const int regset[4] = {
234 		VM_REG_GUEST_CR0,
235 		VM_REG_GUEST_CR3,
236 		VM_REG_GUEST_CR4,
237 		VM_REG_GUEST_EFER
238 	};
239 
240 	if (vm_get_register_set(vcpu, nitems(regset), regset, regs) == -1)
241 		return (-1);
242 
243 	/*
244 	 * For the debugger, always pretend to be the kernel (CPL 0),
245 	 * and if long-mode is enabled, always parse addresses as if
246 	 * in 64-bit mode.
247 	 */
248 	paging->cr3 = regs[1];
249 	paging->cpl = 0;
250 	if (regs[3] & EFER_LMA)
251 		paging->cpu_mode = CPU_MODE_64BIT;
252 	else if (regs[0] & CR0_PE)
253 		paging->cpu_mode = CPU_MODE_PROTECTED;
254 	else
255 		paging->cpu_mode = CPU_MODE_REAL;
256 	if (!(regs[0] & CR0_PG))
257 		paging->paging_mode = PAGING_MODE_FLAT;
258 	else if (!(regs[2] & CR4_PAE))
259 		paging->paging_mode = PAGING_MODE_32;
260 	else if (regs[3] & EFER_LME)
261 		paging->paging_mode = (regs[2] & CR4_LA57) ?
262 		    PAGING_MODE_64_LA57 :  PAGING_MODE_64;
263 	else
264 		paging->paging_mode = PAGING_MODE_PAE;
265 	return (0);
266 }
267 
268 /*
269  * Map a guest virtual address to a physical address (for a given vcpu).
270  * If a guest virtual address is valid, return 1.  If the address is
271  * not valid, return 0.  If an error occurs obtaining the mapping,
272  * return -1.
273  */
274 static int
275 guest_vaddr2paddr(struct vcpu *vcpu, uint64_t vaddr, uint64_t *paddr)
276 {
277 	struct vm_guest_paging paging;
278 	int fault;
279 
280 	if (guest_paging_info(vcpu, &paging) == -1)
281 		return (-1);
282 
283 	/*
284 	 * Always use PROT_READ.  We really care if the VA is
285 	 * accessible, not if the current vCPU can write.
286 	 */
287 	if (vm_gla2gpa_nofault(vcpu, &paging, vaddr, PROT_READ, paddr,
288 	    &fault) == -1)
289 		return (-1);
290 	if (fault)
291 		return (0);
292 	return (1);
293 }
294 
295 static uint64_t
296 guest_pc(struct vm_exit *vme)
297 {
298 	return (vme->rip);
299 }
300 
301 static void
302 io_buffer_reset(struct io_buffer *io)
303 {
304 
305 	io->start = 0;
306 	io->len = 0;
307 }
308 
309 /* Available room for adding data. */
310 static size_t
311 io_buffer_avail(struct io_buffer *io)
312 {
313 
314 	return (io->capacity - (io->start + io->len));
315 }
316 
317 static uint8_t *
318 io_buffer_head(struct io_buffer *io)
319 {
320 
321 	return (io->data + io->start);
322 }
323 
324 static uint8_t *
325 io_buffer_tail(struct io_buffer *io)
326 {
327 
328 	return (io->data + io->start + io->len);
329 }
330 
331 static void
332 io_buffer_advance(struct io_buffer *io, size_t amount)
333 {
334 
335 	assert(amount <= io->len);
336 	io->start += amount;
337 	io->len -= amount;
338 }
339 
340 static void
341 io_buffer_consume(struct io_buffer *io, size_t amount)
342 {
343 
344 	io_buffer_advance(io, amount);
345 	if (io->len == 0) {
346 		io->start = 0;
347 		return;
348 	}
349 
350 	/*
351 	 * XXX: Consider making this move optional and compacting on a
352 	 * future read() before realloc().
353 	 */
354 	memmove(io->data, io_buffer_head(io), io->len);
355 	io->start = 0;
356 }
357 
358 static void
359 io_buffer_grow(struct io_buffer *io, size_t newsize)
360 {
361 	uint8_t *new_data;
362 	size_t avail, new_cap;
363 
364 	avail = io_buffer_avail(io);
365 	if (newsize <= avail)
366 		return;
367 
368 	new_cap = io->capacity + (newsize - avail);
369 	new_data = realloc(io->data, new_cap);
370 	if (new_data == NULL)
371 		err(1, "Failed to grow GDB I/O buffer");
372 	io->data = new_data;
373 	io->capacity = new_cap;
374 }
375 
376 static bool
377 response_pending(void)
378 {
379 
380 	if (cur_resp.start == 0 && cur_resp.len == 0)
381 		return (false);
382 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
383 		return (false);
384 	return (true);
385 }
386 
387 static void
388 close_connection(void)
389 {
390 
391 	/*
392 	 * XXX: This triggers a warning because mevent does the close
393 	 * before the EV_DELETE.
394 	 */
395 	pthread_mutex_lock(&gdb_lock);
396 	mevent_delete(write_event);
397 	mevent_delete_close(read_event);
398 	write_event = NULL;
399 	read_event = NULL;
400 	io_buffer_reset(&cur_comm);
401 	io_buffer_reset(&cur_resp);
402 	cur_fd = -1;
403 
404 	remove_all_sw_breakpoints();
405 
406 	/* Clear any pending events. */
407 	memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
408 
409 	/* Resume any stopped vCPUs. */
410 	gdb_resume_vcpus();
411 	pthread_mutex_unlock(&gdb_lock);
412 }
413 
414 static uint8_t
415 hex_digit(uint8_t nibble)
416 {
417 
418 	if (nibble <= 9)
419 		return (nibble + '0');
420 	else
421 		return (nibble + 'a' - 10);
422 }
423 
424 static uint8_t
425 parse_digit(uint8_t v)
426 {
427 
428 	if (v >= '0' && v <= '9')
429 		return (v - '0');
430 	if (v >= 'a' && v <= 'f')
431 		return (v - 'a' + 10);
432 	if (v >= 'A' && v <= 'F')
433 		return (v - 'A' + 10);
434 	return (0xF);
435 }
436 
437 /* Parses big-endian hexadecimal. */
438 static uintmax_t
439 parse_integer(const uint8_t *p, size_t len)
440 {
441 	uintmax_t v;
442 
443 	v = 0;
444 	while (len > 0) {
445 		v <<= 4;
446 		v |= parse_digit(*p);
447 		p++;
448 		len--;
449 	}
450 	return (v);
451 }
452 
453 static uint8_t
454 parse_byte(const uint8_t *p)
455 {
456 
457 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
458 }
459 
460 static void
461 send_pending_data(int fd)
462 {
463 	ssize_t nwritten;
464 
465 	if (cur_resp.len == 0) {
466 		mevent_disable(write_event);
467 		return;
468 	}
469 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
470 	if (nwritten == -1) {
471 		warn("Write to GDB socket failed");
472 		close_connection();
473 	} else {
474 		io_buffer_advance(&cur_resp, nwritten);
475 		if (cur_resp.len == 0)
476 			mevent_disable(write_event);
477 		else
478 			mevent_enable(write_event);
479 	}
480 }
481 
482 /* Append a single character to the output buffer. */
483 static void
484 send_char(uint8_t data)
485 {
486 	io_buffer_grow(&cur_resp, 1);
487 	*io_buffer_tail(&cur_resp) = data;
488 	cur_resp.len++;
489 }
490 
491 /* Append an array of bytes to the output buffer. */
492 static void
493 send_data(const uint8_t *data, size_t len)
494 {
495 
496 	io_buffer_grow(&cur_resp, len);
497 	memcpy(io_buffer_tail(&cur_resp), data, len);
498 	cur_resp.len += len;
499 }
500 
501 static void
502 format_byte(uint8_t v, uint8_t *buf)
503 {
504 
505 	buf[0] = hex_digit(v >> 4);
506 	buf[1] = hex_digit(v & 0xf);
507 }
508 
509 /*
510  * Append a single byte (formatted as two hex characters) to the
511  * output buffer.
512  */
513 static void
514 send_byte(uint8_t v)
515 {
516 	uint8_t buf[2];
517 
518 	format_byte(v, buf);
519 	send_data(buf, sizeof(buf));
520 }
521 
522 static void
523 start_packet(void)
524 {
525 
526 	send_char('$');
527 	cur_csum = 0;
528 }
529 
530 static void
531 finish_packet(void)
532 {
533 
534 	send_char('#');
535 	send_byte(cur_csum);
536 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
537 }
538 
539 /*
540  * Append a single character (for the packet payload) and update the
541  * checksum.
542  */
543 static void
544 append_char(uint8_t v)
545 {
546 
547 	send_char(v);
548 	cur_csum += v;
549 }
550 
551 /*
552  * Append an array of bytes (for the packet payload) and update the
553  * checksum.
554  */
555 static void
556 append_packet_data(const uint8_t *data, size_t len)
557 {
558 
559 	send_data(data, len);
560 	while (len > 0) {
561 		cur_csum += *data;
562 		data++;
563 		len--;
564 	}
565 }
566 
567 static void
568 append_string(const char *str)
569 {
570 
571 	append_packet_data(str, strlen(str));
572 }
573 
574 static void
575 append_byte(uint8_t v)
576 {
577 	uint8_t buf[2];
578 
579 	format_byte(v, buf);
580 	append_packet_data(buf, sizeof(buf));
581 }
582 
583 static void
584 append_unsigned_native(uintmax_t value, size_t len)
585 {
586 	size_t i;
587 
588 	for (i = 0; i < len; i++) {
589 		append_byte(value);
590 		value >>= 8;
591 	}
592 }
593 
594 static void
595 append_unsigned_be(uintmax_t value, size_t len)
596 {
597 	char buf[len * 2];
598 	size_t i;
599 
600 	for (i = 0; i < len; i++) {
601 		format_byte(value, buf + (len - i - 1) * 2);
602 		value >>= 8;
603 	}
604 	append_packet_data(buf, sizeof(buf));
605 }
606 
607 static void
608 append_integer(unsigned int value)
609 {
610 
611 	if (value == 0)
612 		append_char('0');
613 	else
614 		append_unsigned_be(value, (fls(value) + 7) / 8);
615 }
616 
617 static void
618 append_asciihex(const char *str)
619 {
620 
621 	while (*str != '\0') {
622 		append_byte(*str);
623 		str++;
624 	}
625 }
626 
627 static void
628 send_empty_response(void)
629 {
630 
631 	start_packet();
632 	finish_packet();
633 }
634 
635 static void
636 send_error(int error)
637 {
638 
639 	start_packet();
640 	append_char('E');
641 	append_byte(error);
642 	finish_packet();
643 }
644 
645 static void
646 send_ok(void)
647 {
648 
649 	start_packet();
650 	append_string("OK");
651 	finish_packet();
652 }
653 
654 static int
655 parse_threadid(const uint8_t *data, size_t len)
656 {
657 
658 	if (len == 1 && *data == '0')
659 		return (0);
660 	if (len == 2 && memcmp(data, "-1", 2) == 0)
661 		return (-1);
662 	if (len == 0)
663 		return (-2);
664 	return (parse_integer(data, len));
665 }
666 
667 /*
668  * Report the current stop event to the debugger.  If the stop is due
669  * to an event triggered on a specific vCPU such as a breakpoint or
670  * stepping trap, stopped_vcpu will be set to the vCPU triggering the
671  * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
672  * the reporting vCPU for vCPU events.
673  */
674 static void
675 report_stop(bool set_cur_vcpu)
676 {
677 	struct vcpu_state *vs;
678 
679 	start_packet();
680 	if (stopped_vcpu == -1) {
681 		append_char('S');
682 		append_byte(GDB_SIGNAL_TRAP);
683 	} else {
684 		vs = &vcpu_state[stopped_vcpu];
685 		if (set_cur_vcpu)
686 			cur_vcpu = stopped_vcpu;
687 		append_char('T');
688 		append_byte(GDB_SIGNAL_TRAP);
689 		append_string("thread:");
690 		append_integer(stopped_vcpu + 1);
691 		append_char(';');
692 		if (vs->hit_swbreak) {
693 			debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
694 			if (swbreak_enabled)
695 				append_string("swbreak:;");
696 		} else if (vs->stepped)
697 			debug("$vCPU %d reporting step\n", stopped_vcpu);
698 		else
699 			debug("$vCPU %d reporting ???\n", stopped_vcpu);
700 	}
701 	finish_packet();
702 	report_next_stop = false;
703 }
704 
705 /*
706  * If this stop is due to a vCPU event, clear that event to mark it as
707  * acknowledged.
708  */
709 static void
710 discard_stop(void)
711 {
712 	struct vcpu_state *vs;
713 
714 	if (stopped_vcpu != -1) {
715 		vs = &vcpu_state[stopped_vcpu];
716 		vs->hit_swbreak = false;
717 		vs->stepped = false;
718 		stopped_vcpu = -1;
719 	}
720 	report_next_stop = true;
721 }
722 
723 static void
724 gdb_finish_suspend_vcpus(void)
725 {
726 
727 	if (first_stop) {
728 		first_stop = false;
729 		stopped_vcpu = -1;
730 	} else if (report_next_stop) {
731 		assert(!response_pending());
732 		report_stop(true);
733 		send_pending_data(cur_fd);
734 	}
735 }
736 
737 /*
738  * vCPU threads invoke this function whenever the vCPU enters the
739  * debug server to pause or report an event.  vCPU threads wait here
740  * as long as the debug server keeps them suspended.
741  */
742 static void
743 _gdb_cpu_suspend(struct vcpu *vcpu, bool report_stop)
744 {
745 	int vcpuid = vcpu_id(vcpu);
746 
747 	debug("$vCPU %d suspending\n", vcpuid);
748 	CPU_SET(vcpuid, &vcpus_waiting);
749 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
750 		gdb_finish_suspend_vcpus();
751 	while (CPU_ISSET(vcpuid, &vcpus_suspended))
752 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
753 	CPU_CLR(vcpuid, &vcpus_waiting);
754 	debug("$vCPU %d resuming\n", vcpuid);
755 }
756 
757 /*
758  * Requests vCPU single-stepping using a
759  * VMEXIT suitable for the host platform.
760  */
761 static int
762 _gdb_set_step(struct vcpu *vcpu, int val)
763 {
764 	int error;
765 
766 	/*
767 	 * If the MTRAP cap fails, we are running on an AMD host.
768 	 * In that case, we request DB exits caused by RFLAGS.TF.
769 	 */
770 	error = vm_set_capability(vcpu, VM_CAP_MTRAP_EXIT, val);
771 	if (error != 0)
772 		error = vm_set_capability(vcpu, VM_CAP_RFLAGS_TF, val);
773 	if (error == 0)
774 		(void)vm_set_capability(vcpu, VM_CAP_MASK_HWINTR, val);
775 
776 	return (error);
777 }
778 
779 /*
780  * Checks whether single-stepping is enabled for a given vCPU.
781  */
782 static int
783 _gdb_check_step(struct vcpu *vcpu)
784 {
785 	int val;
786 
787 	if (vm_get_capability(vcpu, VM_CAP_MTRAP_EXIT, &val) != 0) {
788 		if (vm_get_capability(vcpu, VM_CAP_RFLAGS_TF, &val) != 0)
789 			return -1;
790 	}
791 	return 0;
792 }
793 
794 /*
795  * Invoked at the start of a vCPU thread's execution to inform the
796  * debug server about the new thread.
797  */
798 void
799 gdb_cpu_add(struct vcpu *vcpu)
800 {
801 	int vcpuid;
802 
803 	if (!gdb_active)
804 		return;
805 	vcpuid = vcpu_id(vcpu);
806 	debug("$vCPU %d starting\n", vcpuid);
807 	pthread_mutex_lock(&gdb_lock);
808 	assert(vcpuid < guest_ncpus);
809 	assert(vcpus[vcpuid] == NULL);
810 	vcpus[vcpuid] = vcpu;
811 	CPU_SET(vcpuid, &vcpus_active);
812 	if (!TAILQ_EMPTY(&breakpoints)) {
813 		vm_set_capability(vcpu, VM_CAP_BPT_EXIT, 1);
814 		debug("$vCPU %d enabled breakpoint exits\n", vcpuid);
815 	}
816 
817 	/*
818 	 * If a vcpu is added while vcpus are stopped, suspend the new
819 	 * vcpu so that it will pop back out with a debug exit before
820 	 * executing the first instruction.
821 	 */
822 	if (!CPU_EMPTY(&vcpus_suspended)) {
823 		CPU_SET(vcpuid, &vcpus_suspended);
824 		_gdb_cpu_suspend(vcpu, false);
825 	}
826 	pthread_mutex_unlock(&gdb_lock);
827 }
828 
829 /*
830  * Invoked by vCPU before resuming execution.  This enables stepping
831  * if the vCPU is marked as stepping.
832  */
833 static void
834 gdb_cpu_resume(struct vcpu *vcpu)
835 {
836 	struct vcpu_state *vs;
837 	int error;
838 
839 	vs = &vcpu_state[vcpu_id(vcpu)];
840 
841 	/*
842 	 * Any pending event should already be reported before
843 	 * resuming.
844 	 */
845 	assert(vs->hit_swbreak == false);
846 	assert(vs->stepped == false);
847 	if (vs->stepping) {
848 		error = _gdb_set_step(vcpu, 1);
849 		assert(error == 0);
850 	}
851 }
852 
853 /*
854  * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
855  * has been suspended due to an event on different vCPU or in response
856  * to a guest-wide suspend such as Ctrl-C or the stop on attach.
857  */
858 void
859 gdb_cpu_suspend(struct vcpu *vcpu)
860 {
861 
862 	if (!gdb_active)
863 		return;
864 	pthread_mutex_lock(&gdb_lock);
865 	_gdb_cpu_suspend(vcpu, true);
866 	gdb_cpu_resume(vcpu);
867 	pthread_mutex_unlock(&gdb_lock);
868 }
869 
870 static void
871 gdb_suspend_vcpus(void)
872 {
873 
874 	assert(pthread_mutex_isowned_np(&gdb_lock));
875 	debug("suspending all CPUs\n");
876 	vcpus_suspended = vcpus_active;
877 	vm_suspend_all_cpus(ctx);
878 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
879 		gdb_finish_suspend_vcpus();
880 }
881 
882 /*
883  * Invoked each time a vmexit handler needs to step a vCPU.
884  * Handles MTRAP and RFLAGS.TF vmexits.
885  */
886 static void
887 gdb_cpu_step(struct vcpu *vcpu)
888 {
889 	struct vcpu_state *vs;
890 	int vcpuid = vcpu_id(vcpu);
891 	int error;
892 
893 	debug("$vCPU %d stepped\n", vcpuid);
894 	pthread_mutex_lock(&gdb_lock);
895 	vs = &vcpu_state[vcpuid];
896 	if (vs->stepping) {
897 		vs->stepping = false;
898 		vs->stepped = true;
899 		error = _gdb_set_step(vcpu, 0);
900 		assert(error == 0);
901 
902 		while (vs->stepped) {
903 			if (stopped_vcpu == -1) {
904 				debug("$vCPU %d reporting step\n", vcpuid);
905 				stopped_vcpu = vcpuid;
906 				gdb_suspend_vcpus();
907 			}
908 			_gdb_cpu_suspend(vcpu, true);
909 		}
910 		gdb_cpu_resume(vcpu);
911 	}
912 	pthread_mutex_unlock(&gdb_lock);
913 }
914 
915 /*
916  * A general handler for VM_EXITCODE_DB.
917  * Handles RFLAGS.TF exits on AMD SVM.
918  */
919 void
920 gdb_cpu_debug(struct vcpu *vcpu, struct vm_exit *vmexit)
921 {
922 	if (!gdb_active)
923 		return;
924 
925 	/* RFLAGS.TF exit? */
926 	if (vmexit->u.dbg.trace_trap) {
927 		gdb_cpu_step(vcpu);
928 	}
929 }
930 
931 /*
932  * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
933  * the VT-x-specific MTRAP exit.
934  */
935 void
936 gdb_cpu_mtrap(struct vcpu *vcpu)
937 {
938 	if (!gdb_active)
939 		return;
940 	gdb_cpu_step(vcpu);
941 }
942 
943 static struct breakpoint *
944 find_breakpoint(uint64_t gpa)
945 {
946 	struct breakpoint *bp;
947 
948 	TAILQ_FOREACH(bp, &breakpoints, link) {
949 		if (bp->gpa == gpa)
950 			return (bp);
951 	}
952 	return (NULL);
953 }
954 
955 void
956 gdb_cpu_breakpoint(struct vcpu *vcpu, struct vm_exit *vmexit)
957 {
958 	struct breakpoint *bp;
959 	struct vcpu_state *vs;
960 	uint64_t gpa;
961 	int error, vcpuid;
962 
963 	if (!gdb_active) {
964 		EPRINTLN("vm_loop: unexpected VMEXIT_DEBUG");
965 		exit(4);
966 	}
967 	vcpuid = vcpu_id(vcpu);
968 	pthread_mutex_lock(&gdb_lock);
969 	error = guest_vaddr2paddr(vcpu, guest_pc(vmexit), &gpa);
970 	assert(error == 1);
971 	bp = find_breakpoint(gpa);
972 	if (bp != NULL) {
973 		vs = &vcpu_state[vcpuid];
974 		assert(vs->stepping == false);
975 		assert(vs->stepped == false);
976 		assert(vs->hit_swbreak == false);
977 		vs->hit_swbreak = true;
978 		vm_set_register(vcpu, GDB_PC_REGNAME, guest_pc(vmexit));
979 		for (;;) {
980 			if (stopped_vcpu == -1) {
981 				debug("$vCPU %d reporting breakpoint at rip %#lx\n",
982 				    vcpuid, guest_pc(vmexit));
983 				stopped_vcpu = vcpuid;
984 				gdb_suspend_vcpus();
985 			}
986 			_gdb_cpu_suspend(vcpu, true);
987 			if (!vs->hit_swbreak) {
988 				/* Breakpoint reported. */
989 				break;
990 			}
991 			bp = find_breakpoint(gpa);
992 			if (bp == NULL) {
993 				/* Breakpoint was removed. */
994 				vs->hit_swbreak = false;
995 				break;
996 			}
997 		}
998 		gdb_cpu_resume(vcpu);
999 	} else {
1000 		debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpuid,
1001 		    guest_pc(vmexit));
1002 		error = vm_set_register(vcpu, VM_REG_GUEST_ENTRY_INST_LENGTH,
1003 		    vmexit->u.bpt.inst_length);
1004 		assert(error == 0);
1005 		error = vm_inject_exception(vcpu, IDT_BP, 0, 0, 0);
1006 		assert(error == 0);
1007 	}
1008 	pthread_mutex_unlock(&gdb_lock);
1009 }
1010 
1011 static bool
1012 gdb_step_vcpu(struct vcpu *vcpu)
1013 {
1014 	int error, vcpuid;
1015 
1016 	vcpuid = vcpu_id(vcpu);
1017 	debug("$vCPU %d step\n", vcpuid);
1018 	error = _gdb_check_step(vcpu);
1019 	if (error < 0)
1020 		return (false);
1021 
1022 	discard_stop();
1023 	vcpu_state[vcpuid].stepping = true;
1024 	vm_resume_cpu(vcpu);
1025 	CPU_CLR(vcpuid, &vcpus_suspended);
1026 	pthread_cond_broadcast(&idle_vcpus);
1027 	return (true);
1028 }
1029 
1030 static void
1031 gdb_resume_vcpus(void)
1032 {
1033 
1034 	assert(pthread_mutex_isowned_np(&gdb_lock));
1035 	vm_resume_all_cpus(ctx);
1036 	debug("resuming all CPUs\n");
1037 	CPU_ZERO(&vcpus_suspended);
1038 	pthread_cond_broadcast(&idle_vcpus);
1039 }
1040 
1041 static void
1042 gdb_read_regs(void)
1043 {
1044 	uint64_t regvals[nitems(gdb_regset)];
1045 	int regnums[nitems(gdb_regset)];
1046 
1047 	for (size_t i = 0; i < nitems(gdb_regset); i++)
1048 		regnums[i] = gdb_regset[i].id;
1049 	if (vm_get_register_set(vcpus[cur_vcpu], nitems(gdb_regset),
1050 	    regnums, regvals) == -1) {
1051 		send_error(errno);
1052 		return;
1053 	}
1054 
1055 	start_packet();
1056 	for (size_t i = 0; i < nitems(gdb_regset); i++) {
1057 		if (gdb_regset[i].id == GDB_REG_FIRST_EXT)
1058 			break;
1059 		append_unsigned_native(regvals[i], gdb_regset[i].size);
1060 	}
1061 	finish_packet();
1062 }
1063 
1064 static void
1065 gdb_read_one_reg(const uint8_t *data, size_t len)
1066 {
1067 	uint64_t regval;
1068 	uintmax_t reg;
1069 
1070 	reg = parse_integer(data, len);
1071 	if (reg >= nitems(gdb_regset)) {
1072 		send_error(EINVAL);
1073 		return;
1074 	}
1075 
1076 	if (vm_get_register(vcpus[cur_vcpu], gdb_regset[reg].id, &regval) ==
1077 	    -1) {
1078 		send_error(errno);
1079 		return;
1080 	}
1081 
1082 	start_packet();
1083 	append_unsigned_native(regval, gdb_regset[reg].size);
1084 	finish_packet();
1085 }
1086 
1087 static void
1088 gdb_read_mem(const uint8_t *data, size_t len)
1089 {
1090 	uint64_t gpa, gva, val;
1091 	uint8_t *cp;
1092 	size_t resid, todo, bytes;
1093 	bool started;
1094 	int error;
1095 
1096 	assert(len >= 1);
1097 
1098 	/* Skip 'm' */
1099 	data += 1;
1100 	len -= 1;
1101 
1102 	/* Parse and consume address. */
1103 	cp = memchr(data, ',', len);
1104 	if (cp == NULL || cp == data) {
1105 		send_error(EINVAL);
1106 		return;
1107 	}
1108 	gva = parse_integer(data, cp - data);
1109 	len -= (cp - data) + 1;
1110 	data += (cp - data) + 1;
1111 
1112 	/* Parse length. */
1113 	resid = parse_integer(data, len);
1114 
1115 	started = false;
1116 	while (resid > 0) {
1117 		error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1118 		if (error == -1) {
1119 			if (started)
1120 				finish_packet();
1121 			else
1122 				send_error(errno);
1123 			return;
1124 		}
1125 		if (error == 0) {
1126 			if (started)
1127 				finish_packet();
1128 			else
1129 				send_error(EFAULT);
1130 			return;
1131 		}
1132 
1133 		/* Read bytes from current page. */
1134 		todo = getpagesize() - gpa % getpagesize();
1135 		if (todo > resid)
1136 			todo = resid;
1137 
1138 		cp = paddr_guest2host(ctx, gpa, todo);
1139 		if (cp != NULL) {
1140 			/*
1141 			 * If this page is guest RAM, read it a byte
1142 			 * at a time.
1143 			 */
1144 			if (!started) {
1145 				start_packet();
1146 				started = true;
1147 			}
1148 			while (todo > 0) {
1149 				append_byte(*cp);
1150 				cp++;
1151 				gpa++;
1152 				gva++;
1153 				resid--;
1154 				todo--;
1155 			}
1156 		} else {
1157 			/*
1158 			 * If this page isn't guest RAM, try to handle
1159 			 * it via MMIO.  For MMIO requests, use
1160 			 * aligned reads of words when possible.
1161 			 */
1162 			while (todo > 0) {
1163 				if (gpa & 1 || todo == 1)
1164 					bytes = 1;
1165 				else if (gpa & 2 || todo == 2)
1166 					bytes = 2;
1167 				else
1168 					bytes = 4;
1169 				error = read_mem(vcpus[cur_vcpu], gpa, &val,
1170 				    bytes);
1171 				if (error == 0) {
1172 					if (!started) {
1173 						start_packet();
1174 						started = true;
1175 					}
1176 					gpa += bytes;
1177 					gva += bytes;
1178 					resid -= bytes;
1179 					todo -= bytes;
1180 					while (bytes > 0) {
1181 						append_byte(val);
1182 						val >>= 8;
1183 						bytes--;
1184 					}
1185 				} else {
1186 					if (started)
1187 						finish_packet();
1188 					else
1189 						send_error(EFAULT);
1190 					return;
1191 				}
1192 			}
1193 		}
1194 		assert(resid == 0 || gpa % getpagesize() == 0);
1195 	}
1196 	if (!started)
1197 		start_packet();
1198 	finish_packet();
1199 }
1200 
1201 static void
1202 gdb_write_mem(const uint8_t *data, size_t len)
1203 {
1204 	uint64_t gpa, gva, val;
1205 	uint8_t *cp;
1206 	size_t resid, todo, bytes;
1207 	int error;
1208 
1209 	assert(len >= 1);
1210 
1211 	/* Skip 'M' */
1212 	data += 1;
1213 	len -= 1;
1214 
1215 	/* Parse and consume address. */
1216 	cp = memchr(data, ',', len);
1217 	if (cp == NULL || cp == data) {
1218 		send_error(EINVAL);
1219 		return;
1220 	}
1221 	gva = parse_integer(data, cp - data);
1222 	len -= (cp - data) + 1;
1223 	data += (cp - data) + 1;
1224 
1225 	/* Parse and consume length. */
1226 	cp = memchr(data, ':', len);
1227 	if (cp == NULL || cp == data) {
1228 		send_error(EINVAL);
1229 		return;
1230 	}
1231 	resid = parse_integer(data, cp - data);
1232 	len -= (cp - data) + 1;
1233 	data += (cp - data) + 1;
1234 
1235 	/* Verify the available bytes match the length. */
1236 	if (len != resid * 2) {
1237 		send_error(EINVAL);
1238 		return;
1239 	}
1240 
1241 	while (resid > 0) {
1242 		error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1243 		if (error == -1) {
1244 			send_error(errno);
1245 			return;
1246 		}
1247 		if (error == 0) {
1248 			send_error(EFAULT);
1249 			return;
1250 		}
1251 
1252 		/* Write bytes to current page. */
1253 		todo = getpagesize() - gpa % getpagesize();
1254 		if (todo > resid)
1255 			todo = resid;
1256 
1257 		cp = paddr_guest2host(ctx, gpa, todo);
1258 		if (cp != NULL) {
1259 			/*
1260 			 * If this page is guest RAM, write it a byte
1261 			 * at a time.
1262 			 */
1263 			while (todo > 0) {
1264 				assert(len >= 2);
1265 				*cp = parse_byte(data);
1266 				data += 2;
1267 				len -= 2;
1268 				cp++;
1269 				gpa++;
1270 				gva++;
1271 				resid--;
1272 				todo--;
1273 			}
1274 		} else {
1275 			/*
1276 			 * If this page isn't guest RAM, try to handle
1277 			 * it via MMIO.  For MMIO requests, use
1278 			 * aligned writes of words when possible.
1279 			 */
1280 			while (todo > 0) {
1281 				if (gpa & 1 || todo == 1) {
1282 					bytes = 1;
1283 					val = parse_byte(data);
1284 				} else if (gpa & 2 || todo == 2) {
1285 					bytes = 2;
1286 					val = be16toh(parse_integer(data, 4));
1287 				} else {
1288 					bytes = 4;
1289 					val = be32toh(parse_integer(data, 8));
1290 				}
1291 				error = write_mem(vcpus[cur_vcpu], gpa, val,
1292 				    bytes);
1293 				if (error == 0) {
1294 					gpa += bytes;
1295 					gva += bytes;
1296 					resid -= bytes;
1297 					todo -= bytes;
1298 					data += 2 * bytes;
1299 					len -= 2 * bytes;
1300 				} else {
1301 					send_error(EFAULT);
1302 					return;
1303 				}
1304 			}
1305 		}
1306 		assert(resid == 0 || gpa % getpagesize() == 0);
1307 	}
1308 	assert(len == 0);
1309 	send_ok();
1310 }
1311 
1312 static bool
1313 set_breakpoint_caps(bool enable)
1314 {
1315 	cpuset_t mask;
1316 	int vcpu;
1317 
1318 	mask = vcpus_active;
1319 	while (!CPU_EMPTY(&mask)) {
1320 		vcpu = CPU_FFS(&mask) - 1;
1321 		CPU_CLR(vcpu, &mask);
1322 		if (vm_set_capability(vcpus[vcpu], VM_CAP_BPT_EXIT,
1323 		    enable ? 1 : 0) < 0)
1324 			return (false);
1325 		debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1326 		    enable ? "en" : "dis");
1327 	}
1328 	return (true);
1329 }
1330 
1331 static void
1332 remove_all_sw_breakpoints(void)
1333 {
1334 	struct breakpoint *bp, *nbp;
1335 	uint8_t *cp;
1336 
1337 	if (TAILQ_EMPTY(&breakpoints))
1338 		return;
1339 
1340 	TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1341 		debug("remove breakpoint at %#lx\n", bp->gpa);
1342 		cp = paddr_guest2host(ctx, bp->gpa, sizeof(bp->shadow_inst));
1343 		memcpy(cp, bp->shadow_inst, sizeof(bp->shadow_inst));
1344 		TAILQ_REMOVE(&breakpoints, bp, link);
1345 		free(bp);
1346 	}
1347 	TAILQ_INIT(&breakpoints);
1348 	set_breakpoint_caps(false);
1349 }
1350 
1351 static void
1352 update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1353 {
1354 	struct breakpoint *bp;
1355 	uint64_t gpa;
1356 	uint8_t *cp;
1357 	int error;
1358 
1359 	if (kind != GDB_BP_SIZE) {
1360 		send_error(EINVAL);
1361 		return;
1362 	}
1363 
1364 	error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1365 	if (error == -1) {
1366 		send_error(errno);
1367 		return;
1368 	}
1369 	if (error == 0) {
1370 		send_error(EFAULT);
1371 		return;
1372 	}
1373 
1374 	cp = paddr_guest2host(ctx, gpa, sizeof(bp->shadow_inst));
1375 
1376 	/* Only permit breakpoints in guest RAM. */
1377 	if (cp == NULL) {
1378 		send_error(EFAULT);
1379 		return;
1380 	}
1381 
1382 	/* Find any existing breakpoint. */
1383 	bp = find_breakpoint(gpa);
1384 
1385 	/*
1386 	 * Silently ignore duplicate commands since the protocol
1387 	 * requires these packets to be idempotent.
1388 	 */
1389 	if (insert) {
1390 		if (bp == NULL) {
1391 			if (TAILQ_EMPTY(&breakpoints) &&
1392 			    !set_breakpoint_caps(true)) {
1393 				send_empty_response();
1394 				return;
1395 			}
1396 			bp = malloc(sizeof(*bp));
1397 			bp->gpa = gpa;
1398 			memcpy(bp->shadow_inst, cp, sizeof(bp->shadow_inst));
1399 			memcpy(cp, GDB_BP_INSTR, sizeof(bp->shadow_inst));
1400 			TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1401 			debug("new breakpoint at %#lx\n", gpa);
1402 		}
1403 	} else {
1404 		if (bp != NULL) {
1405 			debug("remove breakpoint at %#lx\n", gpa);
1406 			memcpy(cp, bp->shadow_inst, sizeof(bp->shadow_inst));
1407 			TAILQ_REMOVE(&breakpoints, bp, link);
1408 			free(bp);
1409 			if (TAILQ_EMPTY(&breakpoints))
1410 				set_breakpoint_caps(false);
1411 		}
1412 	}
1413 	send_ok();
1414 }
1415 
1416 static void
1417 parse_breakpoint(const uint8_t *data, size_t len)
1418 {
1419 	uint64_t gva;
1420 	uint8_t *cp;
1421 	bool insert;
1422 	int kind, type;
1423 
1424 	insert = data[0] == 'Z';
1425 
1426 	/* Skip 'Z/z' */
1427 	data += 1;
1428 	len -= 1;
1429 
1430 	/* Parse and consume type. */
1431 	cp = memchr(data, ',', len);
1432 	if (cp == NULL || cp == data) {
1433 		send_error(EINVAL);
1434 		return;
1435 	}
1436 	type = parse_integer(data, cp - data);
1437 	len -= (cp - data) + 1;
1438 	data += (cp - data) + 1;
1439 
1440 	/* Parse and consume address. */
1441 	cp = memchr(data, ',', len);
1442 	if (cp == NULL || cp == data) {
1443 		send_error(EINVAL);
1444 		return;
1445 	}
1446 	gva = parse_integer(data, cp - data);
1447 	len -= (cp - data) + 1;
1448 	data += (cp - data) + 1;
1449 
1450 	/* Parse and consume kind. */
1451 	cp = memchr(data, ';', len);
1452 	if (cp == data) {
1453 		send_error(EINVAL);
1454 		return;
1455 	}
1456 	if (cp != NULL) {
1457 		/*
1458 		 * We do not advertise support for either the
1459 		 * ConditionalBreakpoints or BreakpointCommands
1460 		 * features, so we should not be getting conditions or
1461 		 * commands from the remote end.
1462 		 */
1463 		send_empty_response();
1464 		return;
1465 	}
1466 	kind = parse_integer(data, len);
1467 	data += len;
1468 	len = 0;
1469 
1470 	switch (type) {
1471 	case 0:
1472 		update_sw_breakpoint(gva, kind, insert);
1473 		break;
1474 	default:
1475 		send_empty_response();
1476 		break;
1477 	}
1478 }
1479 
1480 static bool
1481 command_equals(const uint8_t *data, size_t len, const char *cmd)
1482 {
1483 
1484 	if (strlen(cmd) > len)
1485 		return (false);
1486 	return (memcmp(data, cmd, strlen(cmd)) == 0);
1487 }
1488 
1489 static void
1490 check_features(const uint8_t *data, size_t len)
1491 {
1492 	char *feature, *next_feature, *str, *value;
1493 	bool supported;
1494 
1495 	str = malloc(len + 1);
1496 	memcpy(str, data, len);
1497 	str[len] = '\0';
1498 	next_feature = str;
1499 
1500 	while ((feature = strsep(&next_feature, ";")) != NULL) {
1501 		/*
1502 		 * Null features shouldn't exist, but skip if they
1503 		 * do.
1504 		 */
1505 		if (strcmp(feature, "") == 0)
1506 			continue;
1507 
1508 		/*
1509 		 * Look for the value or supported / not supported
1510 		 * flag.
1511 		 */
1512 		value = strchr(feature, '=');
1513 		if (value != NULL) {
1514 			*value = '\0';
1515 			value++;
1516 			supported = true;
1517 		} else {
1518 			value = feature + strlen(feature) - 1;
1519 			switch (*value) {
1520 			case '+':
1521 				supported = true;
1522 				break;
1523 			case '-':
1524 				supported = false;
1525 				break;
1526 			default:
1527 				/*
1528 				 * This is really a protocol error,
1529 				 * but we just ignore malformed
1530 				 * features for ease of
1531 				 * implementation.
1532 				 */
1533 				continue;
1534 			}
1535 			value = NULL;
1536 		}
1537 
1538 		if (strcmp(feature, "swbreak") == 0)
1539 			swbreak_enabled = supported;
1540 	}
1541 	free(str);
1542 
1543 	start_packet();
1544 
1545 	/* This is an arbitrary limit. */
1546 	append_string("PacketSize=4096");
1547 	append_string(";swbreak+");
1548 	append_string(";qXfer:features:read+");
1549 	finish_packet();
1550 }
1551 
1552 static void
1553 gdb_query(const uint8_t *data, size_t len)
1554 {
1555 
1556 	/*
1557 	 * TODO:
1558 	 * - qSearch
1559 	 */
1560 	if (command_equals(data, len, "qAttached")) {
1561 		start_packet();
1562 		append_char('1');
1563 		finish_packet();
1564 	} else if (command_equals(data, len, "qC")) {
1565 		start_packet();
1566 		append_string("QC");
1567 		append_integer(cur_vcpu + 1);
1568 		finish_packet();
1569 	} else if (command_equals(data, len, "qfThreadInfo")) {
1570 		cpuset_t mask;
1571 		bool first;
1572 		int vcpu;
1573 
1574 		if (CPU_EMPTY(&vcpus_active)) {
1575 			send_error(EINVAL);
1576 			return;
1577 		}
1578 		mask = vcpus_active;
1579 		start_packet();
1580 		append_char('m');
1581 		first = true;
1582 		while (!CPU_EMPTY(&mask)) {
1583 			vcpu = CPU_FFS(&mask) - 1;
1584 			CPU_CLR(vcpu, &mask);
1585 			if (first)
1586 				first = false;
1587 			else
1588 				append_char(',');
1589 			append_integer(vcpu + 1);
1590 		}
1591 		finish_packet();
1592 	} else if (command_equals(data, len, "qsThreadInfo")) {
1593 		start_packet();
1594 		append_char('l');
1595 		finish_packet();
1596 	} else if (command_equals(data, len, "qSupported")) {
1597 		data += strlen("qSupported");
1598 		len -= strlen("qSupported");
1599 		check_features(data, len);
1600 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1601 		char buf[16];
1602 		int tid;
1603 
1604 		data += strlen("qThreadExtraInfo");
1605 		len -= strlen("qThreadExtraInfo");
1606 		if (len == 0 || *data != ',') {
1607 			send_error(EINVAL);
1608 			return;
1609 		}
1610 		tid = parse_threadid(data + 1, len - 1);
1611 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1612 			send_error(EINVAL);
1613 			return;
1614 		}
1615 
1616 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1617 		start_packet();
1618 		append_asciihex(buf);
1619 		finish_packet();
1620 	} else if (command_equals(data, len, "qXfer:features:read:")) {
1621 		struct stat sb;
1622 		const char *xml;
1623 		const uint8_t *pathend;
1624 		char buf[64], path[PATH_MAX];
1625 		size_t xmllen;
1626 		unsigned int doff, dlen;
1627 		int fd;
1628 
1629 		data += strlen("qXfer:features:read:");
1630 		len -= strlen("qXfer:features:read:");
1631 
1632 		pathend = memchr(data, ':', len);
1633 		if (pathend == NULL ||
1634 		    (size_t)(pathend - data) >= sizeof(path) - 1) {
1635 			send_error(EINVAL);
1636 			return;
1637 		}
1638 		memcpy(path, data, pathend - data);
1639 		path[pathend - data] = '\0';
1640 		data += (pathend - data) + 1;
1641 		len -= (pathend - data) + 1;
1642 
1643 		if (len > sizeof(buf) - 1) {
1644 			send_error(EINVAL);
1645 			return;
1646 		}
1647 		memcpy(buf, data, len);
1648 		buf[len] = '\0';
1649 		if (sscanf(buf, "%x,%x", &doff, &dlen) != 2) {
1650 			send_error(EINVAL);
1651 			return;
1652 		}
1653 
1654 		fd = openat(xml_dfd, path, O_RDONLY | O_RESOLVE_BENEATH);
1655 		if (fd < 0) {
1656 			send_error(errno);
1657 			return;
1658 		}
1659 		if (fstat(fd, &sb) < 0) {
1660 			send_error(errno);
1661 			close(fd);
1662 			return;
1663 		}
1664 		xml = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0);
1665 		if (xml == MAP_FAILED) {
1666 			send_error(errno);
1667 			close(fd);
1668 			return;
1669 		}
1670 		close(fd);
1671 		xmllen = sb.st_size;
1672 
1673 		start_packet();
1674 		if (doff >= xmllen) {
1675 			append_char('l');
1676 		} else if (doff + dlen >= xmllen) {
1677 			append_char('l');
1678 			append_packet_data(xml + doff, xmllen - doff);
1679 		} else {
1680 			append_char('m');
1681 			append_packet_data(xml + doff, dlen);
1682 		}
1683 		finish_packet();
1684 		(void)munmap(__DECONST(void *, xml), xmllen);
1685 	} else
1686 		send_empty_response();
1687 }
1688 
1689 static void
1690 handle_command(const uint8_t *data, size_t len)
1691 {
1692 
1693 	/* Reject packets with a sequence-id. */
1694 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1695 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1696 		send_empty_response();
1697 		return;
1698 	}
1699 
1700 	switch (*data) {
1701 	case 'c':
1702 		if (len != 1) {
1703 			send_error(EINVAL);
1704 			break;
1705 		}
1706 
1707 		discard_stop();
1708 		gdb_resume_vcpus();
1709 		break;
1710 	case 'D':
1711 		send_ok();
1712 
1713 		/* TODO: Resume any stopped CPUs. */
1714 		break;
1715 	case 'g':
1716 		gdb_read_regs();
1717 		break;
1718 	case 'p':
1719 		gdb_read_one_reg(data + 1, len - 1);
1720 		break;
1721 	case 'H': {
1722 		int tid;
1723 
1724 		if (len < 2 || (data[1] != 'g' && data[1] != 'c')) {
1725 			send_error(EINVAL);
1726 			break;
1727 		}
1728 		tid = parse_threadid(data + 2, len - 2);
1729 		if (tid == -2) {
1730 			send_error(EINVAL);
1731 			break;
1732 		}
1733 
1734 		if (CPU_EMPTY(&vcpus_active)) {
1735 			send_error(EINVAL);
1736 			break;
1737 		}
1738 		if (tid == -1 || tid == 0)
1739 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1740 		else if (CPU_ISSET(tid - 1, &vcpus_active))
1741 			cur_vcpu = tid - 1;
1742 		else {
1743 			send_error(EINVAL);
1744 			break;
1745 		}
1746 		send_ok();
1747 		break;
1748 	}
1749 	case 'm':
1750 		gdb_read_mem(data, len);
1751 		break;
1752 	case 'M':
1753 		gdb_write_mem(data, len);
1754 		break;
1755 	case 'T': {
1756 		int tid;
1757 
1758 		tid = parse_threadid(data + 1, len - 1);
1759 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1760 			send_error(EINVAL);
1761 			return;
1762 		}
1763 		send_ok();
1764 		break;
1765 	}
1766 	case 'q':
1767 		gdb_query(data, len);
1768 		break;
1769 	case 's':
1770 		if (len != 1) {
1771 			send_error(EINVAL);
1772 			break;
1773 		}
1774 
1775 		/* Don't send a reply until a stop occurs. */
1776 		if (!gdb_step_vcpu(vcpus[cur_vcpu])) {
1777 			send_error(EOPNOTSUPP);
1778 			break;
1779 		}
1780 		break;
1781 	case 'z':
1782 	case 'Z':
1783 		parse_breakpoint(data, len);
1784 		break;
1785 	case '?':
1786 		report_stop(false);
1787 		break;
1788 	case 'G': /* TODO */
1789 	case 'v':
1790 		/* Handle 'vCont' */
1791 		/* 'vCtrlC' */
1792 	case 'P': /* TODO */
1793 	case 'Q': /* TODO */
1794 	case 't': /* TODO */
1795 	case 'X': /* TODO */
1796 	default:
1797 		send_empty_response();
1798 	}
1799 }
1800 
1801 /* Check for a valid packet in the command buffer. */
1802 static void
1803 check_command(int fd)
1804 {
1805 	uint8_t *head, *hash, *p, sum;
1806 	size_t avail, plen;
1807 
1808 	for (;;) {
1809 		avail = cur_comm.len;
1810 		if (avail == 0)
1811 			return;
1812 		head = io_buffer_head(&cur_comm);
1813 		switch (*head) {
1814 		case 0x03:
1815 			debug("<- Ctrl-C\n");
1816 			io_buffer_consume(&cur_comm, 1);
1817 
1818 			gdb_suspend_vcpus();
1819 			break;
1820 		case '+':
1821 			/* ACK of previous response. */
1822 			debug("<- +\n");
1823 			if (response_pending())
1824 				io_buffer_reset(&cur_resp);
1825 			io_buffer_consume(&cur_comm, 1);
1826 			if (stopped_vcpu != -1 && report_next_stop) {
1827 				report_stop(true);
1828 				send_pending_data(fd);
1829 			}
1830 			break;
1831 		case '-':
1832 			/* NACK of previous response. */
1833 			debug("<- -\n");
1834 			if (response_pending()) {
1835 				cur_resp.len += cur_resp.start;
1836 				cur_resp.start = 0;
1837 				if (cur_resp.data[0] == '+')
1838 					io_buffer_advance(&cur_resp, 1);
1839 				debug("-> %.*s\n", (int)cur_resp.len,
1840 				    io_buffer_head(&cur_resp));
1841 			}
1842 			io_buffer_consume(&cur_comm, 1);
1843 			send_pending_data(fd);
1844 			break;
1845 		case '$':
1846 			/* Packet. */
1847 
1848 			if (response_pending()) {
1849 				warnx("New GDB command while response in "
1850 				    "progress");
1851 				io_buffer_reset(&cur_resp);
1852 			}
1853 
1854 			/* Is packet complete? */
1855 			hash = memchr(head, '#', avail);
1856 			if (hash == NULL)
1857 				return;
1858 			plen = (hash - head + 1) + 2;
1859 			if (avail < plen)
1860 				return;
1861 			debug("<- %.*s\n", (int)plen, head);
1862 
1863 			/* Verify checksum. */
1864 			for (sum = 0, p = head + 1; p < hash; p++)
1865 				sum += *p;
1866 			if (sum != parse_byte(hash + 1)) {
1867 				io_buffer_consume(&cur_comm, plen);
1868 				debug("-> -\n");
1869 				send_char('-');
1870 				send_pending_data(fd);
1871 				break;
1872 			}
1873 			send_char('+');
1874 
1875 			handle_command(head + 1, hash - (head + 1));
1876 			io_buffer_consume(&cur_comm, plen);
1877 			if (!response_pending())
1878 				debug("-> +\n");
1879 			send_pending_data(fd);
1880 			break;
1881 		default:
1882 			/* XXX: Possibly drop connection instead. */
1883 			debug("-> %02x\n", *head);
1884 			io_buffer_consume(&cur_comm, 1);
1885 			break;
1886 		}
1887 	}
1888 }
1889 
1890 static void
1891 gdb_readable(int fd, enum ev_type event __unused, void *arg __unused)
1892 {
1893 	size_t pending;
1894 	ssize_t nread;
1895 	int n;
1896 
1897 	if (ioctl(fd, FIONREAD, &n) == -1) {
1898 		warn("FIONREAD on GDB socket");
1899 		return;
1900 	}
1901 	assert(n >= 0);
1902 	pending = n;
1903 
1904 	/*
1905 	 * 'pending' might be zero due to EOF.  We need to call read
1906 	 * with a non-zero length to detect EOF.
1907 	 */
1908 	if (pending == 0)
1909 		pending = 1;
1910 
1911 	/* Ensure there is room in the command buffer. */
1912 	io_buffer_grow(&cur_comm, pending);
1913 	assert(io_buffer_avail(&cur_comm) >= pending);
1914 
1915 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1916 	if (nread == 0) {
1917 		close_connection();
1918 	} else if (nread == -1) {
1919 		if (errno == EAGAIN)
1920 			return;
1921 
1922 		warn("Read from GDB socket");
1923 		close_connection();
1924 	} else {
1925 		cur_comm.len += nread;
1926 		pthread_mutex_lock(&gdb_lock);
1927 		check_command(fd);
1928 		pthread_mutex_unlock(&gdb_lock);
1929 	}
1930 }
1931 
1932 static void
1933 gdb_writable(int fd, enum ev_type event __unused, void *arg __unused)
1934 {
1935 
1936 	send_pending_data(fd);
1937 }
1938 
1939 static void
1940 new_connection(int fd, enum ev_type event __unused, void *arg)
1941 {
1942 	int optval, s;
1943 
1944 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1945 	if (s == -1) {
1946 		if (arg != NULL)
1947 			err(1, "Failed accepting initial GDB connection");
1948 
1949 		/* Silently ignore errors post-startup. */
1950 		return;
1951 	}
1952 
1953 	optval = 1;
1954 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1955 	    -1) {
1956 		warn("Failed to disable SIGPIPE for GDB connection");
1957 		close(s);
1958 		return;
1959 	}
1960 
1961 	pthread_mutex_lock(&gdb_lock);
1962 	if (cur_fd != -1) {
1963 		close(s);
1964 		warnx("Ignoring additional GDB connection.");
1965 	}
1966 
1967 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1968 	if (read_event == NULL) {
1969 		if (arg != NULL)
1970 			err(1, "Failed to setup initial GDB connection");
1971 		pthread_mutex_unlock(&gdb_lock);
1972 		return;
1973 	}
1974 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1975 	if (write_event == NULL) {
1976 		if (arg != NULL)
1977 			err(1, "Failed to setup initial GDB connection");
1978 		mevent_delete_close(read_event);
1979 		read_event = NULL;
1980 	}
1981 
1982 	cur_fd = s;
1983 	cur_vcpu = 0;
1984 	stopped_vcpu = -1;
1985 
1986 	/* Break on attach. */
1987 	first_stop = true;
1988 	report_next_stop = false;
1989 	gdb_suspend_vcpus();
1990 	pthread_mutex_unlock(&gdb_lock);
1991 }
1992 
1993 #ifndef WITHOUT_CAPSICUM
1994 static void
1995 limit_gdb_socket(int s)
1996 {
1997 	cap_rights_t rights;
1998 	unsigned long ioctls[] = { FIONREAD };
1999 
2000 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
2001 	    CAP_SETSOCKOPT, CAP_IOCTL);
2002 	if (caph_rights_limit(s, &rights) == -1)
2003 		errx(EX_OSERR, "Unable to apply rights for sandbox");
2004 	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
2005 		errx(EX_OSERR, "Unable to apply rights for sandbox");
2006 }
2007 #endif
2008 
2009 void
2010 init_gdb(struct vmctx *_ctx)
2011 {
2012 #ifndef WITHOUT_CAPSICUM
2013 	cap_rights_t rights;
2014 #endif
2015 	int error, flags, optval, s;
2016 	struct addrinfo hints;
2017 	struct addrinfo *gdbaddr;
2018 	const char *saddr, *value;
2019 	char *sport;
2020 	bool wait;
2021 
2022 	value = get_config_value("gdb.port");
2023 	if (value == NULL)
2024 		return;
2025 	sport = strdup(value);
2026 	if (sport == NULL)
2027 		errx(4, "Failed to allocate memory");
2028 
2029 	wait = get_config_bool_default("gdb.wait", false);
2030 
2031 	saddr = get_config_value("gdb.address");
2032 	if (saddr == NULL) {
2033 		saddr = "localhost";
2034 	}
2035 
2036 	debug("==> starting on %s:%s, %swaiting\n",
2037 	    saddr, sport, wait ? "" : "not ");
2038 
2039 	error = pthread_mutex_init(&gdb_lock, NULL);
2040 	if (error != 0)
2041 		errc(1, error, "gdb mutex init");
2042 	error = pthread_cond_init(&idle_vcpus, NULL);
2043 	if (error != 0)
2044 		errc(1, error, "gdb cv init");
2045 
2046 	memset(&hints, 0, sizeof(hints));
2047 	hints.ai_family = AF_UNSPEC;
2048 	hints.ai_socktype = SOCK_STREAM;
2049 	hints.ai_flags = AI_NUMERICSERV | AI_PASSIVE;
2050 
2051 	error = getaddrinfo(saddr, sport, &hints, &gdbaddr);
2052 	if (error != 0)
2053 		errx(1, "gdb address resolution: %s", gai_strerror(error));
2054 
2055 	ctx = _ctx;
2056 	s = socket(gdbaddr->ai_family, gdbaddr->ai_socktype, 0);
2057 	if (s < 0)
2058 		err(1, "gdb socket create");
2059 
2060 	optval = 1;
2061 	(void)setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
2062 
2063 	if (bind(s, gdbaddr->ai_addr, gdbaddr->ai_addrlen) < 0)
2064 		err(1, "gdb socket bind");
2065 
2066 	if (listen(s, 1) < 0)
2067 		err(1, "gdb socket listen");
2068 
2069 	stopped_vcpu = -1;
2070 	TAILQ_INIT(&breakpoints);
2071 	vcpus = calloc(guest_ncpus, sizeof(*vcpus));
2072 	vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
2073 	if (wait) {
2074 		/*
2075 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
2076 		 * logic in gdb_cpu_add() to suspend the first vcpu before
2077 		 * it starts execution.  The vcpu will remain suspended
2078 		 * until a debugger connects.
2079 		 */
2080 		CPU_SET(0, &vcpus_suspended);
2081 		stopped_vcpu = 0;
2082 	}
2083 
2084 	flags = fcntl(s, F_GETFL);
2085 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
2086 		err(1, "Failed to mark gdb socket non-blocking");
2087 
2088 #ifndef WITHOUT_CAPSICUM
2089 	limit_gdb_socket(s);
2090 #endif
2091 	mevent_add(s, EVF_READ, new_connection, NULL);
2092 	gdb_active = true;
2093 	freeaddrinfo(gdbaddr);
2094 	free(sport);
2095 
2096 	xml_dfd = open(_PATH_GDB_XML, O_DIRECTORY);
2097 	if (xml_dfd == -1)
2098 		err(1, "Failed to open gdb xml directory");
2099 #ifndef WITHOUT_CAPSICUM
2100 	cap_rights_init(&rights, CAP_FSTAT, CAP_LOOKUP, CAP_MMAP_R, CAP_PREAD);
2101 	if (caph_rights_limit(xml_dfd, &rights) == -1)
2102 		err(1, "cap_rights_init");
2103 #endif
2104 }
2105