xref: /freebsd/usr.sbin/bhyve/gdb.c (revision d316de24faa7453118a90fb0e9839e8026e36a4e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/param.h>
29 #ifndef WITHOUT_CAPSICUM
30 #include <sys/capsicum.h>
31 #endif
32 #include <sys/endian.h>
33 #include <sys/ioctl.h>
34 #include <sys/mman.h>
35 #include <sys/queue.h>
36 #include <sys/socket.h>
37 #include <sys/stat.h>
38 
39 #include <machine/atomic.h>
40 #include <machine/specialreg.h>
41 #include <machine/vmm.h>
42 #include <netinet/in.h>
43 #include <assert.h>
44 #ifndef WITHOUT_CAPSICUM
45 #include <capsicum_helpers.h>
46 #endif
47 #include <err.h>
48 #include <errno.h>
49 #include <fcntl.h>
50 #include <netdb.h>
51 #include <pthread.h>
52 #include <pthread_np.h>
53 #include <stdbool.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <sysexits.h>
58 #include <unistd.h>
59 #include <vmmapi.h>
60 
61 #include "bhyverun.h"
62 #include "config.h"
63 #include "debug.h"
64 #include "gdb.h"
65 #include "mem.h"
66 #include "mevent.h"
67 
68 #define	_PATH_GDB_XML		"/usr/share/bhyve/gdb"
69 
70 /*
71  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
72  * use SIGTRAP.
73  */
74 #define	GDB_SIGNAL_TRAP		5
75 
76 #define	GDB_BP_SIZE		1
77 #define	GDB_BP_INSTR		(uint8_t []){0xcc}
78 #define	GDB_PC_REGNAME		VM_REG_GUEST_RIP
79 
80 _Static_assert(sizeof(GDB_BP_INSTR) == GDB_BP_SIZE,
81     "GDB_BP_INSTR has wrong size");
82 
83 static void gdb_resume_vcpus(void);
84 static void check_command(int fd);
85 
86 static struct mevent *read_event, *write_event;
87 
88 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
89 static pthread_mutex_t gdb_lock;
90 static pthread_cond_t idle_vcpus;
91 static bool first_stop, report_next_stop, swbreak_enabled;
92 static int xml_dfd = -1;
93 
94 /*
95  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
96  * read buffer, 'start' is unused and 'len' contains the number of
97  * valid bytes in the buffer.  For a write buffer, 'start' is set to
98  * the index of the next byte in 'data' to send, and 'len' contains
99  * the remaining number of valid bytes to send.
100  */
101 struct io_buffer {
102 	uint8_t *data;
103 	size_t capacity;
104 	size_t start;
105 	size_t len;
106 };
107 
108 struct breakpoint {
109 	uint64_t gpa;
110 	uint8_t shadow_inst[GDB_BP_SIZE];
111 	TAILQ_ENTRY(breakpoint) link;
112 };
113 
114 /*
115  * When a vCPU stops to due to an event that should be reported to the
116  * debugger, information about the event is stored in this structure.
117  * The vCPU thread then sets 'stopped_vcpu' if it is not already set
118  * and stops other vCPUs so the event can be reported.  The
119  * report_stop() function reports the event for the 'stopped_vcpu'
120  * vCPU.  When the debugger resumes execution via continue or step,
121  * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
122  * event handlers until the associated event is reported or disabled.
123  *
124  * An idle vCPU will have all of the boolean fields set to false.
125  *
126  * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
127  * released to execute the stepped instruction.  When the vCPU reports
128  * the stepping trap, 'stepped' is set.
129  *
130  * When a vCPU hits a breakpoint set by the debug server,
131  * 'hit_swbreak' is set to true.
132  */
133 struct vcpu_state {
134 	bool stepping;
135 	bool stepped;
136 	bool hit_swbreak;
137 };
138 
139 static struct io_buffer cur_comm, cur_resp;
140 static uint8_t cur_csum;
141 static struct vmctx *ctx;
142 static int cur_fd = -1;
143 static TAILQ_HEAD(, breakpoint) breakpoints;
144 static struct vcpu_state *vcpu_state;
145 static struct vcpu **vcpus;
146 static int cur_vcpu, stopped_vcpu;
147 static bool gdb_active = false;
148 
149 static const struct gdb_reg {
150 	enum vm_reg_name id;
151 	int size;
152 } gdb_regset[] = {
153 	{ .id = VM_REG_GUEST_RAX, .size = 8 },
154 	{ .id = VM_REG_GUEST_RBX, .size = 8 },
155 	{ .id = VM_REG_GUEST_RCX, .size = 8 },
156 	{ .id = VM_REG_GUEST_RDX, .size = 8 },
157 	{ .id = VM_REG_GUEST_RSI, .size = 8 },
158 	{ .id = VM_REG_GUEST_RDI, .size = 8 },
159 	{ .id = VM_REG_GUEST_RBP, .size = 8 },
160 	{ .id = VM_REG_GUEST_RSP, .size = 8 },
161 	{ .id = VM_REG_GUEST_R8, .size = 8 },
162 	{ .id = VM_REG_GUEST_R9, .size = 8 },
163 	{ .id = VM_REG_GUEST_R10, .size = 8 },
164 	{ .id = VM_REG_GUEST_R11, .size = 8 },
165 	{ .id = VM_REG_GUEST_R12, .size = 8 },
166 	{ .id = VM_REG_GUEST_R13, .size = 8 },
167 	{ .id = VM_REG_GUEST_R14, .size = 8 },
168 	{ .id = VM_REG_GUEST_R15, .size = 8 },
169 	{ .id = VM_REG_GUEST_RIP, .size = 8 },
170 	{ .id = VM_REG_GUEST_RFLAGS, .size = 4 },
171 	{ .id = VM_REG_GUEST_CS, .size = 4 },
172 	{ .id = VM_REG_GUEST_SS, .size = 4 },
173 	{ .id = VM_REG_GUEST_DS, .size = 4 },
174 	{ .id = VM_REG_GUEST_ES, .size = 4 },
175 	{ .id = VM_REG_GUEST_FS, .size = 4 },
176 	{ .id = VM_REG_GUEST_GS, .size = 4 },
177 	/*
178 	 * Registers past this point are not included in a reply to a 'g' query,
179 	 * to provide compatibility with debuggers that do not fetch a target
180 	 * description.  The debugger can query them individually with 'p' if it
181 	 * knows about them.
182 	 */
183 #define	GDB_REG_FIRST_EXT	VM_REG_GUEST_FS_BASE
184 	{ .id = VM_REG_GUEST_FS_BASE, .size = 8 },
185 	{ .id = VM_REG_GUEST_GS_BASE, .size = 8 },
186 	{ .id = VM_REG_GUEST_KGS_BASE, .size = 8 },
187 	{ .id = VM_REG_GUEST_CR0, .size = 8 },
188 	{ .id = VM_REG_GUEST_CR2, .size = 8 },
189 	{ .id = VM_REG_GUEST_CR3, .size = 8 },
190 	{ .id = VM_REG_GUEST_CR4, .size = 8 },
191 	{ .id = VM_REG_GUEST_TPR, .size = 8 },
192 	{ .id = VM_REG_GUEST_EFER, .size = 8 },
193 };
194 
195 #ifdef GDB_LOG
196 #include <stdarg.h>
197 #include <stdio.h>
198 
199 static void __printflike(1, 2)
200 debug(const char *fmt, ...)
201 {
202 	static FILE *logfile;
203 	va_list ap;
204 
205 	if (logfile == NULL) {
206 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
207 		if (logfile == NULL)
208 			return;
209 #ifndef WITHOUT_CAPSICUM
210 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
211 			fclose(logfile);
212 			logfile = NULL;
213 			return;
214 		}
215 #endif
216 		setlinebuf(logfile);
217 	}
218 	va_start(ap, fmt);
219 	vfprintf(logfile, fmt, ap);
220 	va_end(ap);
221 }
222 #else
223 #define debug(...)
224 #endif
225 
226 static void	remove_all_sw_breakpoints(void);
227 
228 static int
229 guest_paging_info(struct vcpu *vcpu, struct vm_guest_paging *paging)
230 {
231 	uint64_t regs[4];
232 	const int regset[4] = {
233 		VM_REG_GUEST_CR0,
234 		VM_REG_GUEST_CR3,
235 		VM_REG_GUEST_CR4,
236 		VM_REG_GUEST_EFER
237 	};
238 
239 	if (vm_get_register_set(vcpu, nitems(regset), regset, regs) == -1)
240 		return (-1);
241 
242 	/*
243 	 * For the debugger, always pretend to be the kernel (CPL 0),
244 	 * and if long-mode is enabled, always parse addresses as if
245 	 * in 64-bit mode.
246 	 */
247 	paging->cr3 = regs[1];
248 	paging->cpl = 0;
249 	if (regs[3] & EFER_LMA)
250 		paging->cpu_mode = CPU_MODE_64BIT;
251 	else if (regs[0] & CR0_PE)
252 		paging->cpu_mode = CPU_MODE_PROTECTED;
253 	else
254 		paging->cpu_mode = CPU_MODE_REAL;
255 	if (!(regs[0] & CR0_PG))
256 		paging->paging_mode = PAGING_MODE_FLAT;
257 	else if (!(regs[2] & CR4_PAE))
258 		paging->paging_mode = PAGING_MODE_32;
259 	else if (regs[3] & EFER_LME)
260 		paging->paging_mode = (regs[2] & CR4_LA57) ?
261 		    PAGING_MODE_64_LA57 :  PAGING_MODE_64;
262 	else
263 		paging->paging_mode = PAGING_MODE_PAE;
264 	return (0);
265 }
266 
267 /*
268  * Map a guest virtual address to a physical address (for a given vcpu).
269  * If a guest virtual address is valid, return 1.  If the address is
270  * not valid, return 0.  If an error occurs obtaining the mapping,
271  * return -1.
272  */
273 static int
274 guest_vaddr2paddr(struct vcpu *vcpu, uint64_t vaddr, uint64_t *paddr)
275 {
276 	struct vm_guest_paging paging;
277 	int fault;
278 
279 	if (guest_paging_info(vcpu, &paging) == -1)
280 		return (-1);
281 
282 	/*
283 	 * Always use PROT_READ.  We really care if the VA is
284 	 * accessible, not if the current vCPU can write.
285 	 */
286 	if (vm_gla2gpa_nofault(vcpu, &paging, vaddr, PROT_READ, paddr,
287 	    &fault) == -1)
288 		return (-1);
289 	if (fault)
290 		return (0);
291 	return (1);
292 }
293 
294 static uint64_t
295 guest_pc(struct vm_exit *vme)
296 {
297 	return (vme->rip);
298 }
299 
300 static void
301 io_buffer_reset(struct io_buffer *io)
302 {
303 
304 	io->start = 0;
305 	io->len = 0;
306 }
307 
308 /* Available room for adding data. */
309 static size_t
310 io_buffer_avail(struct io_buffer *io)
311 {
312 
313 	return (io->capacity - (io->start + io->len));
314 }
315 
316 static uint8_t *
317 io_buffer_head(struct io_buffer *io)
318 {
319 
320 	return (io->data + io->start);
321 }
322 
323 static uint8_t *
324 io_buffer_tail(struct io_buffer *io)
325 {
326 
327 	return (io->data + io->start + io->len);
328 }
329 
330 static void
331 io_buffer_advance(struct io_buffer *io, size_t amount)
332 {
333 
334 	assert(amount <= io->len);
335 	io->start += amount;
336 	io->len -= amount;
337 }
338 
339 static void
340 io_buffer_consume(struct io_buffer *io, size_t amount)
341 {
342 
343 	io_buffer_advance(io, amount);
344 	if (io->len == 0) {
345 		io->start = 0;
346 		return;
347 	}
348 
349 	/*
350 	 * XXX: Consider making this move optional and compacting on a
351 	 * future read() before realloc().
352 	 */
353 	memmove(io->data, io_buffer_head(io), io->len);
354 	io->start = 0;
355 }
356 
357 static void
358 io_buffer_grow(struct io_buffer *io, size_t newsize)
359 {
360 	uint8_t *new_data;
361 	size_t avail, new_cap;
362 
363 	avail = io_buffer_avail(io);
364 	if (newsize <= avail)
365 		return;
366 
367 	new_cap = io->capacity + (newsize - avail);
368 	new_data = realloc(io->data, new_cap);
369 	if (new_data == NULL)
370 		err(1, "Failed to grow GDB I/O buffer");
371 	io->data = new_data;
372 	io->capacity = new_cap;
373 }
374 
375 static bool
376 response_pending(void)
377 {
378 
379 	if (cur_resp.start == 0 && cur_resp.len == 0)
380 		return (false);
381 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
382 		return (false);
383 	return (true);
384 }
385 
386 static void
387 close_connection(void)
388 {
389 
390 	/*
391 	 * XXX: This triggers a warning because mevent does the close
392 	 * before the EV_DELETE.
393 	 */
394 	pthread_mutex_lock(&gdb_lock);
395 	mevent_delete(write_event);
396 	mevent_delete_close(read_event);
397 	write_event = NULL;
398 	read_event = NULL;
399 	io_buffer_reset(&cur_comm);
400 	io_buffer_reset(&cur_resp);
401 	cur_fd = -1;
402 
403 	remove_all_sw_breakpoints();
404 
405 	/* Clear any pending events. */
406 	memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
407 
408 	/* Resume any stopped vCPUs. */
409 	gdb_resume_vcpus();
410 	pthread_mutex_unlock(&gdb_lock);
411 }
412 
413 static uint8_t
414 hex_digit(uint8_t nibble)
415 {
416 
417 	if (nibble <= 9)
418 		return (nibble + '0');
419 	else
420 		return (nibble + 'a' - 10);
421 }
422 
423 static uint8_t
424 parse_digit(uint8_t v)
425 {
426 
427 	if (v >= '0' && v <= '9')
428 		return (v - '0');
429 	if (v >= 'a' && v <= 'f')
430 		return (v - 'a' + 10);
431 	if (v >= 'A' && v <= 'F')
432 		return (v - 'A' + 10);
433 	return (0xF);
434 }
435 
436 /* Parses big-endian hexadecimal. */
437 static uintmax_t
438 parse_integer(const uint8_t *p, size_t len)
439 {
440 	uintmax_t v;
441 
442 	v = 0;
443 	while (len > 0) {
444 		v <<= 4;
445 		v |= parse_digit(*p);
446 		p++;
447 		len--;
448 	}
449 	return (v);
450 }
451 
452 static uint8_t
453 parse_byte(const uint8_t *p)
454 {
455 
456 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
457 }
458 
459 static void
460 send_pending_data(int fd)
461 {
462 	ssize_t nwritten;
463 
464 	if (cur_resp.len == 0) {
465 		mevent_disable(write_event);
466 		return;
467 	}
468 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
469 	if (nwritten == -1) {
470 		warn("Write to GDB socket failed");
471 		close_connection();
472 	} else {
473 		io_buffer_advance(&cur_resp, nwritten);
474 		if (cur_resp.len == 0)
475 			mevent_disable(write_event);
476 		else
477 			mevent_enable(write_event);
478 	}
479 }
480 
481 /* Append a single character to the output buffer. */
482 static void
483 send_char(uint8_t data)
484 {
485 	io_buffer_grow(&cur_resp, 1);
486 	*io_buffer_tail(&cur_resp) = data;
487 	cur_resp.len++;
488 }
489 
490 /* Append an array of bytes to the output buffer. */
491 static void
492 send_data(const uint8_t *data, size_t len)
493 {
494 
495 	io_buffer_grow(&cur_resp, len);
496 	memcpy(io_buffer_tail(&cur_resp), data, len);
497 	cur_resp.len += len;
498 }
499 
500 static void
501 format_byte(uint8_t v, uint8_t *buf)
502 {
503 
504 	buf[0] = hex_digit(v >> 4);
505 	buf[1] = hex_digit(v & 0xf);
506 }
507 
508 /*
509  * Append a single byte (formatted as two hex characters) to the
510  * output buffer.
511  */
512 static void
513 send_byte(uint8_t v)
514 {
515 	uint8_t buf[2];
516 
517 	format_byte(v, buf);
518 	send_data(buf, sizeof(buf));
519 }
520 
521 static void
522 start_packet(void)
523 {
524 
525 	send_char('$');
526 	cur_csum = 0;
527 }
528 
529 static void
530 finish_packet(void)
531 {
532 
533 	send_char('#');
534 	send_byte(cur_csum);
535 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
536 }
537 
538 /*
539  * Append a single character (for the packet payload) and update the
540  * checksum.
541  */
542 static void
543 append_char(uint8_t v)
544 {
545 
546 	send_char(v);
547 	cur_csum += v;
548 }
549 
550 /*
551  * Append an array of bytes (for the packet payload) and update the
552  * checksum.
553  */
554 static void
555 append_packet_data(const uint8_t *data, size_t len)
556 {
557 
558 	send_data(data, len);
559 	while (len > 0) {
560 		cur_csum += *data;
561 		data++;
562 		len--;
563 	}
564 }
565 
566 static void
567 append_string(const char *str)
568 {
569 
570 	append_packet_data(str, strlen(str));
571 }
572 
573 static void
574 append_byte(uint8_t v)
575 {
576 	uint8_t buf[2];
577 
578 	format_byte(v, buf);
579 	append_packet_data(buf, sizeof(buf));
580 }
581 
582 static void
583 append_unsigned_native(uintmax_t value, size_t len)
584 {
585 	size_t i;
586 
587 	for (i = 0; i < len; i++) {
588 		append_byte(value);
589 		value >>= 8;
590 	}
591 }
592 
593 static void
594 append_unsigned_be(uintmax_t value, size_t len)
595 {
596 	char buf[len * 2];
597 	size_t i;
598 
599 	for (i = 0; i < len; i++) {
600 		format_byte(value, buf + (len - i - 1) * 2);
601 		value >>= 8;
602 	}
603 	append_packet_data(buf, sizeof(buf));
604 }
605 
606 static void
607 append_integer(unsigned int value)
608 {
609 
610 	if (value == 0)
611 		append_char('0');
612 	else
613 		append_unsigned_be(value, (fls(value) + 7) / 8);
614 }
615 
616 static void
617 append_asciihex(const char *str)
618 {
619 
620 	while (*str != '\0') {
621 		append_byte(*str);
622 		str++;
623 	}
624 }
625 
626 static void
627 send_empty_response(void)
628 {
629 
630 	start_packet();
631 	finish_packet();
632 }
633 
634 static void
635 send_error(int error)
636 {
637 
638 	start_packet();
639 	append_char('E');
640 	append_byte(error);
641 	finish_packet();
642 }
643 
644 static void
645 send_ok(void)
646 {
647 
648 	start_packet();
649 	append_string("OK");
650 	finish_packet();
651 }
652 
653 static int
654 parse_threadid(const uint8_t *data, size_t len)
655 {
656 
657 	if (len == 1 && *data == '0')
658 		return (0);
659 	if (len == 2 && memcmp(data, "-1", 2) == 0)
660 		return (-1);
661 	if (len == 0)
662 		return (-2);
663 	return (parse_integer(data, len));
664 }
665 
666 /*
667  * Report the current stop event to the debugger.  If the stop is due
668  * to an event triggered on a specific vCPU such as a breakpoint or
669  * stepping trap, stopped_vcpu will be set to the vCPU triggering the
670  * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
671  * the reporting vCPU for vCPU events.
672  */
673 static void
674 report_stop(bool set_cur_vcpu)
675 {
676 	struct vcpu_state *vs;
677 
678 	start_packet();
679 	if (stopped_vcpu == -1) {
680 		append_char('S');
681 		append_byte(GDB_SIGNAL_TRAP);
682 	} else {
683 		vs = &vcpu_state[stopped_vcpu];
684 		if (set_cur_vcpu)
685 			cur_vcpu = stopped_vcpu;
686 		append_char('T');
687 		append_byte(GDB_SIGNAL_TRAP);
688 		append_string("thread:");
689 		append_integer(stopped_vcpu + 1);
690 		append_char(';');
691 		if (vs->hit_swbreak) {
692 			debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
693 			if (swbreak_enabled)
694 				append_string("swbreak:;");
695 		} else if (vs->stepped)
696 			debug("$vCPU %d reporting step\n", stopped_vcpu);
697 		else
698 			debug("$vCPU %d reporting ???\n", stopped_vcpu);
699 	}
700 	finish_packet();
701 	report_next_stop = false;
702 }
703 
704 /*
705  * If this stop is due to a vCPU event, clear that event to mark it as
706  * acknowledged.
707  */
708 static void
709 discard_stop(void)
710 {
711 	struct vcpu_state *vs;
712 
713 	if (stopped_vcpu != -1) {
714 		vs = &vcpu_state[stopped_vcpu];
715 		vs->hit_swbreak = false;
716 		vs->stepped = false;
717 		stopped_vcpu = -1;
718 	}
719 	report_next_stop = true;
720 }
721 
722 static void
723 gdb_finish_suspend_vcpus(void)
724 {
725 
726 	if (first_stop) {
727 		first_stop = false;
728 		stopped_vcpu = -1;
729 	} else if (report_next_stop) {
730 		assert(!response_pending());
731 		report_stop(true);
732 		send_pending_data(cur_fd);
733 	}
734 }
735 
736 /*
737  * vCPU threads invoke this function whenever the vCPU enters the
738  * debug server to pause or report an event.  vCPU threads wait here
739  * as long as the debug server keeps them suspended.
740  */
741 static void
742 _gdb_cpu_suspend(struct vcpu *vcpu, bool report_stop)
743 {
744 	int vcpuid = vcpu_id(vcpu);
745 
746 	debug("$vCPU %d suspending\n", vcpuid);
747 	CPU_SET(vcpuid, &vcpus_waiting);
748 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
749 		gdb_finish_suspend_vcpus();
750 	while (CPU_ISSET(vcpuid, &vcpus_suspended))
751 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
752 	CPU_CLR(vcpuid, &vcpus_waiting);
753 	debug("$vCPU %d resuming\n", vcpuid);
754 }
755 
756 /*
757  * Requests vCPU single-stepping using a
758  * VMEXIT suitable for the host platform.
759  */
760 static int
761 _gdb_set_step(struct vcpu *vcpu, int val)
762 {
763 	int error;
764 
765 	/*
766 	 * If the MTRAP cap fails, we are running on an AMD host.
767 	 * In that case, we request DB exits caused by RFLAGS.TF.
768 	 */
769 	error = vm_set_capability(vcpu, VM_CAP_MTRAP_EXIT, val);
770 	if (error != 0)
771 		error = vm_set_capability(vcpu, VM_CAP_RFLAGS_TF, val);
772 	if (error == 0)
773 		(void)vm_set_capability(vcpu, VM_CAP_MASK_HWINTR, val);
774 
775 	return (error);
776 }
777 
778 /*
779  * Checks whether single-stepping is enabled for a given vCPU.
780  */
781 static int
782 _gdb_check_step(struct vcpu *vcpu)
783 {
784 	int val;
785 
786 	if (vm_get_capability(vcpu, VM_CAP_MTRAP_EXIT, &val) != 0) {
787 		if (vm_get_capability(vcpu, VM_CAP_RFLAGS_TF, &val) != 0)
788 			return -1;
789 	}
790 	return 0;
791 }
792 
793 /*
794  * Invoked at the start of a vCPU thread's execution to inform the
795  * debug server about the new thread.
796  */
797 void
798 gdb_cpu_add(struct vcpu *vcpu)
799 {
800 	int vcpuid;
801 
802 	if (!gdb_active)
803 		return;
804 	vcpuid = vcpu_id(vcpu);
805 	debug("$vCPU %d starting\n", vcpuid);
806 	pthread_mutex_lock(&gdb_lock);
807 	assert(vcpuid < guest_ncpus);
808 	assert(vcpus[vcpuid] == NULL);
809 	vcpus[vcpuid] = vcpu;
810 	CPU_SET(vcpuid, &vcpus_active);
811 	if (!TAILQ_EMPTY(&breakpoints)) {
812 		vm_set_capability(vcpu, VM_CAP_BPT_EXIT, 1);
813 		debug("$vCPU %d enabled breakpoint exits\n", vcpuid);
814 	}
815 
816 	/*
817 	 * If a vcpu is added while vcpus are stopped, suspend the new
818 	 * vcpu so that it will pop back out with a debug exit before
819 	 * executing the first instruction.
820 	 */
821 	if (!CPU_EMPTY(&vcpus_suspended)) {
822 		CPU_SET(vcpuid, &vcpus_suspended);
823 		_gdb_cpu_suspend(vcpu, false);
824 	}
825 	pthread_mutex_unlock(&gdb_lock);
826 }
827 
828 /*
829  * Invoked by vCPU before resuming execution.  This enables stepping
830  * if the vCPU is marked as stepping.
831  */
832 static void
833 gdb_cpu_resume(struct vcpu *vcpu)
834 {
835 	struct vcpu_state *vs;
836 	int error;
837 
838 	vs = &vcpu_state[vcpu_id(vcpu)];
839 
840 	/*
841 	 * Any pending event should already be reported before
842 	 * resuming.
843 	 */
844 	assert(vs->hit_swbreak == false);
845 	assert(vs->stepped == false);
846 	if (vs->stepping) {
847 		error = _gdb_set_step(vcpu, 1);
848 		assert(error == 0);
849 	}
850 }
851 
852 /*
853  * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
854  * has been suspended due to an event on different vCPU or in response
855  * to a guest-wide suspend such as Ctrl-C or the stop on attach.
856  */
857 void
858 gdb_cpu_suspend(struct vcpu *vcpu)
859 {
860 
861 	if (!gdb_active)
862 		return;
863 	pthread_mutex_lock(&gdb_lock);
864 	_gdb_cpu_suspend(vcpu, true);
865 	gdb_cpu_resume(vcpu);
866 	pthread_mutex_unlock(&gdb_lock);
867 }
868 
869 static void
870 gdb_suspend_vcpus(void)
871 {
872 
873 	assert(pthread_mutex_isowned_np(&gdb_lock));
874 	debug("suspending all CPUs\n");
875 	vcpus_suspended = vcpus_active;
876 	vm_suspend_all_cpus(ctx);
877 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
878 		gdb_finish_suspend_vcpus();
879 }
880 
881 /*
882  * Invoked each time a vmexit handler needs to step a vCPU.
883  * Handles MTRAP and RFLAGS.TF vmexits.
884  */
885 static void
886 gdb_cpu_step(struct vcpu *vcpu)
887 {
888 	struct vcpu_state *vs;
889 	int vcpuid = vcpu_id(vcpu);
890 	int error;
891 
892 	debug("$vCPU %d stepped\n", vcpuid);
893 	pthread_mutex_lock(&gdb_lock);
894 	vs = &vcpu_state[vcpuid];
895 	if (vs->stepping) {
896 		vs->stepping = false;
897 		vs->stepped = true;
898 		error = _gdb_set_step(vcpu, 0);
899 		assert(error == 0);
900 
901 		while (vs->stepped) {
902 			if (stopped_vcpu == -1) {
903 				debug("$vCPU %d reporting step\n", vcpuid);
904 				stopped_vcpu = vcpuid;
905 				gdb_suspend_vcpus();
906 			}
907 			_gdb_cpu_suspend(vcpu, true);
908 		}
909 		gdb_cpu_resume(vcpu);
910 	}
911 	pthread_mutex_unlock(&gdb_lock);
912 }
913 
914 /*
915  * A general handler for VM_EXITCODE_DB.
916  * Handles RFLAGS.TF exits on AMD SVM.
917  */
918 void
919 gdb_cpu_debug(struct vcpu *vcpu, struct vm_exit *vmexit)
920 {
921 	if (!gdb_active)
922 		return;
923 
924 	/* RFLAGS.TF exit? */
925 	if (vmexit->u.dbg.trace_trap) {
926 		gdb_cpu_step(vcpu);
927 	}
928 }
929 
930 /*
931  * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
932  * the VT-x-specific MTRAP exit.
933  */
934 void
935 gdb_cpu_mtrap(struct vcpu *vcpu)
936 {
937 	if (!gdb_active)
938 		return;
939 	gdb_cpu_step(vcpu);
940 }
941 
942 static struct breakpoint *
943 find_breakpoint(uint64_t gpa)
944 {
945 	struct breakpoint *bp;
946 
947 	TAILQ_FOREACH(bp, &breakpoints, link) {
948 		if (bp->gpa == gpa)
949 			return (bp);
950 	}
951 	return (NULL);
952 }
953 
954 void
955 gdb_cpu_breakpoint(struct vcpu *vcpu, struct vm_exit *vmexit)
956 {
957 	struct breakpoint *bp;
958 	struct vcpu_state *vs;
959 	uint64_t gpa;
960 	int error, vcpuid;
961 
962 	if (!gdb_active) {
963 		EPRINTLN("vm_loop: unexpected VMEXIT_DEBUG");
964 		exit(4);
965 	}
966 	vcpuid = vcpu_id(vcpu);
967 	pthread_mutex_lock(&gdb_lock);
968 	error = guest_vaddr2paddr(vcpu, guest_pc(vmexit), &gpa);
969 	assert(error == 1);
970 	bp = find_breakpoint(gpa);
971 	if (bp != NULL) {
972 		vs = &vcpu_state[vcpuid];
973 		assert(vs->stepping == false);
974 		assert(vs->stepped == false);
975 		assert(vs->hit_swbreak == false);
976 		vs->hit_swbreak = true;
977 		vm_set_register(vcpu, GDB_PC_REGNAME, guest_pc(vmexit));
978 		for (;;) {
979 			if (stopped_vcpu == -1) {
980 				debug("$vCPU %d reporting breakpoint at rip %#lx\n",
981 				    vcpuid, guest_pc(vmexit));
982 				stopped_vcpu = vcpuid;
983 				gdb_suspend_vcpus();
984 			}
985 			_gdb_cpu_suspend(vcpu, true);
986 			if (!vs->hit_swbreak) {
987 				/* Breakpoint reported. */
988 				break;
989 			}
990 			bp = find_breakpoint(gpa);
991 			if (bp == NULL) {
992 				/* Breakpoint was removed. */
993 				vs->hit_swbreak = false;
994 				break;
995 			}
996 		}
997 		gdb_cpu_resume(vcpu);
998 	} else {
999 		debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpuid,
1000 		    guest_pc(vmexit));
1001 		error = vm_set_register(vcpu, VM_REG_GUEST_ENTRY_INST_LENGTH,
1002 		    vmexit->u.bpt.inst_length);
1003 		assert(error == 0);
1004 		error = vm_inject_exception(vcpu, IDT_BP, 0, 0, 0);
1005 		assert(error == 0);
1006 	}
1007 	pthread_mutex_unlock(&gdb_lock);
1008 }
1009 
1010 static bool
1011 gdb_step_vcpu(struct vcpu *vcpu)
1012 {
1013 	int error, vcpuid;
1014 
1015 	vcpuid = vcpu_id(vcpu);
1016 	debug("$vCPU %d step\n", vcpuid);
1017 	error = _gdb_check_step(vcpu);
1018 	if (error < 0)
1019 		return (false);
1020 
1021 	discard_stop();
1022 	vcpu_state[vcpuid].stepping = true;
1023 	vm_resume_cpu(vcpu);
1024 	CPU_CLR(vcpuid, &vcpus_suspended);
1025 	pthread_cond_broadcast(&idle_vcpus);
1026 	return (true);
1027 }
1028 
1029 static void
1030 gdb_resume_vcpus(void)
1031 {
1032 
1033 	assert(pthread_mutex_isowned_np(&gdb_lock));
1034 	vm_resume_all_cpus(ctx);
1035 	debug("resuming all CPUs\n");
1036 	CPU_ZERO(&vcpus_suspended);
1037 	pthread_cond_broadcast(&idle_vcpus);
1038 }
1039 
1040 static void
1041 gdb_read_regs(void)
1042 {
1043 	uint64_t regvals[nitems(gdb_regset)];
1044 	int regnums[nitems(gdb_regset)];
1045 
1046 	for (size_t i = 0; i < nitems(gdb_regset); i++)
1047 		regnums[i] = gdb_regset[i].id;
1048 	if (vm_get_register_set(vcpus[cur_vcpu], nitems(gdb_regset),
1049 	    regnums, regvals) == -1) {
1050 		send_error(errno);
1051 		return;
1052 	}
1053 
1054 	start_packet();
1055 	for (size_t i = 0; i < nitems(gdb_regset); i++) {
1056 		if (gdb_regset[i].id == GDB_REG_FIRST_EXT)
1057 			break;
1058 		append_unsigned_native(regvals[i], gdb_regset[i].size);
1059 	}
1060 	finish_packet();
1061 }
1062 
1063 static void
1064 gdb_read_one_reg(const uint8_t *data, size_t len)
1065 {
1066 	uint64_t regval;
1067 	uintmax_t reg;
1068 
1069 	reg = parse_integer(data, len);
1070 	if (reg >= nitems(gdb_regset)) {
1071 		send_error(EINVAL);
1072 		return;
1073 	}
1074 
1075 	if (vm_get_register(vcpus[cur_vcpu], gdb_regset[reg].id, &regval) ==
1076 	    -1) {
1077 		send_error(errno);
1078 		return;
1079 	}
1080 
1081 	start_packet();
1082 	append_unsigned_native(regval, gdb_regset[reg].size);
1083 	finish_packet();
1084 }
1085 
1086 static void
1087 gdb_read_mem(const uint8_t *data, size_t len)
1088 {
1089 	uint64_t gpa, gva, val;
1090 	uint8_t *cp;
1091 	size_t resid, todo, bytes;
1092 	bool started;
1093 	int error;
1094 
1095 	assert(len >= 1);
1096 
1097 	/* Skip 'm' */
1098 	data += 1;
1099 	len -= 1;
1100 
1101 	/* Parse and consume address. */
1102 	cp = memchr(data, ',', len);
1103 	if (cp == NULL || cp == data) {
1104 		send_error(EINVAL);
1105 		return;
1106 	}
1107 	gva = parse_integer(data, cp - data);
1108 	len -= (cp - data) + 1;
1109 	data += (cp - data) + 1;
1110 
1111 	/* Parse length. */
1112 	resid = parse_integer(data, len);
1113 
1114 	started = false;
1115 	while (resid > 0) {
1116 		error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1117 		if (error == -1) {
1118 			if (started)
1119 				finish_packet();
1120 			else
1121 				send_error(errno);
1122 			return;
1123 		}
1124 		if (error == 0) {
1125 			if (started)
1126 				finish_packet();
1127 			else
1128 				send_error(EFAULT);
1129 			return;
1130 		}
1131 
1132 		/* Read bytes from current page. */
1133 		todo = getpagesize() - gpa % getpagesize();
1134 		if (todo > resid)
1135 			todo = resid;
1136 
1137 		cp = paddr_guest2host(ctx, gpa, todo);
1138 		if (cp != NULL) {
1139 			/*
1140 			 * If this page is guest RAM, read it a byte
1141 			 * at a time.
1142 			 */
1143 			if (!started) {
1144 				start_packet();
1145 				started = true;
1146 			}
1147 			while (todo > 0) {
1148 				append_byte(*cp);
1149 				cp++;
1150 				gpa++;
1151 				gva++;
1152 				resid--;
1153 				todo--;
1154 			}
1155 		} else {
1156 			/*
1157 			 * If this page isn't guest RAM, try to handle
1158 			 * it via MMIO.  For MMIO requests, use
1159 			 * aligned reads of words when possible.
1160 			 */
1161 			while (todo > 0) {
1162 				if (gpa & 1 || todo == 1)
1163 					bytes = 1;
1164 				else if (gpa & 2 || todo == 2)
1165 					bytes = 2;
1166 				else
1167 					bytes = 4;
1168 				error = read_mem(vcpus[cur_vcpu], gpa, &val,
1169 				    bytes);
1170 				if (error == 0) {
1171 					if (!started) {
1172 						start_packet();
1173 						started = true;
1174 					}
1175 					gpa += bytes;
1176 					gva += bytes;
1177 					resid -= bytes;
1178 					todo -= bytes;
1179 					while (bytes > 0) {
1180 						append_byte(val);
1181 						val >>= 8;
1182 						bytes--;
1183 					}
1184 				} else {
1185 					if (started)
1186 						finish_packet();
1187 					else
1188 						send_error(EFAULT);
1189 					return;
1190 				}
1191 			}
1192 		}
1193 		assert(resid == 0 || gpa % getpagesize() == 0);
1194 	}
1195 	if (!started)
1196 		start_packet();
1197 	finish_packet();
1198 }
1199 
1200 static void
1201 gdb_write_mem(const uint8_t *data, size_t len)
1202 {
1203 	uint64_t gpa, gva, val;
1204 	uint8_t *cp;
1205 	size_t resid, todo, bytes;
1206 	int error;
1207 
1208 	assert(len >= 1);
1209 
1210 	/* Skip 'M' */
1211 	data += 1;
1212 	len -= 1;
1213 
1214 	/* Parse and consume address. */
1215 	cp = memchr(data, ',', len);
1216 	if (cp == NULL || cp == data) {
1217 		send_error(EINVAL);
1218 		return;
1219 	}
1220 	gva = parse_integer(data, cp - data);
1221 	len -= (cp - data) + 1;
1222 	data += (cp - data) + 1;
1223 
1224 	/* Parse and consume length. */
1225 	cp = memchr(data, ':', len);
1226 	if (cp == NULL || cp == data) {
1227 		send_error(EINVAL);
1228 		return;
1229 	}
1230 	resid = parse_integer(data, cp - data);
1231 	len -= (cp - data) + 1;
1232 	data += (cp - data) + 1;
1233 
1234 	/* Verify the available bytes match the length. */
1235 	if (len != resid * 2) {
1236 		send_error(EINVAL);
1237 		return;
1238 	}
1239 
1240 	while (resid > 0) {
1241 		error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1242 		if (error == -1) {
1243 			send_error(errno);
1244 			return;
1245 		}
1246 		if (error == 0) {
1247 			send_error(EFAULT);
1248 			return;
1249 		}
1250 
1251 		/* Write bytes to current page. */
1252 		todo = getpagesize() - gpa % getpagesize();
1253 		if (todo > resid)
1254 			todo = resid;
1255 
1256 		cp = paddr_guest2host(ctx, gpa, todo);
1257 		if (cp != NULL) {
1258 			/*
1259 			 * If this page is guest RAM, write it a byte
1260 			 * at a time.
1261 			 */
1262 			while (todo > 0) {
1263 				assert(len >= 2);
1264 				*cp = parse_byte(data);
1265 				data += 2;
1266 				len -= 2;
1267 				cp++;
1268 				gpa++;
1269 				gva++;
1270 				resid--;
1271 				todo--;
1272 			}
1273 		} else {
1274 			/*
1275 			 * If this page isn't guest RAM, try to handle
1276 			 * it via MMIO.  For MMIO requests, use
1277 			 * aligned writes of words when possible.
1278 			 */
1279 			while (todo > 0) {
1280 				if (gpa & 1 || todo == 1) {
1281 					bytes = 1;
1282 					val = parse_byte(data);
1283 				} else if (gpa & 2 || todo == 2) {
1284 					bytes = 2;
1285 					val = be16toh(parse_integer(data, 4));
1286 				} else {
1287 					bytes = 4;
1288 					val = be32toh(parse_integer(data, 8));
1289 				}
1290 				error = write_mem(vcpus[cur_vcpu], gpa, val,
1291 				    bytes);
1292 				if (error == 0) {
1293 					gpa += bytes;
1294 					gva += bytes;
1295 					resid -= bytes;
1296 					todo -= bytes;
1297 					data += 2 * bytes;
1298 					len -= 2 * bytes;
1299 				} else {
1300 					send_error(EFAULT);
1301 					return;
1302 				}
1303 			}
1304 		}
1305 		assert(resid == 0 || gpa % getpagesize() == 0);
1306 	}
1307 	assert(len == 0);
1308 	send_ok();
1309 }
1310 
1311 static bool
1312 set_breakpoint_caps(bool enable)
1313 {
1314 	cpuset_t mask;
1315 	int vcpu;
1316 
1317 	mask = vcpus_active;
1318 	while (!CPU_EMPTY(&mask)) {
1319 		vcpu = CPU_FFS(&mask) - 1;
1320 		CPU_CLR(vcpu, &mask);
1321 		if (vm_set_capability(vcpus[vcpu], VM_CAP_BPT_EXIT,
1322 		    enable ? 1 : 0) < 0)
1323 			return (false);
1324 		debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1325 		    enable ? "en" : "dis");
1326 	}
1327 	return (true);
1328 }
1329 
1330 static void
1331 remove_all_sw_breakpoints(void)
1332 {
1333 	struct breakpoint *bp, *nbp;
1334 	uint8_t *cp;
1335 
1336 	if (TAILQ_EMPTY(&breakpoints))
1337 		return;
1338 
1339 	TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1340 		debug("remove breakpoint at %#lx\n", bp->gpa);
1341 		cp = paddr_guest2host(ctx, bp->gpa, sizeof(bp->shadow_inst));
1342 		memcpy(cp, bp->shadow_inst, sizeof(bp->shadow_inst));
1343 		TAILQ_REMOVE(&breakpoints, bp, link);
1344 		free(bp);
1345 	}
1346 	TAILQ_INIT(&breakpoints);
1347 	set_breakpoint_caps(false);
1348 }
1349 
1350 static void
1351 update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1352 {
1353 	struct breakpoint *bp;
1354 	uint64_t gpa;
1355 	uint8_t *cp;
1356 	int error;
1357 
1358 	if (kind != GDB_BP_SIZE) {
1359 		send_error(EINVAL);
1360 		return;
1361 	}
1362 
1363 	error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1364 	if (error == -1) {
1365 		send_error(errno);
1366 		return;
1367 	}
1368 	if (error == 0) {
1369 		send_error(EFAULT);
1370 		return;
1371 	}
1372 
1373 	cp = paddr_guest2host(ctx, gpa, sizeof(bp->shadow_inst));
1374 
1375 	/* Only permit breakpoints in guest RAM. */
1376 	if (cp == NULL) {
1377 		send_error(EFAULT);
1378 		return;
1379 	}
1380 
1381 	/* Find any existing breakpoint. */
1382 	bp = find_breakpoint(gpa);
1383 
1384 	/*
1385 	 * Silently ignore duplicate commands since the protocol
1386 	 * requires these packets to be idempotent.
1387 	 */
1388 	if (insert) {
1389 		if (bp == NULL) {
1390 			if (TAILQ_EMPTY(&breakpoints) &&
1391 			    !set_breakpoint_caps(true)) {
1392 				send_empty_response();
1393 				return;
1394 			}
1395 			bp = malloc(sizeof(*bp));
1396 			bp->gpa = gpa;
1397 			memcpy(bp->shadow_inst, cp, sizeof(bp->shadow_inst));
1398 			memcpy(cp, GDB_BP_INSTR, sizeof(bp->shadow_inst));
1399 			TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1400 			debug("new breakpoint at %#lx\n", gpa);
1401 		}
1402 	} else {
1403 		if (bp != NULL) {
1404 			debug("remove breakpoint at %#lx\n", gpa);
1405 			memcpy(cp, bp->shadow_inst, sizeof(bp->shadow_inst));
1406 			TAILQ_REMOVE(&breakpoints, bp, link);
1407 			free(bp);
1408 			if (TAILQ_EMPTY(&breakpoints))
1409 				set_breakpoint_caps(false);
1410 		}
1411 	}
1412 	send_ok();
1413 }
1414 
1415 static void
1416 parse_breakpoint(const uint8_t *data, size_t len)
1417 {
1418 	uint64_t gva;
1419 	uint8_t *cp;
1420 	bool insert;
1421 	int kind, type;
1422 
1423 	insert = data[0] == 'Z';
1424 
1425 	/* Skip 'Z/z' */
1426 	data += 1;
1427 	len -= 1;
1428 
1429 	/* Parse and consume type. */
1430 	cp = memchr(data, ',', len);
1431 	if (cp == NULL || cp == data) {
1432 		send_error(EINVAL);
1433 		return;
1434 	}
1435 	type = parse_integer(data, cp - data);
1436 	len -= (cp - data) + 1;
1437 	data += (cp - data) + 1;
1438 
1439 	/* Parse and consume address. */
1440 	cp = memchr(data, ',', len);
1441 	if (cp == NULL || cp == data) {
1442 		send_error(EINVAL);
1443 		return;
1444 	}
1445 	gva = parse_integer(data, cp - data);
1446 	len -= (cp - data) + 1;
1447 	data += (cp - data) + 1;
1448 
1449 	/* Parse and consume kind. */
1450 	cp = memchr(data, ';', len);
1451 	if (cp == data) {
1452 		send_error(EINVAL);
1453 		return;
1454 	}
1455 	if (cp != NULL) {
1456 		/*
1457 		 * We do not advertise support for either the
1458 		 * ConditionalBreakpoints or BreakpointCommands
1459 		 * features, so we should not be getting conditions or
1460 		 * commands from the remote end.
1461 		 */
1462 		send_empty_response();
1463 		return;
1464 	}
1465 	kind = parse_integer(data, len);
1466 	data += len;
1467 	len = 0;
1468 
1469 	switch (type) {
1470 	case 0:
1471 		update_sw_breakpoint(gva, kind, insert);
1472 		break;
1473 	default:
1474 		send_empty_response();
1475 		break;
1476 	}
1477 }
1478 
1479 static bool
1480 command_equals(const uint8_t *data, size_t len, const char *cmd)
1481 {
1482 
1483 	if (strlen(cmd) > len)
1484 		return (false);
1485 	return (memcmp(data, cmd, strlen(cmd)) == 0);
1486 }
1487 
1488 static void
1489 check_features(const uint8_t *data, size_t len)
1490 {
1491 	char *feature, *next_feature, *str, *value;
1492 	bool supported;
1493 
1494 	str = malloc(len + 1);
1495 	memcpy(str, data, len);
1496 	str[len] = '\0';
1497 	next_feature = str;
1498 
1499 	while ((feature = strsep(&next_feature, ";")) != NULL) {
1500 		/*
1501 		 * Null features shouldn't exist, but skip if they
1502 		 * do.
1503 		 */
1504 		if (strcmp(feature, "") == 0)
1505 			continue;
1506 
1507 		/*
1508 		 * Look for the value or supported / not supported
1509 		 * flag.
1510 		 */
1511 		value = strchr(feature, '=');
1512 		if (value != NULL) {
1513 			*value = '\0';
1514 			value++;
1515 			supported = true;
1516 		} else {
1517 			value = feature + strlen(feature) - 1;
1518 			switch (*value) {
1519 			case '+':
1520 				supported = true;
1521 				break;
1522 			case '-':
1523 				supported = false;
1524 				break;
1525 			default:
1526 				/*
1527 				 * This is really a protocol error,
1528 				 * but we just ignore malformed
1529 				 * features for ease of
1530 				 * implementation.
1531 				 */
1532 				continue;
1533 			}
1534 			value = NULL;
1535 		}
1536 
1537 		if (strcmp(feature, "swbreak") == 0)
1538 			swbreak_enabled = supported;
1539 	}
1540 	free(str);
1541 
1542 	start_packet();
1543 
1544 	/* This is an arbitrary limit. */
1545 	append_string("PacketSize=4096");
1546 	append_string(";swbreak+");
1547 	append_string(";qXfer:features:read+");
1548 	finish_packet();
1549 }
1550 
1551 static void
1552 gdb_query(const uint8_t *data, size_t len)
1553 {
1554 
1555 	/*
1556 	 * TODO:
1557 	 * - qSearch
1558 	 */
1559 	if (command_equals(data, len, "qAttached")) {
1560 		start_packet();
1561 		append_char('1');
1562 		finish_packet();
1563 	} else if (command_equals(data, len, "qC")) {
1564 		start_packet();
1565 		append_string("QC");
1566 		append_integer(cur_vcpu + 1);
1567 		finish_packet();
1568 	} else if (command_equals(data, len, "qfThreadInfo")) {
1569 		cpuset_t mask;
1570 		bool first;
1571 		int vcpu;
1572 
1573 		if (CPU_EMPTY(&vcpus_active)) {
1574 			send_error(EINVAL);
1575 			return;
1576 		}
1577 		mask = vcpus_active;
1578 		start_packet();
1579 		append_char('m');
1580 		first = true;
1581 		while (!CPU_EMPTY(&mask)) {
1582 			vcpu = CPU_FFS(&mask) - 1;
1583 			CPU_CLR(vcpu, &mask);
1584 			if (first)
1585 				first = false;
1586 			else
1587 				append_char(',');
1588 			append_integer(vcpu + 1);
1589 		}
1590 		finish_packet();
1591 	} else if (command_equals(data, len, "qsThreadInfo")) {
1592 		start_packet();
1593 		append_char('l');
1594 		finish_packet();
1595 	} else if (command_equals(data, len, "qSupported")) {
1596 		data += strlen("qSupported");
1597 		len -= strlen("qSupported");
1598 		check_features(data, len);
1599 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1600 		char buf[16];
1601 		int tid;
1602 
1603 		data += strlen("qThreadExtraInfo");
1604 		len -= strlen("qThreadExtraInfo");
1605 		if (len == 0 || *data != ',') {
1606 			send_error(EINVAL);
1607 			return;
1608 		}
1609 		tid = parse_threadid(data + 1, len - 1);
1610 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1611 			send_error(EINVAL);
1612 			return;
1613 		}
1614 
1615 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1616 		start_packet();
1617 		append_asciihex(buf);
1618 		finish_packet();
1619 	} else if (command_equals(data, len, "qXfer:features:read:")) {
1620 		struct stat sb;
1621 		const char *xml;
1622 		const uint8_t *pathend;
1623 		char buf[64], path[PATH_MAX];
1624 		size_t xmllen;
1625 		unsigned int doff, dlen;
1626 		int fd;
1627 
1628 		data += strlen("qXfer:features:read:");
1629 		len -= strlen("qXfer:features:read:");
1630 
1631 		pathend = memchr(data, ':', len);
1632 		if (pathend == NULL ||
1633 		    (size_t)(pathend - data) >= sizeof(path) - 1) {
1634 			send_error(EINVAL);
1635 			return;
1636 		}
1637 		memcpy(path, data, pathend - data);
1638 		path[pathend - data] = '\0';
1639 		data += (pathend - data) + 1;
1640 		len -= (pathend - data) + 1;
1641 
1642 		if (len > sizeof(buf) - 1) {
1643 			send_error(EINVAL);
1644 			return;
1645 		}
1646 		memcpy(buf, data, len);
1647 		buf[len] = '\0';
1648 		if (sscanf(buf, "%x,%x", &doff, &dlen) != 2) {
1649 			send_error(EINVAL);
1650 			return;
1651 		}
1652 
1653 		fd = openat(xml_dfd, path, O_RDONLY | O_RESOLVE_BENEATH);
1654 		if (fd < 0) {
1655 			send_error(errno);
1656 			return;
1657 		}
1658 		if (fstat(fd, &sb) < 0) {
1659 			send_error(errno);
1660 			close(fd);
1661 			return;
1662 		}
1663 		xml = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0);
1664 		if (xml == MAP_FAILED) {
1665 			send_error(errno);
1666 			close(fd);
1667 			return;
1668 		}
1669 		close(fd);
1670 		xmllen = sb.st_size;
1671 
1672 		start_packet();
1673 		if (doff >= xmllen) {
1674 			append_char('l');
1675 		} else if (doff + dlen >= xmllen) {
1676 			append_char('l');
1677 			append_packet_data(xml + doff, xmllen - doff);
1678 		} else {
1679 			append_char('m');
1680 			append_packet_data(xml + doff, dlen);
1681 		}
1682 		finish_packet();
1683 		(void)munmap(__DECONST(void *, xml), xmllen);
1684 	} else
1685 		send_empty_response();
1686 }
1687 
1688 static void
1689 handle_command(const uint8_t *data, size_t len)
1690 {
1691 
1692 	/* Reject packets with a sequence-id. */
1693 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1694 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1695 		send_empty_response();
1696 		return;
1697 	}
1698 
1699 	switch (*data) {
1700 	case 'c':
1701 		if (len != 1) {
1702 			send_error(EINVAL);
1703 			break;
1704 		}
1705 
1706 		discard_stop();
1707 		gdb_resume_vcpus();
1708 		break;
1709 	case 'D':
1710 		send_ok();
1711 
1712 		/* TODO: Resume any stopped CPUs. */
1713 		break;
1714 	case 'g':
1715 		gdb_read_regs();
1716 		break;
1717 	case 'p':
1718 		gdb_read_one_reg(data + 1, len - 1);
1719 		break;
1720 	case 'H': {
1721 		int tid;
1722 
1723 		if (len < 2 || (data[1] != 'g' && data[1] != 'c')) {
1724 			send_error(EINVAL);
1725 			break;
1726 		}
1727 		tid = parse_threadid(data + 2, len - 2);
1728 		if (tid == -2) {
1729 			send_error(EINVAL);
1730 			break;
1731 		}
1732 
1733 		if (CPU_EMPTY(&vcpus_active)) {
1734 			send_error(EINVAL);
1735 			break;
1736 		}
1737 		if (tid == -1 || tid == 0)
1738 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1739 		else if (CPU_ISSET(tid - 1, &vcpus_active))
1740 			cur_vcpu = tid - 1;
1741 		else {
1742 			send_error(EINVAL);
1743 			break;
1744 		}
1745 		send_ok();
1746 		break;
1747 	}
1748 	case 'm':
1749 		gdb_read_mem(data, len);
1750 		break;
1751 	case 'M':
1752 		gdb_write_mem(data, len);
1753 		break;
1754 	case 'T': {
1755 		int tid;
1756 
1757 		tid = parse_threadid(data + 1, len - 1);
1758 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1759 			send_error(EINVAL);
1760 			return;
1761 		}
1762 		send_ok();
1763 		break;
1764 	}
1765 	case 'q':
1766 		gdb_query(data, len);
1767 		break;
1768 	case 's':
1769 		if (len != 1) {
1770 			send_error(EINVAL);
1771 			break;
1772 		}
1773 
1774 		/* Don't send a reply until a stop occurs. */
1775 		if (!gdb_step_vcpu(vcpus[cur_vcpu])) {
1776 			send_error(EOPNOTSUPP);
1777 			break;
1778 		}
1779 		break;
1780 	case 'z':
1781 	case 'Z':
1782 		parse_breakpoint(data, len);
1783 		break;
1784 	case '?':
1785 		report_stop(false);
1786 		break;
1787 	case 'G': /* TODO */
1788 	case 'v':
1789 		/* Handle 'vCont' */
1790 		/* 'vCtrlC' */
1791 	case 'P': /* TODO */
1792 	case 'Q': /* TODO */
1793 	case 't': /* TODO */
1794 	case 'X': /* TODO */
1795 	default:
1796 		send_empty_response();
1797 	}
1798 }
1799 
1800 /* Check for a valid packet in the command buffer. */
1801 static void
1802 check_command(int fd)
1803 {
1804 	uint8_t *head, *hash, *p, sum;
1805 	size_t avail, plen;
1806 
1807 	for (;;) {
1808 		avail = cur_comm.len;
1809 		if (avail == 0)
1810 			return;
1811 		head = io_buffer_head(&cur_comm);
1812 		switch (*head) {
1813 		case 0x03:
1814 			debug("<- Ctrl-C\n");
1815 			io_buffer_consume(&cur_comm, 1);
1816 
1817 			gdb_suspend_vcpus();
1818 			break;
1819 		case '+':
1820 			/* ACK of previous response. */
1821 			debug("<- +\n");
1822 			if (response_pending())
1823 				io_buffer_reset(&cur_resp);
1824 			io_buffer_consume(&cur_comm, 1);
1825 			if (stopped_vcpu != -1 && report_next_stop) {
1826 				report_stop(true);
1827 				send_pending_data(fd);
1828 			}
1829 			break;
1830 		case '-':
1831 			/* NACK of previous response. */
1832 			debug("<- -\n");
1833 			if (response_pending()) {
1834 				cur_resp.len += cur_resp.start;
1835 				cur_resp.start = 0;
1836 				if (cur_resp.data[0] == '+')
1837 					io_buffer_advance(&cur_resp, 1);
1838 				debug("-> %.*s\n", (int)cur_resp.len,
1839 				    io_buffer_head(&cur_resp));
1840 			}
1841 			io_buffer_consume(&cur_comm, 1);
1842 			send_pending_data(fd);
1843 			break;
1844 		case '$':
1845 			/* Packet. */
1846 
1847 			if (response_pending()) {
1848 				warnx("New GDB command while response in "
1849 				    "progress");
1850 				io_buffer_reset(&cur_resp);
1851 			}
1852 
1853 			/* Is packet complete? */
1854 			hash = memchr(head, '#', avail);
1855 			if (hash == NULL)
1856 				return;
1857 			plen = (hash - head + 1) + 2;
1858 			if (avail < plen)
1859 				return;
1860 			debug("<- %.*s\n", (int)plen, head);
1861 
1862 			/* Verify checksum. */
1863 			for (sum = 0, p = head + 1; p < hash; p++)
1864 				sum += *p;
1865 			if (sum != parse_byte(hash + 1)) {
1866 				io_buffer_consume(&cur_comm, plen);
1867 				debug("-> -\n");
1868 				send_char('-');
1869 				send_pending_data(fd);
1870 				break;
1871 			}
1872 			send_char('+');
1873 
1874 			handle_command(head + 1, hash - (head + 1));
1875 			io_buffer_consume(&cur_comm, plen);
1876 			if (!response_pending())
1877 				debug("-> +\n");
1878 			send_pending_data(fd);
1879 			break;
1880 		default:
1881 			/* XXX: Possibly drop connection instead. */
1882 			debug("-> %02x\n", *head);
1883 			io_buffer_consume(&cur_comm, 1);
1884 			break;
1885 		}
1886 	}
1887 }
1888 
1889 static void
1890 gdb_readable(int fd, enum ev_type event __unused, void *arg __unused)
1891 {
1892 	size_t pending;
1893 	ssize_t nread;
1894 	int n;
1895 
1896 	if (ioctl(fd, FIONREAD, &n) == -1) {
1897 		warn("FIONREAD on GDB socket");
1898 		return;
1899 	}
1900 	assert(n >= 0);
1901 	pending = n;
1902 
1903 	/*
1904 	 * 'pending' might be zero due to EOF.  We need to call read
1905 	 * with a non-zero length to detect EOF.
1906 	 */
1907 	if (pending == 0)
1908 		pending = 1;
1909 
1910 	/* Ensure there is room in the command buffer. */
1911 	io_buffer_grow(&cur_comm, pending);
1912 	assert(io_buffer_avail(&cur_comm) >= pending);
1913 
1914 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1915 	if (nread == 0) {
1916 		close_connection();
1917 	} else if (nread == -1) {
1918 		if (errno == EAGAIN)
1919 			return;
1920 
1921 		warn("Read from GDB socket");
1922 		close_connection();
1923 	} else {
1924 		cur_comm.len += nread;
1925 		pthread_mutex_lock(&gdb_lock);
1926 		check_command(fd);
1927 		pthread_mutex_unlock(&gdb_lock);
1928 	}
1929 }
1930 
1931 static void
1932 gdb_writable(int fd, enum ev_type event __unused, void *arg __unused)
1933 {
1934 
1935 	send_pending_data(fd);
1936 }
1937 
1938 static void
1939 new_connection(int fd, enum ev_type event __unused, void *arg)
1940 {
1941 	int optval, s;
1942 
1943 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1944 	if (s == -1) {
1945 		if (arg != NULL)
1946 			err(1, "Failed accepting initial GDB connection");
1947 
1948 		/* Silently ignore errors post-startup. */
1949 		return;
1950 	}
1951 
1952 	optval = 1;
1953 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1954 	    -1) {
1955 		warn("Failed to disable SIGPIPE for GDB connection");
1956 		close(s);
1957 		return;
1958 	}
1959 
1960 	pthread_mutex_lock(&gdb_lock);
1961 	if (cur_fd != -1) {
1962 		close(s);
1963 		warnx("Ignoring additional GDB connection.");
1964 	}
1965 
1966 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1967 	if (read_event == NULL) {
1968 		if (arg != NULL)
1969 			err(1, "Failed to setup initial GDB connection");
1970 		pthread_mutex_unlock(&gdb_lock);
1971 		return;
1972 	}
1973 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1974 	if (write_event == NULL) {
1975 		if (arg != NULL)
1976 			err(1, "Failed to setup initial GDB connection");
1977 		mevent_delete_close(read_event);
1978 		read_event = NULL;
1979 	}
1980 
1981 	cur_fd = s;
1982 	cur_vcpu = 0;
1983 	stopped_vcpu = -1;
1984 
1985 	/* Break on attach. */
1986 	first_stop = true;
1987 	report_next_stop = false;
1988 	gdb_suspend_vcpus();
1989 	pthread_mutex_unlock(&gdb_lock);
1990 }
1991 
1992 #ifndef WITHOUT_CAPSICUM
1993 static void
1994 limit_gdb_socket(int s)
1995 {
1996 	cap_rights_t rights;
1997 	unsigned long ioctls[] = { FIONREAD };
1998 
1999 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
2000 	    CAP_SETSOCKOPT, CAP_IOCTL);
2001 	if (caph_rights_limit(s, &rights) == -1)
2002 		errx(EX_OSERR, "Unable to apply rights for sandbox");
2003 	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
2004 		errx(EX_OSERR, "Unable to apply rights for sandbox");
2005 }
2006 #endif
2007 
2008 void
2009 init_gdb(struct vmctx *_ctx)
2010 {
2011 #ifndef WITHOUT_CAPSICUM
2012 	cap_rights_t rights;
2013 #endif
2014 	int error, flags, optval, s;
2015 	struct addrinfo hints;
2016 	struct addrinfo *gdbaddr;
2017 	const char *saddr, *value;
2018 	char *sport;
2019 	bool wait;
2020 
2021 	value = get_config_value("gdb.port");
2022 	if (value == NULL)
2023 		return;
2024 	sport = strdup(value);
2025 	if (sport == NULL)
2026 		errx(4, "Failed to allocate memory");
2027 
2028 	wait = get_config_bool_default("gdb.wait", false);
2029 
2030 	saddr = get_config_value("gdb.address");
2031 	if (saddr == NULL) {
2032 		saddr = "localhost";
2033 	}
2034 
2035 	debug("==> starting on %s:%s, %swaiting\n",
2036 	    saddr, sport, wait ? "" : "not ");
2037 
2038 	error = pthread_mutex_init(&gdb_lock, NULL);
2039 	if (error != 0)
2040 		errc(1, error, "gdb mutex init");
2041 	error = pthread_cond_init(&idle_vcpus, NULL);
2042 	if (error != 0)
2043 		errc(1, error, "gdb cv init");
2044 
2045 	memset(&hints, 0, sizeof(hints));
2046 	hints.ai_family = AF_UNSPEC;
2047 	hints.ai_socktype = SOCK_STREAM;
2048 	hints.ai_flags = AI_NUMERICSERV | AI_PASSIVE;
2049 
2050 	error = getaddrinfo(saddr, sport, &hints, &gdbaddr);
2051 	if (error != 0)
2052 		errx(1, "gdb address resolution: %s", gai_strerror(error));
2053 
2054 	ctx = _ctx;
2055 	s = socket(gdbaddr->ai_family, gdbaddr->ai_socktype, 0);
2056 	if (s < 0)
2057 		err(1, "gdb socket create");
2058 
2059 	optval = 1;
2060 	(void)setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
2061 
2062 	if (bind(s, gdbaddr->ai_addr, gdbaddr->ai_addrlen) < 0)
2063 		err(1, "gdb socket bind");
2064 
2065 	if (listen(s, 1) < 0)
2066 		err(1, "gdb socket listen");
2067 
2068 	stopped_vcpu = -1;
2069 	TAILQ_INIT(&breakpoints);
2070 	vcpus = calloc(guest_ncpus, sizeof(*vcpus));
2071 	vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
2072 	if (wait) {
2073 		/*
2074 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
2075 		 * logic in gdb_cpu_add() to suspend the first vcpu before
2076 		 * it starts execution.  The vcpu will remain suspended
2077 		 * until a debugger connects.
2078 		 */
2079 		CPU_SET(0, &vcpus_suspended);
2080 		stopped_vcpu = 0;
2081 	}
2082 
2083 	flags = fcntl(s, F_GETFL);
2084 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
2085 		err(1, "Failed to mark gdb socket non-blocking");
2086 
2087 #ifndef WITHOUT_CAPSICUM
2088 	limit_gdb_socket(s);
2089 #endif
2090 	mevent_add(s, EVF_READ, new_connection, NULL);
2091 	gdb_active = true;
2092 	freeaddrinfo(gdbaddr);
2093 	free(sport);
2094 
2095 	xml_dfd = open(_PATH_GDB_XML, O_DIRECTORY);
2096 	if (xml_dfd == -1)
2097 		err(1, "Failed to open gdb xml directory");
2098 #ifndef WITHOUT_CAPSICUM
2099 	cap_rights_init(&rights, CAP_FSTAT, CAP_LOOKUP, CAP_MMAP_R, CAP_PREAD);
2100 	if (caph_rights_limit(xml_dfd, &rights) == -1)
2101 		err(1, "cap_rights_init");
2102 #endif
2103 }
2104