xref: /freebsd/usr.sbin/bhyve/gdb.c (revision b891f61ef538a4e9b4658b4b756635c8036a5788)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/param.h>
29 #ifndef WITHOUT_CAPSICUM
30 #include <sys/capsicum.h>
31 #endif
32 #include <sys/endian.h>
33 #include <sys/ioctl.h>
34 #include <sys/mman.h>
35 #include <sys/queue.h>
36 #include <sys/socket.h>
37 #include <machine/atomic.h>
38 #include <machine/specialreg.h>
39 #include <machine/vmm.h>
40 #include <netinet/in.h>
41 #include <assert.h>
42 #ifndef WITHOUT_CAPSICUM
43 #include <capsicum_helpers.h>
44 #endif
45 #include <err.h>
46 #include <errno.h>
47 #include <fcntl.h>
48 #include <netdb.h>
49 #include <pthread.h>
50 #include <pthread_np.h>
51 #include <stdbool.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <sysexits.h>
56 #include <unistd.h>
57 #include <vmmapi.h>
58 
59 #include "bhyverun.h"
60 #include "config.h"
61 #include "debug.h"
62 #include "gdb.h"
63 #include "mem.h"
64 #include "mevent.h"
65 
66 /*
67  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
68  * use SIGTRAP.
69  */
70 #define	GDB_SIGNAL_TRAP		5
71 
72 static void gdb_resume_vcpus(void);
73 static void check_command(int fd);
74 
75 static struct mevent *read_event, *write_event;
76 
77 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
78 static pthread_mutex_t gdb_lock;
79 static pthread_cond_t idle_vcpus;
80 static bool first_stop, report_next_stop, swbreak_enabled;
81 
82 /*
83  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
84  * read buffer, 'start' is unused and 'len' contains the number of
85  * valid bytes in the buffer.  For a write buffer, 'start' is set to
86  * the index of the next byte in 'data' to send, and 'len' contains
87  * the remaining number of valid bytes to send.
88  */
89 struct io_buffer {
90 	uint8_t *data;
91 	size_t capacity;
92 	size_t start;
93 	size_t len;
94 };
95 
96 struct breakpoint {
97 	uint64_t gpa;
98 	uint8_t shadow_inst;
99 	TAILQ_ENTRY(breakpoint) link;
100 };
101 
102 /*
103  * When a vCPU stops to due to an event that should be reported to the
104  * debugger, information about the event is stored in this structure.
105  * The vCPU thread then sets 'stopped_vcpu' if it is not already set
106  * and stops other vCPUs so the event can be reported.  The
107  * report_stop() function reports the event for the 'stopped_vcpu'
108  * vCPU.  When the debugger resumes execution via continue or step,
109  * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
110  * event handlers until the associated event is reported or disabled.
111  *
112  * An idle vCPU will have all of the boolean fields set to false.
113  *
114  * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
115  * released to execute the stepped instruction.  When the vCPU reports
116  * the stepping trap, 'stepped' is set.
117  *
118  * When a vCPU hits a breakpoint set by the debug server,
119  * 'hit_swbreak' is set to true.
120  */
121 struct vcpu_state {
122 	bool stepping;
123 	bool stepped;
124 	bool hit_swbreak;
125 };
126 
127 static struct io_buffer cur_comm, cur_resp;
128 static uint8_t cur_csum;
129 static struct vmctx *ctx;
130 static int cur_fd = -1;
131 static TAILQ_HEAD(, breakpoint) breakpoints;
132 static struct vcpu_state *vcpu_state;
133 static struct vcpu **vcpus;
134 static int cur_vcpu, stopped_vcpu;
135 static bool gdb_active = false;
136 
137 static const int gdb_regset[] = {
138 	VM_REG_GUEST_RAX,
139 	VM_REG_GUEST_RBX,
140 	VM_REG_GUEST_RCX,
141 	VM_REG_GUEST_RDX,
142 	VM_REG_GUEST_RSI,
143 	VM_REG_GUEST_RDI,
144 	VM_REG_GUEST_RBP,
145 	VM_REG_GUEST_RSP,
146 	VM_REG_GUEST_R8,
147 	VM_REG_GUEST_R9,
148 	VM_REG_GUEST_R10,
149 	VM_REG_GUEST_R11,
150 	VM_REG_GUEST_R12,
151 	VM_REG_GUEST_R13,
152 	VM_REG_GUEST_R14,
153 	VM_REG_GUEST_R15,
154 	VM_REG_GUEST_RIP,
155 	VM_REG_GUEST_RFLAGS,
156 	VM_REG_GUEST_CS,
157 	VM_REG_GUEST_SS,
158 	VM_REG_GUEST_DS,
159 	VM_REG_GUEST_ES,
160 	VM_REG_GUEST_FS,
161 	VM_REG_GUEST_GS
162 };
163 
164 static const int gdb_regsize[] = {
165 	8,
166 	8,
167 	8,
168 	8,
169 	8,
170 	8,
171 	8,
172 	8,
173 	8,
174 	8,
175 	8,
176 	8,
177 	8,
178 	8,
179 	8,
180 	8,
181 	8,
182 	4,
183 	4,
184 	4,
185 	4,
186 	4,
187 	4,
188 	4
189 };
190 
191 #ifdef GDB_LOG
192 #include <stdarg.h>
193 #include <stdio.h>
194 
195 static void __printflike(1, 2)
196 debug(const char *fmt, ...)
197 {
198 	static FILE *logfile;
199 	va_list ap;
200 
201 	if (logfile == NULL) {
202 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
203 		if (logfile == NULL)
204 			return;
205 #ifndef WITHOUT_CAPSICUM
206 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
207 			fclose(logfile);
208 			logfile = NULL;
209 			return;
210 		}
211 #endif
212 		setlinebuf(logfile);
213 	}
214 	va_start(ap, fmt);
215 	vfprintf(logfile, fmt, ap);
216 	va_end(ap);
217 }
218 #else
219 #define debug(...)
220 #endif
221 
222 static void	remove_all_sw_breakpoints(void);
223 
224 static int
225 guest_paging_info(struct vcpu *vcpu, struct vm_guest_paging *paging)
226 {
227 	uint64_t regs[4];
228 	const int regset[4] = {
229 		VM_REG_GUEST_CR0,
230 		VM_REG_GUEST_CR3,
231 		VM_REG_GUEST_CR4,
232 		VM_REG_GUEST_EFER
233 	};
234 
235 	if (vm_get_register_set(vcpu, nitems(regset), regset, regs) == -1)
236 		return (-1);
237 
238 	/*
239 	 * For the debugger, always pretend to be the kernel (CPL 0),
240 	 * and if long-mode is enabled, always parse addresses as if
241 	 * in 64-bit mode.
242 	 */
243 	paging->cr3 = regs[1];
244 	paging->cpl = 0;
245 	if (regs[3] & EFER_LMA)
246 		paging->cpu_mode = CPU_MODE_64BIT;
247 	else if (regs[0] & CR0_PE)
248 		paging->cpu_mode = CPU_MODE_PROTECTED;
249 	else
250 		paging->cpu_mode = CPU_MODE_REAL;
251 	if (!(regs[0] & CR0_PG))
252 		paging->paging_mode = PAGING_MODE_FLAT;
253 	else if (!(regs[2] & CR4_PAE))
254 		paging->paging_mode = PAGING_MODE_32;
255 	else if (regs[3] & EFER_LME)
256 		paging->paging_mode = (regs[2] & CR4_LA57) ?
257 		    PAGING_MODE_64_LA57 :  PAGING_MODE_64;
258 	else
259 		paging->paging_mode = PAGING_MODE_PAE;
260 	return (0);
261 }
262 
263 /*
264  * Map a guest virtual address to a physical address (for a given vcpu).
265  * If a guest virtual address is valid, return 1.  If the address is
266  * not valid, return 0.  If an error occurs obtaining the mapping,
267  * return -1.
268  */
269 static int
270 guest_vaddr2paddr(struct vcpu *vcpu, uint64_t vaddr, uint64_t *paddr)
271 {
272 	struct vm_guest_paging paging;
273 	int fault;
274 
275 	if (guest_paging_info(vcpu, &paging) == -1)
276 		return (-1);
277 
278 	/*
279 	 * Always use PROT_READ.  We really care if the VA is
280 	 * accessible, not if the current vCPU can write.
281 	 */
282 	if (vm_gla2gpa_nofault(vcpu, &paging, vaddr, PROT_READ, paddr,
283 	    &fault) == -1)
284 		return (-1);
285 	if (fault)
286 		return (0);
287 	return (1);
288 }
289 
290 static void
291 io_buffer_reset(struct io_buffer *io)
292 {
293 
294 	io->start = 0;
295 	io->len = 0;
296 }
297 
298 /* Available room for adding data. */
299 static size_t
300 io_buffer_avail(struct io_buffer *io)
301 {
302 
303 	return (io->capacity - (io->start + io->len));
304 }
305 
306 static uint8_t *
307 io_buffer_head(struct io_buffer *io)
308 {
309 
310 	return (io->data + io->start);
311 }
312 
313 static uint8_t *
314 io_buffer_tail(struct io_buffer *io)
315 {
316 
317 	return (io->data + io->start + io->len);
318 }
319 
320 static void
321 io_buffer_advance(struct io_buffer *io, size_t amount)
322 {
323 
324 	assert(amount <= io->len);
325 	io->start += amount;
326 	io->len -= amount;
327 }
328 
329 static void
330 io_buffer_consume(struct io_buffer *io, size_t amount)
331 {
332 
333 	io_buffer_advance(io, amount);
334 	if (io->len == 0) {
335 		io->start = 0;
336 		return;
337 	}
338 
339 	/*
340 	 * XXX: Consider making this move optional and compacting on a
341 	 * future read() before realloc().
342 	 */
343 	memmove(io->data, io_buffer_head(io), io->len);
344 	io->start = 0;
345 }
346 
347 static void
348 io_buffer_grow(struct io_buffer *io, size_t newsize)
349 {
350 	uint8_t *new_data;
351 	size_t avail, new_cap;
352 
353 	avail = io_buffer_avail(io);
354 	if (newsize <= avail)
355 		return;
356 
357 	new_cap = io->capacity + (newsize - avail);
358 	new_data = realloc(io->data, new_cap);
359 	if (new_data == NULL)
360 		err(1, "Failed to grow GDB I/O buffer");
361 	io->data = new_data;
362 	io->capacity = new_cap;
363 }
364 
365 static bool
366 response_pending(void)
367 {
368 
369 	if (cur_resp.start == 0 && cur_resp.len == 0)
370 		return (false);
371 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
372 		return (false);
373 	return (true);
374 }
375 
376 static void
377 close_connection(void)
378 {
379 
380 	/*
381 	 * XXX: This triggers a warning because mevent does the close
382 	 * before the EV_DELETE.
383 	 */
384 	pthread_mutex_lock(&gdb_lock);
385 	mevent_delete(write_event);
386 	mevent_delete_close(read_event);
387 	write_event = NULL;
388 	read_event = NULL;
389 	io_buffer_reset(&cur_comm);
390 	io_buffer_reset(&cur_resp);
391 	cur_fd = -1;
392 
393 	remove_all_sw_breakpoints();
394 
395 	/* Clear any pending events. */
396 	memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
397 
398 	/* Resume any stopped vCPUs. */
399 	gdb_resume_vcpus();
400 	pthread_mutex_unlock(&gdb_lock);
401 }
402 
403 static uint8_t
404 hex_digit(uint8_t nibble)
405 {
406 
407 	if (nibble <= 9)
408 		return (nibble + '0');
409 	else
410 		return (nibble + 'a' - 10);
411 }
412 
413 static uint8_t
414 parse_digit(uint8_t v)
415 {
416 
417 	if (v >= '0' && v <= '9')
418 		return (v - '0');
419 	if (v >= 'a' && v <= 'f')
420 		return (v - 'a' + 10);
421 	if (v >= 'A' && v <= 'F')
422 		return (v - 'A' + 10);
423 	return (0xF);
424 }
425 
426 /* Parses big-endian hexadecimal. */
427 static uintmax_t
428 parse_integer(const uint8_t *p, size_t len)
429 {
430 	uintmax_t v;
431 
432 	v = 0;
433 	while (len > 0) {
434 		v <<= 4;
435 		v |= parse_digit(*p);
436 		p++;
437 		len--;
438 	}
439 	return (v);
440 }
441 
442 static uint8_t
443 parse_byte(const uint8_t *p)
444 {
445 
446 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
447 }
448 
449 static void
450 send_pending_data(int fd)
451 {
452 	ssize_t nwritten;
453 
454 	if (cur_resp.len == 0) {
455 		mevent_disable(write_event);
456 		return;
457 	}
458 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
459 	if (nwritten == -1) {
460 		warn("Write to GDB socket failed");
461 		close_connection();
462 	} else {
463 		io_buffer_advance(&cur_resp, nwritten);
464 		if (cur_resp.len == 0)
465 			mevent_disable(write_event);
466 		else
467 			mevent_enable(write_event);
468 	}
469 }
470 
471 /* Append a single character to the output buffer. */
472 static void
473 send_char(uint8_t data)
474 {
475 	io_buffer_grow(&cur_resp, 1);
476 	*io_buffer_tail(&cur_resp) = data;
477 	cur_resp.len++;
478 }
479 
480 /* Append an array of bytes to the output buffer. */
481 static void
482 send_data(const uint8_t *data, size_t len)
483 {
484 
485 	io_buffer_grow(&cur_resp, len);
486 	memcpy(io_buffer_tail(&cur_resp), data, len);
487 	cur_resp.len += len;
488 }
489 
490 static void
491 format_byte(uint8_t v, uint8_t *buf)
492 {
493 
494 	buf[0] = hex_digit(v >> 4);
495 	buf[1] = hex_digit(v & 0xf);
496 }
497 
498 /*
499  * Append a single byte (formatted as two hex characters) to the
500  * output buffer.
501  */
502 static void
503 send_byte(uint8_t v)
504 {
505 	uint8_t buf[2];
506 
507 	format_byte(v, buf);
508 	send_data(buf, sizeof(buf));
509 }
510 
511 static void
512 start_packet(void)
513 {
514 
515 	send_char('$');
516 	cur_csum = 0;
517 }
518 
519 static void
520 finish_packet(void)
521 {
522 
523 	send_char('#');
524 	send_byte(cur_csum);
525 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
526 }
527 
528 /*
529  * Append a single character (for the packet payload) and update the
530  * checksum.
531  */
532 static void
533 append_char(uint8_t v)
534 {
535 
536 	send_char(v);
537 	cur_csum += v;
538 }
539 
540 /*
541  * Append an array of bytes (for the packet payload) and update the
542  * checksum.
543  */
544 static void
545 append_packet_data(const uint8_t *data, size_t len)
546 {
547 
548 	send_data(data, len);
549 	while (len > 0) {
550 		cur_csum += *data;
551 		data++;
552 		len--;
553 	}
554 }
555 
556 static void
557 append_string(const char *str)
558 {
559 
560 	append_packet_data(str, strlen(str));
561 }
562 
563 static void
564 append_byte(uint8_t v)
565 {
566 	uint8_t buf[2];
567 
568 	format_byte(v, buf);
569 	append_packet_data(buf, sizeof(buf));
570 }
571 
572 static void
573 append_unsigned_native(uintmax_t value, size_t len)
574 {
575 	size_t i;
576 
577 	for (i = 0; i < len; i++) {
578 		append_byte(value);
579 		value >>= 8;
580 	}
581 }
582 
583 static void
584 append_unsigned_be(uintmax_t value, size_t len)
585 {
586 	char buf[len * 2];
587 	size_t i;
588 
589 	for (i = 0; i < len; i++) {
590 		format_byte(value, buf + (len - i - 1) * 2);
591 		value >>= 8;
592 	}
593 	append_packet_data(buf, sizeof(buf));
594 }
595 
596 static void
597 append_integer(unsigned int value)
598 {
599 
600 	if (value == 0)
601 		append_char('0');
602 	else
603 		append_unsigned_be(value, (fls(value) + 7) / 8);
604 }
605 
606 static void
607 append_asciihex(const char *str)
608 {
609 
610 	while (*str != '\0') {
611 		append_byte(*str);
612 		str++;
613 	}
614 }
615 
616 static void
617 send_empty_response(void)
618 {
619 
620 	start_packet();
621 	finish_packet();
622 }
623 
624 static void
625 send_error(int error)
626 {
627 
628 	start_packet();
629 	append_char('E');
630 	append_byte(error);
631 	finish_packet();
632 }
633 
634 static void
635 send_ok(void)
636 {
637 
638 	start_packet();
639 	append_string("OK");
640 	finish_packet();
641 }
642 
643 static int
644 parse_threadid(const uint8_t *data, size_t len)
645 {
646 
647 	if (len == 1 && *data == '0')
648 		return (0);
649 	if (len == 2 && memcmp(data, "-1", 2) == 0)
650 		return (-1);
651 	if (len == 0)
652 		return (-2);
653 	return (parse_integer(data, len));
654 }
655 
656 /*
657  * Report the current stop event to the debugger.  If the stop is due
658  * to an event triggered on a specific vCPU such as a breakpoint or
659  * stepping trap, stopped_vcpu will be set to the vCPU triggering the
660  * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
661  * the reporting vCPU for vCPU events.
662  */
663 static void
664 report_stop(bool set_cur_vcpu)
665 {
666 	struct vcpu_state *vs;
667 
668 	start_packet();
669 	if (stopped_vcpu == -1) {
670 		append_char('S');
671 		append_byte(GDB_SIGNAL_TRAP);
672 	} else {
673 		vs = &vcpu_state[stopped_vcpu];
674 		if (set_cur_vcpu)
675 			cur_vcpu = stopped_vcpu;
676 		append_char('T');
677 		append_byte(GDB_SIGNAL_TRAP);
678 		append_string("thread:");
679 		append_integer(stopped_vcpu + 1);
680 		append_char(';');
681 		if (vs->hit_swbreak) {
682 			debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
683 			if (swbreak_enabled)
684 				append_string("swbreak:;");
685 		} else if (vs->stepped)
686 			debug("$vCPU %d reporting step\n", stopped_vcpu);
687 		else
688 			debug("$vCPU %d reporting ???\n", stopped_vcpu);
689 	}
690 	finish_packet();
691 	report_next_stop = false;
692 }
693 
694 /*
695  * If this stop is due to a vCPU event, clear that event to mark it as
696  * acknowledged.
697  */
698 static void
699 discard_stop(void)
700 {
701 	struct vcpu_state *vs;
702 
703 	if (stopped_vcpu != -1) {
704 		vs = &vcpu_state[stopped_vcpu];
705 		vs->hit_swbreak = false;
706 		vs->stepped = false;
707 		stopped_vcpu = -1;
708 	}
709 	report_next_stop = true;
710 }
711 
712 static void
713 gdb_finish_suspend_vcpus(void)
714 {
715 
716 	if (first_stop) {
717 		first_stop = false;
718 		stopped_vcpu = -1;
719 	} else if (report_next_stop) {
720 		assert(!response_pending());
721 		report_stop(true);
722 		send_pending_data(cur_fd);
723 	}
724 }
725 
726 /*
727  * vCPU threads invoke this function whenever the vCPU enters the
728  * debug server to pause or report an event.  vCPU threads wait here
729  * as long as the debug server keeps them suspended.
730  */
731 static void
732 _gdb_cpu_suspend(struct vcpu *vcpu, bool report_stop)
733 {
734 	int vcpuid = vcpu_id(vcpu);
735 
736 	debug("$vCPU %d suspending\n", vcpuid);
737 	CPU_SET(vcpuid, &vcpus_waiting);
738 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
739 		gdb_finish_suspend_vcpus();
740 	while (CPU_ISSET(vcpuid, &vcpus_suspended))
741 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
742 	CPU_CLR(vcpuid, &vcpus_waiting);
743 	debug("$vCPU %d resuming\n", vcpuid);
744 }
745 
746 /*
747  * Requests vCPU single-stepping using a
748  * VMEXIT suitable for the host platform.
749  */
750 static int
751 _gdb_set_step(struct vcpu *vcpu, int val)
752 {
753 	int error;
754 
755 	/*
756 	 * If the MTRAP cap fails, we are running on an AMD host.
757 	 * In that case, we request DB exits caused by RFLAGS.TF.
758 	 */
759 	error = vm_set_capability(vcpu, VM_CAP_MTRAP_EXIT, val);
760 	if (error != 0)
761 		error = vm_set_capability(vcpu, VM_CAP_RFLAGS_TF, val);
762 	if (error == 0)
763 		(void)vm_set_capability(vcpu, VM_CAP_MASK_HWINTR, val);
764 
765 	return (error);
766 }
767 
768 /*
769  * Checks whether single-stepping is enabled for a given vCPU.
770  */
771 static int
772 _gdb_check_step(struct vcpu *vcpu)
773 {
774 	int val;
775 
776 	if (vm_get_capability(vcpu, VM_CAP_MTRAP_EXIT, &val) != 0) {
777 		if (vm_get_capability(vcpu, VM_CAP_RFLAGS_TF, &val) != 0)
778 			return -1;
779 	}
780 	return 0;
781 }
782 
783 /*
784  * Invoked at the start of a vCPU thread's execution to inform the
785  * debug server about the new thread.
786  */
787 void
788 gdb_cpu_add(struct vcpu *vcpu)
789 {
790 	int vcpuid;
791 
792 	if (!gdb_active)
793 		return;
794 	vcpuid = vcpu_id(vcpu);
795 	debug("$vCPU %d starting\n", vcpuid);
796 	pthread_mutex_lock(&gdb_lock);
797 	assert(vcpuid < guest_ncpus);
798 	assert(vcpus[vcpuid] == NULL);
799 	vcpus[vcpuid] = vcpu;
800 	CPU_SET(vcpuid, &vcpus_active);
801 	if (!TAILQ_EMPTY(&breakpoints)) {
802 		vm_set_capability(vcpu, VM_CAP_BPT_EXIT, 1);
803 		debug("$vCPU %d enabled breakpoint exits\n", vcpuid);
804 	}
805 
806 	/*
807 	 * If a vcpu is added while vcpus are stopped, suspend the new
808 	 * vcpu so that it will pop back out with a debug exit before
809 	 * executing the first instruction.
810 	 */
811 	if (!CPU_EMPTY(&vcpus_suspended)) {
812 		CPU_SET(vcpuid, &vcpus_suspended);
813 		_gdb_cpu_suspend(vcpu, false);
814 	}
815 	pthread_mutex_unlock(&gdb_lock);
816 }
817 
818 /*
819  * Invoked by vCPU before resuming execution.  This enables stepping
820  * if the vCPU is marked as stepping.
821  */
822 static void
823 gdb_cpu_resume(struct vcpu *vcpu)
824 {
825 	struct vcpu_state *vs;
826 	int error;
827 
828 	vs = &vcpu_state[vcpu_id(vcpu)];
829 
830 	/*
831 	 * Any pending event should already be reported before
832 	 * resuming.
833 	 */
834 	assert(vs->hit_swbreak == false);
835 	assert(vs->stepped == false);
836 	if (vs->stepping) {
837 		error = _gdb_set_step(vcpu, 1);
838 		assert(error == 0);
839 	}
840 }
841 
842 /*
843  * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
844  * has been suspended due to an event on different vCPU or in response
845  * to a guest-wide suspend such as Ctrl-C or the stop on attach.
846  */
847 void
848 gdb_cpu_suspend(struct vcpu *vcpu)
849 {
850 
851 	if (!gdb_active)
852 		return;
853 	pthread_mutex_lock(&gdb_lock);
854 	_gdb_cpu_suspend(vcpu, true);
855 	gdb_cpu_resume(vcpu);
856 	pthread_mutex_unlock(&gdb_lock);
857 }
858 
859 static void
860 gdb_suspend_vcpus(void)
861 {
862 
863 	assert(pthread_mutex_isowned_np(&gdb_lock));
864 	debug("suspending all CPUs\n");
865 	vcpus_suspended = vcpus_active;
866 	vm_suspend_all_cpus(ctx);
867 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
868 		gdb_finish_suspend_vcpus();
869 }
870 
871 /*
872  * Invoked each time a vmexit handler needs to step a vCPU.
873  * Handles MTRAP and RFLAGS.TF vmexits.
874  */
875 static void
876 gdb_cpu_step(struct vcpu *vcpu)
877 {
878 	struct vcpu_state *vs;
879 	int vcpuid = vcpu_id(vcpu);
880 	int error;
881 
882 	debug("$vCPU %d stepped\n", vcpuid);
883 	pthread_mutex_lock(&gdb_lock);
884 	vs = &vcpu_state[vcpuid];
885 	if (vs->stepping) {
886 		vs->stepping = false;
887 		vs->stepped = true;
888 		error = _gdb_set_step(vcpu, 0);
889 		assert(error == 0);
890 
891 		while (vs->stepped) {
892 			if (stopped_vcpu == -1) {
893 				debug("$vCPU %d reporting step\n", vcpuid);
894 				stopped_vcpu = vcpuid;
895 				gdb_suspend_vcpus();
896 			}
897 			_gdb_cpu_suspend(vcpu, true);
898 		}
899 		gdb_cpu_resume(vcpu);
900 	}
901 	pthread_mutex_unlock(&gdb_lock);
902 }
903 
904 /*
905  * A general handler for VM_EXITCODE_DB.
906  * Handles RFLAGS.TF exits on AMD SVM.
907  */
908 void
909 gdb_cpu_debug(struct vcpu *vcpu, struct vm_exit *vmexit)
910 {
911 	if (!gdb_active)
912 		return;
913 
914 	/* RFLAGS.TF exit? */
915 	if (vmexit->u.dbg.trace_trap) {
916 		gdb_cpu_step(vcpu);
917 	}
918 }
919 
920 /*
921  * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
922  * the VT-x-specific MTRAP exit.
923  */
924 void
925 gdb_cpu_mtrap(struct vcpu *vcpu)
926 {
927 	if (!gdb_active)
928 		return;
929 	gdb_cpu_step(vcpu);
930 }
931 
932 static struct breakpoint *
933 find_breakpoint(uint64_t gpa)
934 {
935 	struct breakpoint *bp;
936 
937 	TAILQ_FOREACH(bp, &breakpoints, link) {
938 		if (bp->gpa == gpa)
939 			return (bp);
940 	}
941 	return (NULL);
942 }
943 
944 void
945 gdb_cpu_breakpoint(struct vcpu *vcpu, struct vm_exit *vmexit)
946 {
947 	struct breakpoint *bp;
948 	struct vcpu_state *vs;
949 	uint64_t gpa;
950 	int error, vcpuid;
951 
952 	if (!gdb_active) {
953 		EPRINTLN("vm_loop: unexpected VMEXIT_DEBUG");
954 		exit(4);
955 	}
956 	vcpuid = vcpu_id(vcpu);
957 	pthread_mutex_lock(&gdb_lock);
958 	error = guest_vaddr2paddr(vcpu, vmexit->rip, &gpa);
959 	assert(error == 1);
960 	bp = find_breakpoint(gpa);
961 	if (bp != NULL) {
962 		vs = &vcpu_state[vcpuid];
963 		assert(vs->stepping == false);
964 		assert(vs->stepped == false);
965 		assert(vs->hit_swbreak == false);
966 		vs->hit_swbreak = true;
967 		vm_set_register(vcpu, VM_REG_GUEST_RIP, vmexit->rip);
968 		for (;;) {
969 			if (stopped_vcpu == -1) {
970 				debug("$vCPU %d reporting breakpoint at rip %#lx\n",
971 				    vcpuid, vmexit->rip);
972 				stopped_vcpu = vcpuid;
973 				gdb_suspend_vcpus();
974 			}
975 			_gdb_cpu_suspend(vcpu, true);
976 			if (!vs->hit_swbreak) {
977 				/* Breakpoint reported. */
978 				break;
979 			}
980 			bp = find_breakpoint(gpa);
981 			if (bp == NULL) {
982 				/* Breakpoint was removed. */
983 				vs->hit_swbreak = false;
984 				break;
985 			}
986 		}
987 		gdb_cpu_resume(vcpu);
988 	} else {
989 		debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpuid,
990 		    vmexit->rip);
991 		error = vm_set_register(vcpu, VM_REG_GUEST_ENTRY_INST_LENGTH,
992 		    vmexit->u.bpt.inst_length);
993 		assert(error == 0);
994 		error = vm_inject_exception(vcpu, IDT_BP, 0, 0, 0);
995 		assert(error == 0);
996 	}
997 	pthread_mutex_unlock(&gdb_lock);
998 }
999 
1000 static bool
1001 gdb_step_vcpu(struct vcpu *vcpu)
1002 {
1003 	int error, vcpuid;
1004 
1005 	vcpuid = vcpu_id(vcpu);
1006 	debug("$vCPU %d step\n", vcpuid);
1007 	error = _gdb_check_step(vcpu);
1008 	if (error < 0)
1009 		return (false);
1010 
1011 	discard_stop();
1012 	vcpu_state[vcpuid].stepping = true;
1013 	vm_resume_cpu(vcpu);
1014 	CPU_CLR(vcpuid, &vcpus_suspended);
1015 	pthread_cond_broadcast(&idle_vcpus);
1016 	return (true);
1017 }
1018 
1019 static void
1020 gdb_resume_vcpus(void)
1021 {
1022 
1023 	assert(pthread_mutex_isowned_np(&gdb_lock));
1024 	vm_resume_all_cpus(ctx);
1025 	debug("resuming all CPUs\n");
1026 	CPU_ZERO(&vcpus_suspended);
1027 	pthread_cond_broadcast(&idle_vcpus);
1028 }
1029 
1030 static void
1031 gdb_read_regs(void)
1032 {
1033 	uint64_t regvals[nitems(gdb_regset)];
1034 
1035 	if (vm_get_register_set(vcpus[cur_vcpu], nitems(gdb_regset),
1036 	    gdb_regset, regvals) == -1) {
1037 		send_error(errno);
1038 		return;
1039 	}
1040 	start_packet();
1041 	for (size_t i = 0; i < nitems(regvals); i++)
1042 		append_unsigned_native(regvals[i], gdb_regsize[i]);
1043 	finish_packet();
1044 }
1045 
1046 static void
1047 gdb_read_mem(const uint8_t *data, size_t len)
1048 {
1049 	uint64_t gpa, gva, val;
1050 	uint8_t *cp;
1051 	size_t resid, todo, bytes;
1052 	bool started;
1053 	int error;
1054 
1055 	/* Skip 'm' */
1056 	data += 1;
1057 	len -= 1;
1058 
1059 	/* Parse and consume address. */
1060 	cp = memchr(data, ',', len);
1061 	if (cp == NULL || cp == data) {
1062 		send_error(EINVAL);
1063 		return;
1064 	}
1065 	gva = parse_integer(data, cp - data);
1066 	len -= (cp - data) + 1;
1067 	data += (cp - data) + 1;
1068 
1069 	/* Parse length. */
1070 	resid = parse_integer(data, len);
1071 
1072 	started = false;
1073 	while (resid > 0) {
1074 		error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1075 		if (error == -1) {
1076 			if (started)
1077 				finish_packet();
1078 			else
1079 				send_error(errno);
1080 			return;
1081 		}
1082 		if (error == 0) {
1083 			if (started)
1084 				finish_packet();
1085 			else
1086 				send_error(EFAULT);
1087 			return;
1088 		}
1089 
1090 		/* Read bytes from current page. */
1091 		todo = getpagesize() - gpa % getpagesize();
1092 		if (todo > resid)
1093 			todo = resid;
1094 
1095 		cp = paddr_guest2host(ctx, gpa, todo);
1096 		if (cp != NULL) {
1097 			/*
1098 			 * If this page is guest RAM, read it a byte
1099 			 * at a time.
1100 			 */
1101 			if (!started) {
1102 				start_packet();
1103 				started = true;
1104 			}
1105 			while (todo > 0) {
1106 				append_byte(*cp);
1107 				cp++;
1108 				gpa++;
1109 				gva++;
1110 				resid--;
1111 				todo--;
1112 			}
1113 		} else {
1114 			/*
1115 			 * If this page isn't guest RAM, try to handle
1116 			 * it via MMIO.  For MMIO requests, use
1117 			 * aligned reads of words when possible.
1118 			 */
1119 			while (todo > 0) {
1120 				if (gpa & 1 || todo == 1)
1121 					bytes = 1;
1122 				else if (gpa & 2 || todo == 2)
1123 					bytes = 2;
1124 				else
1125 					bytes = 4;
1126 				error = read_mem(vcpus[cur_vcpu], gpa, &val,
1127 				    bytes);
1128 				if (error == 0) {
1129 					if (!started) {
1130 						start_packet();
1131 						started = true;
1132 					}
1133 					gpa += bytes;
1134 					gva += bytes;
1135 					resid -= bytes;
1136 					todo -= bytes;
1137 					while (bytes > 0) {
1138 						append_byte(val);
1139 						val >>= 8;
1140 						bytes--;
1141 					}
1142 				} else {
1143 					if (started)
1144 						finish_packet();
1145 					else
1146 						send_error(EFAULT);
1147 					return;
1148 				}
1149 			}
1150 		}
1151 		assert(resid == 0 || gpa % getpagesize() == 0);
1152 	}
1153 	if (!started)
1154 		start_packet();
1155 	finish_packet();
1156 }
1157 
1158 static void
1159 gdb_write_mem(const uint8_t *data, size_t len)
1160 {
1161 	uint64_t gpa, gva, val;
1162 	uint8_t *cp;
1163 	size_t resid, todo, bytes;
1164 	int error;
1165 
1166 	/* Skip 'M' */
1167 	data += 1;
1168 	len -= 1;
1169 
1170 	/* Parse and consume address. */
1171 	cp = memchr(data, ',', len);
1172 	if (cp == NULL || cp == data) {
1173 		send_error(EINVAL);
1174 		return;
1175 	}
1176 	gva = parse_integer(data, cp - data);
1177 	len -= (cp - data) + 1;
1178 	data += (cp - data) + 1;
1179 
1180 	/* Parse and consume length. */
1181 	cp = memchr(data, ':', len);
1182 	if (cp == NULL || cp == data) {
1183 		send_error(EINVAL);
1184 		return;
1185 	}
1186 	resid = parse_integer(data, cp - data);
1187 	len -= (cp - data) + 1;
1188 	data += (cp - data) + 1;
1189 
1190 	/* Verify the available bytes match the length. */
1191 	if (len != resid * 2) {
1192 		send_error(EINVAL);
1193 		return;
1194 	}
1195 
1196 	while (resid > 0) {
1197 		error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1198 		if (error == -1) {
1199 			send_error(errno);
1200 			return;
1201 		}
1202 		if (error == 0) {
1203 			send_error(EFAULT);
1204 			return;
1205 		}
1206 
1207 		/* Write bytes to current page. */
1208 		todo = getpagesize() - gpa % getpagesize();
1209 		if (todo > resid)
1210 			todo = resid;
1211 
1212 		cp = paddr_guest2host(ctx, gpa, todo);
1213 		if (cp != NULL) {
1214 			/*
1215 			 * If this page is guest RAM, write it a byte
1216 			 * at a time.
1217 			 */
1218 			while (todo > 0) {
1219 				assert(len >= 2);
1220 				*cp = parse_byte(data);
1221 				data += 2;
1222 				len -= 2;
1223 				cp++;
1224 				gpa++;
1225 				gva++;
1226 				resid--;
1227 				todo--;
1228 			}
1229 		} else {
1230 			/*
1231 			 * If this page isn't guest RAM, try to handle
1232 			 * it via MMIO.  For MMIO requests, use
1233 			 * aligned writes of words when possible.
1234 			 */
1235 			while (todo > 0) {
1236 				if (gpa & 1 || todo == 1) {
1237 					bytes = 1;
1238 					val = parse_byte(data);
1239 				} else if (gpa & 2 || todo == 2) {
1240 					bytes = 2;
1241 					val = be16toh(parse_integer(data, 4));
1242 				} else {
1243 					bytes = 4;
1244 					val = be32toh(parse_integer(data, 8));
1245 				}
1246 				error = write_mem(vcpus[cur_vcpu], gpa, val,
1247 				    bytes);
1248 				if (error == 0) {
1249 					gpa += bytes;
1250 					gva += bytes;
1251 					resid -= bytes;
1252 					todo -= bytes;
1253 					data += 2 * bytes;
1254 					len -= 2 * bytes;
1255 				} else {
1256 					send_error(EFAULT);
1257 					return;
1258 				}
1259 			}
1260 		}
1261 		assert(resid == 0 || gpa % getpagesize() == 0);
1262 	}
1263 	assert(len == 0);
1264 	send_ok();
1265 }
1266 
1267 static bool
1268 set_breakpoint_caps(bool enable)
1269 {
1270 	cpuset_t mask;
1271 	int vcpu;
1272 
1273 	mask = vcpus_active;
1274 	while (!CPU_EMPTY(&mask)) {
1275 		vcpu = CPU_FFS(&mask) - 1;
1276 		CPU_CLR(vcpu, &mask);
1277 		if (vm_set_capability(vcpus[vcpu], VM_CAP_BPT_EXIT,
1278 		    enable ? 1 : 0) < 0)
1279 			return (false);
1280 		debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1281 		    enable ? "en" : "dis");
1282 	}
1283 	return (true);
1284 }
1285 
1286 static void
1287 remove_all_sw_breakpoints(void)
1288 {
1289 	struct breakpoint *bp, *nbp;
1290 	uint8_t *cp;
1291 
1292 	if (TAILQ_EMPTY(&breakpoints))
1293 		return;
1294 
1295 	TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1296 		debug("remove breakpoint at %#lx\n", bp->gpa);
1297 		cp = paddr_guest2host(ctx, bp->gpa, 1);
1298 		*cp = bp->shadow_inst;
1299 		TAILQ_REMOVE(&breakpoints, bp, link);
1300 		free(bp);
1301 	}
1302 	TAILQ_INIT(&breakpoints);
1303 	set_breakpoint_caps(false);
1304 }
1305 
1306 static void
1307 update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1308 {
1309 	struct breakpoint *bp;
1310 	uint64_t gpa;
1311 	uint8_t *cp;
1312 	int error;
1313 
1314 	if (kind != 1) {
1315 		send_error(EINVAL);
1316 		return;
1317 	}
1318 
1319 	error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1320 	if (error == -1) {
1321 		send_error(errno);
1322 		return;
1323 	}
1324 	if (error == 0) {
1325 		send_error(EFAULT);
1326 		return;
1327 	}
1328 
1329 	cp = paddr_guest2host(ctx, gpa, 1);
1330 
1331 	/* Only permit breakpoints in guest RAM. */
1332 	if (cp == NULL) {
1333 		send_error(EFAULT);
1334 		return;
1335 	}
1336 
1337 	/* Find any existing breakpoint. */
1338 	bp = find_breakpoint(gpa);
1339 
1340 	/*
1341 	 * Silently ignore duplicate commands since the protocol
1342 	 * requires these packets to be idempotent.
1343 	 */
1344 	if (insert) {
1345 		if (bp == NULL) {
1346 			if (TAILQ_EMPTY(&breakpoints) &&
1347 			    !set_breakpoint_caps(true)) {
1348 				send_empty_response();
1349 				return;
1350 			}
1351 			bp = malloc(sizeof(*bp));
1352 			bp->gpa = gpa;
1353 			bp->shadow_inst = *cp;
1354 			*cp = 0xcc;	/* INT 3 */
1355 			TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1356 			debug("new breakpoint at %#lx\n", gpa);
1357 		}
1358 	} else {
1359 		if (bp != NULL) {
1360 			debug("remove breakpoint at %#lx\n", gpa);
1361 			*cp = bp->shadow_inst;
1362 			TAILQ_REMOVE(&breakpoints, bp, link);
1363 			free(bp);
1364 			if (TAILQ_EMPTY(&breakpoints))
1365 				set_breakpoint_caps(false);
1366 		}
1367 	}
1368 	send_ok();
1369 }
1370 
1371 static void
1372 parse_breakpoint(const uint8_t *data, size_t len)
1373 {
1374 	uint64_t gva;
1375 	uint8_t *cp;
1376 	bool insert;
1377 	int kind, type;
1378 
1379 	insert = data[0] == 'Z';
1380 
1381 	/* Skip 'Z/z' */
1382 	data += 1;
1383 	len -= 1;
1384 
1385 	/* Parse and consume type. */
1386 	cp = memchr(data, ',', len);
1387 	if (cp == NULL || cp == data) {
1388 		send_error(EINVAL);
1389 		return;
1390 	}
1391 	type = parse_integer(data, cp - data);
1392 	len -= (cp - data) + 1;
1393 	data += (cp - data) + 1;
1394 
1395 	/* Parse and consume address. */
1396 	cp = memchr(data, ',', len);
1397 	if (cp == NULL || cp == data) {
1398 		send_error(EINVAL);
1399 		return;
1400 	}
1401 	gva = parse_integer(data, cp - data);
1402 	len -= (cp - data) + 1;
1403 	data += (cp - data) + 1;
1404 
1405 	/* Parse and consume kind. */
1406 	cp = memchr(data, ';', len);
1407 	if (cp == data) {
1408 		send_error(EINVAL);
1409 		return;
1410 	}
1411 	if (cp != NULL) {
1412 		/*
1413 		 * We do not advertise support for either the
1414 		 * ConditionalBreakpoints or BreakpointCommands
1415 		 * features, so we should not be getting conditions or
1416 		 * commands from the remote end.
1417 		 */
1418 		send_empty_response();
1419 		return;
1420 	}
1421 	kind = parse_integer(data, len);
1422 	data += len;
1423 	len = 0;
1424 
1425 	switch (type) {
1426 	case 0:
1427 		update_sw_breakpoint(gva, kind, insert);
1428 		break;
1429 	default:
1430 		send_empty_response();
1431 		break;
1432 	}
1433 }
1434 
1435 static bool
1436 command_equals(const uint8_t *data, size_t len, const char *cmd)
1437 {
1438 
1439 	if (strlen(cmd) > len)
1440 		return (false);
1441 	return (memcmp(data, cmd, strlen(cmd)) == 0);
1442 }
1443 
1444 static void
1445 check_features(const uint8_t *data, size_t len)
1446 {
1447 	char *feature, *next_feature, *str, *value;
1448 	bool supported;
1449 
1450 	str = malloc(len + 1);
1451 	memcpy(str, data, len);
1452 	str[len] = '\0';
1453 	next_feature = str;
1454 
1455 	while ((feature = strsep(&next_feature, ";")) != NULL) {
1456 		/*
1457 		 * Null features shouldn't exist, but skip if they
1458 		 * do.
1459 		 */
1460 		if (strcmp(feature, "") == 0)
1461 			continue;
1462 
1463 		/*
1464 		 * Look for the value or supported / not supported
1465 		 * flag.
1466 		 */
1467 		value = strchr(feature, '=');
1468 		if (value != NULL) {
1469 			*value = '\0';
1470 			value++;
1471 			supported = true;
1472 		} else {
1473 			value = feature + strlen(feature) - 1;
1474 			switch (*value) {
1475 			case '+':
1476 				supported = true;
1477 				break;
1478 			case '-':
1479 				supported = false;
1480 				break;
1481 			default:
1482 				/*
1483 				 * This is really a protocol error,
1484 				 * but we just ignore malformed
1485 				 * features for ease of
1486 				 * implementation.
1487 				 */
1488 				continue;
1489 			}
1490 			value = NULL;
1491 		}
1492 
1493 		if (strcmp(feature, "swbreak") == 0)
1494 			swbreak_enabled = supported;
1495 	}
1496 	free(str);
1497 
1498 	start_packet();
1499 
1500 	/* This is an arbitrary limit. */
1501 	append_string("PacketSize=4096");
1502 	append_string(";swbreak+");
1503 	finish_packet();
1504 }
1505 
1506 static void
1507 gdb_query(const uint8_t *data, size_t len)
1508 {
1509 
1510 	/*
1511 	 * TODO:
1512 	 * - qSearch
1513 	 */
1514 	if (command_equals(data, len, "qAttached")) {
1515 		start_packet();
1516 		append_char('1');
1517 		finish_packet();
1518 	} else if (command_equals(data, len, "qC")) {
1519 		start_packet();
1520 		append_string("QC");
1521 		append_integer(cur_vcpu + 1);
1522 		finish_packet();
1523 	} else if (command_equals(data, len, "qfThreadInfo")) {
1524 		cpuset_t mask;
1525 		bool first;
1526 		int vcpu;
1527 
1528 		if (CPU_EMPTY(&vcpus_active)) {
1529 			send_error(EINVAL);
1530 			return;
1531 		}
1532 		mask = vcpus_active;
1533 		start_packet();
1534 		append_char('m');
1535 		first = true;
1536 		while (!CPU_EMPTY(&mask)) {
1537 			vcpu = CPU_FFS(&mask) - 1;
1538 			CPU_CLR(vcpu, &mask);
1539 			if (first)
1540 				first = false;
1541 			else
1542 				append_char(',');
1543 			append_integer(vcpu + 1);
1544 		}
1545 		finish_packet();
1546 	} else if (command_equals(data, len, "qsThreadInfo")) {
1547 		start_packet();
1548 		append_char('l');
1549 		finish_packet();
1550 	} else if (command_equals(data, len, "qSupported")) {
1551 		data += strlen("qSupported");
1552 		len -= strlen("qSupported");
1553 		check_features(data, len);
1554 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1555 		char buf[16];
1556 		int tid;
1557 
1558 		data += strlen("qThreadExtraInfo");
1559 		len -= strlen("qThreadExtraInfo");
1560 		if (*data != ',') {
1561 			send_error(EINVAL);
1562 			return;
1563 		}
1564 		tid = parse_threadid(data + 1, len - 1);
1565 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1566 			send_error(EINVAL);
1567 			return;
1568 		}
1569 
1570 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1571 		start_packet();
1572 		append_asciihex(buf);
1573 		finish_packet();
1574 	} else
1575 		send_empty_response();
1576 }
1577 
1578 static void
1579 handle_command(const uint8_t *data, size_t len)
1580 {
1581 
1582 	/* Reject packets with a sequence-id. */
1583 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1584 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1585 		send_empty_response();
1586 		return;
1587 	}
1588 
1589 	switch (*data) {
1590 	case 'c':
1591 		if (len != 1) {
1592 			send_error(EINVAL);
1593 			break;
1594 		}
1595 
1596 		discard_stop();
1597 		gdb_resume_vcpus();
1598 		break;
1599 	case 'D':
1600 		send_ok();
1601 
1602 		/* TODO: Resume any stopped CPUs. */
1603 		break;
1604 	case 'g': {
1605 		gdb_read_regs();
1606 		break;
1607 	}
1608 	case 'H': {
1609 		int tid;
1610 
1611 		if (data[1] != 'g' && data[1] != 'c') {
1612 			send_error(EINVAL);
1613 			break;
1614 		}
1615 		tid = parse_threadid(data + 2, len - 2);
1616 		if (tid == -2) {
1617 			send_error(EINVAL);
1618 			break;
1619 		}
1620 
1621 		if (CPU_EMPTY(&vcpus_active)) {
1622 			send_error(EINVAL);
1623 			break;
1624 		}
1625 		if (tid == -1 || tid == 0)
1626 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1627 		else if (CPU_ISSET(tid - 1, &vcpus_active))
1628 			cur_vcpu = tid - 1;
1629 		else {
1630 			send_error(EINVAL);
1631 			break;
1632 		}
1633 		send_ok();
1634 		break;
1635 	}
1636 	case 'm':
1637 		gdb_read_mem(data, len);
1638 		break;
1639 	case 'M':
1640 		gdb_write_mem(data, len);
1641 		break;
1642 	case 'T': {
1643 		int tid;
1644 
1645 		tid = parse_threadid(data + 1, len - 1);
1646 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1647 			send_error(EINVAL);
1648 			return;
1649 		}
1650 		send_ok();
1651 		break;
1652 	}
1653 	case 'q':
1654 		gdb_query(data, len);
1655 		break;
1656 	case 's':
1657 		if (len != 1) {
1658 			send_error(EINVAL);
1659 			break;
1660 		}
1661 
1662 		/* Don't send a reply until a stop occurs. */
1663 		if (!gdb_step_vcpu(vcpus[cur_vcpu])) {
1664 			send_error(EOPNOTSUPP);
1665 			break;
1666 		}
1667 		break;
1668 	case 'z':
1669 	case 'Z':
1670 		parse_breakpoint(data, len);
1671 		break;
1672 	case '?':
1673 		report_stop(false);
1674 		break;
1675 	case 'G': /* TODO */
1676 	case 'v':
1677 		/* Handle 'vCont' */
1678 		/* 'vCtrlC' */
1679 	case 'p': /* TODO */
1680 	case 'P': /* TODO */
1681 	case 'Q': /* TODO */
1682 	case 't': /* TODO */
1683 	case 'X': /* TODO */
1684 	default:
1685 		send_empty_response();
1686 	}
1687 }
1688 
1689 /* Check for a valid packet in the command buffer. */
1690 static void
1691 check_command(int fd)
1692 {
1693 	uint8_t *head, *hash, *p, sum;
1694 	size_t avail, plen;
1695 
1696 	for (;;) {
1697 		avail = cur_comm.len;
1698 		if (avail == 0)
1699 			return;
1700 		head = io_buffer_head(&cur_comm);
1701 		switch (*head) {
1702 		case 0x03:
1703 			debug("<- Ctrl-C\n");
1704 			io_buffer_consume(&cur_comm, 1);
1705 
1706 			gdb_suspend_vcpus();
1707 			break;
1708 		case '+':
1709 			/* ACK of previous response. */
1710 			debug("<- +\n");
1711 			if (response_pending())
1712 				io_buffer_reset(&cur_resp);
1713 			io_buffer_consume(&cur_comm, 1);
1714 			if (stopped_vcpu != -1 && report_next_stop) {
1715 				report_stop(true);
1716 				send_pending_data(fd);
1717 			}
1718 			break;
1719 		case '-':
1720 			/* NACK of previous response. */
1721 			debug("<- -\n");
1722 			if (response_pending()) {
1723 				cur_resp.len += cur_resp.start;
1724 				cur_resp.start = 0;
1725 				if (cur_resp.data[0] == '+')
1726 					io_buffer_advance(&cur_resp, 1);
1727 				debug("-> %.*s\n", (int)cur_resp.len,
1728 				    io_buffer_head(&cur_resp));
1729 			}
1730 			io_buffer_consume(&cur_comm, 1);
1731 			send_pending_data(fd);
1732 			break;
1733 		case '$':
1734 			/* Packet. */
1735 
1736 			if (response_pending()) {
1737 				warnx("New GDB command while response in "
1738 				    "progress");
1739 				io_buffer_reset(&cur_resp);
1740 			}
1741 
1742 			/* Is packet complete? */
1743 			hash = memchr(head, '#', avail);
1744 			if (hash == NULL)
1745 				return;
1746 			plen = (hash - head + 1) + 2;
1747 			if (avail < plen)
1748 				return;
1749 			debug("<- %.*s\n", (int)plen, head);
1750 
1751 			/* Verify checksum. */
1752 			for (sum = 0, p = head + 1; p < hash; p++)
1753 				sum += *p;
1754 			if (sum != parse_byte(hash + 1)) {
1755 				io_buffer_consume(&cur_comm, plen);
1756 				debug("-> -\n");
1757 				send_char('-');
1758 				send_pending_data(fd);
1759 				break;
1760 			}
1761 			send_char('+');
1762 
1763 			handle_command(head + 1, hash - (head + 1));
1764 			io_buffer_consume(&cur_comm, plen);
1765 			if (!response_pending())
1766 				debug("-> +\n");
1767 			send_pending_data(fd);
1768 			break;
1769 		default:
1770 			/* XXX: Possibly drop connection instead. */
1771 			debug("-> %02x\n", *head);
1772 			io_buffer_consume(&cur_comm, 1);
1773 			break;
1774 		}
1775 	}
1776 }
1777 
1778 static void
1779 gdb_readable(int fd, enum ev_type event __unused, void *arg __unused)
1780 {
1781 	size_t pending;
1782 	ssize_t nread;
1783 	int n;
1784 
1785 	if (ioctl(fd, FIONREAD, &n) == -1) {
1786 		warn("FIONREAD on GDB socket");
1787 		return;
1788 	}
1789 	assert(n >= 0);
1790 	pending = n;
1791 
1792 	/*
1793 	 * 'pending' might be zero due to EOF.  We need to call read
1794 	 * with a non-zero length to detect EOF.
1795 	 */
1796 	if (pending == 0)
1797 		pending = 1;
1798 
1799 	/* Ensure there is room in the command buffer. */
1800 	io_buffer_grow(&cur_comm, pending);
1801 	assert(io_buffer_avail(&cur_comm) >= pending);
1802 
1803 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1804 	if (nread == 0) {
1805 		close_connection();
1806 	} else if (nread == -1) {
1807 		if (errno == EAGAIN)
1808 			return;
1809 
1810 		warn("Read from GDB socket");
1811 		close_connection();
1812 	} else {
1813 		cur_comm.len += nread;
1814 		pthread_mutex_lock(&gdb_lock);
1815 		check_command(fd);
1816 		pthread_mutex_unlock(&gdb_lock);
1817 	}
1818 }
1819 
1820 static void
1821 gdb_writable(int fd, enum ev_type event __unused, void *arg __unused)
1822 {
1823 
1824 	send_pending_data(fd);
1825 }
1826 
1827 static void
1828 new_connection(int fd, enum ev_type event __unused, void *arg)
1829 {
1830 	int optval, s;
1831 
1832 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1833 	if (s == -1) {
1834 		if (arg != NULL)
1835 			err(1, "Failed accepting initial GDB connection");
1836 
1837 		/* Silently ignore errors post-startup. */
1838 		return;
1839 	}
1840 
1841 	optval = 1;
1842 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1843 	    -1) {
1844 		warn("Failed to disable SIGPIPE for GDB connection");
1845 		close(s);
1846 		return;
1847 	}
1848 
1849 	pthread_mutex_lock(&gdb_lock);
1850 	if (cur_fd != -1) {
1851 		close(s);
1852 		warnx("Ignoring additional GDB connection.");
1853 	}
1854 
1855 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1856 	if (read_event == NULL) {
1857 		if (arg != NULL)
1858 			err(1, "Failed to setup initial GDB connection");
1859 		pthread_mutex_unlock(&gdb_lock);
1860 		return;
1861 	}
1862 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1863 	if (write_event == NULL) {
1864 		if (arg != NULL)
1865 			err(1, "Failed to setup initial GDB connection");
1866 		mevent_delete_close(read_event);
1867 		read_event = NULL;
1868 	}
1869 
1870 	cur_fd = s;
1871 	cur_vcpu = 0;
1872 	stopped_vcpu = -1;
1873 
1874 	/* Break on attach. */
1875 	first_stop = true;
1876 	report_next_stop = false;
1877 	gdb_suspend_vcpus();
1878 	pthread_mutex_unlock(&gdb_lock);
1879 }
1880 
1881 #ifndef WITHOUT_CAPSICUM
1882 static void
1883 limit_gdb_socket(int s)
1884 {
1885 	cap_rights_t rights;
1886 	unsigned long ioctls[] = { FIONREAD };
1887 
1888 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1889 	    CAP_SETSOCKOPT, CAP_IOCTL);
1890 	if (caph_rights_limit(s, &rights) == -1)
1891 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1892 	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1893 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1894 }
1895 #endif
1896 
1897 void
1898 init_gdb(struct vmctx *_ctx)
1899 {
1900 	int error, flags, optval, s;
1901 	struct addrinfo hints;
1902 	struct addrinfo *gdbaddr;
1903 	const char *saddr, *value;
1904 	char *sport;
1905 	bool wait;
1906 
1907 	value = get_config_value("gdb.port");
1908 	if (value == NULL)
1909 		return;
1910 	sport = strdup(value);
1911 	if (sport == NULL)
1912 		errx(4, "Failed to allocate memory");
1913 
1914 	wait = get_config_bool_default("gdb.wait", false);
1915 
1916 	saddr = get_config_value("gdb.address");
1917 	if (saddr == NULL) {
1918 		saddr = "localhost";
1919 	}
1920 
1921 	debug("==> starting on %s:%s, %swaiting\n",
1922 	    saddr, sport, wait ? "" : "not ");
1923 
1924 	error = pthread_mutex_init(&gdb_lock, NULL);
1925 	if (error != 0)
1926 		errc(1, error, "gdb mutex init");
1927 	error = pthread_cond_init(&idle_vcpus, NULL);
1928 	if (error != 0)
1929 		errc(1, error, "gdb cv init");
1930 
1931 	memset(&hints, 0, sizeof(hints));
1932 	hints.ai_family = AF_UNSPEC;
1933 	hints.ai_socktype = SOCK_STREAM;
1934 	hints.ai_flags = AI_NUMERICSERV | AI_PASSIVE;
1935 
1936 	error = getaddrinfo(saddr, sport, &hints, &gdbaddr);
1937 	if (error != 0)
1938 		errx(1, "gdb address resolution: %s", gai_strerror(error));
1939 
1940 	ctx = _ctx;
1941 	s = socket(gdbaddr->ai_family, gdbaddr->ai_socktype, 0);
1942 	if (s < 0)
1943 		err(1, "gdb socket create");
1944 
1945 	optval = 1;
1946 	(void)setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
1947 
1948 	if (bind(s, gdbaddr->ai_addr, gdbaddr->ai_addrlen) < 0)
1949 		err(1, "gdb socket bind");
1950 
1951 	if (listen(s, 1) < 0)
1952 		err(1, "gdb socket listen");
1953 
1954 	stopped_vcpu = -1;
1955 	TAILQ_INIT(&breakpoints);
1956 	vcpus = calloc(guest_ncpus, sizeof(*vcpus));
1957 	vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
1958 	if (wait) {
1959 		/*
1960 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
1961 		 * logic in gdb_cpu_add() to suspend the first vcpu before
1962 		 * it starts execution.  The vcpu will remain suspended
1963 		 * until a debugger connects.
1964 		 */
1965 		CPU_SET(0, &vcpus_suspended);
1966 		stopped_vcpu = 0;
1967 	}
1968 
1969 	flags = fcntl(s, F_GETFL);
1970 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1971 		err(1, "Failed to mark gdb socket non-blocking");
1972 
1973 #ifndef WITHOUT_CAPSICUM
1974 	limit_gdb_socket(s);
1975 #endif
1976 	mevent_add(s, EVF_READ, new_connection, NULL);
1977 	gdb_active = true;
1978 	freeaddrinfo(gdbaddr);
1979 	free(sport);
1980 }
1981