xref: /freebsd/usr.sbin/bhyve/gdb.c (revision 99282790b7d01ec3c4072621d46a0d7302517ad4)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/param.h>
32 #ifndef WITHOUT_CAPSICUM
33 #include <sys/capsicum.h>
34 #endif
35 #include <sys/endian.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <sys/queue.h>
39 #include <sys/socket.h>
40 #include <machine/atomic.h>
41 #include <machine/specialreg.h>
42 #include <machine/vmm.h>
43 #include <netinet/in.h>
44 #include <assert.h>
45 #ifndef WITHOUT_CAPSICUM
46 #include <capsicum_helpers.h>
47 #endif
48 #include <err.h>
49 #include <errno.h>
50 #include <fcntl.h>
51 #include <pthread.h>
52 #include <pthread_np.h>
53 #include <stdbool.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <sysexits.h>
58 #include <unistd.h>
59 #include <vmmapi.h>
60 
61 #include "bhyverun.h"
62 #include "gdb.h"
63 #include "mem.h"
64 #include "mevent.h"
65 
66 /*
67  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
68  * use SIGTRAP.
69  */
70 #define	GDB_SIGNAL_TRAP		5
71 
72 static void gdb_resume_vcpus(void);
73 static void check_command(int fd);
74 
75 static struct mevent *read_event, *write_event;
76 
77 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
78 static pthread_mutex_t gdb_lock;
79 static pthread_cond_t idle_vcpus;
80 static bool first_stop, report_next_stop, swbreak_enabled;
81 
82 /*
83  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
84  * read buffer, 'start' is unused and 'len' contains the number of
85  * valid bytes in the buffer.  For a write buffer, 'start' is set to
86  * the index of the next byte in 'data' to send, and 'len' contains
87  * the remaining number of valid bytes to send.
88  */
89 struct io_buffer {
90 	uint8_t *data;
91 	size_t capacity;
92 	size_t start;
93 	size_t len;
94 };
95 
96 struct breakpoint {
97 	uint64_t gpa;
98 	uint8_t shadow_inst;
99 	TAILQ_ENTRY(breakpoint) link;
100 };
101 
102 /*
103  * When a vCPU stops to due to an event that should be reported to the
104  * debugger, information about the event is stored in this structure.
105  * The vCPU thread then sets 'stopped_vcpu' if it is not already set
106  * and stops other vCPUs so the event can be reported.  The
107  * report_stop() function reports the event for the 'stopped_vcpu'
108  * vCPU.  When the debugger resumes execution via continue or step,
109  * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
110  * event handlers until the associated event is reported or disabled.
111  *
112  * An idle vCPU will have all of the boolean fields set to false.
113  *
114  * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
115  * released to execute the stepped instruction.  When the vCPU reports
116  * the stepping trap, 'stepped' is set.
117  *
118  * When a vCPU hits a breakpoint set by the debug server,
119  * 'hit_swbreak' is set to true.
120  */
121 struct vcpu_state {
122 	bool stepping;
123 	bool stepped;
124 	bool hit_swbreak;
125 };
126 
127 static struct io_buffer cur_comm, cur_resp;
128 static uint8_t cur_csum;
129 static struct vmctx *ctx;
130 static int cur_fd = -1;
131 static TAILQ_HEAD(, breakpoint) breakpoints;
132 static struct vcpu_state *vcpu_state;
133 static int cur_vcpu, stopped_vcpu;
134 
135 const int gdb_regset[] = {
136 	VM_REG_GUEST_RAX,
137 	VM_REG_GUEST_RBX,
138 	VM_REG_GUEST_RCX,
139 	VM_REG_GUEST_RDX,
140 	VM_REG_GUEST_RSI,
141 	VM_REG_GUEST_RDI,
142 	VM_REG_GUEST_RBP,
143 	VM_REG_GUEST_RSP,
144 	VM_REG_GUEST_R8,
145 	VM_REG_GUEST_R9,
146 	VM_REG_GUEST_R10,
147 	VM_REG_GUEST_R11,
148 	VM_REG_GUEST_R12,
149 	VM_REG_GUEST_R13,
150 	VM_REG_GUEST_R14,
151 	VM_REG_GUEST_R15,
152 	VM_REG_GUEST_RIP,
153 	VM_REG_GUEST_RFLAGS,
154 	VM_REG_GUEST_CS,
155 	VM_REG_GUEST_SS,
156 	VM_REG_GUEST_DS,
157 	VM_REG_GUEST_ES,
158 	VM_REG_GUEST_FS,
159 	VM_REG_GUEST_GS
160 };
161 
162 const int gdb_regsize[] = {
163 	8,
164 	8,
165 	8,
166 	8,
167 	8,
168 	8,
169 	8,
170 	8,
171 	8,
172 	8,
173 	8,
174 	8,
175 	8,
176 	8,
177 	8,
178 	8,
179 	8,
180 	4,
181 	4,
182 	4,
183 	4,
184 	4,
185 	4,
186 	4
187 };
188 
189 #ifdef GDB_LOG
190 #include <stdarg.h>
191 #include <stdio.h>
192 
193 static void __printflike(1, 2)
194 debug(const char *fmt, ...)
195 {
196 	static FILE *logfile;
197 	va_list ap;
198 
199 	if (logfile == NULL) {
200 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
201 		if (logfile == NULL)
202 			return;
203 #ifndef WITHOUT_CAPSICUM
204 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
205 			fclose(logfile);
206 			logfile = NULL;
207 			return;
208 		}
209 #endif
210 		setlinebuf(logfile);
211 	}
212 	va_start(ap, fmt);
213 	vfprintf(logfile, fmt, ap);
214 	va_end(ap);
215 }
216 #else
217 #define debug(...)
218 #endif
219 
220 static void	remove_all_sw_breakpoints(void);
221 
222 static int
223 guest_paging_info(int vcpu, struct vm_guest_paging *paging)
224 {
225 	uint64_t regs[4];
226 	const int regset[4] = {
227 		VM_REG_GUEST_CR0,
228 		VM_REG_GUEST_CR3,
229 		VM_REG_GUEST_CR4,
230 		VM_REG_GUEST_EFER
231 	};
232 
233 	if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
234 		return (-1);
235 
236 	/*
237 	 * For the debugger, always pretend to be the kernel (CPL 0),
238 	 * and if long-mode is enabled, always parse addresses as if
239 	 * in 64-bit mode.
240 	 */
241 	paging->cr3 = regs[1];
242 	paging->cpl = 0;
243 	if (regs[3] & EFER_LMA)
244 		paging->cpu_mode = CPU_MODE_64BIT;
245 	else if (regs[0] & CR0_PE)
246 		paging->cpu_mode = CPU_MODE_PROTECTED;
247 	else
248 		paging->cpu_mode = CPU_MODE_REAL;
249 	if (!(regs[0] & CR0_PG))
250 		paging->paging_mode = PAGING_MODE_FLAT;
251 	else if (!(regs[2] & CR4_PAE))
252 		paging->paging_mode = PAGING_MODE_32;
253 	else if (regs[3] & EFER_LME)
254 		paging->paging_mode = PAGING_MODE_64;
255 	else
256 		paging->paging_mode = PAGING_MODE_PAE;
257 	return (0);
258 }
259 
260 /*
261  * Map a guest virtual address to a physical address (for a given vcpu).
262  * If a guest virtual address is valid, return 1.  If the address is
263  * not valid, return 0.  If an error occurs obtaining the mapping,
264  * return -1.
265  */
266 static int
267 guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
268 {
269 	struct vm_guest_paging paging;
270 	int fault;
271 
272 	if (guest_paging_info(vcpu, &paging) == -1)
273 		return (-1);
274 
275 	/*
276 	 * Always use PROT_READ.  We really care if the VA is
277 	 * accessible, not if the current vCPU can write.
278 	 */
279 	if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
280 	    &fault) == -1)
281 		return (-1);
282 	if (fault)
283 		return (0);
284 	return (1);
285 }
286 
287 static void
288 io_buffer_reset(struct io_buffer *io)
289 {
290 
291 	io->start = 0;
292 	io->len = 0;
293 }
294 
295 /* Available room for adding data. */
296 static size_t
297 io_buffer_avail(struct io_buffer *io)
298 {
299 
300 	return (io->capacity - (io->start + io->len));
301 }
302 
303 static uint8_t *
304 io_buffer_head(struct io_buffer *io)
305 {
306 
307 	return (io->data + io->start);
308 }
309 
310 static uint8_t *
311 io_buffer_tail(struct io_buffer *io)
312 {
313 
314 	return (io->data + io->start + io->len);
315 }
316 
317 static void
318 io_buffer_advance(struct io_buffer *io, size_t amount)
319 {
320 
321 	assert(amount <= io->len);
322 	io->start += amount;
323 	io->len -= amount;
324 }
325 
326 static void
327 io_buffer_consume(struct io_buffer *io, size_t amount)
328 {
329 
330 	io_buffer_advance(io, amount);
331 	if (io->len == 0) {
332 		io->start = 0;
333 		return;
334 	}
335 
336 	/*
337 	 * XXX: Consider making this move optional and compacting on a
338 	 * future read() before realloc().
339 	 */
340 	memmove(io->data, io_buffer_head(io), io->len);
341 	io->start = 0;
342 }
343 
344 static void
345 io_buffer_grow(struct io_buffer *io, size_t newsize)
346 {
347 	uint8_t *new_data;
348 	size_t avail, new_cap;
349 
350 	avail = io_buffer_avail(io);
351 	if (newsize <= avail)
352 		return;
353 
354 	new_cap = io->capacity + (newsize - avail);
355 	new_data = realloc(io->data, new_cap);
356 	if (new_data == NULL)
357 		err(1, "Failed to grow GDB I/O buffer");
358 	io->data = new_data;
359 	io->capacity = new_cap;
360 }
361 
362 static bool
363 response_pending(void)
364 {
365 
366 	if (cur_resp.start == 0 && cur_resp.len == 0)
367 		return (false);
368 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
369 		return (false);
370 	return (true);
371 }
372 
373 static void
374 close_connection(void)
375 {
376 
377 	/*
378 	 * XXX: This triggers a warning because mevent does the close
379 	 * before the EV_DELETE.
380 	 */
381 	pthread_mutex_lock(&gdb_lock);
382 	mevent_delete(write_event);
383 	mevent_delete_close(read_event);
384 	write_event = NULL;
385 	read_event = NULL;
386 	io_buffer_reset(&cur_comm);
387 	io_buffer_reset(&cur_resp);
388 	cur_fd = -1;
389 
390 	remove_all_sw_breakpoints();
391 
392 	/* Clear any pending events. */
393 	memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
394 
395 	/* Resume any stopped vCPUs. */
396 	gdb_resume_vcpus();
397 	pthread_mutex_unlock(&gdb_lock);
398 }
399 
400 static uint8_t
401 hex_digit(uint8_t nibble)
402 {
403 
404 	if (nibble <= 9)
405 		return (nibble + '0');
406 	else
407 		return (nibble + 'a' - 10);
408 }
409 
410 static uint8_t
411 parse_digit(uint8_t v)
412 {
413 
414 	if (v >= '0' && v <= '9')
415 		return (v - '0');
416 	if (v >= 'a' && v <= 'f')
417 		return (v - 'a' + 10);
418 	if (v >= 'A' && v <= 'F')
419 		return (v - 'A' + 10);
420 	return (0xF);
421 }
422 
423 /* Parses big-endian hexadecimal. */
424 static uintmax_t
425 parse_integer(const uint8_t *p, size_t len)
426 {
427 	uintmax_t v;
428 
429 	v = 0;
430 	while (len > 0) {
431 		v <<= 4;
432 		v |= parse_digit(*p);
433 		p++;
434 		len--;
435 	}
436 	return (v);
437 }
438 
439 static uint8_t
440 parse_byte(const uint8_t *p)
441 {
442 
443 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
444 }
445 
446 static void
447 send_pending_data(int fd)
448 {
449 	ssize_t nwritten;
450 
451 	if (cur_resp.len == 0) {
452 		mevent_disable(write_event);
453 		return;
454 	}
455 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
456 	if (nwritten == -1) {
457 		warn("Write to GDB socket failed");
458 		close_connection();
459 	} else {
460 		io_buffer_advance(&cur_resp, nwritten);
461 		if (cur_resp.len == 0)
462 			mevent_disable(write_event);
463 		else
464 			mevent_enable(write_event);
465 	}
466 }
467 
468 /* Append a single character to the output buffer. */
469 static void
470 send_char(uint8_t data)
471 {
472 	io_buffer_grow(&cur_resp, 1);
473 	*io_buffer_tail(&cur_resp) = data;
474 	cur_resp.len++;
475 }
476 
477 /* Append an array of bytes to the output buffer. */
478 static void
479 send_data(const uint8_t *data, size_t len)
480 {
481 
482 	io_buffer_grow(&cur_resp, len);
483 	memcpy(io_buffer_tail(&cur_resp), data, len);
484 	cur_resp.len += len;
485 }
486 
487 static void
488 format_byte(uint8_t v, uint8_t *buf)
489 {
490 
491 	buf[0] = hex_digit(v >> 4);
492 	buf[1] = hex_digit(v & 0xf);
493 }
494 
495 /*
496  * Append a single byte (formatted as two hex characters) to the
497  * output buffer.
498  */
499 static void
500 send_byte(uint8_t v)
501 {
502 	uint8_t buf[2];
503 
504 	format_byte(v, buf);
505 	send_data(buf, sizeof(buf));
506 }
507 
508 static void
509 start_packet(void)
510 {
511 
512 	send_char('$');
513 	cur_csum = 0;
514 }
515 
516 static void
517 finish_packet(void)
518 {
519 
520 	send_char('#');
521 	send_byte(cur_csum);
522 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
523 }
524 
525 /*
526  * Append a single character (for the packet payload) and update the
527  * checksum.
528  */
529 static void
530 append_char(uint8_t v)
531 {
532 
533 	send_char(v);
534 	cur_csum += v;
535 }
536 
537 /*
538  * Append an array of bytes (for the packet payload) and update the
539  * checksum.
540  */
541 static void
542 append_packet_data(const uint8_t *data, size_t len)
543 {
544 
545 	send_data(data, len);
546 	while (len > 0) {
547 		cur_csum += *data;
548 		data++;
549 		len--;
550 	}
551 }
552 
553 static void
554 append_string(const char *str)
555 {
556 
557 	append_packet_data(str, strlen(str));
558 }
559 
560 static void
561 append_byte(uint8_t v)
562 {
563 	uint8_t buf[2];
564 
565 	format_byte(v, buf);
566 	append_packet_data(buf, sizeof(buf));
567 }
568 
569 static void
570 append_unsigned_native(uintmax_t value, size_t len)
571 {
572 	size_t i;
573 
574 	for (i = 0; i < len; i++) {
575 		append_byte(value);
576 		value >>= 8;
577 	}
578 }
579 
580 static void
581 append_unsigned_be(uintmax_t value, size_t len)
582 {
583 	char buf[len * 2];
584 	size_t i;
585 
586 	for (i = 0; i < len; i++) {
587 		format_byte(value, buf + (len - i - 1) * 2);
588 		value >>= 8;
589 	}
590 	append_packet_data(buf, sizeof(buf));
591 }
592 
593 static void
594 append_integer(unsigned int value)
595 {
596 
597 	if (value == 0)
598 		append_char('0');
599 	else
600 		append_unsigned_be(value, (fls(value) + 7) / 8);
601 }
602 
603 static void
604 append_asciihex(const char *str)
605 {
606 
607 	while (*str != '\0') {
608 		append_byte(*str);
609 		str++;
610 	}
611 }
612 
613 static void
614 send_empty_response(void)
615 {
616 
617 	start_packet();
618 	finish_packet();
619 }
620 
621 static void
622 send_error(int error)
623 {
624 
625 	start_packet();
626 	append_char('E');
627 	append_byte(error);
628 	finish_packet();
629 }
630 
631 static void
632 send_ok(void)
633 {
634 
635 	start_packet();
636 	append_string("OK");
637 	finish_packet();
638 }
639 
640 static int
641 parse_threadid(const uint8_t *data, size_t len)
642 {
643 
644 	if (len == 1 && *data == '0')
645 		return (0);
646 	if (len == 2 && memcmp(data, "-1", 2) == 0)
647 		return (-1);
648 	if (len == 0)
649 		return (-2);
650 	return (parse_integer(data, len));
651 }
652 
653 /*
654  * Report the current stop event to the debugger.  If the stop is due
655  * to an event triggered on a specific vCPU such as a breakpoint or
656  * stepping trap, stopped_vcpu will be set to the vCPU triggering the
657  * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
658  * the reporting vCPU for vCPU events.
659  */
660 static void
661 report_stop(bool set_cur_vcpu)
662 {
663 	struct vcpu_state *vs;
664 
665 	start_packet();
666 	if (stopped_vcpu == -1) {
667 		append_char('S');
668 		append_byte(GDB_SIGNAL_TRAP);
669 	} else {
670 		vs = &vcpu_state[stopped_vcpu];
671 		if (set_cur_vcpu)
672 			cur_vcpu = stopped_vcpu;
673 		append_char('T');
674 		append_byte(GDB_SIGNAL_TRAP);
675 		append_string("thread:");
676 		append_integer(stopped_vcpu + 1);
677 		append_char(';');
678 		if (vs->hit_swbreak) {
679 			debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
680 			if (swbreak_enabled)
681 				append_string("swbreak:;");
682 		} else if (vs->stepped)
683 			debug("$vCPU %d reporting step\n", stopped_vcpu);
684 		else
685 			debug("$vCPU %d reporting ???\n", stopped_vcpu);
686 	}
687 	finish_packet();
688 	report_next_stop = false;
689 }
690 
691 /*
692  * If this stop is due to a vCPU event, clear that event to mark it as
693  * acknowledged.
694  */
695 static void
696 discard_stop(void)
697 {
698 	struct vcpu_state *vs;
699 
700 	if (stopped_vcpu != -1) {
701 		vs = &vcpu_state[stopped_vcpu];
702 		vs->hit_swbreak = false;
703 		vs->stepped = false;
704 		stopped_vcpu = -1;
705 	}
706 	report_next_stop = true;
707 }
708 
709 static void
710 gdb_finish_suspend_vcpus(void)
711 {
712 
713 	if (first_stop) {
714 		first_stop = false;
715 		stopped_vcpu = -1;
716 	} else if (report_next_stop) {
717 		assert(!response_pending());
718 		report_stop(true);
719 		send_pending_data(cur_fd);
720 	}
721 }
722 
723 /*
724  * vCPU threads invoke this function whenever the vCPU enters the
725  * debug server to pause or report an event.  vCPU threads wait here
726  * as long as the debug server keeps them suspended.
727  */
728 static void
729 _gdb_cpu_suspend(int vcpu, bool report_stop)
730 {
731 
732 	debug("$vCPU %d suspending\n", vcpu);
733 	CPU_SET(vcpu, &vcpus_waiting);
734 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
735 		gdb_finish_suspend_vcpus();
736 	while (CPU_ISSET(vcpu, &vcpus_suspended))
737 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
738 	CPU_CLR(vcpu, &vcpus_waiting);
739 	debug("$vCPU %d resuming\n", vcpu);
740 }
741 
742 /*
743  * Invoked at the start of a vCPU thread's execution to inform the
744  * debug server about the new thread.
745  */
746 void
747 gdb_cpu_add(int vcpu)
748 {
749 
750 	debug("$vCPU %d starting\n", vcpu);
751 	pthread_mutex_lock(&gdb_lock);
752 	assert(vcpu < guest_ncpus);
753 	CPU_SET(vcpu, &vcpus_active);
754 	if (!TAILQ_EMPTY(&breakpoints)) {
755 		vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT, 1);
756 		debug("$vCPU %d enabled breakpoint exits\n", vcpu);
757 	}
758 
759 	/*
760 	 * If a vcpu is added while vcpus are stopped, suspend the new
761 	 * vcpu so that it will pop back out with a debug exit before
762 	 * executing the first instruction.
763 	 */
764 	if (!CPU_EMPTY(&vcpus_suspended)) {
765 		CPU_SET(vcpu, &vcpus_suspended);
766 		_gdb_cpu_suspend(vcpu, false);
767 	}
768 	pthread_mutex_unlock(&gdb_lock);
769 }
770 
771 /*
772  * Invoked by vCPU before resuming execution.  This enables stepping
773  * if the vCPU is marked as stepping.
774  */
775 static void
776 gdb_cpu_resume(int vcpu)
777 {
778 	struct vcpu_state *vs;
779 	int error;
780 
781 	vs = &vcpu_state[vcpu];
782 
783 	/*
784 	 * Any pending event should already be reported before
785 	 * resuming.
786 	 */
787 	assert(vs->hit_swbreak == false);
788 	assert(vs->stepped == false);
789 	if (vs->stepping) {
790 		error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
791 		assert(error == 0);
792 	}
793 }
794 
795 /*
796  * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
797  * has been suspended due to an event on different vCPU or in response
798  * to a guest-wide suspend such as Ctrl-C or the stop on attach.
799  */
800 void
801 gdb_cpu_suspend(int vcpu)
802 {
803 
804 	pthread_mutex_lock(&gdb_lock);
805 	_gdb_cpu_suspend(vcpu, true);
806 	gdb_cpu_resume(vcpu);
807 	pthread_mutex_unlock(&gdb_lock);
808 }
809 
810 static void
811 gdb_suspend_vcpus(void)
812 {
813 
814 	assert(pthread_mutex_isowned_np(&gdb_lock));
815 	debug("suspending all CPUs\n");
816 	vcpus_suspended = vcpus_active;
817 	vm_suspend_cpu(ctx, -1);
818 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
819 		gdb_finish_suspend_vcpus();
820 }
821 
822 /*
823  * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
824  * the VT-x-specific MTRAP exit.
825  */
826 void
827 gdb_cpu_mtrap(int vcpu)
828 {
829 	struct vcpu_state *vs;
830 
831 	debug("$vCPU %d MTRAP\n", vcpu);
832 	pthread_mutex_lock(&gdb_lock);
833 	vs = &vcpu_state[vcpu];
834 	if (vs->stepping) {
835 		vs->stepping = false;
836 		vs->stepped = true;
837 		vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
838 		while (vs->stepped) {
839 			if (stopped_vcpu == -1) {
840 				debug("$vCPU %d reporting step\n", vcpu);
841 				stopped_vcpu = vcpu;
842 				gdb_suspend_vcpus();
843 			}
844 			_gdb_cpu_suspend(vcpu, true);
845 		}
846 		gdb_cpu_resume(vcpu);
847 	}
848 	pthread_mutex_unlock(&gdb_lock);
849 }
850 
851 static struct breakpoint *
852 find_breakpoint(uint64_t gpa)
853 {
854 	struct breakpoint *bp;
855 
856 	TAILQ_FOREACH(bp, &breakpoints, link) {
857 		if (bp->gpa == gpa)
858 			return (bp);
859 	}
860 	return (NULL);
861 }
862 
863 void
864 gdb_cpu_breakpoint(int vcpu, struct vm_exit *vmexit)
865 {
866 	struct breakpoint *bp;
867 	struct vcpu_state *vs;
868 	uint64_t gpa;
869 	int error;
870 
871 	pthread_mutex_lock(&gdb_lock);
872 	error = guest_vaddr2paddr(vcpu, vmexit->rip, &gpa);
873 	assert(error == 1);
874 	bp = find_breakpoint(gpa);
875 	if (bp != NULL) {
876 		vs = &vcpu_state[vcpu];
877 		assert(vs->stepping == false);
878 		assert(vs->stepped == false);
879 		assert(vs->hit_swbreak == false);
880 		vs->hit_swbreak = true;
881 		vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, vmexit->rip);
882 		for (;;) {
883 			if (stopped_vcpu == -1) {
884 				debug("$vCPU %d reporting breakpoint at rip %#lx\n", vcpu,
885 				    vmexit->rip);
886 				stopped_vcpu = vcpu;
887 				gdb_suspend_vcpus();
888 			}
889 			_gdb_cpu_suspend(vcpu, true);
890 			if (!vs->hit_swbreak) {
891 				/* Breakpoint reported. */
892 				break;
893 			}
894 			bp = find_breakpoint(gpa);
895 			if (bp == NULL) {
896 				/* Breakpoint was removed. */
897 				vs->hit_swbreak = false;
898 				break;
899 			}
900 		}
901 		gdb_cpu_resume(vcpu);
902 	} else {
903 		debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpu,
904 		    vmexit->rip);
905 		error = vm_set_register(ctx, vcpu,
906 		    VM_REG_GUEST_ENTRY_INST_LENGTH, vmexit->u.bpt.inst_length);
907 		assert(error == 0);
908 		error = vm_inject_exception(ctx, vcpu, IDT_BP, 0, 0, 0);
909 		assert(error == 0);
910 	}
911 	pthread_mutex_unlock(&gdb_lock);
912 }
913 
914 static bool
915 gdb_step_vcpu(int vcpu)
916 {
917 	int error, val;
918 
919 	debug("$vCPU %d step\n", vcpu);
920 	error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
921 	if (error < 0)
922 		return (false);
923 
924 	discard_stop();
925 	vcpu_state[vcpu].stepping = true;
926 	vm_resume_cpu(ctx, vcpu);
927 	CPU_CLR(vcpu, &vcpus_suspended);
928 	pthread_cond_broadcast(&idle_vcpus);
929 	return (true);
930 }
931 
932 static void
933 gdb_resume_vcpus(void)
934 {
935 
936 	assert(pthread_mutex_isowned_np(&gdb_lock));
937 	vm_resume_cpu(ctx, -1);
938 	debug("resuming all CPUs\n");
939 	CPU_ZERO(&vcpus_suspended);
940 	pthread_cond_broadcast(&idle_vcpus);
941 }
942 
943 static void
944 gdb_read_regs(void)
945 {
946 	uint64_t regvals[nitems(gdb_regset)];
947 	int i;
948 
949 	if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
950 	    gdb_regset, regvals) == -1) {
951 		send_error(errno);
952 		return;
953 	}
954 	start_packet();
955 	for (i = 0; i < nitems(regvals); i++)
956 		append_unsigned_native(regvals[i], gdb_regsize[i]);
957 	finish_packet();
958 }
959 
960 static void
961 gdb_read_mem(const uint8_t *data, size_t len)
962 {
963 	uint64_t gpa, gva, val;
964 	uint8_t *cp;
965 	size_t resid, todo, bytes;
966 	bool started;
967 	int error;
968 
969 	/* Skip 'm' */
970 	data += 1;
971 	len -= 1;
972 
973 	/* Parse and consume address. */
974 	cp = memchr(data, ',', len);
975 	if (cp == NULL || cp == data) {
976 		send_error(EINVAL);
977 		return;
978 	}
979 	gva = parse_integer(data, cp - data);
980 	len -= (cp - data) + 1;
981 	data += (cp - data) + 1;
982 
983 	/* Parse length. */
984 	resid = parse_integer(data, len);
985 
986 	started = false;
987 	while (resid > 0) {
988 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
989 		if (error == -1) {
990 			if (started)
991 				finish_packet();
992 			else
993 				send_error(errno);
994 			return;
995 		}
996 		if (error == 0) {
997 			if (started)
998 				finish_packet();
999 			else
1000 				send_error(EFAULT);
1001 			return;
1002 		}
1003 
1004 		/* Read bytes from current page. */
1005 		todo = getpagesize() - gpa % getpagesize();
1006 		if (todo > resid)
1007 			todo = resid;
1008 
1009 		cp = paddr_guest2host(ctx, gpa, todo);
1010 		if (cp != NULL) {
1011 			/*
1012 			 * If this page is guest RAM, read it a byte
1013 			 * at a time.
1014 			 */
1015 			if (!started) {
1016 				start_packet();
1017 				started = true;
1018 			}
1019 			while (todo > 0) {
1020 				append_byte(*cp);
1021 				cp++;
1022 				gpa++;
1023 				gva++;
1024 				resid--;
1025 				todo--;
1026 			}
1027 		} else {
1028 			/*
1029 			 * If this page isn't guest RAM, try to handle
1030 			 * it via MMIO.  For MMIO requests, use
1031 			 * aligned reads of words when possible.
1032 			 */
1033 			while (todo > 0) {
1034 				if (gpa & 1 || todo == 1)
1035 					bytes = 1;
1036 				else if (gpa & 2 || todo == 2)
1037 					bytes = 2;
1038 				else
1039 					bytes = 4;
1040 				error = read_mem(ctx, cur_vcpu, gpa, &val,
1041 				    bytes);
1042 				if (error == 0) {
1043 					if (!started) {
1044 						start_packet();
1045 						started = true;
1046 					}
1047 					gpa += bytes;
1048 					gva += bytes;
1049 					resid -= bytes;
1050 					todo -= bytes;
1051 					while (bytes > 0) {
1052 						append_byte(val);
1053 						val >>= 8;
1054 						bytes--;
1055 					}
1056 				} else {
1057 					if (started)
1058 						finish_packet();
1059 					else
1060 						send_error(EFAULT);
1061 					return;
1062 				}
1063 			}
1064 		}
1065 		assert(resid == 0 || gpa % getpagesize() == 0);
1066 	}
1067 	if (!started)
1068 		start_packet();
1069 	finish_packet();
1070 }
1071 
1072 static void
1073 gdb_write_mem(const uint8_t *data, size_t len)
1074 {
1075 	uint64_t gpa, gva, val;
1076 	uint8_t *cp;
1077 	size_t resid, todo, bytes;
1078 	int error;
1079 
1080 	/* Skip 'M' */
1081 	data += 1;
1082 	len -= 1;
1083 
1084 	/* Parse and consume address. */
1085 	cp = memchr(data, ',', len);
1086 	if (cp == NULL || cp == data) {
1087 		send_error(EINVAL);
1088 		return;
1089 	}
1090 	gva = parse_integer(data, cp - data);
1091 	len -= (cp - data) + 1;
1092 	data += (cp - data) + 1;
1093 
1094 	/* Parse and consume length. */
1095 	cp = memchr(data, ':', len);
1096 	if (cp == NULL || cp == data) {
1097 		send_error(EINVAL);
1098 		return;
1099 	}
1100 	resid = parse_integer(data, cp - data);
1101 	len -= (cp - data) + 1;
1102 	data += (cp - data) + 1;
1103 
1104 	/* Verify the available bytes match the length. */
1105 	if (len != resid * 2) {
1106 		send_error(EINVAL);
1107 		return;
1108 	}
1109 
1110 	while (resid > 0) {
1111 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1112 		if (error == -1) {
1113 			send_error(errno);
1114 			return;
1115 		}
1116 		if (error == 0) {
1117 			send_error(EFAULT);
1118 			return;
1119 		}
1120 
1121 		/* Write bytes to current page. */
1122 		todo = getpagesize() - gpa % getpagesize();
1123 		if (todo > resid)
1124 			todo = resid;
1125 
1126 		cp = paddr_guest2host(ctx, gpa, todo);
1127 		if (cp != NULL) {
1128 			/*
1129 			 * If this page is guest RAM, write it a byte
1130 			 * at a time.
1131 			 */
1132 			while (todo > 0) {
1133 				assert(len >= 2);
1134 				*cp = parse_byte(data);
1135 				data += 2;
1136 				len -= 2;
1137 				cp++;
1138 				gpa++;
1139 				gva++;
1140 				resid--;
1141 				todo--;
1142 			}
1143 		} else {
1144 			/*
1145 			 * If this page isn't guest RAM, try to handle
1146 			 * it via MMIO.  For MMIO requests, use
1147 			 * aligned writes of words when possible.
1148 			 */
1149 			while (todo > 0) {
1150 				if (gpa & 1 || todo == 1) {
1151 					bytes = 1;
1152 					val = parse_byte(data);
1153 				} else if (gpa & 2 || todo == 2) {
1154 					bytes = 2;
1155 					val = be16toh(parse_integer(data, 4));
1156 				} else {
1157 					bytes = 4;
1158 					val = be32toh(parse_integer(data, 8));
1159 				}
1160 				error = write_mem(ctx, cur_vcpu, gpa, val,
1161 				    bytes);
1162 				if (error == 0) {
1163 					gpa += bytes;
1164 					gva += bytes;
1165 					resid -= bytes;
1166 					todo -= bytes;
1167 					data += 2 * bytes;
1168 					len -= 2 * bytes;
1169 				} else {
1170 					send_error(EFAULT);
1171 					return;
1172 				}
1173 			}
1174 		}
1175 		assert(resid == 0 || gpa % getpagesize() == 0);
1176 	}
1177 	assert(len == 0);
1178 	send_ok();
1179 }
1180 
1181 static bool
1182 set_breakpoint_caps(bool enable)
1183 {
1184 	cpuset_t mask;
1185 	int vcpu;
1186 
1187 	mask = vcpus_active;
1188 	while (!CPU_EMPTY(&mask)) {
1189 		vcpu = CPU_FFS(&mask) - 1;
1190 		CPU_CLR(vcpu, &mask);
1191 		if (vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT,
1192 		    enable ? 1 : 0) < 0)
1193 			return (false);
1194 		debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1195 		    enable ? "en" : "dis");
1196 	}
1197 	return (true);
1198 }
1199 
1200 static void
1201 remove_all_sw_breakpoints(void)
1202 {
1203 	struct breakpoint *bp, *nbp;
1204 	uint8_t *cp;
1205 
1206 	if (TAILQ_EMPTY(&breakpoints))
1207 		return;
1208 
1209 	TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1210 		debug("remove breakpoint at %#lx\n", bp->gpa);
1211 		cp = paddr_guest2host(ctx, bp->gpa, 1);
1212 		*cp = bp->shadow_inst;
1213 		TAILQ_REMOVE(&breakpoints, bp, link);
1214 		free(bp);
1215 	}
1216 	TAILQ_INIT(&breakpoints);
1217 	set_breakpoint_caps(false);
1218 }
1219 
1220 static void
1221 update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1222 {
1223 	struct breakpoint *bp;
1224 	uint64_t gpa;
1225 	uint8_t *cp;
1226 	int error;
1227 
1228 	if (kind != 1) {
1229 		send_error(EINVAL);
1230 		return;
1231 	}
1232 
1233 	error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1234 	if (error == -1) {
1235 		send_error(errno);
1236 		return;
1237 	}
1238 	if (error == 0) {
1239 		send_error(EFAULT);
1240 		return;
1241 	}
1242 
1243 	cp = paddr_guest2host(ctx, gpa, 1);
1244 
1245 	/* Only permit breakpoints in guest RAM. */
1246 	if (cp == NULL) {
1247 		send_error(EFAULT);
1248 		return;
1249 	}
1250 
1251 	/* Find any existing breakpoint. */
1252 	bp = find_breakpoint(gpa);
1253 
1254 	/*
1255 	 * Silently ignore duplicate commands since the protocol
1256 	 * requires these packets to be idempotent.
1257 	 */
1258 	if (insert) {
1259 		if (bp == NULL) {
1260 			if (TAILQ_EMPTY(&breakpoints) &&
1261 			    !set_breakpoint_caps(true)) {
1262 				send_empty_response();
1263 				return;
1264 			}
1265 			bp = malloc(sizeof(*bp));
1266 			bp->gpa = gpa;
1267 			bp->shadow_inst = *cp;
1268 			*cp = 0xcc;	/* INT 3 */
1269 			TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1270 			debug("new breakpoint at %#lx\n", gpa);
1271 		}
1272 	} else {
1273 		if (bp != NULL) {
1274 			debug("remove breakpoint at %#lx\n", gpa);
1275 			*cp = bp->shadow_inst;
1276 			TAILQ_REMOVE(&breakpoints, bp, link);
1277 			free(bp);
1278 			if (TAILQ_EMPTY(&breakpoints))
1279 				set_breakpoint_caps(false);
1280 		}
1281 	}
1282 	send_ok();
1283 }
1284 
1285 static void
1286 parse_breakpoint(const uint8_t *data, size_t len)
1287 {
1288 	uint64_t gva;
1289 	uint8_t *cp;
1290 	bool insert;
1291 	int kind, type;
1292 
1293 	insert = data[0] == 'Z';
1294 
1295 	/* Skip 'Z/z' */
1296 	data += 1;
1297 	len -= 1;
1298 
1299 	/* Parse and consume type. */
1300 	cp = memchr(data, ',', len);
1301 	if (cp == NULL || cp == data) {
1302 		send_error(EINVAL);
1303 		return;
1304 	}
1305 	type = parse_integer(data, cp - data);
1306 	len -= (cp - data) + 1;
1307 	data += (cp - data) + 1;
1308 
1309 	/* Parse and consume address. */
1310 	cp = memchr(data, ',', len);
1311 	if (cp == NULL || cp == data) {
1312 		send_error(EINVAL);
1313 		return;
1314 	}
1315 	gva = parse_integer(data, cp - data);
1316 	len -= (cp - data) + 1;
1317 	data += (cp - data) + 1;
1318 
1319 	/* Parse and consume kind. */
1320 	cp = memchr(data, ';', len);
1321 	if (cp == data) {
1322 		send_error(EINVAL);
1323 		return;
1324 	}
1325 	if (cp != NULL) {
1326 		/*
1327 		 * We do not advertise support for either the
1328 		 * ConditionalBreakpoints or BreakpointCommands
1329 		 * features, so we should not be getting conditions or
1330 		 * commands from the remote end.
1331 		 */
1332 		send_empty_response();
1333 		return;
1334 	}
1335 	kind = parse_integer(data, len);
1336 	data += len;
1337 	len = 0;
1338 
1339 	switch (type) {
1340 	case 0:
1341 		update_sw_breakpoint(gva, kind, insert);
1342 		break;
1343 	default:
1344 		send_empty_response();
1345 		break;
1346 	}
1347 }
1348 
1349 static bool
1350 command_equals(const uint8_t *data, size_t len, const char *cmd)
1351 {
1352 
1353 	if (strlen(cmd) > len)
1354 		return (false);
1355 	return (memcmp(data, cmd, strlen(cmd)) == 0);
1356 }
1357 
1358 static void
1359 check_features(const uint8_t *data, size_t len)
1360 {
1361 	char *feature, *next_feature, *str, *value;
1362 	bool supported;
1363 
1364 	str = malloc(len + 1);
1365 	memcpy(str, data, len);
1366 	str[len] = '\0';
1367 	next_feature = str;
1368 
1369 	while ((feature = strsep(&next_feature, ";")) != NULL) {
1370 		/*
1371 		 * Null features shouldn't exist, but skip if they
1372 		 * do.
1373 		 */
1374 		if (strcmp(feature, "") == 0)
1375 			continue;
1376 
1377 		/*
1378 		 * Look for the value or supported / not supported
1379 		 * flag.
1380 		 */
1381 		value = strchr(feature, '=');
1382 		if (value != NULL) {
1383 			*value = '\0';
1384 			value++;
1385 			supported = true;
1386 		} else {
1387 			value = feature + strlen(feature) - 1;
1388 			switch (*value) {
1389 			case '+':
1390 				supported = true;
1391 				break;
1392 			case '-':
1393 				supported = false;
1394 				break;
1395 			default:
1396 				/*
1397 				 * This is really a protocol error,
1398 				 * but we just ignore malformed
1399 				 * features for ease of
1400 				 * implementation.
1401 				 */
1402 				continue;
1403 			}
1404 			value = NULL;
1405 		}
1406 
1407 		if (strcmp(feature, "swbreak") == 0)
1408 			swbreak_enabled = supported;
1409 	}
1410 	free(str);
1411 
1412 	start_packet();
1413 
1414 	/* This is an arbitrary limit. */
1415 	append_string("PacketSize=4096");
1416 	append_string(";swbreak+");
1417 	finish_packet();
1418 }
1419 
1420 static void
1421 gdb_query(const uint8_t *data, size_t len)
1422 {
1423 
1424 	/*
1425 	 * TODO:
1426 	 * - qSearch
1427 	 */
1428 	if (command_equals(data, len, "qAttached")) {
1429 		start_packet();
1430 		append_char('1');
1431 		finish_packet();
1432 	} else if (command_equals(data, len, "qC")) {
1433 		start_packet();
1434 		append_string("QC");
1435 		append_integer(cur_vcpu + 1);
1436 		finish_packet();
1437 	} else if (command_equals(data, len, "qfThreadInfo")) {
1438 		cpuset_t mask;
1439 		bool first;
1440 		int vcpu;
1441 
1442 		if (CPU_EMPTY(&vcpus_active)) {
1443 			send_error(EINVAL);
1444 			return;
1445 		}
1446 		mask = vcpus_active;
1447 		start_packet();
1448 		append_char('m');
1449 		first = true;
1450 		while (!CPU_EMPTY(&mask)) {
1451 			vcpu = CPU_FFS(&mask) - 1;
1452 			CPU_CLR(vcpu, &mask);
1453 			if (first)
1454 				first = false;
1455 			else
1456 				append_char(',');
1457 			append_integer(vcpu + 1);
1458 		}
1459 		finish_packet();
1460 	} else if (command_equals(data, len, "qsThreadInfo")) {
1461 		start_packet();
1462 		append_char('l');
1463 		finish_packet();
1464 	} else if (command_equals(data, len, "qSupported")) {
1465 		data += strlen("qSupported");
1466 		len -= strlen("qSupported");
1467 		check_features(data, len);
1468 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1469 		char buf[16];
1470 		int tid;
1471 
1472 		data += strlen("qThreadExtraInfo");
1473 		len -= strlen("qThreadExtraInfo");
1474 		if (*data != ',') {
1475 			send_error(EINVAL);
1476 			return;
1477 		}
1478 		tid = parse_threadid(data + 1, len - 1);
1479 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1480 			send_error(EINVAL);
1481 			return;
1482 		}
1483 
1484 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1485 		start_packet();
1486 		append_asciihex(buf);
1487 		finish_packet();
1488 	} else
1489 		send_empty_response();
1490 }
1491 
1492 static void
1493 handle_command(const uint8_t *data, size_t len)
1494 {
1495 
1496 	/* Reject packets with a sequence-id. */
1497 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1498 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1499 		send_empty_response();
1500 		return;
1501 	}
1502 
1503 	switch (*data) {
1504 	case 'c':
1505 		if (len != 1) {
1506 			send_error(EINVAL);
1507 			break;
1508 		}
1509 
1510 		discard_stop();
1511 		gdb_resume_vcpus();
1512 		break;
1513 	case 'D':
1514 		send_ok();
1515 
1516 		/* TODO: Resume any stopped CPUs. */
1517 		break;
1518 	case 'g': {
1519 		gdb_read_regs();
1520 		break;
1521 	}
1522 	case 'H': {
1523 		int tid;
1524 
1525 		if (data[1] != 'g' && data[1] != 'c') {
1526 			send_error(EINVAL);
1527 			break;
1528 		}
1529 		tid = parse_threadid(data + 2, len - 2);
1530 		if (tid == -2) {
1531 			send_error(EINVAL);
1532 			break;
1533 		}
1534 
1535 		if (CPU_EMPTY(&vcpus_active)) {
1536 			send_error(EINVAL);
1537 			break;
1538 		}
1539 		if (tid == -1 || tid == 0)
1540 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1541 		else if (CPU_ISSET(tid - 1, &vcpus_active))
1542 			cur_vcpu = tid - 1;
1543 		else {
1544 			send_error(EINVAL);
1545 			break;
1546 		}
1547 		send_ok();
1548 		break;
1549 	}
1550 	case 'm':
1551 		gdb_read_mem(data, len);
1552 		break;
1553 	case 'M':
1554 		gdb_write_mem(data, len);
1555 		break;
1556 	case 'T': {
1557 		int tid;
1558 
1559 		tid = parse_threadid(data + 1, len - 1);
1560 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1561 			send_error(EINVAL);
1562 			return;
1563 		}
1564 		send_ok();
1565 		break;
1566 	}
1567 	case 'q':
1568 		gdb_query(data, len);
1569 		break;
1570 	case 's':
1571 		if (len != 1) {
1572 			send_error(EINVAL);
1573 			break;
1574 		}
1575 
1576 		/* Don't send a reply until a stop occurs. */
1577 		if (!gdb_step_vcpu(cur_vcpu)) {
1578 			send_error(EOPNOTSUPP);
1579 			break;
1580 		}
1581 		break;
1582 	case 'z':
1583 	case 'Z':
1584 		parse_breakpoint(data, len);
1585 		break;
1586 	case '?':
1587 		report_stop(false);
1588 		break;
1589 	case 'G': /* TODO */
1590 	case 'v':
1591 		/* Handle 'vCont' */
1592 		/* 'vCtrlC' */
1593 	case 'p': /* TODO */
1594 	case 'P': /* TODO */
1595 	case 'Q': /* TODO */
1596 	case 't': /* TODO */
1597 	case 'X': /* TODO */
1598 	default:
1599 		send_empty_response();
1600 	}
1601 }
1602 
1603 /* Check for a valid packet in the command buffer. */
1604 static void
1605 check_command(int fd)
1606 {
1607 	uint8_t *head, *hash, *p, sum;
1608 	size_t avail, plen;
1609 
1610 	for (;;) {
1611 		avail = cur_comm.len;
1612 		if (avail == 0)
1613 			return;
1614 		head = io_buffer_head(&cur_comm);
1615 		switch (*head) {
1616 		case 0x03:
1617 			debug("<- Ctrl-C\n");
1618 			io_buffer_consume(&cur_comm, 1);
1619 
1620 			gdb_suspend_vcpus();
1621 			break;
1622 		case '+':
1623 			/* ACK of previous response. */
1624 			debug("<- +\n");
1625 			if (response_pending())
1626 				io_buffer_reset(&cur_resp);
1627 			io_buffer_consume(&cur_comm, 1);
1628 			if (stopped_vcpu != -1 && report_next_stop) {
1629 				report_stop(true);
1630 				send_pending_data(fd);
1631 			}
1632 			break;
1633 		case '-':
1634 			/* NACK of previous response. */
1635 			debug("<- -\n");
1636 			if (response_pending()) {
1637 				cur_resp.len += cur_resp.start;
1638 				cur_resp.start = 0;
1639 				if (cur_resp.data[0] == '+')
1640 					io_buffer_advance(&cur_resp, 1);
1641 				debug("-> %.*s\n", (int)cur_resp.len,
1642 				    io_buffer_head(&cur_resp));
1643 			}
1644 			io_buffer_consume(&cur_comm, 1);
1645 			send_pending_data(fd);
1646 			break;
1647 		case '$':
1648 			/* Packet. */
1649 
1650 			if (response_pending()) {
1651 				warnx("New GDB command while response in "
1652 				    "progress");
1653 				io_buffer_reset(&cur_resp);
1654 			}
1655 
1656 			/* Is packet complete? */
1657 			hash = memchr(head, '#', avail);
1658 			if (hash == NULL)
1659 				return;
1660 			plen = (hash - head + 1) + 2;
1661 			if (avail < plen)
1662 				return;
1663 			debug("<- %.*s\n", (int)plen, head);
1664 
1665 			/* Verify checksum. */
1666 			for (sum = 0, p = head + 1; p < hash; p++)
1667 				sum += *p;
1668 			if (sum != parse_byte(hash + 1)) {
1669 				io_buffer_consume(&cur_comm, plen);
1670 				debug("-> -\n");
1671 				send_char('-');
1672 				send_pending_data(fd);
1673 				break;
1674 			}
1675 			send_char('+');
1676 
1677 			handle_command(head + 1, hash - (head + 1));
1678 			io_buffer_consume(&cur_comm, plen);
1679 			if (!response_pending())
1680 				debug("-> +\n");
1681 			send_pending_data(fd);
1682 			break;
1683 		default:
1684 			/* XXX: Possibly drop connection instead. */
1685 			debug("-> %02x\n", *head);
1686 			io_buffer_consume(&cur_comm, 1);
1687 			break;
1688 		}
1689 	}
1690 }
1691 
1692 static void
1693 gdb_readable(int fd, enum ev_type event, void *arg)
1694 {
1695 	ssize_t nread;
1696 	int pending;
1697 
1698 	if (ioctl(fd, FIONREAD, &pending) == -1) {
1699 		warn("FIONREAD on GDB socket");
1700 		return;
1701 	}
1702 
1703 	/*
1704 	 * 'pending' might be zero due to EOF.  We need to call read
1705 	 * with a non-zero length to detect EOF.
1706 	 */
1707 	if (pending == 0)
1708 		pending = 1;
1709 
1710 	/* Ensure there is room in the command buffer. */
1711 	io_buffer_grow(&cur_comm, pending);
1712 	assert(io_buffer_avail(&cur_comm) >= pending);
1713 
1714 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1715 	if (nread == 0) {
1716 		close_connection();
1717 	} else if (nread == -1) {
1718 		if (errno == EAGAIN)
1719 			return;
1720 
1721 		warn("Read from GDB socket");
1722 		close_connection();
1723 	} else {
1724 		cur_comm.len += nread;
1725 		pthread_mutex_lock(&gdb_lock);
1726 		check_command(fd);
1727 		pthread_mutex_unlock(&gdb_lock);
1728 	}
1729 }
1730 
1731 static void
1732 gdb_writable(int fd, enum ev_type event, void *arg)
1733 {
1734 
1735 	send_pending_data(fd);
1736 }
1737 
1738 static void
1739 new_connection(int fd, enum ev_type event, void *arg)
1740 {
1741 	int optval, s;
1742 
1743 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1744 	if (s == -1) {
1745 		if (arg != NULL)
1746 			err(1, "Failed accepting initial GDB connection");
1747 
1748 		/* Silently ignore errors post-startup. */
1749 		return;
1750 	}
1751 
1752 	optval = 1;
1753 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1754 	    -1) {
1755 		warn("Failed to disable SIGPIPE for GDB connection");
1756 		close(s);
1757 		return;
1758 	}
1759 
1760 	pthread_mutex_lock(&gdb_lock);
1761 	if (cur_fd != -1) {
1762 		close(s);
1763 		warnx("Ignoring additional GDB connection.");
1764 	}
1765 
1766 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1767 	if (read_event == NULL) {
1768 		if (arg != NULL)
1769 			err(1, "Failed to setup initial GDB connection");
1770 		pthread_mutex_unlock(&gdb_lock);
1771 		return;
1772 	}
1773 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1774 	if (write_event == NULL) {
1775 		if (arg != NULL)
1776 			err(1, "Failed to setup initial GDB connection");
1777 		mevent_delete_close(read_event);
1778 		read_event = NULL;
1779 	}
1780 
1781 	cur_fd = s;
1782 	cur_vcpu = 0;
1783 	stopped_vcpu = -1;
1784 
1785 	/* Break on attach. */
1786 	first_stop = true;
1787 	report_next_stop = false;
1788 	gdb_suspend_vcpus();
1789 	pthread_mutex_unlock(&gdb_lock);
1790 }
1791 
1792 #ifndef WITHOUT_CAPSICUM
1793 void
1794 limit_gdb_socket(int s)
1795 {
1796 	cap_rights_t rights;
1797 	unsigned long ioctls[] = { FIONREAD };
1798 
1799 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1800 	    CAP_SETSOCKOPT, CAP_IOCTL);
1801 	if (caph_rights_limit(s, &rights) == -1)
1802 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1803 	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1804 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1805 }
1806 #endif
1807 
1808 void
1809 init_gdb(struct vmctx *_ctx, int sport, bool wait)
1810 {
1811 	struct sockaddr_in sin;
1812 	int error, flags, s;
1813 
1814 	debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not ");
1815 
1816 	error = pthread_mutex_init(&gdb_lock, NULL);
1817 	if (error != 0)
1818 		errc(1, error, "gdb mutex init");
1819 	error = pthread_cond_init(&idle_vcpus, NULL);
1820 	if (error != 0)
1821 		errc(1, error, "gdb cv init");
1822 
1823 	ctx = _ctx;
1824 	s = socket(PF_INET, SOCK_STREAM, 0);
1825 	if (s < 0)
1826 		err(1, "gdb socket create");
1827 
1828 	sin.sin_len = sizeof(sin);
1829 	sin.sin_family = AF_INET;
1830 	sin.sin_addr.s_addr = htonl(INADDR_ANY);
1831 	sin.sin_port = htons(sport);
1832 
1833 	if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0)
1834 		err(1, "gdb socket bind");
1835 
1836 	if (listen(s, 1) < 0)
1837 		err(1, "gdb socket listen");
1838 
1839 	stopped_vcpu = -1;
1840 	TAILQ_INIT(&breakpoints);
1841 	vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
1842 	if (wait) {
1843 		/*
1844 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
1845 		 * logic in gdb_cpu_add() to suspend the first vcpu before
1846 		 * it starts execution.  The vcpu will remain suspended
1847 		 * until a debugger connects.
1848 		 */
1849 		CPU_SET(0, &vcpus_suspended);
1850 		stopped_vcpu = 0;
1851 	}
1852 
1853 	flags = fcntl(s, F_GETFL);
1854 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1855 		err(1, "Failed to mark gdb socket non-blocking");
1856 
1857 #ifndef WITHOUT_CAPSICUM
1858 	limit_gdb_socket(s);
1859 #endif
1860 	mevent_add(s, EVF_READ, new_connection, NULL);
1861 }
1862