xref: /freebsd/usr.sbin/bhyve/gdb.c (revision dd41de95a84d979615a2ef11df6850622bf6184e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/param.h>
32 #ifndef WITHOUT_CAPSICUM
33 #include <sys/capsicum.h>
34 #endif
35 #include <sys/endian.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <sys/queue.h>
39 #include <sys/socket.h>
40 #include <machine/atomic.h>
41 #include <machine/specialreg.h>
42 #include <machine/vmm.h>
43 #include <netinet/in.h>
44 #include <assert.h>
45 #ifndef WITHOUT_CAPSICUM
46 #include <capsicum_helpers.h>
47 #endif
48 #include <err.h>
49 #include <errno.h>
50 #include <fcntl.h>
51 #include <pthread.h>
52 #include <pthread_np.h>
53 #include <stdbool.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <sysexits.h>
58 #include <unistd.h>
59 #include <vmmapi.h>
60 
61 #include "bhyverun.h"
62 #include "gdb.h"
63 #include "mem.h"
64 #include "mevent.h"
65 
66 /*
67  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
68  * use SIGTRAP.
69  */
70 #define	GDB_SIGNAL_TRAP		5
71 
72 static void gdb_resume_vcpus(void);
73 static void check_command(int fd);
74 
75 static struct mevent *read_event, *write_event;
76 
77 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
78 static pthread_mutex_t gdb_lock;
79 static pthread_cond_t idle_vcpus;
80 static bool first_stop, report_next_stop, swbreak_enabled;
81 
82 /*
83  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
84  * read buffer, 'start' is unused and 'len' contains the number of
85  * valid bytes in the buffer.  For a write buffer, 'start' is set to
86  * the index of the next byte in 'data' to send, and 'len' contains
87  * the remaining number of valid bytes to send.
88  */
89 struct io_buffer {
90 	uint8_t *data;
91 	size_t capacity;
92 	size_t start;
93 	size_t len;
94 };
95 
96 struct breakpoint {
97 	uint64_t gpa;
98 	uint8_t shadow_inst;
99 	TAILQ_ENTRY(breakpoint) link;
100 };
101 
102 /*
103  * When a vCPU stops to due to an event that should be reported to the
104  * debugger, information about the event is stored in this structure.
105  * The vCPU thread then sets 'stopped_vcpu' if it is not already set
106  * and stops other vCPUs so the event can be reported.  The
107  * report_stop() function reports the event for the 'stopped_vcpu'
108  * vCPU.  When the debugger resumes execution via continue or step,
109  * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
110  * event handlers until the associated event is reported or disabled.
111  *
112  * An idle vCPU will have all of the boolean fields set to false.
113  *
114  * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
115  * released to execute the stepped instruction.  When the vCPU reports
116  * the stepping trap, 'stepped' is set.
117  *
118  * When a vCPU hits a breakpoint set by the debug server,
119  * 'hit_swbreak' is set to true.
120  */
121 struct vcpu_state {
122 	bool stepping;
123 	bool stepped;
124 	bool hit_swbreak;
125 };
126 
127 static struct io_buffer cur_comm, cur_resp;
128 static uint8_t cur_csum;
129 static struct vmctx *ctx;
130 static int cur_fd = -1;
131 static TAILQ_HEAD(, breakpoint) breakpoints;
132 static struct vcpu_state *vcpu_state;
133 static int cur_vcpu, stopped_vcpu;
134 static bool gdb_active = false;
135 
136 const int gdb_regset[] = {
137 	VM_REG_GUEST_RAX,
138 	VM_REG_GUEST_RBX,
139 	VM_REG_GUEST_RCX,
140 	VM_REG_GUEST_RDX,
141 	VM_REG_GUEST_RSI,
142 	VM_REG_GUEST_RDI,
143 	VM_REG_GUEST_RBP,
144 	VM_REG_GUEST_RSP,
145 	VM_REG_GUEST_R8,
146 	VM_REG_GUEST_R9,
147 	VM_REG_GUEST_R10,
148 	VM_REG_GUEST_R11,
149 	VM_REG_GUEST_R12,
150 	VM_REG_GUEST_R13,
151 	VM_REG_GUEST_R14,
152 	VM_REG_GUEST_R15,
153 	VM_REG_GUEST_RIP,
154 	VM_REG_GUEST_RFLAGS,
155 	VM_REG_GUEST_CS,
156 	VM_REG_GUEST_SS,
157 	VM_REG_GUEST_DS,
158 	VM_REG_GUEST_ES,
159 	VM_REG_GUEST_FS,
160 	VM_REG_GUEST_GS
161 };
162 
163 const int gdb_regsize[] = {
164 	8,
165 	8,
166 	8,
167 	8,
168 	8,
169 	8,
170 	8,
171 	8,
172 	8,
173 	8,
174 	8,
175 	8,
176 	8,
177 	8,
178 	8,
179 	8,
180 	8,
181 	4,
182 	4,
183 	4,
184 	4,
185 	4,
186 	4,
187 	4
188 };
189 
190 #ifdef GDB_LOG
191 #include <stdarg.h>
192 #include <stdio.h>
193 
194 static void __printflike(1, 2)
195 debug(const char *fmt, ...)
196 {
197 	static FILE *logfile;
198 	va_list ap;
199 
200 	if (logfile == NULL) {
201 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
202 		if (logfile == NULL)
203 			return;
204 #ifndef WITHOUT_CAPSICUM
205 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
206 			fclose(logfile);
207 			logfile = NULL;
208 			return;
209 		}
210 #endif
211 		setlinebuf(logfile);
212 	}
213 	va_start(ap, fmt);
214 	vfprintf(logfile, fmt, ap);
215 	va_end(ap);
216 }
217 #else
218 #define debug(...)
219 #endif
220 
221 static void	remove_all_sw_breakpoints(void);
222 
223 static int
224 guest_paging_info(int vcpu, struct vm_guest_paging *paging)
225 {
226 	uint64_t regs[4];
227 	const int regset[4] = {
228 		VM_REG_GUEST_CR0,
229 		VM_REG_GUEST_CR3,
230 		VM_REG_GUEST_CR4,
231 		VM_REG_GUEST_EFER
232 	};
233 
234 	if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
235 		return (-1);
236 
237 	/*
238 	 * For the debugger, always pretend to be the kernel (CPL 0),
239 	 * and if long-mode is enabled, always parse addresses as if
240 	 * in 64-bit mode.
241 	 */
242 	paging->cr3 = regs[1];
243 	paging->cpl = 0;
244 	if (regs[3] & EFER_LMA)
245 		paging->cpu_mode = CPU_MODE_64BIT;
246 	else if (regs[0] & CR0_PE)
247 		paging->cpu_mode = CPU_MODE_PROTECTED;
248 	else
249 		paging->cpu_mode = CPU_MODE_REAL;
250 	if (!(regs[0] & CR0_PG))
251 		paging->paging_mode = PAGING_MODE_FLAT;
252 	else if (!(regs[2] & CR4_PAE))
253 		paging->paging_mode = PAGING_MODE_32;
254 	else if (regs[3] & EFER_LME)
255 		paging->paging_mode = (regs[2] & CR4_LA57) ?
256 		    PAGING_MODE_64_LA57 :  PAGING_MODE_64;
257 	else
258 		paging->paging_mode = PAGING_MODE_PAE;
259 	return (0);
260 }
261 
262 /*
263  * Map a guest virtual address to a physical address (for a given vcpu).
264  * If a guest virtual address is valid, return 1.  If the address is
265  * not valid, return 0.  If an error occurs obtaining the mapping,
266  * return -1.
267  */
268 static int
269 guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
270 {
271 	struct vm_guest_paging paging;
272 	int fault;
273 
274 	if (guest_paging_info(vcpu, &paging) == -1)
275 		return (-1);
276 
277 	/*
278 	 * Always use PROT_READ.  We really care if the VA is
279 	 * accessible, not if the current vCPU can write.
280 	 */
281 	if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
282 	    &fault) == -1)
283 		return (-1);
284 	if (fault)
285 		return (0);
286 	return (1);
287 }
288 
289 static void
290 io_buffer_reset(struct io_buffer *io)
291 {
292 
293 	io->start = 0;
294 	io->len = 0;
295 }
296 
297 /* Available room for adding data. */
298 static size_t
299 io_buffer_avail(struct io_buffer *io)
300 {
301 
302 	return (io->capacity - (io->start + io->len));
303 }
304 
305 static uint8_t *
306 io_buffer_head(struct io_buffer *io)
307 {
308 
309 	return (io->data + io->start);
310 }
311 
312 static uint8_t *
313 io_buffer_tail(struct io_buffer *io)
314 {
315 
316 	return (io->data + io->start + io->len);
317 }
318 
319 static void
320 io_buffer_advance(struct io_buffer *io, size_t amount)
321 {
322 
323 	assert(amount <= io->len);
324 	io->start += amount;
325 	io->len -= amount;
326 }
327 
328 static void
329 io_buffer_consume(struct io_buffer *io, size_t amount)
330 {
331 
332 	io_buffer_advance(io, amount);
333 	if (io->len == 0) {
334 		io->start = 0;
335 		return;
336 	}
337 
338 	/*
339 	 * XXX: Consider making this move optional and compacting on a
340 	 * future read() before realloc().
341 	 */
342 	memmove(io->data, io_buffer_head(io), io->len);
343 	io->start = 0;
344 }
345 
346 static void
347 io_buffer_grow(struct io_buffer *io, size_t newsize)
348 {
349 	uint8_t *new_data;
350 	size_t avail, new_cap;
351 
352 	avail = io_buffer_avail(io);
353 	if (newsize <= avail)
354 		return;
355 
356 	new_cap = io->capacity + (newsize - avail);
357 	new_data = realloc(io->data, new_cap);
358 	if (new_data == NULL)
359 		err(1, "Failed to grow GDB I/O buffer");
360 	io->data = new_data;
361 	io->capacity = new_cap;
362 }
363 
364 static bool
365 response_pending(void)
366 {
367 
368 	if (cur_resp.start == 0 && cur_resp.len == 0)
369 		return (false);
370 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
371 		return (false);
372 	return (true);
373 }
374 
375 static void
376 close_connection(void)
377 {
378 
379 	/*
380 	 * XXX: This triggers a warning because mevent does the close
381 	 * before the EV_DELETE.
382 	 */
383 	pthread_mutex_lock(&gdb_lock);
384 	mevent_delete(write_event);
385 	mevent_delete_close(read_event);
386 	write_event = NULL;
387 	read_event = NULL;
388 	io_buffer_reset(&cur_comm);
389 	io_buffer_reset(&cur_resp);
390 	cur_fd = -1;
391 
392 	remove_all_sw_breakpoints();
393 
394 	/* Clear any pending events. */
395 	memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
396 
397 	/* Resume any stopped vCPUs. */
398 	gdb_resume_vcpus();
399 	pthread_mutex_unlock(&gdb_lock);
400 }
401 
402 static uint8_t
403 hex_digit(uint8_t nibble)
404 {
405 
406 	if (nibble <= 9)
407 		return (nibble + '0');
408 	else
409 		return (nibble + 'a' - 10);
410 }
411 
412 static uint8_t
413 parse_digit(uint8_t v)
414 {
415 
416 	if (v >= '0' && v <= '9')
417 		return (v - '0');
418 	if (v >= 'a' && v <= 'f')
419 		return (v - 'a' + 10);
420 	if (v >= 'A' && v <= 'F')
421 		return (v - 'A' + 10);
422 	return (0xF);
423 }
424 
425 /* Parses big-endian hexadecimal. */
426 static uintmax_t
427 parse_integer(const uint8_t *p, size_t len)
428 {
429 	uintmax_t v;
430 
431 	v = 0;
432 	while (len > 0) {
433 		v <<= 4;
434 		v |= parse_digit(*p);
435 		p++;
436 		len--;
437 	}
438 	return (v);
439 }
440 
441 static uint8_t
442 parse_byte(const uint8_t *p)
443 {
444 
445 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
446 }
447 
448 static void
449 send_pending_data(int fd)
450 {
451 	ssize_t nwritten;
452 
453 	if (cur_resp.len == 0) {
454 		mevent_disable(write_event);
455 		return;
456 	}
457 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
458 	if (nwritten == -1) {
459 		warn("Write to GDB socket failed");
460 		close_connection();
461 	} else {
462 		io_buffer_advance(&cur_resp, nwritten);
463 		if (cur_resp.len == 0)
464 			mevent_disable(write_event);
465 		else
466 			mevent_enable(write_event);
467 	}
468 }
469 
470 /* Append a single character to the output buffer. */
471 static void
472 send_char(uint8_t data)
473 {
474 	io_buffer_grow(&cur_resp, 1);
475 	*io_buffer_tail(&cur_resp) = data;
476 	cur_resp.len++;
477 }
478 
479 /* Append an array of bytes to the output buffer. */
480 static void
481 send_data(const uint8_t *data, size_t len)
482 {
483 
484 	io_buffer_grow(&cur_resp, len);
485 	memcpy(io_buffer_tail(&cur_resp), data, len);
486 	cur_resp.len += len;
487 }
488 
489 static void
490 format_byte(uint8_t v, uint8_t *buf)
491 {
492 
493 	buf[0] = hex_digit(v >> 4);
494 	buf[1] = hex_digit(v & 0xf);
495 }
496 
497 /*
498  * Append a single byte (formatted as two hex characters) to the
499  * output buffer.
500  */
501 static void
502 send_byte(uint8_t v)
503 {
504 	uint8_t buf[2];
505 
506 	format_byte(v, buf);
507 	send_data(buf, sizeof(buf));
508 }
509 
510 static void
511 start_packet(void)
512 {
513 
514 	send_char('$');
515 	cur_csum = 0;
516 }
517 
518 static void
519 finish_packet(void)
520 {
521 
522 	send_char('#');
523 	send_byte(cur_csum);
524 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
525 }
526 
527 /*
528  * Append a single character (for the packet payload) and update the
529  * checksum.
530  */
531 static void
532 append_char(uint8_t v)
533 {
534 
535 	send_char(v);
536 	cur_csum += v;
537 }
538 
539 /*
540  * Append an array of bytes (for the packet payload) and update the
541  * checksum.
542  */
543 static void
544 append_packet_data(const uint8_t *data, size_t len)
545 {
546 
547 	send_data(data, len);
548 	while (len > 0) {
549 		cur_csum += *data;
550 		data++;
551 		len--;
552 	}
553 }
554 
555 static void
556 append_string(const char *str)
557 {
558 
559 	append_packet_data(str, strlen(str));
560 }
561 
562 static void
563 append_byte(uint8_t v)
564 {
565 	uint8_t buf[2];
566 
567 	format_byte(v, buf);
568 	append_packet_data(buf, sizeof(buf));
569 }
570 
571 static void
572 append_unsigned_native(uintmax_t value, size_t len)
573 {
574 	size_t i;
575 
576 	for (i = 0; i < len; i++) {
577 		append_byte(value);
578 		value >>= 8;
579 	}
580 }
581 
582 static void
583 append_unsigned_be(uintmax_t value, size_t len)
584 {
585 	char buf[len * 2];
586 	size_t i;
587 
588 	for (i = 0; i < len; i++) {
589 		format_byte(value, buf + (len - i - 1) * 2);
590 		value >>= 8;
591 	}
592 	append_packet_data(buf, sizeof(buf));
593 }
594 
595 static void
596 append_integer(unsigned int value)
597 {
598 
599 	if (value == 0)
600 		append_char('0');
601 	else
602 		append_unsigned_be(value, (fls(value) + 7) / 8);
603 }
604 
605 static void
606 append_asciihex(const char *str)
607 {
608 
609 	while (*str != '\0') {
610 		append_byte(*str);
611 		str++;
612 	}
613 }
614 
615 static void
616 send_empty_response(void)
617 {
618 
619 	start_packet();
620 	finish_packet();
621 }
622 
623 static void
624 send_error(int error)
625 {
626 
627 	start_packet();
628 	append_char('E');
629 	append_byte(error);
630 	finish_packet();
631 }
632 
633 static void
634 send_ok(void)
635 {
636 
637 	start_packet();
638 	append_string("OK");
639 	finish_packet();
640 }
641 
642 static int
643 parse_threadid(const uint8_t *data, size_t len)
644 {
645 
646 	if (len == 1 && *data == '0')
647 		return (0);
648 	if (len == 2 && memcmp(data, "-1", 2) == 0)
649 		return (-1);
650 	if (len == 0)
651 		return (-2);
652 	return (parse_integer(data, len));
653 }
654 
655 /*
656  * Report the current stop event to the debugger.  If the stop is due
657  * to an event triggered on a specific vCPU such as a breakpoint or
658  * stepping trap, stopped_vcpu will be set to the vCPU triggering the
659  * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
660  * the reporting vCPU for vCPU events.
661  */
662 static void
663 report_stop(bool set_cur_vcpu)
664 {
665 	struct vcpu_state *vs;
666 
667 	start_packet();
668 	if (stopped_vcpu == -1) {
669 		append_char('S');
670 		append_byte(GDB_SIGNAL_TRAP);
671 	} else {
672 		vs = &vcpu_state[stopped_vcpu];
673 		if (set_cur_vcpu)
674 			cur_vcpu = stopped_vcpu;
675 		append_char('T');
676 		append_byte(GDB_SIGNAL_TRAP);
677 		append_string("thread:");
678 		append_integer(stopped_vcpu + 1);
679 		append_char(';');
680 		if (vs->hit_swbreak) {
681 			debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
682 			if (swbreak_enabled)
683 				append_string("swbreak:;");
684 		} else if (vs->stepped)
685 			debug("$vCPU %d reporting step\n", stopped_vcpu);
686 		else
687 			debug("$vCPU %d reporting ???\n", stopped_vcpu);
688 	}
689 	finish_packet();
690 	report_next_stop = false;
691 }
692 
693 /*
694  * If this stop is due to a vCPU event, clear that event to mark it as
695  * acknowledged.
696  */
697 static void
698 discard_stop(void)
699 {
700 	struct vcpu_state *vs;
701 
702 	if (stopped_vcpu != -1) {
703 		vs = &vcpu_state[stopped_vcpu];
704 		vs->hit_swbreak = false;
705 		vs->stepped = false;
706 		stopped_vcpu = -1;
707 	}
708 	report_next_stop = true;
709 }
710 
711 static void
712 gdb_finish_suspend_vcpus(void)
713 {
714 
715 	if (first_stop) {
716 		first_stop = false;
717 		stopped_vcpu = -1;
718 	} else if (report_next_stop) {
719 		assert(!response_pending());
720 		report_stop(true);
721 		send_pending_data(cur_fd);
722 	}
723 }
724 
725 /*
726  * vCPU threads invoke this function whenever the vCPU enters the
727  * debug server to pause or report an event.  vCPU threads wait here
728  * as long as the debug server keeps them suspended.
729  */
730 static void
731 _gdb_cpu_suspend(int vcpu, bool report_stop)
732 {
733 
734 	debug("$vCPU %d suspending\n", vcpu);
735 	CPU_SET(vcpu, &vcpus_waiting);
736 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
737 		gdb_finish_suspend_vcpus();
738 	while (CPU_ISSET(vcpu, &vcpus_suspended))
739 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
740 	CPU_CLR(vcpu, &vcpus_waiting);
741 	debug("$vCPU %d resuming\n", vcpu);
742 }
743 
744 /*
745  * Invoked at the start of a vCPU thread's execution to inform the
746  * debug server about the new thread.
747  */
748 void
749 gdb_cpu_add(int vcpu)
750 {
751 
752 	if (!gdb_active)
753 		return;
754 	debug("$vCPU %d starting\n", vcpu);
755 	pthread_mutex_lock(&gdb_lock);
756 	assert(vcpu < guest_ncpus);
757 	CPU_SET(vcpu, &vcpus_active);
758 	if (!TAILQ_EMPTY(&breakpoints)) {
759 		vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT, 1);
760 		debug("$vCPU %d enabled breakpoint exits\n", vcpu);
761 	}
762 
763 	/*
764 	 * If a vcpu is added while vcpus are stopped, suspend the new
765 	 * vcpu so that it will pop back out with a debug exit before
766 	 * executing the first instruction.
767 	 */
768 	if (!CPU_EMPTY(&vcpus_suspended)) {
769 		CPU_SET(vcpu, &vcpus_suspended);
770 		_gdb_cpu_suspend(vcpu, false);
771 	}
772 	pthread_mutex_unlock(&gdb_lock);
773 }
774 
775 /*
776  * Invoked by vCPU before resuming execution.  This enables stepping
777  * if the vCPU is marked as stepping.
778  */
779 static void
780 gdb_cpu_resume(int vcpu)
781 {
782 	struct vcpu_state *vs;
783 	int error;
784 
785 	vs = &vcpu_state[vcpu];
786 
787 	/*
788 	 * Any pending event should already be reported before
789 	 * resuming.
790 	 */
791 	assert(vs->hit_swbreak == false);
792 	assert(vs->stepped == false);
793 	if (vs->stepping) {
794 		error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
795 		assert(error == 0);
796 	}
797 }
798 
799 /*
800  * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
801  * has been suspended due to an event on different vCPU or in response
802  * to a guest-wide suspend such as Ctrl-C or the stop on attach.
803  */
804 void
805 gdb_cpu_suspend(int vcpu)
806 {
807 
808 	if (!gdb_active)
809 		return;
810 	pthread_mutex_lock(&gdb_lock);
811 	_gdb_cpu_suspend(vcpu, true);
812 	gdb_cpu_resume(vcpu);
813 	pthread_mutex_unlock(&gdb_lock);
814 }
815 
816 static void
817 gdb_suspend_vcpus(void)
818 {
819 
820 	assert(pthread_mutex_isowned_np(&gdb_lock));
821 	debug("suspending all CPUs\n");
822 	vcpus_suspended = vcpus_active;
823 	vm_suspend_cpu(ctx, -1);
824 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
825 		gdb_finish_suspend_vcpus();
826 }
827 
828 /*
829  * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
830  * the VT-x-specific MTRAP exit.
831  */
832 void
833 gdb_cpu_mtrap(int vcpu)
834 {
835 	struct vcpu_state *vs;
836 
837 	if (!gdb_active)
838 		return;
839 	debug("$vCPU %d MTRAP\n", vcpu);
840 	pthread_mutex_lock(&gdb_lock);
841 	vs = &vcpu_state[vcpu];
842 	if (vs->stepping) {
843 		vs->stepping = false;
844 		vs->stepped = true;
845 		vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
846 		while (vs->stepped) {
847 			if (stopped_vcpu == -1) {
848 				debug("$vCPU %d reporting step\n", vcpu);
849 				stopped_vcpu = vcpu;
850 				gdb_suspend_vcpus();
851 			}
852 			_gdb_cpu_suspend(vcpu, true);
853 		}
854 		gdb_cpu_resume(vcpu);
855 	}
856 	pthread_mutex_unlock(&gdb_lock);
857 }
858 
859 static struct breakpoint *
860 find_breakpoint(uint64_t gpa)
861 {
862 	struct breakpoint *bp;
863 
864 	TAILQ_FOREACH(bp, &breakpoints, link) {
865 		if (bp->gpa == gpa)
866 			return (bp);
867 	}
868 	return (NULL);
869 }
870 
871 void
872 gdb_cpu_breakpoint(int vcpu, struct vm_exit *vmexit)
873 {
874 	struct breakpoint *bp;
875 	struct vcpu_state *vs;
876 	uint64_t gpa;
877 	int error;
878 
879 	if (!gdb_active) {
880 		fprintf(stderr, "vm_loop: unexpected VMEXIT_DEBUG\n");
881 		exit(4);
882 	}
883 	pthread_mutex_lock(&gdb_lock);
884 	error = guest_vaddr2paddr(vcpu, vmexit->rip, &gpa);
885 	assert(error == 1);
886 	bp = find_breakpoint(gpa);
887 	if (bp != NULL) {
888 		vs = &vcpu_state[vcpu];
889 		assert(vs->stepping == false);
890 		assert(vs->stepped == false);
891 		assert(vs->hit_swbreak == false);
892 		vs->hit_swbreak = true;
893 		vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, vmexit->rip);
894 		for (;;) {
895 			if (stopped_vcpu == -1) {
896 				debug("$vCPU %d reporting breakpoint at rip %#lx\n", vcpu,
897 				    vmexit->rip);
898 				stopped_vcpu = vcpu;
899 				gdb_suspend_vcpus();
900 			}
901 			_gdb_cpu_suspend(vcpu, true);
902 			if (!vs->hit_swbreak) {
903 				/* Breakpoint reported. */
904 				break;
905 			}
906 			bp = find_breakpoint(gpa);
907 			if (bp == NULL) {
908 				/* Breakpoint was removed. */
909 				vs->hit_swbreak = false;
910 				break;
911 			}
912 		}
913 		gdb_cpu_resume(vcpu);
914 	} else {
915 		debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpu,
916 		    vmexit->rip);
917 		error = vm_set_register(ctx, vcpu,
918 		    VM_REG_GUEST_ENTRY_INST_LENGTH, vmexit->u.bpt.inst_length);
919 		assert(error == 0);
920 		error = vm_inject_exception(ctx, vcpu, IDT_BP, 0, 0, 0);
921 		assert(error == 0);
922 	}
923 	pthread_mutex_unlock(&gdb_lock);
924 }
925 
926 static bool
927 gdb_step_vcpu(int vcpu)
928 {
929 	int error, val;
930 
931 	debug("$vCPU %d step\n", vcpu);
932 	error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
933 	if (error < 0)
934 		return (false);
935 
936 	discard_stop();
937 	vcpu_state[vcpu].stepping = true;
938 	vm_resume_cpu(ctx, vcpu);
939 	CPU_CLR(vcpu, &vcpus_suspended);
940 	pthread_cond_broadcast(&idle_vcpus);
941 	return (true);
942 }
943 
944 static void
945 gdb_resume_vcpus(void)
946 {
947 
948 	assert(pthread_mutex_isowned_np(&gdb_lock));
949 	vm_resume_cpu(ctx, -1);
950 	debug("resuming all CPUs\n");
951 	CPU_ZERO(&vcpus_suspended);
952 	pthread_cond_broadcast(&idle_vcpus);
953 }
954 
955 static void
956 gdb_read_regs(void)
957 {
958 	uint64_t regvals[nitems(gdb_regset)];
959 	int i;
960 
961 	if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
962 	    gdb_regset, regvals) == -1) {
963 		send_error(errno);
964 		return;
965 	}
966 	start_packet();
967 	for (i = 0; i < nitems(regvals); i++)
968 		append_unsigned_native(regvals[i], gdb_regsize[i]);
969 	finish_packet();
970 }
971 
972 static void
973 gdb_read_mem(const uint8_t *data, size_t len)
974 {
975 	uint64_t gpa, gva, val;
976 	uint8_t *cp;
977 	size_t resid, todo, bytes;
978 	bool started;
979 	int error;
980 
981 	/* Skip 'm' */
982 	data += 1;
983 	len -= 1;
984 
985 	/* Parse and consume address. */
986 	cp = memchr(data, ',', len);
987 	if (cp == NULL || cp == data) {
988 		send_error(EINVAL);
989 		return;
990 	}
991 	gva = parse_integer(data, cp - data);
992 	len -= (cp - data) + 1;
993 	data += (cp - data) + 1;
994 
995 	/* Parse length. */
996 	resid = parse_integer(data, len);
997 
998 	started = false;
999 	while (resid > 0) {
1000 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1001 		if (error == -1) {
1002 			if (started)
1003 				finish_packet();
1004 			else
1005 				send_error(errno);
1006 			return;
1007 		}
1008 		if (error == 0) {
1009 			if (started)
1010 				finish_packet();
1011 			else
1012 				send_error(EFAULT);
1013 			return;
1014 		}
1015 
1016 		/* Read bytes from current page. */
1017 		todo = getpagesize() - gpa % getpagesize();
1018 		if (todo > resid)
1019 			todo = resid;
1020 
1021 		cp = paddr_guest2host(ctx, gpa, todo);
1022 		if (cp != NULL) {
1023 			/*
1024 			 * If this page is guest RAM, read it a byte
1025 			 * at a time.
1026 			 */
1027 			if (!started) {
1028 				start_packet();
1029 				started = true;
1030 			}
1031 			while (todo > 0) {
1032 				append_byte(*cp);
1033 				cp++;
1034 				gpa++;
1035 				gva++;
1036 				resid--;
1037 				todo--;
1038 			}
1039 		} else {
1040 			/*
1041 			 * If this page isn't guest RAM, try to handle
1042 			 * it via MMIO.  For MMIO requests, use
1043 			 * aligned reads of words when possible.
1044 			 */
1045 			while (todo > 0) {
1046 				if (gpa & 1 || todo == 1)
1047 					bytes = 1;
1048 				else if (gpa & 2 || todo == 2)
1049 					bytes = 2;
1050 				else
1051 					bytes = 4;
1052 				error = read_mem(ctx, cur_vcpu, gpa, &val,
1053 				    bytes);
1054 				if (error == 0) {
1055 					if (!started) {
1056 						start_packet();
1057 						started = true;
1058 					}
1059 					gpa += bytes;
1060 					gva += bytes;
1061 					resid -= bytes;
1062 					todo -= bytes;
1063 					while (bytes > 0) {
1064 						append_byte(val);
1065 						val >>= 8;
1066 						bytes--;
1067 					}
1068 				} else {
1069 					if (started)
1070 						finish_packet();
1071 					else
1072 						send_error(EFAULT);
1073 					return;
1074 				}
1075 			}
1076 		}
1077 		assert(resid == 0 || gpa % getpagesize() == 0);
1078 	}
1079 	if (!started)
1080 		start_packet();
1081 	finish_packet();
1082 }
1083 
1084 static void
1085 gdb_write_mem(const uint8_t *data, size_t len)
1086 {
1087 	uint64_t gpa, gva, val;
1088 	uint8_t *cp;
1089 	size_t resid, todo, bytes;
1090 	int error;
1091 
1092 	/* Skip 'M' */
1093 	data += 1;
1094 	len -= 1;
1095 
1096 	/* Parse and consume address. */
1097 	cp = memchr(data, ',', len);
1098 	if (cp == NULL || cp == data) {
1099 		send_error(EINVAL);
1100 		return;
1101 	}
1102 	gva = parse_integer(data, cp - data);
1103 	len -= (cp - data) + 1;
1104 	data += (cp - data) + 1;
1105 
1106 	/* Parse and consume length. */
1107 	cp = memchr(data, ':', len);
1108 	if (cp == NULL || cp == data) {
1109 		send_error(EINVAL);
1110 		return;
1111 	}
1112 	resid = parse_integer(data, cp - data);
1113 	len -= (cp - data) + 1;
1114 	data += (cp - data) + 1;
1115 
1116 	/* Verify the available bytes match the length. */
1117 	if (len != resid * 2) {
1118 		send_error(EINVAL);
1119 		return;
1120 	}
1121 
1122 	while (resid > 0) {
1123 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1124 		if (error == -1) {
1125 			send_error(errno);
1126 			return;
1127 		}
1128 		if (error == 0) {
1129 			send_error(EFAULT);
1130 			return;
1131 		}
1132 
1133 		/* Write bytes to current page. */
1134 		todo = getpagesize() - gpa % getpagesize();
1135 		if (todo > resid)
1136 			todo = resid;
1137 
1138 		cp = paddr_guest2host(ctx, gpa, todo);
1139 		if (cp != NULL) {
1140 			/*
1141 			 * If this page is guest RAM, write it a byte
1142 			 * at a time.
1143 			 */
1144 			while (todo > 0) {
1145 				assert(len >= 2);
1146 				*cp = parse_byte(data);
1147 				data += 2;
1148 				len -= 2;
1149 				cp++;
1150 				gpa++;
1151 				gva++;
1152 				resid--;
1153 				todo--;
1154 			}
1155 		} else {
1156 			/*
1157 			 * If this page isn't guest RAM, try to handle
1158 			 * it via MMIO.  For MMIO requests, use
1159 			 * aligned writes of words when possible.
1160 			 */
1161 			while (todo > 0) {
1162 				if (gpa & 1 || todo == 1) {
1163 					bytes = 1;
1164 					val = parse_byte(data);
1165 				} else if (gpa & 2 || todo == 2) {
1166 					bytes = 2;
1167 					val = be16toh(parse_integer(data, 4));
1168 				} else {
1169 					bytes = 4;
1170 					val = be32toh(parse_integer(data, 8));
1171 				}
1172 				error = write_mem(ctx, cur_vcpu, gpa, val,
1173 				    bytes);
1174 				if (error == 0) {
1175 					gpa += bytes;
1176 					gva += bytes;
1177 					resid -= bytes;
1178 					todo -= bytes;
1179 					data += 2 * bytes;
1180 					len -= 2 * bytes;
1181 				} else {
1182 					send_error(EFAULT);
1183 					return;
1184 				}
1185 			}
1186 		}
1187 		assert(resid == 0 || gpa % getpagesize() == 0);
1188 	}
1189 	assert(len == 0);
1190 	send_ok();
1191 }
1192 
1193 static bool
1194 set_breakpoint_caps(bool enable)
1195 {
1196 	cpuset_t mask;
1197 	int vcpu;
1198 
1199 	mask = vcpus_active;
1200 	while (!CPU_EMPTY(&mask)) {
1201 		vcpu = CPU_FFS(&mask) - 1;
1202 		CPU_CLR(vcpu, &mask);
1203 		if (vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT,
1204 		    enable ? 1 : 0) < 0)
1205 			return (false);
1206 		debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1207 		    enable ? "en" : "dis");
1208 	}
1209 	return (true);
1210 }
1211 
1212 static void
1213 remove_all_sw_breakpoints(void)
1214 {
1215 	struct breakpoint *bp, *nbp;
1216 	uint8_t *cp;
1217 
1218 	if (TAILQ_EMPTY(&breakpoints))
1219 		return;
1220 
1221 	TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1222 		debug("remove breakpoint at %#lx\n", bp->gpa);
1223 		cp = paddr_guest2host(ctx, bp->gpa, 1);
1224 		*cp = bp->shadow_inst;
1225 		TAILQ_REMOVE(&breakpoints, bp, link);
1226 		free(bp);
1227 	}
1228 	TAILQ_INIT(&breakpoints);
1229 	set_breakpoint_caps(false);
1230 }
1231 
1232 static void
1233 update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1234 {
1235 	struct breakpoint *bp;
1236 	uint64_t gpa;
1237 	uint8_t *cp;
1238 	int error;
1239 
1240 	if (kind != 1) {
1241 		send_error(EINVAL);
1242 		return;
1243 	}
1244 
1245 	error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1246 	if (error == -1) {
1247 		send_error(errno);
1248 		return;
1249 	}
1250 	if (error == 0) {
1251 		send_error(EFAULT);
1252 		return;
1253 	}
1254 
1255 	cp = paddr_guest2host(ctx, gpa, 1);
1256 
1257 	/* Only permit breakpoints in guest RAM. */
1258 	if (cp == NULL) {
1259 		send_error(EFAULT);
1260 		return;
1261 	}
1262 
1263 	/* Find any existing breakpoint. */
1264 	bp = find_breakpoint(gpa);
1265 
1266 	/*
1267 	 * Silently ignore duplicate commands since the protocol
1268 	 * requires these packets to be idempotent.
1269 	 */
1270 	if (insert) {
1271 		if (bp == NULL) {
1272 			if (TAILQ_EMPTY(&breakpoints) &&
1273 			    !set_breakpoint_caps(true)) {
1274 				send_empty_response();
1275 				return;
1276 			}
1277 			bp = malloc(sizeof(*bp));
1278 			bp->gpa = gpa;
1279 			bp->shadow_inst = *cp;
1280 			*cp = 0xcc;	/* INT 3 */
1281 			TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1282 			debug("new breakpoint at %#lx\n", gpa);
1283 		}
1284 	} else {
1285 		if (bp != NULL) {
1286 			debug("remove breakpoint at %#lx\n", gpa);
1287 			*cp = bp->shadow_inst;
1288 			TAILQ_REMOVE(&breakpoints, bp, link);
1289 			free(bp);
1290 			if (TAILQ_EMPTY(&breakpoints))
1291 				set_breakpoint_caps(false);
1292 		}
1293 	}
1294 	send_ok();
1295 }
1296 
1297 static void
1298 parse_breakpoint(const uint8_t *data, size_t len)
1299 {
1300 	uint64_t gva;
1301 	uint8_t *cp;
1302 	bool insert;
1303 	int kind, type;
1304 
1305 	insert = data[0] == 'Z';
1306 
1307 	/* Skip 'Z/z' */
1308 	data += 1;
1309 	len -= 1;
1310 
1311 	/* Parse and consume type. */
1312 	cp = memchr(data, ',', len);
1313 	if (cp == NULL || cp == data) {
1314 		send_error(EINVAL);
1315 		return;
1316 	}
1317 	type = parse_integer(data, cp - data);
1318 	len -= (cp - data) + 1;
1319 	data += (cp - data) + 1;
1320 
1321 	/* Parse and consume address. */
1322 	cp = memchr(data, ',', len);
1323 	if (cp == NULL || cp == data) {
1324 		send_error(EINVAL);
1325 		return;
1326 	}
1327 	gva = parse_integer(data, cp - data);
1328 	len -= (cp - data) + 1;
1329 	data += (cp - data) + 1;
1330 
1331 	/* Parse and consume kind. */
1332 	cp = memchr(data, ';', len);
1333 	if (cp == data) {
1334 		send_error(EINVAL);
1335 		return;
1336 	}
1337 	if (cp != NULL) {
1338 		/*
1339 		 * We do not advertise support for either the
1340 		 * ConditionalBreakpoints or BreakpointCommands
1341 		 * features, so we should not be getting conditions or
1342 		 * commands from the remote end.
1343 		 */
1344 		send_empty_response();
1345 		return;
1346 	}
1347 	kind = parse_integer(data, len);
1348 	data += len;
1349 	len = 0;
1350 
1351 	switch (type) {
1352 	case 0:
1353 		update_sw_breakpoint(gva, kind, insert);
1354 		break;
1355 	default:
1356 		send_empty_response();
1357 		break;
1358 	}
1359 }
1360 
1361 static bool
1362 command_equals(const uint8_t *data, size_t len, const char *cmd)
1363 {
1364 
1365 	if (strlen(cmd) > len)
1366 		return (false);
1367 	return (memcmp(data, cmd, strlen(cmd)) == 0);
1368 }
1369 
1370 static void
1371 check_features(const uint8_t *data, size_t len)
1372 {
1373 	char *feature, *next_feature, *str, *value;
1374 	bool supported;
1375 
1376 	str = malloc(len + 1);
1377 	memcpy(str, data, len);
1378 	str[len] = '\0';
1379 	next_feature = str;
1380 
1381 	while ((feature = strsep(&next_feature, ";")) != NULL) {
1382 		/*
1383 		 * Null features shouldn't exist, but skip if they
1384 		 * do.
1385 		 */
1386 		if (strcmp(feature, "") == 0)
1387 			continue;
1388 
1389 		/*
1390 		 * Look for the value or supported / not supported
1391 		 * flag.
1392 		 */
1393 		value = strchr(feature, '=');
1394 		if (value != NULL) {
1395 			*value = '\0';
1396 			value++;
1397 			supported = true;
1398 		} else {
1399 			value = feature + strlen(feature) - 1;
1400 			switch (*value) {
1401 			case '+':
1402 				supported = true;
1403 				break;
1404 			case '-':
1405 				supported = false;
1406 				break;
1407 			default:
1408 				/*
1409 				 * This is really a protocol error,
1410 				 * but we just ignore malformed
1411 				 * features for ease of
1412 				 * implementation.
1413 				 */
1414 				continue;
1415 			}
1416 			value = NULL;
1417 		}
1418 
1419 		if (strcmp(feature, "swbreak") == 0)
1420 			swbreak_enabled = supported;
1421 	}
1422 	free(str);
1423 
1424 	start_packet();
1425 
1426 	/* This is an arbitrary limit. */
1427 	append_string("PacketSize=4096");
1428 	append_string(";swbreak+");
1429 	finish_packet();
1430 }
1431 
1432 static void
1433 gdb_query(const uint8_t *data, size_t len)
1434 {
1435 
1436 	/*
1437 	 * TODO:
1438 	 * - qSearch
1439 	 */
1440 	if (command_equals(data, len, "qAttached")) {
1441 		start_packet();
1442 		append_char('1');
1443 		finish_packet();
1444 	} else if (command_equals(data, len, "qC")) {
1445 		start_packet();
1446 		append_string("QC");
1447 		append_integer(cur_vcpu + 1);
1448 		finish_packet();
1449 	} else if (command_equals(data, len, "qfThreadInfo")) {
1450 		cpuset_t mask;
1451 		bool first;
1452 		int vcpu;
1453 
1454 		if (CPU_EMPTY(&vcpus_active)) {
1455 			send_error(EINVAL);
1456 			return;
1457 		}
1458 		mask = vcpus_active;
1459 		start_packet();
1460 		append_char('m');
1461 		first = true;
1462 		while (!CPU_EMPTY(&mask)) {
1463 			vcpu = CPU_FFS(&mask) - 1;
1464 			CPU_CLR(vcpu, &mask);
1465 			if (first)
1466 				first = false;
1467 			else
1468 				append_char(',');
1469 			append_integer(vcpu + 1);
1470 		}
1471 		finish_packet();
1472 	} else if (command_equals(data, len, "qsThreadInfo")) {
1473 		start_packet();
1474 		append_char('l');
1475 		finish_packet();
1476 	} else if (command_equals(data, len, "qSupported")) {
1477 		data += strlen("qSupported");
1478 		len -= strlen("qSupported");
1479 		check_features(data, len);
1480 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1481 		char buf[16];
1482 		int tid;
1483 
1484 		data += strlen("qThreadExtraInfo");
1485 		len -= strlen("qThreadExtraInfo");
1486 		if (*data != ',') {
1487 			send_error(EINVAL);
1488 			return;
1489 		}
1490 		tid = parse_threadid(data + 1, len - 1);
1491 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1492 			send_error(EINVAL);
1493 			return;
1494 		}
1495 
1496 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1497 		start_packet();
1498 		append_asciihex(buf);
1499 		finish_packet();
1500 	} else
1501 		send_empty_response();
1502 }
1503 
1504 static void
1505 handle_command(const uint8_t *data, size_t len)
1506 {
1507 
1508 	/* Reject packets with a sequence-id. */
1509 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1510 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1511 		send_empty_response();
1512 		return;
1513 	}
1514 
1515 	switch (*data) {
1516 	case 'c':
1517 		if (len != 1) {
1518 			send_error(EINVAL);
1519 			break;
1520 		}
1521 
1522 		discard_stop();
1523 		gdb_resume_vcpus();
1524 		break;
1525 	case 'D':
1526 		send_ok();
1527 
1528 		/* TODO: Resume any stopped CPUs. */
1529 		break;
1530 	case 'g': {
1531 		gdb_read_regs();
1532 		break;
1533 	}
1534 	case 'H': {
1535 		int tid;
1536 
1537 		if (data[1] != 'g' && data[1] != 'c') {
1538 			send_error(EINVAL);
1539 			break;
1540 		}
1541 		tid = parse_threadid(data + 2, len - 2);
1542 		if (tid == -2) {
1543 			send_error(EINVAL);
1544 			break;
1545 		}
1546 
1547 		if (CPU_EMPTY(&vcpus_active)) {
1548 			send_error(EINVAL);
1549 			break;
1550 		}
1551 		if (tid == -1 || tid == 0)
1552 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1553 		else if (CPU_ISSET(tid - 1, &vcpus_active))
1554 			cur_vcpu = tid - 1;
1555 		else {
1556 			send_error(EINVAL);
1557 			break;
1558 		}
1559 		send_ok();
1560 		break;
1561 	}
1562 	case 'm':
1563 		gdb_read_mem(data, len);
1564 		break;
1565 	case 'M':
1566 		gdb_write_mem(data, len);
1567 		break;
1568 	case 'T': {
1569 		int tid;
1570 
1571 		tid = parse_threadid(data + 1, len - 1);
1572 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1573 			send_error(EINVAL);
1574 			return;
1575 		}
1576 		send_ok();
1577 		break;
1578 	}
1579 	case 'q':
1580 		gdb_query(data, len);
1581 		break;
1582 	case 's':
1583 		if (len != 1) {
1584 			send_error(EINVAL);
1585 			break;
1586 		}
1587 
1588 		/* Don't send a reply until a stop occurs. */
1589 		if (!gdb_step_vcpu(cur_vcpu)) {
1590 			send_error(EOPNOTSUPP);
1591 			break;
1592 		}
1593 		break;
1594 	case 'z':
1595 	case 'Z':
1596 		parse_breakpoint(data, len);
1597 		break;
1598 	case '?':
1599 		report_stop(false);
1600 		break;
1601 	case 'G': /* TODO */
1602 	case 'v':
1603 		/* Handle 'vCont' */
1604 		/* 'vCtrlC' */
1605 	case 'p': /* TODO */
1606 	case 'P': /* TODO */
1607 	case 'Q': /* TODO */
1608 	case 't': /* TODO */
1609 	case 'X': /* TODO */
1610 	default:
1611 		send_empty_response();
1612 	}
1613 }
1614 
1615 /* Check for a valid packet in the command buffer. */
1616 static void
1617 check_command(int fd)
1618 {
1619 	uint8_t *head, *hash, *p, sum;
1620 	size_t avail, plen;
1621 
1622 	for (;;) {
1623 		avail = cur_comm.len;
1624 		if (avail == 0)
1625 			return;
1626 		head = io_buffer_head(&cur_comm);
1627 		switch (*head) {
1628 		case 0x03:
1629 			debug("<- Ctrl-C\n");
1630 			io_buffer_consume(&cur_comm, 1);
1631 
1632 			gdb_suspend_vcpus();
1633 			break;
1634 		case '+':
1635 			/* ACK of previous response. */
1636 			debug("<- +\n");
1637 			if (response_pending())
1638 				io_buffer_reset(&cur_resp);
1639 			io_buffer_consume(&cur_comm, 1);
1640 			if (stopped_vcpu != -1 && report_next_stop) {
1641 				report_stop(true);
1642 				send_pending_data(fd);
1643 			}
1644 			break;
1645 		case '-':
1646 			/* NACK of previous response. */
1647 			debug("<- -\n");
1648 			if (response_pending()) {
1649 				cur_resp.len += cur_resp.start;
1650 				cur_resp.start = 0;
1651 				if (cur_resp.data[0] == '+')
1652 					io_buffer_advance(&cur_resp, 1);
1653 				debug("-> %.*s\n", (int)cur_resp.len,
1654 				    io_buffer_head(&cur_resp));
1655 			}
1656 			io_buffer_consume(&cur_comm, 1);
1657 			send_pending_data(fd);
1658 			break;
1659 		case '$':
1660 			/* Packet. */
1661 
1662 			if (response_pending()) {
1663 				warnx("New GDB command while response in "
1664 				    "progress");
1665 				io_buffer_reset(&cur_resp);
1666 			}
1667 
1668 			/* Is packet complete? */
1669 			hash = memchr(head, '#', avail);
1670 			if (hash == NULL)
1671 				return;
1672 			plen = (hash - head + 1) + 2;
1673 			if (avail < plen)
1674 				return;
1675 			debug("<- %.*s\n", (int)plen, head);
1676 
1677 			/* Verify checksum. */
1678 			for (sum = 0, p = head + 1; p < hash; p++)
1679 				sum += *p;
1680 			if (sum != parse_byte(hash + 1)) {
1681 				io_buffer_consume(&cur_comm, plen);
1682 				debug("-> -\n");
1683 				send_char('-');
1684 				send_pending_data(fd);
1685 				break;
1686 			}
1687 			send_char('+');
1688 
1689 			handle_command(head + 1, hash - (head + 1));
1690 			io_buffer_consume(&cur_comm, plen);
1691 			if (!response_pending())
1692 				debug("-> +\n");
1693 			send_pending_data(fd);
1694 			break;
1695 		default:
1696 			/* XXX: Possibly drop connection instead. */
1697 			debug("-> %02x\n", *head);
1698 			io_buffer_consume(&cur_comm, 1);
1699 			break;
1700 		}
1701 	}
1702 }
1703 
1704 static void
1705 gdb_readable(int fd, enum ev_type event, void *arg)
1706 {
1707 	ssize_t nread;
1708 	int pending;
1709 
1710 	if (ioctl(fd, FIONREAD, &pending) == -1) {
1711 		warn("FIONREAD on GDB socket");
1712 		return;
1713 	}
1714 
1715 	/*
1716 	 * 'pending' might be zero due to EOF.  We need to call read
1717 	 * with a non-zero length to detect EOF.
1718 	 */
1719 	if (pending == 0)
1720 		pending = 1;
1721 
1722 	/* Ensure there is room in the command buffer. */
1723 	io_buffer_grow(&cur_comm, pending);
1724 	assert(io_buffer_avail(&cur_comm) >= pending);
1725 
1726 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1727 	if (nread == 0) {
1728 		close_connection();
1729 	} else if (nread == -1) {
1730 		if (errno == EAGAIN)
1731 			return;
1732 
1733 		warn("Read from GDB socket");
1734 		close_connection();
1735 	} else {
1736 		cur_comm.len += nread;
1737 		pthread_mutex_lock(&gdb_lock);
1738 		check_command(fd);
1739 		pthread_mutex_unlock(&gdb_lock);
1740 	}
1741 }
1742 
1743 static void
1744 gdb_writable(int fd, enum ev_type event, void *arg)
1745 {
1746 
1747 	send_pending_data(fd);
1748 }
1749 
1750 static void
1751 new_connection(int fd, enum ev_type event, void *arg)
1752 {
1753 	int optval, s;
1754 
1755 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1756 	if (s == -1) {
1757 		if (arg != NULL)
1758 			err(1, "Failed accepting initial GDB connection");
1759 
1760 		/* Silently ignore errors post-startup. */
1761 		return;
1762 	}
1763 
1764 	optval = 1;
1765 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1766 	    -1) {
1767 		warn("Failed to disable SIGPIPE for GDB connection");
1768 		close(s);
1769 		return;
1770 	}
1771 
1772 	pthread_mutex_lock(&gdb_lock);
1773 	if (cur_fd != -1) {
1774 		close(s);
1775 		warnx("Ignoring additional GDB connection.");
1776 	}
1777 
1778 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1779 	if (read_event == NULL) {
1780 		if (arg != NULL)
1781 			err(1, "Failed to setup initial GDB connection");
1782 		pthread_mutex_unlock(&gdb_lock);
1783 		return;
1784 	}
1785 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1786 	if (write_event == NULL) {
1787 		if (arg != NULL)
1788 			err(1, "Failed to setup initial GDB connection");
1789 		mevent_delete_close(read_event);
1790 		read_event = NULL;
1791 	}
1792 
1793 	cur_fd = s;
1794 	cur_vcpu = 0;
1795 	stopped_vcpu = -1;
1796 
1797 	/* Break on attach. */
1798 	first_stop = true;
1799 	report_next_stop = false;
1800 	gdb_suspend_vcpus();
1801 	pthread_mutex_unlock(&gdb_lock);
1802 }
1803 
1804 #ifndef WITHOUT_CAPSICUM
1805 void
1806 limit_gdb_socket(int s)
1807 {
1808 	cap_rights_t rights;
1809 	unsigned long ioctls[] = { FIONREAD };
1810 
1811 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1812 	    CAP_SETSOCKOPT, CAP_IOCTL);
1813 	if (caph_rights_limit(s, &rights) == -1)
1814 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1815 	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1816 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1817 }
1818 #endif
1819 
1820 void
1821 init_gdb(struct vmctx *_ctx, int sport, bool wait)
1822 {
1823 	struct sockaddr_in sin;
1824 	int error, flags, optval, s;
1825 
1826 	debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not ");
1827 
1828 	error = pthread_mutex_init(&gdb_lock, NULL);
1829 	if (error != 0)
1830 		errc(1, error, "gdb mutex init");
1831 	error = pthread_cond_init(&idle_vcpus, NULL);
1832 	if (error != 0)
1833 		errc(1, error, "gdb cv init");
1834 
1835 	ctx = _ctx;
1836 	s = socket(PF_INET, SOCK_STREAM, 0);
1837 	if (s < 0)
1838 		err(1, "gdb socket create");
1839 
1840 	optval = 1;
1841 	(void)setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
1842 
1843 	sin.sin_len = sizeof(sin);
1844 	sin.sin_family = AF_INET;
1845 	sin.sin_addr.s_addr = htonl(INADDR_ANY);
1846 	sin.sin_port = htons(sport);
1847 
1848 	if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0)
1849 		err(1, "gdb socket bind");
1850 
1851 	if (listen(s, 1) < 0)
1852 		err(1, "gdb socket listen");
1853 
1854 	stopped_vcpu = -1;
1855 	TAILQ_INIT(&breakpoints);
1856 	vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
1857 	if (wait) {
1858 		/*
1859 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
1860 		 * logic in gdb_cpu_add() to suspend the first vcpu before
1861 		 * it starts execution.  The vcpu will remain suspended
1862 		 * until a debugger connects.
1863 		 */
1864 		CPU_SET(0, &vcpus_suspended);
1865 		stopped_vcpu = 0;
1866 	}
1867 
1868 	flags = fcntl(s, F_GETFL);
1869 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1870 		err(1, "Failed to mark gdb socket non-blocking");
1871 
1872 #ifndef WITHOUT_CAPSICUM
1873 	limit_gdb_socket(s);
1874 #endif
1875 	mevent_add(s, EVF_READ, new_connection, NULL);
1876 	gdb_active = true;
1877 }
1878