xref: /freebsd/usr.sbin/bhyve/gdb.c (revision 3733d82c4deb49035a39e18744085d1e3e9b8dc5)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/param.h>
29 #ifndef WITHOUT_CAPSICUM
30 #include <sys/capsicum.h>
31 #endif
32 #include <sys/endian.h>
33 #include <sys/ioctl.h>
34 #include <sys/mman.h>
35 #include <sys/queue.h>
36 #include <sys/socket.h>
37 #include <machine/atomic.h>
38 #include <machine/specialreg.h>
39 #include <machine/vmm.h>
40 #include <netinet/in.h>
41 #include <assert.h>
42 #ifndef WITHOUT_CAPSICUM
43 #include <capsicum_helpers.h>
44 #endif
45 #include <err.h>
46 #include <errno.h>
47 #include <fcntl.h>
48 #include <netdb.h>
49 #include <pthread.h>
50 #include <pthread_np.h>
51 #include <stdbool.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <sysexits.h>
56 #include <unistd.h>
57 #include <vmmapi.h>
58 
59 #include "bhyverun.h"
60 #include "config.h"
61 #include "debug.h"
62 #include "gdb.h"
63 #include "mem.h"
64 #include "mevent.h"
65 
66 /*
67  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
68  * use SIGTRAP.
69  */
70 #define	GDB_SIGNAL_TRAP		5
71 
72 #define	GDB_BP_SIZE		1
73 #define	GDB_BP_INSTR		(uint8_t []){0xcc}
74 #define	GDB_PC_REGNAME		VM_REG_GUEST_RIP
75 
76 _Static_assert(sizeof(GDB_BP_INSTR) == GDB_BP_SIZE,
77     "GDB_BP_INSTR has wrong size");
78 
79 static void gdb_resume_vcpus(void);
80 static void check_command(int fd);
81 
82 static struct mevent *read_event, *write_event;
83 
84 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
85 static pthread_mutex_t gdb_lock;
86 static pthread_cond_t idle_vcpus;
87 static bool first_stop, report_next_stop, swbreak_enabled;
88 
89 /*
90  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
91  * read buffer, 'start' is unused and 'len' contains the number of
92  * valid bytes in the buffer.  For a write buffer, 'start' is set to
93  * the index of the next byte in 'data' to send, and 'len' contains
94  * the remaining number of valid bytes to send.
95  */
96 struct io_buffer {
97 	uint8_t *data;
98 	size_t capacity;
99 	size_t start;
100 	size_t len;
101 };
102 
103 struct breakpoint {
104 	uint64_t gpa;
105 	uint8_t shadow_inst[GDB_BP_SIZE];
106 	TAILQ_ENTRY(breakpoint) link;
107 };
108 
109 /*
110  * When a vCPU stops to due to an event that should be reported to the
111  * debugger, information about the event is stored in this structure.
112  * The vCPU thread then sets 'stopped_vcpu' if it is not already set
113  * and stops other vCPUs so the event can be reported.  The
114  * report_stop() function reports the event for the 'stopped_vcpu'
115  * vCPU.  When the debugger resumes execution via continue or step,
116  * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
117  * event handlers until the associated event is reported or disabled.
118  *
119  * An idle vCPU will have all of the boolean fields set to false.
120  *
121  * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
122  * released to execute the stepped instruction.  When the vCPU reports
123  * the stepping trap, 'stepped' is set.
124  *
125  * When a vCPU hits a breakpoint set by the debug server,
126  * 'hit_swbreak' is set to true.
127  */
128 struct vcpu_state {
129 	bool stepping;
130 	bool stepped;
131 	bool hit_swbreak;
132 };
133 
134 static struct io_buffer cur_comm, cur_resp;
135 static uint8_t cur_csum;
136 static struct vmctx *ctx;
137 static int cur_fd = -1;
138 static TAILQ_HEAD(, breakpoint) breakpoints;
139 static struct vcpu_state *vcpu_state;
140 static struct vcpu **vcpus;
141 static int cur_vcpu, stopped_vcpu;
142 static bool gdb_active = false;
143 
144 static const struct gdb_reg {
145 	enum vm_reg_name id;
146 	int size;
147 } gdb_regset[] = {
148 	{ .id = VM_REG_GUEST_RAX, .size = 8 },
149 	{ .id = VM_REG_GUEST_RBX, .size = 8 },
150 	{ .id = VM_REG_GUEST_RCX, .size = 8 },
151 	{ .id = VM_REG_GUEST_RDX, .size = 8 },
152 	{ .id = VM_REG_GUEST_RSI, .size = 8 },
153 	{ .id = VM_REG_GUEST_RDI, .size = 8 },
154 	{ .id = VM_REG_GUEST_RBP, .size = 8 },
155 	{ .id = VM_REG_GUEST_RSP, .size = 8 },
156 	{ .id = VM_REG_GUEST_R8, .size = 8 },
157 	{ .id = VM_REG_GUEST_R9, .size = 8 },
158 	{ .id = VM_REG_GUEST_R10, .size = 8 },
159 	{ .id = VM_REG_GUEST_R11, .size = 8 },
160 	{ .id = VM_REG_GUEST_R12, .size = 8 },
161 	{ .id = VM_REG_GUEST_R13, .size = 8 },
162 	{ .id = VM_REG_GUEST_R14, .size = 8 },
163 	{ .id = VM_REG_GUEST_R15, .size = 8 },
164 	{ .id = VM_REG_GUEST_RIP, .size = 8 },
165 	{ .id = VM_REG_GUEST_RFLAGS, .size = 4 },
166 	{ .id = VM_REG_GUEST_CS, .size = 4 },
167 	{ .id = VM_REG_GUEST_SS, .size = 4 },
168 	{ .id = VM_REG_GUEST_DS, .size = 4 },
169 	{ .id = VM_REG_GUEST_ES, .size = 4 },
170 	{ .id = VM_REG_GUEST_FS, .size = 4 },
171 	{ .id = VM_REG_GUEST_GS, .size = 4 },
172 };
173 
174 #ifdef GDB_LOG
175 #include <stdarg.h>
176 #include <stdio.h>
177 
178 static void __printflike(1, 2)
179 debug(const char *fmt, ...)
180 {
181 	static FILE *logfile;
182 	va_list ap;
183 
184 	if (logfile == NULL) {
185 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
186 		if (logfile == NULL)
187 			return;
188 #ifndef WITHOUT_CAPSICUM
189 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
190 			fclose(logfile);
191 			logfile = NULL;
192 			return;
193 		}
194 #endif
195 		setlinebuf(logfile);
196 	}
197 	va_start(ap, fmt);
198 	vfprintf(logfile, fmt, ap);
199 	va_end(ap);
200 }
201 #else
202 #define debug(...)
203 #endif
204 
205 static void	remove_all_sw_breakpoints(void);
206 
207 static int
208 guest_paging_info(struct vcpu *vcpu, struct vm_guest_paging *paging)
209 {
210 	uint64_t regs[4];
211 	const int regset[4] = {
212 		VM_REG_GUEST_CR0,
213 		VM_REG_GUEST_CR3,
214 		VM_REG_GUEST_CR4,
215 		VM_REG_GUEST_EFER
216 	};
217 
218 	if (vm_get_register_set(vcpu, nitems(regset), regset, regs) == -1)
219 		return (-1);
220 
221 	/*
222 	 * For the debugger, always pretend to be the kernel (CPL 0),
223 	 * and if long-mode is enabled, always parse addresses as if
224 	 * in 64-bit mode.
225 	 */
226 	paging->cr3 = regs[1];
227 	paging->cpl = 0;
228 	if (regs[3] & EFER_LMA)
229 		paging->cpu_mode = CPU_MODE_64BIT;
230 	else if (regs[0] & CR0_PE)
231 		paging->cpu_mode = CPU_MODE_PROTECTED;
232 	else
233 		paging->cpu_mode = CPU_MODE_REAL;
234 	if (!(regs[0] & CR0_PG))
235 		paging->paging_mode = PAGING_MODE_FLAT;
236 	else if (!(regs[2] & CR4_PAE))
237 		paging->paging_mode = PAGING_MODE_32;
238 	else if (regs[3] & EFER_LME)
239 		paging->paging_mode = (regs[2] & CR4_LA57) ?
240 		    PAGING_MODE_64_LA57 :  PAGING_MODE_64;
241 	else
242 		paging->paging_mode = PAGING_MODE_PAE;
243 	return (0);
244 }
245 
246 /*
247  * Map a guest virtual address to a physical address (for a given vcpu).
248  * If a guest virtual address is valid, return 1.  If the address is
249  * not valid, return 0.  If an error occurs obtaining the mapping,
250  * return -1.
251  */
252 static int
253 guest_vaddr2paddr(struct vcpu *vcpu, uint64_t vaddr, uint64_t *paddr)
254 {
255 	struct vm_guest_paging paging;
256 	int fault;
257 
258 	if (guest_paging_info(vcpu, &paging) == -1)
259 		return (-1);
260 
261 	/*
262 	 * Always use PROT_READ.  We really care if the VA is
263 	 * accessible, not if the current vCPU can write.
264 	 */
265 	if (vm_gla2gpa_nofault(vcpu, &paging, vaddr, PROT_READ, paddr,
266 	    &fault) == -1)
267 		return (-1);
268 	if (fault)
269 		return (0);
270 	return (1);
271 }
272 
273 static uint64_t
274 guest_pc(struct vm_exit *vme)
275 {
276 	return (vme->rip);
277 }
278 
279 static void
280 io_buffer_reset(struct io_buffer *io)
281 {
282 
283 	io->start = 0;
284 	io->len = 0;
285 }
286 
287 /* Available room for adding data. */
288 static size_t
289 io_buffer_avail(struct io_buffer *io)
290 {
291 
292 	return (io->capacity - (io->start + io->len));
293 }
294 
295 static uint8_t *
296 io_buffer_head(struct io_buffer *io)
297 {
298 
299 	return (io->data + io->start);
300 }
301 
302 static uint8_t *
303 io_buffer_tail(struct io_buffer *io)
304 {
305 
306 	return (io->data + io->start + io->len);
307 }
308 
309 static void
310 io_buffer_advance(struct io_buffer *io, size_t amount)
311 {
312 
313 	assert(amount <= io->len);
314 	io->start += amount;
315 	io->len -= amount;
316 }
317 
318 static void
319 io_buffer_consume(struct io_buffer *io, size_t amount)
320 {
321 
322 	io_buffer_advance(io, amount);
323 	if (io->len == 0) {
324 		io->start = 0;
325 		return;
326 	}
327 
328 	/*
329 	 * XXX: Consider making this move optional and compacting on a
330 	 * future read() before realloc().
331 	 */
332 	memmove(io->data, io_buffer_head(io), io->len);
333 	io->start = 0;
334 }
335 
336 static void
337 io_buffer_grow(struct io_buffer *io, size_t newsize)
338 {
339 	uint8_t *new_data;
340 	size_t avail, new_cap;
341 
342 	avail = io_buffer_avail(io);
343 	if (newsize <= avail)
344 		return;
345 
346 	new_cap = io->capacity + (newsize - avail);
347 	new_data = realloc(io->data, new_cap);
348 	if (new_data == NULL)
349 		err(1, "Failed to grow GDB I/O buffer");
350 	io->data = new_data;
351 	io->capacity = new_cap;
352 }
353 
354 static bool
355 response_pending(void)
356 {
357 
358 	if (cur_resp.start == 0 && cur_resp.len == 0)
359 		return (false);
360 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
361 		return (false);
362 	return (true);
363 }
364 
365 static void
366 close_connection(void)
367 {
368 
369 	/*
370 	 * XXX: This triggers a warning because mevent does the close
371 	 * before the EV_DELETE.
372 	 */
373 	pthread_mutex_lock(&gdb_lock);
374 	mevent_delete(write_event);
375 	mevent_delete_close(read_event);
376 	write_event = NULL;
377 	read_event = NULL;
378 	io_buffer_reset(&cur_comm);
379 	io_buffer_reset(&cur_resp);
380 	cur_fd = -1;
381 
382 	remove_all_sw_breakpoints();
383 
384 	/* Clear any pending events. */
385 	memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
386 
387 	/* Resume any stopped vCPUs. */
388 	gdb_resume_vcpus();
389 	pthread_mutex_unlock(&gdb_lock);
390 }
391 
392 static uint8_t
393 hex_digit(uint8_t nibble)
394 {
395 
396 	if (nibble <= 9)
397 		return (nibble + '0');
398 	else
399 		return (nibble + 'a' - 10);
400 }
401 
402 static uint8_t
403 parse_digit(uint8_t v)
404 {
405 
406 	if (v >= '0' && v <= '9')
407 		return (v - '0');
408 	if (v >= 'a' && v <= 'f')
409 		return (v - 'a' + 10);
410 	if (v >= 'A' && v <= 'F')
411 		return (v - 'A' + 10);
412 	return (0xF);
413 }
414 
415 /* Parses big-endian hexadecimal. */
416 static uintmax_t
417 parse_integer(const uint8_t *p, size_t len)
418 {
419 	uintmax_t v;
420 
421 	v = 0;
422 	while (len > 0) {
423 		v <<= 4;
424 		v |= parse_digit(*p);
425 		p++;
426 		len--;
427 	}
428 	return (v);
429 }
430 
431 static uint8_t
432 parse_byte(const uint8_t *p)
433 {
434 
435 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
436 }
437 
438 static void
439 send_pending_data(int fd)
440 {
441 	ssize_t nwritten;
442 
443 	if (cur_resp.len == 0) {
444 		mevent_disable(write_event);
445 		return;
446 	}
447 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
448 	if (nwritten == -1) {
449 		warn("Write to GDB socket failed");
450 		close_connection();
451 	} else {
452 		io_buffer_advance(&cur_resp, nwritten);
453 		if (cur_resp.len == 0)
454 			mevent_disable(write_event);
455 		else
456 			mevent_enable(write_event);
457 	}
458 }
459 
460 /* Append a single character to the output buffer. */
461 static void
462 send_char(uint8_t data)
463 {
464 	io_buffer_grow(&cur_resp, 1);
465 	*io_buffer_tail(&cur_resp) = data;
466 	cur_resp.len++;
467 }
468 
469 /* Append an array of bytes to the output buffer. */
470 static void
471 send_data(const uint8_t *data, size_t len)
472 {
473 
474 	io_buffer_grow(&cur_resp, len);
475 	memcpy(io_buffer_tail(&cur_resp), data, len);
476 	cur_resp.len += len;
477 }
478 
479 static void
480 format_byte(uint8_t v, uint8_t *buf)
481 {
482 
483 	buf[0] = hex_digit(v >> 4);
484 	buf[1] = hex_digit(v & 0xf);
485 }
486 
487 /*
488  * Append a single byte (formatted as two hex characters) to the
489  * output buffer.
490  */
491 static void
492 send_byte(uint8_t v)
493 {
494 	uint8_t buf[2];
495 
496 	format_byte(v, buf);
497 	send_data(buf, sizeof(buf));
498 }
499 
500 static void
501 start_packet(void)
502 {
503 
504 	send_char('$');
505 	cur_csum = 0;
506 }
507 
508 static void
509 finish_packet(void)
510 {
511 
512 	send_char('#');
513 	send_byte(cur_csum);
514 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
515 }
516 
517 /*
518  * Append a single character (for the packet payload) and update the
519  * checksum.
520  */
521 static void
522 append_char(uint8_t v)
523 {
524 
525 	send_char(v);
526 	cur_csum += v;
527 }
528 
529 /*
530  * Append an array of bytes (for the packet payload) and update the
531  * checksum.
532  */
533 static void
534 append_packet_data(const uint8_t *data, size_t len)
535 {
536 
537 	send_data(data, len);
538 	while (len > 0) {
539 		cur_csum += *data;
540 		data++;
541 		len--;
542 	}
543 }
544 
545 static void
546 append_string(const char *str)
547 {
548 
549 	append_packet_data(str, strlen(str));
550 }
551 
552 static void
553 append_byte(uint8_t v)
554 {
555 	uint8_t buf[2];
556 
557 	format_byte(v, buf);
558 	append_packet_data(buf, sizeof(buf));
559 }
560 
561 static void
562 append_unsigned_native(uintmax_t value, size_t len)
563 {
564 	size_t i;
565 
566 	for (i = 0; i < len; i++) {
567 		append_byte(value);
568 		value >>= 8;
569 	}
570 }
571 
572 static void
573 append_unsigned_be(uintmax_t value, size_t len)
574 {
575 	char buf[len * 2];
576 	size_t i;
577 
578 	for (i = 0; i < len; i++) {
579 		format_byte(value, buf + (len - i - 1) * 2);
580 		value >>= 8;
581 	}
582 	append_packet_data(buf, sizeof(buf));
583 }
584 
585 static void
586 append_integer(unsigned int value)
587 {
588 
589 	if (value == 0)
590 		append_char('0');
591 	else
592 		append_unsigned_be(value, (fls(value) + 7) / 8);
593 }
594 
595 static void
596 append_asciihex(const char *str)
597 {
598 
599 	while (*str != '\0') {
600 		append_byte(*str);
601 		str++;
602 	}
603 }
604 
605 static void
606 send_empty_response(void)
607 {
608 
609 	start_packet();
610 	finish_packet();
611 }
612 
613 static void
614 send_error(int error)
615 {
616 
617 	start_packet();
618 	append_char('E');
619 	append_byte(error);
620 	finish_packet();
621 }
622 
623 static void
624 send_ok(void)
625 {
626 
627 	start_packet();
628 	append_string("OK");
629 	finish_packet();
630 }
631 
632 static int
633 parse_threadid(const uint8_t *data, size_t len)
634 {
635 
636 	if (len == 1 && *data == '0')
637 		return (0);
638 	if (len == 2 && memcmp(data, "-1", 2) == 0)
639 		return (-1);
640 	if (len == 0)
641 		return (-2);
642 	return (parse_integer(data, len));
643 }
644 
645 /*
646  * Report the current stop event to the debugger.  If the stop is due
647  * to an event triggered on a specific vCPU such as a breakpoint or
648  * stepping trap, stopped_vcpu will be set to the vCPU triggering the
649  * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
650  * the reporting vCPU for vCPU events.
651  */
652 static void
653 report_stop(bool set_cur_vcpu)
654 {
655 	struct vcpu_state *vs;
656 
657 	start_packet();
658 	if (stopped_vcpu == -1) {
659 		append_char('S');
660 		append_byte(GDB_SIGNAL_TRAP);
661 	} else {
662 		vs = &vcpu_state[stopped_vcpu];
663 		if (set_cur_vcpu)
664 			cur_vcpu = stopped_vcpu;
665 		append_char('T');
666 		append_byte(GDB_SIGNAL_TRAP);
667 		append_string("thread:");
668 		append_integer(stopped_vcpu + 1);
669 		append_char(';');
670 		if (vs->hit_swbreak) {
671 			debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
672 			if (swbreak_enabled)
673 				append_string("swbreak:;");
674 		} else if (vs->stepped)
675 			debug("$vCPU %d reporting step\n", stopped_vcpu);
676 		else
677 			debug("$vCPU %d reporting ???\n", stopped_vcpu);
678 	}
679 	finish_packet();
680 	report_next_stop = false;
681 }
682 
683 /*
684  * If this stop is due to a vCPU event, clear that event to mark it as
685  * acknowledged.
686  */
687 static void
688 discard_stop(void)
689 {
690 	struct vcpu_state *vs;
691 
692 	if (stopped_vcpu != -1) {
693 		vs = &vcpu_state[stopped_vcpu];
694 		vs->hit_swbreak = false;
695 		vs->stepped = false;
696 		stopped_vcpu = -1;
697 	}
698 	report_next_stop = true;
699 }
700 
701 static void
702 gdb_finish_suspend_vcpus(void)
703 {
704 
705 	if (first_stop) {
706 		first_stop = false;
707 		stopped_vcpu = -1;
708 	} else if (report_next_stop) {
709 		assert(!response_pending());
710 		report_stop(true);
711 		send_pending_data(cur_fd);
712 	}
713 }
714 
715 /*
716  * vCPU threads invoke this function whenever the vCPU enters the
717  * debug server to pause or report an event.  vCPU threads wait here
718  * as long as the debug server keeps them suspended.
719  */
720 static void
721 _gdb_cpu_suspend(struct vcpu *vcpu, bool report_stop)
722 {
723 	int vcpuid = vcpu_id(vcpu);
724 
725 	debug("$vCPU %d suspending\n", vcpuid);
726 	CPU_SET(vcpuid, &vcpus_waiting);
727 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
728 		gdb_finish_suspend_vcpus();
729 	while (CPU_ISSET(vcpuid, &vcpus_suspended))
730 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
731 	CPU_CLR(vcpuid, &vcpus_waiting);
732 	debug("$vCPU %d resuming\n", vcpuid);
733 }
734 
735 /*
736  * Requests vCPU single-stepping using a
737  * VMEXIT suitable for the host platform.
738  */
739 static int
740 _gdb_set_step(struct vcpu *vcpu, int val)
741 {
742 	int error;
743 
744 	/*
745 	 * If the MTRAP cap fails, we are running on an AMD host.
746 	 * In that case, we request DB exits caused by RFLAGS.TF.
747 	 */
748 	error = vm_set_capability(vcpu, VM_CAP_MTRAP_EXIT, val);
749 	if (error != 0)
750 		error = vm_set_capability(vcpu, VM_CAP_RFLAGS_TF, val);
751 	if (error == 0)
752 		(void)vm_set_capability(vcpu, VM_CAP_MASK_HWINTR, val);
753 
754 	return (error);
755 }
756 
757 /*
758  * Checks whether single-stepping is enabled for a given vCPU.
759  */
760 static int
761 _gdb_check_step(struct vcpu *vcpu)
762 {
763 	int val;
764 
765 	if (vm_get_capability(vcpu, VM_CAP_MTRAP_EXIT, &val) != 0) {
766 		if (vm_get_capability(vcpu, VM_CAP_RFLAGS_TF, &val) != 0)
767 			return -1;
768 	}
769 	return 0;
770 }
771 
772 /*
773  * Invoked at the start of a vCPU thread's execution to inform the
774  * debug server about the new thread.
775  */
776 void
777 gdb_cpu_add(struct vcpu *vcpu)
778 {
779 	int vcpuid;
780 
781 	if (!gdb_active)
782 		return;
783 	vcpuid = vcpu_id(vcpu);
784 	debug("$vCPU %d starting\n", vcpuid);
785 	pthread_mutex_lock(&gdb_lock);
786 	assert(vcpuid < guest_ncpus);
787 	assert(vcpus[vcpuid] == NULL);
788 	vcpus[vcpuid] = vcpu;
789 	CPU_SET(vcpuid, &vcpus_active);
790 	if (!TAILQ_EMPTY(&breakpoints)) {
791 		vm_set_capability(vcpu, VM_CAP_BPT_EXIT, 1);
792 		debug("$vCPU %d enabled breakpoint exits\n", vcpuid);
793 	}
794 
795 	/*
796 	 * If a vcpu is added while vcpus are stopped, suspend the new
797 	 * vcpu so that it will pop back out with a debug exit before
798 	 * executing the first instruction.
799 	 */
800 	if (!CPU_EMPTY(&vcpus_suspended)) {
801 		CPU_SET(vcpuid, &vcpus_suspended);
802 		_gdb_cpu_suspend(vcpu, false);
803 	}
804 	pthread_mutex_unlock(&gdb_lock);
805 }
806 
807 /*
808  * Invoked by vCPU before resuming execution.  This enables stepping
809  * if the vCPU is marked as stepping.
810  */
811 static void
812 gdb_cpu_resume(struct vcpu *vcpu)
813 {
814 	struct vcpu_state *vs;
815 	int error;
816 
817 	vs = &vcpu_state[vcpu_id(vcpu)];
818 
819 	/*
820 	 * Any pending event should already be reported before
821 	 * resuming.
822 	 */
823 	assert(vs->hit_swbreak == false);
824 	assert(vs->stepped == false);
825 	if (vs->stepping) {
826 		error = _gdb_set_step(vcpu, 1);
827 		assert(error == 0);
828 	}
829 }
830 
831 /*
832  * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
833  * has been suspended due to an event on different vCPU or in response
834  * to a guest-wide suspend such as Ctrl-C or the stop on attach.
835  */
836 void
837 gdb_cpu_suspend(struct vcpu *vcpu)
838 {
839 
840 	if (!gdb_active)
841 		return;
842 	pthread_mutex_lock(&gdb_lock);
843 	_gdb_cpu_suspend(vcpu, true);
844 	gdb_cpu_resume(vcpu);
845 	pthread_mutex_unlock(&gdb_lock);
846 }
847 
848 static void
849 gdb_suspend_vcpus(void)
850 {
851 
852 	assert(pthread_mutex_isowned_np(&gdb_lock));
853 	debug("suspending all CPUs\n");
854 	vcpus_suspended = vcpus_active;
855 	vm_suspend_all_cpus(ctx);
856 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
857 		gdb_finish_suspend_vcpus();
858 }
859 
860 /*
861  * Invoked each time a vmexit handler needs to step a vCPU.
862  * Handles MTRAP and RFLAGS.TF vmexits.
863  */
864 static void
865 gdb_cpu_step(struct vcpu *vcpu)
866 {
867 	struct vcpu_state *vs;
868 	int vcpuid = vcpu_id(vcpu);
869 	int error;
870 
871 	debug("$vCPU %d stepped\n", vcpuid);
872 	pthread_mutex_lock(&gdb_lock);
873 	vs = &vcpu_state[vcpuid];
874 	if (vs->stepping) {
875 		vs->stepping = false;
876 		vs->stepped = true;
877 		error = _gdb_set_step(vcpu, 0);
878 		assert(error == 0);
879 
880 		while (vs->stepped) {
881 			if (stopped_vcpu == -1) {
882 				debug("$vCPU %d reporting step\n", vcpuid);
883 				stopped_vcpu = vcpuid;
884 				gdb_suspend_vcpus();
885 			}
886 			_gdb_cpu_suspend(vcpu, true);
887 		}
888 		gdb_cpu_resume(vcpu);
889 	}
890 	pthread_mutex_unlock(&gdb_lock);
891 }
892 
893 /*
894  * A general handler for VM_EXITCODE_DB.
895  * Handles RFLAGS.TF exits on AMD SVM.
896  */
897 void
898 gdb_cpu_debug(struct vcpu *vcpu, struct vm_exit *vmexit)
899 {
900 	if (!gdb_active)
901 		return;
902 
903 	/* RFLAGS.TF exit? */
904 	if (vmexit->u.dbg.trace_trap) {
905 		gdb_cpu_step(vcpu);
906 	}
907 }
908 
909 /*
910  * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
911  * the VT-x-specific MTRAP exit.
912  */
913 void
914 gdb_cpu_mtrap(struct vcpu *vcpu)
915 {
916 	if (!gdb_active)
917 		return;
918 	gdb_cpu_step(vcpu);
919 }
920 
921 static struct breakpoint *
922 find_breakpoint(uint64_t gpa)
923 {
924 	struct breakpoint *bp;
925 
926 	TAILQ_FOREACH(bp, &breakpoints, link) {
927 		if (bp->gpa == gpa)
928 			return (bp);
929 	}
930 	return (NULL);
931 }
932 
933 void
934 gdb_cpu_breakpoint(struct vcpu *vcpu, struct vm_exit *vmexit)
935 {
936 	struct breakpoint *bp;
937 	struct vcpu_state *vs;
938 	uint64_t gpa;
939 	int error, vcpuid;
940 
941 	if (!gdb_active) {
942 		EPRINTLN("vm_loop: unexpected VMEXIT_DEBUG");
943 		exit(4);
944 	}
945 	vcpuid = vcpu_id(vcpu);
946 	pthread_mutex_lock(&gdb_lock);
947 	error = guest_vaddr2paddr(vcpu, guest_pc(vmexit), &gpa);
948 	assert(error == 1);
949 	bp = find_breakpoint(gpa);
950 	if (bp != NULL) {
951 		vs = &vcpu_state[vcpuid];
952 		assert(vs->stepping == false);
953 		assert(vs->stepped == false);
954 		assert(vs->hit_swbreak == false);
955 		vs->hit_swbreak = true;
956 		vm_set_register(vcpu, GDB_PC_REGNAME, guest_pc(vmexit));
957 		for (;;) {
958 			if (stopped_vcpu == -1) {
959 				debug("$vCPU %d reporting breakpoint at rip %#lx\n",
960 				    vcpuid, guest_pc(vmexit));
961 				stopped_vcpu = vcpuid;
962 				gdb_suspend_vcpus();
963 			}
964 			_gdb_cpu_suspend(vcpu, true);
965 			if (!vs->hit_swbreak) {
966 				/* Breakpoint reported. */
967 				break;
968 			}
969 			bp = find_breakpoint(gpa);
970 			if (bp == NULL) {
971 				/* Breakpoint was removed. */
972 				vs->hit_swbreak = false;
973 				break;
974 			}
975 		}
976 		gdb_cpu_resume(vcpu);
977 	} else {
978 		debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpuid,
979 		    guest_pc(vmexit));
980 		error = vm_set_register(vcpu, VM_REG_GUEST_ENTRY_INST_LENGTH,
981 		    vmexit->u.bpt.inst_length);
982 		assert(error == 0);
983 		error = vm_inject_exception(vcpu, IDT_BP, 0, 0, 0);
984 		assert(error == 0);
985 	}
986 	pthread_mutex_unlock(&gdb_lock);
987 }
988 
989 static bool
990 gdb_step_vcpu(struct vcpu *vcpu)
991 {
992 	int error, vcpuid;
993 
994 	vcpuid = vcpu_id(vcpu);
995 	debug("$vCPU %d step\n", vcpuid);
996 	error = _gdb_check_step(vcpu);
997 	if (error < 0)
998 		return (false);
999 
1000 	discard_stop();
1001 	vcpu_state[vcpuid].stepping = true;
1002 	vm_resume_cpu(vcpu);
1003 	CPU_CLR(vcpuid, &vcpus_suspended);
1004 	pthread_cond_broadcast(&idle_vcpus);
1005 	return (true);
1006 }
1007 
1008 static void
1009 gdb_resume_vcpus(void)
1010 {
1011 
1012 	assert(pthread_mutex_isowned_np(&gdb_lock));
1013 	vm_resume_all_cpus(ctx);
1014 	debug("resuming all CPUs\n");
1015 	CPU_ZERO(&vcpus_suspended);
1016 	pthread_cond_broadcast(&idle_vcpus);
1017 }
1018 
1019 static void
1020 gdb_read_regs(void)
1021 {
1022 	uint64_t regvals[nitems(gdb_regset)];
1023 	int regnums[nitems(gdb_regset)];
1024 
1025 	for (size_t i = 0; i < nitems(gdb_regset); i++)
1026 		regnums[i] = gdb_regset[i].id;
1027 	if (vm_get_register_set(vcpus[cur_vcpu], nitems(gdb_regset),
1028 	    regnums, regvals) == -1) {
1029 		send_error(errno);
1030 		return;
1031 	}
1032 	start_packet();
1033 	for (size_t i = 0; i < nitems(gdb_regset); i++)
1034 		append_unsigned_native(regvals[i], gdb_regset[i].size);
1035 	finish_packet();
1036 }
1037 
1038 static void
1039 gdb_read_one_reg(const uint8_t *data, size_t len)
1040 {
1041 	uint64_t regval;
1042 	uintmax_t reg;
1043 
1044 	reg = parse_integer(data, len);
1045 	if (reg >= nitems(gdb_regset)) {
1046 		send_error(EINVAL);
1047 		return;
1048 	}
1049 
1050 	if (vm_get_register(vcpus[cur_vcpu], gdb_regset[reg].id, &regval) ==
1051 	    -1) {
1052 		send_error(errno);
1053 		return;
1054 	}
1055 
1056 	start_packet();
1057 	append_unsigned_native(regval, gdb_regset[reg].size);
1058 	finish_packet();
1059 }
1060 
1061 static void
1062 gdb_read_mem(const uint8_t *data, size_t len)
1063 {
1064 	uint64_t gpa, gva, val;
1065 	uint8_t *cp;
1066 	size_t resid, todo, bytes;
1067 	bool started;
1068 	int error;
1069 
1070 	assert(len >= 1);
1071 
1072 	/* Skip 'm' */
1073 	data += 1;
1074 	len -= 1;
1075 
1076 	/* Parse and consume address. */
1077 	cp = memchr(data, ',', len);
1078 	if (cp == NULL || cp == data) {
1079 		send_error(EINVAL);
1080 		return;
1081 	}
1082 	gva = parse_integer(data, cp - data);
1083 	len -= (cp - data) + 1;
1084 	data += (cp - data) + 1;
1085 
1086 	/* Parse length. */
1087 	resid = parse_integer(data, len);
1088 
1089 	started = false;
1090 	while (resid > 0) {
1091 		error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1092 		if (error == -1) {
1093 			if (started)
1094 				finish_packet();
1095 			else
1096 				send_error(errno);
1097 			return;
1098 		}
1099 		if (error == 0) {
1100 			if (started)
1101 				finish_packet();
1102 			else
1103 				send_error(EFAULT);
1104 			return;
1105 		}
1106 
1107 		/* Read bytes from current page. */
1108 		todo = getpagesize() - gpa % getpagesize();
1109 		if (todo > resid)
1110 			todo = resid;
1111 
1112 		cp = paddr_guest2host(ctx, gpa, todo);
1113 		if (cp != NULL) {
1114 			/*
1115 			 * If this page is guest RAM, read it a byte
1116 			 * at a time.
1117 			 */
1118 			if (!started) {
1119 				start_packet();
1120 				started = true;
1121 			}
1122 			while (todo > 0) {
1123 				append_byte(*cp);
1124 				cp++;
1125 				gpa++;
1126 				gva++;
1127 				resid--;
1128 				todo--;
1129 			}
1130 		} else {
1131 			/*
1132 			 * If this page isn't guest RAM, try to handle
1133 			 * it via MMIO.  For MMIO requests, use
1134 			 * aligned reads of words when possible.
1135 			 */
1136 			while (todo > 0) {
1137 				if (gpa & 1 || todo == 1)
1138 					bytes = 1;
1139 				else if (gpa & 2 || todo == 2)
1140 					bytes = 2;
1141 				else
1142 					bytes = 4;
1143 				error = read_mem(vcpus[cur_vcpu], gpa, &val,
1144 				    bytes);
1145 				if (error == 0) {
1146 					if (!started) {
1147 						start_packet();
1148 						started = true;
1149 					}
1150 					gpa += bytes;
1151 					gva += bytes;
1152 					resid -= bytes;
1153 					todo -= bytes;
1154 					while (bytes > 0) {
1155 						append_byte(val);
1156 						val >>= 8;
1157 						bytes--;
1158 					}
1159 				} else {
1160 					if (started)
1161 						finish_packet();
1162 					else
1163 						send_error(EFAULT);
1164 					return;
1165 				}
1166 			}
1167 		}
1168 		assert(resid == 0 || gpa % getpagesize() == 0);
1169 	}
1170 	if (!started)
1171 		start_packet();
1172 	finish_packet();
1173 }
1174 
1175 static void
1176 gdb_write_mem(const uint8_t *data, size_t len)
1177 {
1178 	uint64_t gpa, gva, val;
1179 	uint8_t *cp;
1180 	size_t resid, todo, bytes;
1181 	int error;
1182 
1183 	assert(len >= 1);
1184 
1185 	/* Skip 'M' */
1186 	data += 1;
1187 	len -= 1;
1188 
1189 	/* Parse and consume address. */
1190 	cp = memchr(data, ',', len);
1191 	if (cp == NULL || cp == data) {
1192 		send_error(EINVAL);
1193 		return;
1194 	}
1195 	gva = parse_integer(data, cp - data);
1196 	len -= (cp - data) + 1;
1197 	data += (cp - data) + 1;
1198 
1199 	/* Parse and consume length. */
1200 	cp = memchr(data, ':', len);
1201 	if (cp == NULL || cp == data) {
1202 		send_error(EINVAL);
1203 		return;
1204 	}
1205 	resid = parse_integer(data, cp - data);
1206 	len -= (cp - data) + 1;
1207 	data += (cp - data) + 1;
1208 
1209 	/* Verify the available bytes match the length. */
1210 	if (len != resid * 2) {
1211 		send_error(EINVAL);
1212 		return;
1213 	}
1214 
1215 	while (resid > 0) {
1216 		error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1217 		if (error == -1) {
1218 			send_error(errno);
1219 			return;
1220 		}
1221 		if (error == 0) {
1222 			send_error(EFAULT);
1223 			return;
1224 		}
1225 
1226 		/* Write bytes to current page. */
1227 		todo = getpagesize() - gpa % getpagesize();
1228 		if (todo > resid)
1229 			todo = resid;
1230 
1231 		cp = paddr_guest2host(ctx, gpa, todo);
1232 		if (cp != NULL) {
1233 			/*
1234 			 * If this page is guest RAM, write it a byte
1235 			 * at a time.
1236 			 */
1237 			while (todo > 0) {
1238 				assert(len >= 2);
1239 				*cp = parse_byte(data);
1240 				data += 2;
1241 				len -= 2;
1242 				cp++;
1243 				gpa++;
1244 				gva++;
1245 				resid--;
1246 				todo--;
1247 			}
1248 		} else {
1249 			/*
1250 			 * If this page isn't guest RAM, try to handle
1251 			 * it via MMIO.  For MMIO requests, use
1252 			 * aligned writes of words when possible.
1253 			 */
1254 			while (todo > 0) {
1255 				if (gpa & 1 || todo == 1) {
1256 					bytes = 1;
1257 					val = parse_byte(data);
1258 				} else if (gpa & 2 || todo == 2) {
1259 					bytes = 2;
1260 					val = be16toh(parse_integer(data, 4));
1261 				} else {
1262 					bytes = 4;
1263 					val = be32toh(parse_integer(data, 8));
1264 				}
1265 				error = write_mem(vcpus[cur_vcpu], gpa, val,
1266 				    bytes);
1267 				if (error == 0) {
1268 					gpa += bytes;
1269 					gva += bytes;
1270 					resid -= bytes;
1271 					todo -= bytes;
1272 					data += 2 * bytes;
1273 					len -= 2 * bytes;
1274 				} else {
1275 					send_error(EFAULT);
1276 					return;
1277 				}
1278 			}
1279 		}
1280 		assert(resid == 0 || gpa % getpagesize() == 0);
1281 	}
1282 	assert(len == 0);
1283 	send_ok();
1284 }
1285 
1286 static bool
1287 set_breakpoint_caps(bool enable)
1288 {
1289 	cpuset_t mask;
1290 	int vcpu;
1291 
1292 	mask = vcpus_active;
1293 	while (!CPU_EMPTY(&mask)) {
1294 		vcpu = CPU_FFS(&mask) - 1;
1295 		CPU_CLR(vcpu, &mask);
1296 		if (vm_set_capability(vcpus[vcpu], VM_CAP_BPT_EXIT,
1297 		    enable ? 1 : 0) < 0)
1298 			return (false);
1299 		debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1300 		    enable ? "en" : "dis");
1301 	}
1302 	return (true);
1303 }
1304 
1305 static void
1306 remove_all_sw_breakpoints(void)
1307 {
1308 	struct breakpoint *bp, *nbp;
1309 	uint8_t *cp;
1310 
1311 	if (TAILQ_EMPTY(&breakpoints))
1312 		return;
1313 
1314 	TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1315 		debug("remove breakpoint at %#lx\n", bp->gpa);
1316 		cp = paddr_guest2host(ctx, bp->gpa, sizeof(bp->shadow_inst));
1317 		memcpy(cp, bp->shadow_inst, sizeof(bp->shadow_inst));
1318 		TAILQ_REMOVE(&breakpoints, bp, link);
1319 		free(bp);
1320 	}
1321 	TAILQ_INIT(&breakpoints);
1322 	set_breakpoint_caps(false);
1323 }
1324 
1325 static void
1326 update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1327 {
1328 	struct breakpoint *bp;
1329 	uint64_t gpa;
1330 	uint8_t *cp;
1331 	int error;
1332 
1333 	if (kind != GDB_BP_SIZE) {
1334 		send_error(EINVAL);
1335 		return;
1336 	}
1337 
1338 	error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1339 	if (error == -1) {
1340 		send_error(errno);
1341 		return;
1342 	}
1343 	if (error == 0) {
1344 		send_error(EFAULT);
1345 		return;
1346 	}
1347 
1348 	cp = paddr_guest2host(ctx, gpa, sizeof(bp->shadow_inst));
1349 
1350 	/* Only permit breakpoints in guest RAM. */
1351 	if (cp == NULL) {
1352 		send_error(EFAULT);
1353 		return;
1354 	}
1355 
1356 	/* Find any existing breakpoint. */
1357 	bp = find_breakpoint(gpa);
1358 
1359 	/*
1360 	 * Silently ignore duplicate commands since the protocol
1361 	 * requires these packets to be idempotent.
1362 	 */
1363 	if (insert) {
1364 		if (bp == NULL) {
1365 			if (TAILQ_EMPTY(&breakpoints) &&
1366 			    !set_breakpoint_caps(true)) {
1367 				send_empty_response();
1368 				return;
1369 			}
1370 			bp = malloc(sizeof(*bp));
1371 			bp->gpa = gpa;
1372 			memcpy(bp->shadow_inst, cp, sizeof(bp->shadow_inst));
1373 			memcpy(cp, GDB_BP_INSTR, sizeof(bp->shadow_inst));
1374 			TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1375 			debug("new breakpoint at %#lx\n", gpa);
1376 		}
1377 	} else {
1378 		if (bp != NULL) {
1379 			debug("remove breakpoint at %#lx\n", gpa);
1380 			memcpy(cp, bp->shadow_inst, sizeof(bp->shadow_inst));
1381 			TAILQ_REMOVE(&breakpoints, bp, link);
1382 			free(bp);
1383 			if (TAILQ_EMPTY(&breakpoints))
1384 				set_breakpoint_caps(false);
1385 		}
1386 	}
1387 	send_ok();
1388 }
1389 
1390 static void
1391 parse_breakpoint(const uint8_t *data, size_t len)
1392 {
1393 	uint64_t gva;
1394 	uint8_t *cp;
1395 	bool insert;
1396 	int kind, type;
1397 
1398 	insert = data[0] == 'Z';
1399 
1400 	/* Skip 'Z/z' */
1401 	data += 1;
1402 	len -= 1;
1403 
1404 	/* Parse and consume type. */
1405 	cp = memchr(data, ',', len);
1406 	if (cp == NULL || cp == data) {
1407 		send_error(EINVAL);
1408 		return;
1409 	}
1410 	type = parse_integer(data, cp - data);
1411 	len -= (cp - data) + 1;
1412 	data += (cp - data) + 1;
1413 
1414 	/* Parse and consume address. */
1415 	cp = memchr(data, ',', len);
1416 	if (cp == NULL || cp == data) {
1417 		send_error(EINVAL);
1418 		return;
1419 	}
1420 	gva = parse_integer(data, cp - data);
1421 	len -= (cp - data) + 1;
1422 	data += (cp - data) + 1;
1423 
1424 	/* Parse and consume kind. */
1425 	cp = memchr(data, ';', len);
1426 	if (cp == data) {
1427 		send_error(EINVAL);
1428 		return;
1429 	}
1430 	if (cp != NULL) {
1431 		/*
1432 		 * We do not advertise support for either the
1433 		 * ConditionalBreakpoints or BreakpointCommands
1434 		 * features, so we should not be getting conditions or
1435 		 * commands from the remote end.
1436 		 */
1437 		send_empty_response();
1438 		return;
1439 	}
1440 	kind = parse_integer(data, len);
1441 	data += len;
1442 	len = 0;
1443 
1444 	switch (type) {
1445 	case 0:
1446 		update_sw_breakpoint(gva, kind, insert);
1447 		break;
1448 	default:
1449 		send_empty_response();
1450 		break;
1451 	}
1452 }
1453 
1454 static bool
1455 command_equals(const uint8_t *data, size_t len, const char *cmd)
1456 {
1457 
1458 	if (strlen(cmd) > len)
1459 		return (false);
1460 	return (memcmp(data, cmd, strlen(cmd)) == 0);
1461 }
1462 
1463 static void
1464 check_features(const uint8_t *data, size_t len)
1465 {
1466 	char *feature, *next_feature, *str, *value;
1467 	bool supported;
1468 
1469 	str = malloc(len + 1);
1470 	memcpy(str, data, len);
1471 	str[len] = '\0';
1472 	next_feature = str;
1473 
1474 	while ((feature = strsep(&next_feature, ";")) != NULL) {
1475 		/*
1476 		 * Null features shouldn't exist, but skip if they
1477 		 * do.
1478 		 */
1479 		if (strcmp(feature, "") == 0)
1480 			continue;
1481 
1482 		/*
1483 		 * Look for the value or supported / not supported
1484 		 * flag.
1485 		 */
1486 		value = strchr(feature, '=');
1487 		if (value != NULL) {
1488 			*value = '\0';
1489 			value++;
1490 			supported = true;
1491 		} else {
1492 			value = feature + strlen(feature) - 1;
1493 			switch (*value) {
1494 			case '+':
1495 				supported = true;
1496 				break;
1497 			case '-':
1498 				supported = false;
1499 				break;
1500 			default:
1501 				/*
1502 				 * This is really a protocol error,
1503 				 * but we just ignore malformed
1504 				 * features for ease of
1505 				 * implementation.
1506 				 */
1507 				continue;
1508 			}
1509 			value = NULL;
1510 		}
1511 
1512 		if (strcmp(feature, "swbreak") == 0)
1513 			swbreak_enabled = supported;
1514 	}
1515 	free(str);
1516 
1517 	start_packet();
1518 
1519 	/* This is an arbitrary limit. */
1520 	append_string("PacketSize=4096");
1521 	append_string(";swbreak+");
1522 	finish_packet();
1523 }
1524 
1525 static void
1526 gdb_query(const uint8_t *data, size_t len)
1527 {
1528 
1529 	/*
1530 	 * TODO:
1531 	 * - qSearch
1532 	 */
1533 	if (command_equals(data, len, "qAttached")) {
1534 		start_packet();
1535 		append_char('1');
1536 		finish_packet();
1537 	} else if (command_equals(data, len, "qC")) {
1538 		start_packet();
1539 		append_string("QC");
1540 		append_integer(cur_vcpu + 1);
1541 		finish_packet();
1542 	} else if (command_equals(data, len, "qfThreadInfo")) {
1543 		cpuset_t mask;
1544 		bool first;
1545 		int vcpu;
1546 
1547 		if (CPU_EMPTY(&vcpus_active)) {
1548 			send_error(EINVAL);
1549 			return;
1550 		}
1551 		mask = vcpus_active;
1552 		start_packet();
1553 		append_char('m');
1554 		first = true;
1555 		while (!CPU_EMPTY(&mask)) {
1556 			vcpu = CPU_FFS(&mask) - 1;
1557 			CPU_CLR(vcpu, &mask);
1558 			if (first)
1559 				first = false;
1560 			else
1561 				append_char(',');
1562 			append_integer(vcpu + 1);
1563 		}
1564 		finish_packet();
1565 	} else if (command_equals(data, len, "qsThreadInfo")) {
1566 		start_packet();
1567 		append_char('l');
1568 		finish_packet();
1569 	} else if (command_equals(data, len, "qSupported")) {
1570 		data += strlen("qSupported");
1571 		len -= strlen("qSupported");
1572 		check_features(data, len);
1573 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1574 		char buf[16];
1575 		int tid;
1576 
1577 		data += strlen("qThreadExtraInfo");
1578 		len -= strlen("qThreadExtraInfo");
1579 		if (len == 0 || *data != ',') {
1580 			send_error(EINVAL);
1581 			return;
1582 		}
1583 		tid = parse_threadid(data + 1, len - 1);
1584 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1585 			send_error(EINVAL);
1586 			return;
1587 		}
1588 
1589 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1590 		start_packet();
1591 		append_asciihex(buf);
1592 		finish_packet();
1593 	} else
1594 		send_empty_response();
1595 }
1596 
1597 static void
1598 handle_command(const uint8_t *data, size_t len)
1599 {
1600 
1601 	/* Reject packets with a sequence-id. */
1602 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1603 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1604 		send_empty_response();
1605 		return;
1606 	}
1607 
1608 	switch (*data) {
1609 	case 'c':
1610 		if (len != 1) {
1611 			send_error(EINVAL);
1612 			break;
1613 		}
1614 
1615 		discard_stop();
1616 		gdb_resume_vcpus();
1617 		break;
1618 	case 'D':
1619 		send_ok();
1620 
1621 		/* TODO: Resume any stopped CPUs. */
1622 		break;
1623 	case 'g':
1624 		gdb_read_regs();
1625 		break;
1626 	case 'p':
1627 		gdb_read_one_reg(data + 1, len - 1);
1628 		break;
1629 	case 'H': {
1630 		int tid;
1631 
1632 		if (len < 2 || (data[1] != 'g' && data[1] != 'c')) {
1633 			send_error(EINVAL);
1634 			break;
1635 		}
1636 		tid = parse_threadid(data + 2, len - 2);
1637 		if (tid == -2) {
1638 			send_error(EINVAL);
1639 			break;
1640 		}
1641 
1642 		if (CPU_EMPTY(&vcpus_active)) {
1643 			send_error(EINVAL);
1644 			break;
1645 		}
1646 		if (tid == -1 || tid == 0)
1647 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1648 		else if (CPU_ISSET(tid - 1, &vcpus_active))
1649 			cur_vcpu = tid - 1;
1650 		else {
1651 			send_error(EINVAL);
1652 			break;
1653 		}
1654 		send_ok();
1655 		break;
1656 	}
1657 	case 'm':
1658 		gdb_read_mem(data, len);
1659 		break;
1660 	case 'M':
1661 		gdb_write_mem(data, len);
1662 		break;
1663 	case 'T': {
1664 		int tid;
1665 
1666 		tid = parse_threadid(data + 1, len - 1);
1667 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1668 			send_error(EINVAL);
1669 			return;
1670 		}
1671 		send_ok();
1672 		break;
1673 	}
1674 	case 'q':
1675 		gdb_query(data, len);
1676 		break;
1677 	case 's':
1678 		if (len != 1) {
1679 			send_error(EINVAL);
1680 			break;
1681 		}
1682 
1683 		/* Don't send a reply until a stop occurs. */
1684 		if (!gdb_step_vcpu(vcpus[cur_vcpu])) {
1685 			send_error(EOPNOTSUPP);
1686 			break;
1687 		}
1688 		break;
1689 	case 'z':
1690 	case 'Z':
1691 		parse_breakpoint(data, len);
1692 		break;
1693 	case '?':
1694 		report_stop(false);
1695 		break;
1696 	case 'G': /* TODO */
1697 	case 'v':
1698 		/* Handle 'vCont' */
1699 		/* 'vCtrlC' */
1700 	case 'P': /* TODO */
1701 	case 'Q': /* TODO */
1702 	case 't': /* TODO */
1703 	case 'X': /* TODO */
1704 	default:
1705 		send_empty_response();
1706 	}
1707 }
1708 
1709 /* Check for a valid packet in the command buffer. */
1710 static void
1711 check_command(int fd)
1712 {
1713 	uint8_t *head, *hash, *p, sum;
1714 	size_t avail, plen;
1715 
1716 	for (;;) {
1717 		avail = cur_comm.len;
1718 		if (avail == 0)
1719 			return;
1720 		head = io_buffer_head(&cur_comm);
1721 		switch (*head) {
1722 		case 0x03:
1723 			debug("<- Ctrl-C\n");
1724 			io_buffer_consume(&cur_comm, 1);
1725 
1726 			gdb_suspend_vcpus();
1727 			break;
1728 		case '+':
1729 			/* ACK of previous response. */
1730 			debug("<- +\n");
1731 			if (response_pending())
1732 				io_buffer_reset(&cur_resp);
1733 			io_buffer_consume(&cur_comm, 1);
1734 			if (stopped_vcpu != -1 && report_next_stop) {
1735 				report_stop(true);
1736 				send_pending_data(fd);
1737 			}
1738 			break;
1739 		case '-':
1740 			/* NACK of previous response. */
1741 			debug("<- -\n");
1742 			if (response_pending()) {
1743 				cur_resp.len += cur_resp.start;
1744 				cur_resp.start = 0;
1745 				if (cur_resp.data[0] == '+')
1746 					io_buffer_advance(&cur_resp, 1);
1747 				debug("-> %.*s\n", (int)cur_resp.len,
1748 				    io_buffer_head(&cur_resp));
1749 			}
1750 			io_buffer_consume(&cur_comm, 1);
1751 			send_pending_data(fd);
1752 			break;
1753 		case '$':
1754 			/* Packet. */
1755 
1756 			if (response_pending()) {
1757 				warnx("New GDB command while response in "
1758 				    "progress");
1759 				io_buffer_reset(&cur_resp);
1760 			}
1761 
1762 			/* Is packet complete? */
1763 			hash = memchr(head, '#', avail);
1764 			if (hash == NULL)
1765 				return;
1766 			plen = (hash - head + 1) + 2;
1767 			if (avail < plen)
1768 				return;
1769 			debug("<- %.*s\n", (int)plen, head);
1770 
1771 			/* Verify checksum. */
1772 			for (sum = 0, p = head + 1; p < hash; p++)
1773 				sum += *p;
1774 			if (sum != parse_byte(hash + 1)) {
1775 				io_buffer_consume(&cur_comm, plen);
1776 				debug("-> -\n");
1777 				send_char('-');
1778 				send_pending_data(fd);
1779 				break;
1780 			}
1781 			send_char('+');
1782 
1783 			handle_command(head + 1, hash - (head + 1));
1784 			io_buffer_consume(&cur_comm, plen);
1785 			if (!response_pending())
1786 				debug("-> +\n");
1787 			send_pending_data(fd);
1788 			break;
1789 		default:
1790 			/* XXX: Possibly drop connection instead. */
1791 			debug("-> %02x\n", *head);
1792 			io_buffer_consume(&cur_comm, 1);
1793 			break;
1794 		}
1795 	}
1796 }
1797 
1798 static void
1799 gdb_readable(int fd, enum ev_type event __unused, void *arg __unused)
1800 {
1801 	size_t pending;
1802 	ssize_t nread;
1803 	int n;
1804 
1805 	if (ioctl(fd, FIONREAD, &n) == -1) {
1806 		warn("FIONREAD on GDB socket");
1807 		return;
1808 	}
1809 	assert(n >= 0);
1810 	pending = n;
1811 
1812 	/*
1813 	 * 'pending' might be zero due to EOF.  We need to call read
1814 	 * with a non-zero length to detect EOF.
1815 	 */
1816 	if (pending == 0)
1817 		pending = 1;
1818 
1819 	/* Ensure there is room in the command buffer. */
1820 	io_buffer_grow(&cur_comm, pending);
1821 	assert(io_buffer_avail(&cur_comm) >= pending);
1822 
1823 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1824 	if (nread == 0) {
1825 		close_connection();
1826 	} else if (nread == -1) {
1827 		if (errno == EAGAIN)
1828 			return;
1829 
1830 		warn("Read from GDB socket");
1831 		close_connection();
1832 	} else {
1833 		cur_comm.len += nread;
1834 		pthread_mutex_lock(&gdb_lock);
1835 		check_command(fd);
1836 		pthread_mutex_unlock(&gdb_lock);
1837 	}
1838 }
1839 
1840 static void
1841 gdb_writable(int fd, enum ev_type event __unused, void *arg __unused)
1842 {
1843 
1844 	send_pending_data(fd);
1845 }
1846 
1847 static void
1848 new_connection(int fd, enum ev_type event __unused, void *arg)
1849 {
1850 	int optval, s;
1851 
1852 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1853 	if (s == -1) {
1854 		if (arg != NULL)
1855 			err(1, "Failed accepting initial GDB connection");
1856 
1857 		/* Silently ignore errors post-startup. */
1858 		return;
1859 	}
1860 
1861 	optval = 1;
1862 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1863 	    -1) {
1864 		warn("Failed to disable SIGPIPE for GDB connection");
1865 		close(s);
1866 		return;
1867 	}
1868 
1869 	pthread_mutex_lock(&gdb_lock);
1870 	if (cur_fd != -1) {
1871 		close(s);
1872 		warnx("Ignoring additional GDB connection.");
1873 	}
1874 
1875 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1876 	if (read_event == NULL) {
1877 		if (arg != NULL)
1878 			err(1, "Failed to setup initial GDB connection");
1879 		pthread_mutex_unlock(&gdb_lock);
1880 		return;
1881 	}
1882 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1883 	if (write_event == NULL) {
1884 		if (arg != NULL)
1885 			err(1, "Failed to setup initial GDB connection");
1886 		mevent_delete_close(read_event);
1887 		read_event = NULL;
1888 	}
1889 
1890 	cur_fd = s;
1891 	cur_vcpu = 0;
1892 	stopped_vcpu = -1;
1893 
1894 	/* Break on attach. */
1895 	first_stop = true;
1896 	report_next_stop = false;
1897 	gdb_suspend_vcpus();
1898 	pthread_mutex_unlock(&gdb_lock);
1899 }
1900 
1901 #ifndef WITHOUT_CAPSICUM
1902 static void
1903 limit_gdb_socket(int s)
1904 {
1905 	cap_rights_t rights;
1906 	unsigned long ioctls[] = { FIONREAD };
1907 
1908 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1909 	    CAP_SETSOCKOPT, CAP_IOCTL);
1910 	if (caph_rights_limit(s, &rights) == -1)
1911 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1912 	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1913 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1914 }
1915 #endif
1916 
1917 void
1918 init_gdb(struct vmctx *_ctx)
1919 {
1920 	int error, flags, optval, s;
1921 	struct addrinfo hints;
1922 	struct addrinfo *gdbaddr;
1923 	const char *saddr, *value;
1924 	char *sport;
1925 	bool wait;
1926 
1927 	value = get_config_value("gdb.port");
1928 	if (value == NULL)
1929 		return;
1930 	sport = strdup(value);
1931 	if (sport == NULL)
1932 		errx(4, "Failed to allocate memory");
1933 
1934 	wait = get_config_bool_default("gdb.wait", false);
1935 
1936 	saddr = get_config_value("gdb.address");
1937 	if (saddr == NULL) {
1938 		saddr = "localhost";
1939 	}
1940 
1941 	debug("==> starting on %s:%s, %swaiting\n",
1942 	    saddr, sport, wait ? "" : "not ");
1943 
1944 	error = pthread_mutex_init(&gdb_lock, NULL);
1945 	if (error != 0)
1946 		errc(1, error, "gdb mutex init");
1947 	error = pthread_cond_init(&idle_vcpus, NULL);
1948 	if (error != 0)
1949 		errc(1, error, "gdb cv init");
1950 
1951 	memset(&hints, 0, sizeof(hints));
1952 	hints.ai_family = AF_UNSPEC;
1953 	hints.ai_socktype = SOCK_STREAM;
1954 	hints.ai_flags = AI_NUMERICSERV | AI_PASSIVE;
1955 
1956 	error = getaddrinfo(saddr, sport, &hints, &gdbaddr);
1957 	if (error != 0)
1958 		errx(1, "gdb address resolution: %s", gai_strerror(error));
1959 
1960 	ctx = _ctx;
1961 	s = socket(gdbaddr->ai_family, gdbaddr->ai_socktype, 0);
1962 	if (s < 0)
1963 		err(1, "gdb socket create");
1964 
1965 	optval = 1;
1966 	(void)setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
1967 
1968 	if (bind(s, gdbaddr->ai_addr, gdbaddr->ai_addrlen) < 0)
1969 		err(1, "gdb socket bind");
1970 
1971 	if (listen(s, 1) < 0)
1972 		err(1, "gdb socket listen");
1973 
1974 	stopped_vcpu = -1;
1975 	TAILQ_INIT(&breakpoints);
1976 	vcpus = calloc(guest_ncpus, sizeof(*vcpus));
1977 	vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
1978 	if (wait) {
1979 		/*
1980 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
1981 		 * logic in gdb_cpu_add() to suspend the first vcpu before
1982 		 * it starts execution.  The vcpu will remain suspended
1983 		 * until a debugger connects.
1984 		 */
1985 		CPU_SET(0, &vcpus_suspended);
1986 		stopped_vcpu = 0;
1987 	}
1988 
1989 	flags = fcntl(s, F_GETFL);
1990 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1991 		err(1, "Failed to mark gdb socket non-blocking");
1992 
1993 #ifndef WITHOUT_CAPSICUM
1994 	limit_gdb_socket(s);
1995 #endif
1996 	mevent_add(s, EVF_READ, new_connection, NULL);
1997 	gdb_active = true;
1998 	freeaddrinfo(gdbaddr);
1999 	free(sport);
2000 }
2001