xref: /illumos-gate/usr/src/cmd/bhyve/gdb.c (revision cab7c30c9587a8c7b5dd94af5f688dc5b8e8add7)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/param.h>
32 #ifndef WITHOUT_CAPSICUM
33 #include <sys/capsicum.h>
34 #endif
35 #include <sys/ioctl.h>
36 #include <sys/mman.h>
37 #include <sys/socket.h>
38 #include <machine/atomic.h>
39 #include <machine/specialreg.h>
40 #include <machine/vmm.h>
41 #include <netinet/in.h>
42 #include <assert.h>
43 #ifndef WITHOUT_CAPSICUM
44 #include <capsicum_helpers.h>
45 #endif
46 #include <err.h>
47 #include <errno.h>
48 #include <fcntl.h>
49 #include <pthread.h>
50 #include <pthread_np.h>
51 #include <stdbool.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <sysexits.h>
56 #include <unistd.h>
57 #include <vmmapi.h>
58 
59 #include "bhyverun.h"
60 #include "mem.h"
61 #include "mevent.h"
62 
63 /*
64  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
65  * use SIGTRAP.
66  */
67 #define	GDB_SIGNAL_TRAP		5
68 
69 static void gdb_resume_vcpus(void);
70 static void check_command(int fd);
71 
72 static struct mevent *read_event, *write_event;
73 
74 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
75 static pthread_mutex_t gdb_lock;
76 static pthread_cond_t idle_vcpus;
77 static bool stop_pending, first_stop;
78 static int stepping_vcpu, stopped_vcpu;
79 
80 /*
81  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
82  * read buffer, 'start' is unused and 'len' contains the number of
83  * valid bytes in the buffer.  For a write buffer, 'start' is set to
84  * the index of the next byte in 'data' to send, and 'len' contains
85  * the remaining number of valid bytes to send.
86  */
87 struct io_buffer {
88 	uint8_t *data;
89 	size_t capacity;
90 	size_t start;
91 	size_t len;
92 };
93 
94 static struct io_buffer cur_comm, cur_resp;
95 static uint8_t cur_csum;
96 static int cur_vcpu;
97 static struct vmctx *ctx;
98 static int cur_fd = -1;
99 
100 const int gdb_regset[] = {
101 	VM_REG_GUEST_RAX,
102 	VM_REG_GUEST_RBX,
103 	VM_REG_GUEST_RCX,
104 	VM_REG_GUEST_RDX,
105 	VM_REG_GUEST_RSI,
106 	VM_REG_GUEST_RDI,
107 	VM_REG_GUEST_RBP,
108 	VM_REG_GUEST_RSP,
109 	VM_REG_GUEST_R8,
110 	VM_REG_GUEST_R9,
111 	VM_REG_GUEST_R10,
112 	VM_REG_GUEST_R11,
113 	VM_REG_GUEST_R12,
114 	VM_REG_GUEST_R13,
115 	VM_REG_GUEST_R14,
116 	VM_REG_GUEST_R15,
117 	VM_REG_GUEST_RIP,
118 	VM_REG_GUEST_RFLAGS,
119 	VM_REG_GUEST_CS,
120 	VM_REG_GUEST_SS,
121 	VM_REG_GUEST_DS,
122 	VM_REG_GUEST_ES,
123 	VM_REG_GUEST_FS,
124 	VM_REG_GUEST_GS
125 };
126 
127 const int gdb_regsize[] = {
128 	8,
129 	8,
130 	8,
131 	8,
132 	8,
133 	8,
134 	8,
135 	8,
136 	8,
137 	8,
138 	8,
139 	8,
140 	8,
141 	8,
142 	8,
143 	8,
144 	8,
145 	4,
146 	4,
147 	4,
148 	4,
149 	4,
150 	4,
151 	4
152 };
153 
154 #ifdef GDB_LOG
155 #include <stdarg.h>
156 #include <stdio.h>
157 
158 static void __printflike(1, 2)
159 debug(const char *fmt, ...)
160 {
161 	static FILE *logfile;
162 	va_list ap;
163 
164 	if (logfile == NULL) {
165 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
166 		if (logfile == NULL)
167 			return;
168 #ifndef WITHOUT_CAPSICUM
169 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
170 			fclose(logfile);
171 			logfile = NULL;
172 			return;
173 		}
174 #endif
175 		setlinebuf(logfile);
176 	}
177 	va_start(ap, fmt);
178 	vfprintf(logfile, fmt, ap);
179 	va_end(ap);
180 }
181 #else
182 #define debug(...)
183 #endif
184 
185 static int
186 guest_paging_info(int vcpu, struct vm_guest_paging *paging)
187 {
188 	uint64_t regs[4];
189 	const int regset[4] = {
190 		VM_REG_GUEST_CR0,
191 		VM_REG_GUEST_CR3,
192 		VM_REG_GUEST_CR4,
193 		VM_REG_GUEST_EFER
194 	};
195 
196 	if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
197 		return (-1);
198 
199 	/*
200 	 * For the debugger, always pretend to be the kernel (CPL 0),
201 	 * and if long-mode is enabled, always parse addresses as if
202 	 * in 64-bit mode.
203 	 */
204 	paging->cr3 = regs[1];
205 	paging->cpl = 0;
206 	if (regs[3] & EFER_LMA)
207 		paging->cpu_mode = CPU_MODE_64BIT;
208 	else if (regs[0] & CR0_PE)
209 		paging->cpu_mode = CPU_MODE_PROTECTED;
210 	else
211 		paging->cpu_mode = CPU_MODE_REAL;
212 	if (!(regs[0] & CR0_PG))
213 		paging->paging_mode = PAGING_MODE_FLAT;
214 	else if (!(regs[2] & CR4_PAE))
215 		paging->paging_mode = PAGING_MODE_32;
216 	else if (regs[3] & EFER_LME)
217 		paging->paging_mode = PAGING_MODE_64;
218 	else
219 		paging->paging_mode = PAGING_MODE_PAE;
220 	return (0);
221 }
222 
223 /*
224  * Map a guest virtual address to a physical address (for a given vcpu).
225  * If a guest virtual address is valid, return 1.  If the address is
226  * not valid, return 0.  If an error occurs obtaining the mapping,
227  * return -1.
228  */
229 static int
230 guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
231 {
232 	struct vm_guest_paging paging;
233 	int fault;
234 
235 	if (guest_paging_info(vcpu, &paging) == -1)
236 		return (-1);
237 
238 	/*
239 	 * Always use PROT_READ.  We really care if the VA is
240 	 * accessible, not if the current vCPU can write.
241 	 */
242 	if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
243 	    &fault) == -1)
244 		return (-1);
245 	if (fault)
246 		return (0);
247 	return (1);
248 }
249 
250 static void
251 io_buffer_reset(struct io_buffer *io)
252 {
253 
254 	io->start = 0;
255 	io->len = 0;
256 }
257 
258 /* Available room for adding data. */
259 static size_t
260 io_buffer_avail(struct io_buffer *io)
261 {
262 
263 	return (io->capacity - (io->start + io->len));
264 }
265 
266 static uint8_t *
267 io_buffer_head(struct io_buffer *io)
268 {
269 
270 	return (io->data + io->start);
271 }
272 
273 static uint8_t *
274 io_buffer_tail(struct io_buffer *io)
275 {
276 
277 	return (io->data + io->start + io->len);
278 }
279 
280 static void
281 io_buffer_advance(struct io_buffer *io, size_t amount)
282 {
283 
284 	assert(amount <= io->len);
285 	io->start += amount;
286 	io->len -= amount;
287 }
288 
289 static void
290 io_buffer_consume(struct io_buffer *io, size_t amount)
291 {
292 
293 	io_buffer_advance(io, amount);
294 	if (io->len == 0) {
295 		io->start = 0;
296 		return;
297 	}
298 
299 	/*
300 	 * XXX: Consider making this move optional and compacting on a
301 	 * future read() before realloc().
302 	 */
303 	memmove(io->data, io_buffer_head(io), io->len);
304 	io->start = 0;
305 }
306 
307 static void
308 io_buffer_grow(struct io_buffer *io, size_t newsize)
309 {
310 	uint8_t *new_data;
311 	size_t avail, new_cap;
312 
313 	avail = io_buffer_avail(io);
314 	if (newsize <= avail)
315 		return;
316 
317 	new_cap = io->capacity + (newsize - avail);
318 	new_data = realloc(io->data, new_cap);
319 	if (new_data == NULL)
320 		err(1, "Failed to grow GDB I/O buffer");
321 	io->data = new_data;
322 	io->capacity = new_cap;
323 }
324 
325 static bool
326 response_pending(void)
327 {
328 
329 	if (cur_resp.start == 0 && cur_resp.len == 0)
330 		return (false);
331 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
332 		return (false);
333 	return (true);
334 }
335 
336 static void
337 close_connection(void)
338 {
339 
340 	/*
341 	 * XXX: This triggers a warning because mevent does the close
342 	 * before the EV_DELETE.
343 	 */
344 	pthread_mutex_lock(&gdb_lock);
345 	mevent_delete(write_event);
346 	mevent_delete_close(read_event);
347 	write_event = NULL;
348 	read_event = NULL;
349 	io_buffer_reset(&cur_comm);
350 	io_buffer_reset(&cur_resp);
351 	cur_fd = -1;
352 
353 	/* Resume any stopped vCPUs. */
354 	gdb_resume_vcpus();
355 	pthread_mutex_unlock(&gdb_lock);
356 }
357 
358 static uint8_t
359 hex_digit(uint8_t nibble)
360 {
361 
362 	if (nibble <= 9)
363 		return (nibble + '0');
364 	else
365 		return (nibble + 'a' - 10);
366 }
367 
368 static uint8_t
369 parse_digit(uint8_t v)
370 {
371 
372 	if (v >= '0' && v <= '9')
373 		return (v - '0');
374 	if (v >= 'a' && v <= 'f')
375 		return (v - 'a' + 10);
376 	if (v >= 'A' && v <= 'F')
377 		return (v - 'A' + 10);
378 	return (0xF);
379 }
380 
381 /* Parses big-endian hexadecimal. */
382 static uintmax_t
383 parse_integer(const uint8_t *p, size_t len)
384 {
385 	uintmax_t v;
386 
387 	v = 0;
388 	while (len > 0) {
389 		v <<= 4;
390 		v |= parse_digit(*p);
391 		p++;
392 		len--;
393 	}
394 	return (v);
395 }
396 
397 static uint8_t
398 parse_byte(const uint8_t *p)
399 {
400 
401 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
402 }
403 
404 static void
405 send_pending_data(int fd)
406 {
407 	ssize_t nwritten;
408 
409 	if (cur_resp.len == 0) {
410 		mevent_disable(write_event);
411 		return;
412 	}
413 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
414 	if (nwritten == -1) {
415 		warn("Write to GDB socket failed");
416 		close_connection();
417 	} else {
418 		io_buffer_advance(&cur_resp, nwritten);
419 		if (cur_resp.len == 0)
420 			mevent_disable(write_event);
421 		else
422 			mevent_enable(write_event);
423 	}
424 }
425 
426 /* Append a single character to the output buffer. */
427 static void
428 send_char(uint8_t data)
429 {
430 	io_buffer_grow(&cur_resp, 1);
431 	*io_buffer_tail(&cur_resp) = data;
432 	cur_resp.len++;
433 }
434 
435 /* Append an array of bytes to the output buffer. */
436 static void
437 send_data(const uint8_t *data, size_t len)
438 {
439 
440 	io_buffer_grow(&cur_resp, len);
441 	memcpy(io_buffer_tail(&cur_resp), data, len);
442 	cur_resp.len += len;
443 }
444 
445 static void
446 format_byte(uint8_t v, uint8_t *buf)
447 {
448 
449 	buf[0] = hex_digit(v >> 4);
450 	buf[1] = hex_digit(v & 0xf);
451 }
452 
453 /*
454  * Append a single byte (formatted as two hex characters) to the
455  * output buffer.
456  */
457 static void
458 send_byte(uint8_t v)
459 {
460 	uint8_t buf[2];
461 
462 	format_byte(v, buf);
463 	send_data(buf, sizeof(buf));
464 }
465 
466 static void
467 start_packet(void)
468 {
469 
470 	send_char('$');
471 	cur_csum = 0;
472 }
473 
474 static void
475 finish_packet(void)
476 {
477 
478 	send_char('#');
479 	send_byte(cur_csum);
480 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
481 }
482 
483 /*
484  * Append a single character (for the packet payload) and update the
485  * checksum.
486  */
487 static void
488 append_char(uint8_t v)
489 {
490 
491 	send_char(v);
492 	cur_csum += v;
493 }
494 
495 /*
496  * Append an array of bytes (for the packet payload) and update the
497  * checksum.
498  */
499 static void
500 append_packet_data(const uint8_t *data, size_t len)
501 {
502 
503 	send_data(data, len);
504 	while (len > 0) {
505 		cur_csum += *data;
506 		data++;
507 		len--;
508 	}
509 }
510 
511 static void
512 append_string(const char *str)
513 {
514 
515 #ifdef __FreeBSD__
516 	append_packet_data(str, strlen(str));
517 #else
518 	append_packet_data((const uint8_t *)str, strlen(str));
519 #endif
520 }
521 
522 static void
523 append_byte(uint8_t v)
524 {
525 	uint8_t buf[2];
526 
527 	format_byte(v, buf);
528 	append_packet_data(buf, sizeof(buf));
529 }
530 
531 static void
532 append_unsigned_native(uintmax_t value, size_t len)
533 {
534 	size_t i;
535 
536 	for (i = 0; i < len; i++) {
537 		append_byte(value);
538 		value >>= 8;
539 	}
540 }
541 
542 static void
543 append_unsigned_be(uintmax_t value, size_t len)
544 {
545 	char buf[len * 2];
546 	size_t i;
547 
548 	for (i = 0; i < len; i++) {
549 #ifdef __FreeBSD__
550 		format_byte(value, buf + (len - i - 1) * 2);
551 #else
552 		format_byte(value, (uint8_t *)(buf + (len - i - 1) * 2));
553 #endif
554 		value >>= 8;
555 	}
556 #ifdef __FreeBSD__
557 	append_packet_data(buf, sizeof(buf));
558 #else
559 	append_packet_data((const uint8_t *)buf, sizeof(buf));
560 #endif
561 }
562 
563 static void
564 append_integer(unsigned int value)
565 {
566 
567 	if (value == 0)
568 		append_char('0');
569 	else
570 		append_unsigned_be(value, fls(value) + 7 / 8);
571 }
572 
573 static void
574 append_asciihex(const char *str)
575 {
576 
577 	while (*str != '\0') {
578 		append_byte(*str);
579 		str++;
580 	}
581 }
582 
583 static void
584 send_empty_response(void)
585 {
586 
587 	start_packet();
588 	finish_packet();
589 }
590 
591 static void
592 send_error(int error)
593 {
594 
595 	start_packet();
596 	append_char('E');
597 	append_byte(error);
598 	finish_packet();
599 }
600 
601 static void
602 send_ok(void)
603 {
604 
605 	start_packet();
606 	append_string("OK");
607 	finish_packet();
608 }
609 
610 static int
611 parse_threadid(const uint8_t *data, size_t len)
612 {
613 
614 	if (len == 1 && *data == '0')
615 		return (0);
616 	if (len == 2 && memcmp(data, "-1", 2) == 0)
617 		return (-1);
618 	if (len == 0)
619 		return (-2);
620 	return (parse_integer(data, len));
621 }
622 
623 static void
624 report_stop(void)
625 {
626 
627 	start_packet();
628 	if (stopped_vcpu == -1)
629 		append_char('S');
630 	else
631 		append_char('T');
632 	append_byte(GDB_SIGNAL_TRAP);
633 	if (stopped_vcpu != -1) {
634 		append_string("thread:");
635 		append_integer(stopped_vcpu + 1);
636 		append_char(';');
637 	}
638 	stopped_vcpu = -1;
639 	finish_packet();
640 }
641 
642 static void
643 gdb_finish_suspend_vcpus(void)
644 {
645 
646 	if (first_stop) {
647 		first_stop = false;
648 		stopped_vcpu = -1;
649 	} else if (response_pending())
650 		stop_pending = true;
651 	else {
652 		report_stop();
653 		send_pending_data(cur_fd);
654 	}
655 }
656 
657 static void
658 _gdb_cpu_suspend(int vcpu, bool report_stop)
659 {
660 
661 	debug("$vCPU %d suspending\n", vcpu);
662 	CPU_SET(vcpu, &vcpus_waiting);
663 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
664 		gdb_finish_suspend_vcpus();
665 	while (CPU_ISSET(vcpu, &vcpus_suspended) && vcpu != stepping_vcpu)
666 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
667 	CPU_CLR(vcpu, &vcpus_waiting);
668 	debug("$vCPU %d resuming\n", vcpu);
669 }
670 
671 void
672 gdb_cpu_add(int vcpu)
673 {
674 
675 	debug("$vCPU %d starting\n", vcpu);
676 	pthread_mutex_lock(&gdb_lock);
677 	CPU_SET(vcpu, &vcpus_active);
678 
679 	/*
680 	 * If a vcpu is added while vcpus are stopped, suspend the new
681 	 * vcpu so that it will pop back out with a debug exit before
682 	 * executing the first instruction.
683 	 */
684 	if (!CPU_EMPTY(&vcpus_suspended)) {
685 		CPU_SET(vcpu, &vcpus_suspended);
686 		_gdb_cpu_suspend(vcpu, false);
687 	}
688 	pthread_mutex_unlock(&gdb_lock);
689 }
690 
691 void
692 gdb_cpu_suspend(int vcpu)
693 {
694 
695 	pthread_mutex_lock(&gdb_lock);
696 	_gdb_cpu_suspend(vcpu, true);
697 	pthread_mutex_unlock(&gdb_lock);
698 }
699 
700 void
701 gdb_cpu_mtrap(int vcpu)
702 {
703 
704 	debug("$vCPU %d MTRAP\n", vcpu);
705 	pthread_mutex_lock(&gdb_lock);
706 	if (vcpu == stepping_vcpu) {
707 		stepping_vcpu = -1;
708 		vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
709 		vm_suspend_cpu(ctx, vcpu);
710 		assert(stopped_vcpu == -1);
711 		stopped_vcpu = vcpu;
712 		_gdb_cpu_suspend(vcpu, true);
713 	}
714 	pthread_mutex_unlock(&gdb_lock);
715 }
716 
717 static void
718 gdb_suspend_vcpus(void)
719 {
720 
721 	assert(pthread_mutex_isowned_np(&gdb_lock));
722 	debug("suspending all CPUs\n");
723 	vcpus_suspended = vcpus_active;
724 	vm_suspend_cpu(ctx, -1);
725 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
726 		gdb_finish_suspend_vcpus();
727 }
728 
729 static bool
730 gdb_step_vcpu(int vcpu)
731 {
732 	int error, val;
733 
734 	debug("$vCPU %d step\n", vcpu);
735 	error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
736 	if (error < 0)
737 		return (false);
738 	error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
739 	vm_resume_cpu(ctx, vcpu);
740 	stepping_vcpu = vcpu;
741 	pthread_cond_broadcast(&idle_vcpus);
742 	return (true);
743 }
744 
745 static void
746 gdb_resume_vcpus(void)
747 {
748 
749 	assert(pthread_mutex_isowned_np(&gdb_lock));
750 	vm_resume_cpu(ctx, -1);
751 	debug("resuming all CPUs\n");
752 	CPU_ZERO(&vcpus_suspended);
753 	pthread_cond_broadcast(&idle_vcpus);
754 }
755 
756 static void
757 gdb_read_regs(void)
758 {
759 	uint64_t regvals[nitems(gdb_regset)];
760 	int i;
761 
762 	if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
763 	    gdb_regset, regvals) == -1) {
764 		send_error(errno);
765 		return;
766 	}
767 	start_packet();
768 	for (i = 0; i < nitems(regvals); i++)
769 		append_unsigned_native(regvals[i], gdb_regsize[i]);
770 	finish_packet();
771 }
772 
773 static void
774 gdb_read_mem(const uint8_t *data, size_t len)
775 {
776 	uint64_t gpa, gva, val;
777 	uint8_t *cp;
778 	size_t resid, todo, bytes;
779 	bool started;
780 	int error;
781 
782 	/* Skip 'm' */
783 	data += 1;
784 	len -= 1;
785 
786 	/* Parse and consume address. */
787 	cp = memchr(data, ',', len);
788 	if (cp == NULL || cp == data) {
789 		send_error(EINVAL);
790 		return;
791 	}
792 	gva = parse_integer(data, cp - data);
793 	len -= (cp - data) + 1;
794 	data += (cp - data) + 1;
795 
796 	/* Parse length. */
797 	resid = parse_integer(data, len);
798 
799 	started = false;
800 	while (resid > 0) {
801 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
802 		if (error == -1) {
803 			if (started)
804 				finish_packet();
805 			else
806 				send_error(errno);
807 			return;
808 		}
809 		if (error == 0) {
810 			if (started)
811 				finish_packet();
812 			else
813 				send_error(EFAULT);
814 			return;
815 		}
816 
817 		/* Read bytes from current page. */
818 		todo = getpagesize() - gpa % getpagesize();
819 		if (todo > resid)
820 			todo = resid;
821 
822 		cp = paddr_guest2host(ctx, gpa, todo);
823 		if (cp != NULL) {
824 			/*
825 			 * If this page is guest RAM, read it a byte
826 			 * at a time.
827 			 */
828 			if (!started) {
829 				start_packet();
830 				started = true;
831 			}
832 			while (todo > 0) {
833 				append_byte(*cp);
834 				cp++;
835 				gpa++;
836 				gva++;
837 				resid--;
838 				todo--;
839 			}
840 		} else {
841 			/*
842 			 * If this page isn't guest RAM, try to handle
843 			 * it via MMIO.  For MMIO requests, use
844 			 * aligned reads of words when possible.
845 			 */
846 			while (todo > 0) {
847 				if (gpa & 1 || todo == 1)
848 					bytes = 1;
849 				else if (gpa & 2 || todo == 2)
850 					bytes = 2;
851 				else
852 					bytes = 4;
853 				error = read_mem(ctx, cur_vcpu, gpa, &val,
854 				    bytes);
855 				if (error == 0) {
856 					if (!started) {
857 						start_packet();
858 						started = true;
859 					}
860 					gpa += bytes;
861 					gva += bytes;
862 					resid -= bytes;
863 					todo -= bytes;
864 					while (bytes > 0) {
865 						append_byte(val);
866 						val >>= 8;
867 						bytes--;
868 					}
869 				} else {
870 					if (started)
871 						finish_packet();
872 					else
873 						send_error(EFAULT);
874 					return;
875 				}
876 			}
877 		}
878 		assert(resid == 0 || gpa % getpagesize() == 0);
879 	}
880 	if (!started)
881 		start_packet();
882 	finish_packet();
883 }
884 
885 static void
886 gdb_write_mem(const uint8_t *data, size_t len)
887 {
888 	uint64_t gpa, gva, val;
889 	uint8_t *cp;
890 	size_t resid, todo, bytes;
891 	int error;
892 
893 	/* Skip 'M' */
894 	data += 1;
895 	len -= 1;
896 
897 	/* Parse and consume address. */
898 	cp = memchr(data, ',', len);
899 	if (cp == NULL || cp == data) {
900 		send_error(EINVAL);
901 		return;
902 	}
903 	gva = parse_integer(data, cp - data);
904 	len -= (cp - data) + 1;
905 	data += (cp - data) + 1;
906 
907 	/* Parse and consume length. */
908 	cp = memchr(data, ':', len);
909 	if (cp == NULL || cp == data) {
910 		send_error(EINVAL);
911 		return;
912 	}
913 	resid = parse_integer(data, cp - data);
914 	len -= (cp - data) + 1;
915 	data += (cp - data) + 1;
916 
917 	/* Verify the available bytes match the length. */
918 	if (len != resid * 2) {
919 		send_error(EINVAL);
920 		return;
921 	}
922 
923 	while (resid > 0) {
924 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
925 		if (error == -1) {
926 			send_error(errno);
927 			return;
928 		}
929 		if (error == 0) {
930 			send_error(EFAULT);
931 			return;
932 		}
933 
934 		/* Write bytes to current page. */
935 		todo = getpagesize() - gpa % getpagesize();
936 		if (todo > resid)
937 			todo = resid;
938 
939 		cp = paddr_guest2host(ctx, gpa, todo);
940 		if (cp != NULL) {
941 			/*
942 			 * If this page is guest RAM, write it a byte
943 			 * at a time.
944 			 */
945 			while (todo > 0) {
946 				assert(len >= 2);
947 				*cp = parse_byte(data);
948 				data += 2;
949 				len -= 2;
950 				cp++;
951 				gpa++;
952 				gva++;
953 				resid--;
954 				todo--;
955 			}
956 		} else {
957 			/*
958 			 * If this page isn't guest RAM, try to handle
959 			 * it via MMIO.  For MMIO requests, use
960 			 * aligned writes of words when possible.
961 			 */
962 			while (todo > 0) {
963 				if (gpa & 1 || todo == 1) {
964 					bytes = 1;
965 					val = parse_byte(data);
966 				} else if (gpa & 2 || todo == 2) {
967 					bytes = 2;
968 					val = parse_byte(data) |
969 					    (parse_byte(data + 2) << 8);
970 				} else {
971 					bytes = 4;
972 					val = parse_byte(data) |
973 					    (parse_byte(data + 2) << 8) |
974 					    (parse_byte(data + 4) << 16) |
975 					    (parse_byte(data + 6) << 24);
976 				}
977 				error = write_mem(ctx, cur_vcpu, gpa, val,
978 				    bytes);
979 				if (error == 0) {
980 					gpa += bytes;
981 					gva += bytes;
982 					resid -= bytes;
983 					todo -= bytes;
984 					data += 2 * bytes;
985 					len -= 2 * bytes;
986 				} else {
987 					send_error(EFAULT);
988 					return;
989 				}
990 			}
991 		}
992 		assert(resid == 0 || gpa % getpagesize() == 0);
993 	}
994 	assert(len == 0);
995 	send_ok();
996 }
997 
998 static bool
999 command_equals(const uint8_t *data, size_t len, const char *cmd)
1000 {
1001 
1002 	if (strlen(cmd) > len)
1003 		return (false);
1004 	return (memcmp(data, cmd, strlen(cmd)) == 0);
1005 }
1006 
1007 static void
1008 check_features(const uint8_t *data, size_t len)
1009 {
1010 	char *feature, *next_feature, *str, *value;
1011 	bool supported;
1012 
1013 	str = malloc(len + 1);
1014 	memcpy(str, data, len);
1015 	str[len] = '\0';
1016 	next_feature = str;
1017 
1018 	while ((feature = strsep(&next_feature, ";")) != NULL) {
1019 		/*
1020 		 * Null features shouldn't exist, but skip if they
1021 		 * do.
1022 		 */
1023 		if (strcmp(feature, "") == 0)
1024 			continue;
1025 
1026 		/*
1027 		 * Look for the value or supported / not supported
1028 		 * flag.
1029 		 */
1030 		value = strchr(feature, '=');
1031 		if (value != NULL) {
1032 			*value = '\0';
1033 			value++;
1034 			supported = true;
1035 		} else {
1036 			value = feature + strlen(feature) - 1;
1037 			switch (*value) {
1038 			case '+':
1039 				supported = true;
1040 				break;
1041 			case '-':
1042 				supported = false;
1043 				break;
1044 			default:
1045 				/*
1046 				 * This is really a protocol error,
1047 				 * but we just ignore malformed
1048 				 * features for ease of
1049 				 * implementation.
1050 				 */
1051 				continue;
1052 			}
1053 			value = NULL;
1054 		}
1055 
1056 		/* No currently supported features. */
1057 #ifndef __FreeBSD__
1058 		/*
1059 		 * The compiler dislikes 'supported' being set but never used.
1060 		 * Make it happy here.
1061 		 */
1062 		if (supported) {
1063 			debug("feature '%s' supported\n", feature);
1064 		}
1065 #endif /* __FreeBSD__ */
1066 	}
1067 	free(str);
1068 
1069 	start_packet();
1070 
1071 	/* This is an arbitrary limit. */
1072 	append_string("PacketSize=4096");
1073 	finish_packet();
1074 }
1075 
1076 static void
1077 gdb_query(const uint8_t *data, size_t len)
1078 {
1079 
1080 	/*
1081 	 * TODO:
1082 	 * - qSearch
1083 	 */
1084 	if (command_equals(data, len, "qAttached")) {
1085 		start_packet();
1086 		append_char('1');
1087 		finish_packet();
1088 	} else if (command_equals(data, len, "qC")) {
1089 		start_packet();
1090 		append_string("QC");
1091 		append_integer(cur_vcpu + 1);
1092 		finish_packet();
1093 	} else if (command_equals(data, len, "qfThreadInfo")) {
1094 		cpuset_t mask;
1095 		bool first;
1096 		int vcpu;
1097 
1098 		if (CPU_EMPTY(&vcpus_active)) {
1099 			send_error(EINVAL);
1100 			return;
1101 		}
1102 		mask = vcpus_active;
1103 		start_packet();
1104 		append_char('m');
1105 		first = true;
1106 		while (!CPU_EMPTY(&mask)) {
1107 			vcpu = CPU_FFS(&mask) - 1;
1108 			CPU_CLR(vcpu, &mask);
1109 			if (first)
1110 				first = false;
1111 			else
1112 				append_char(',');
1113 			append_integer(vcpu + 1);
1114 		}
1115 		finish_packet();
1116 	} else if (command_equals(data, len, "qsThreadInfo")) {
1117 		start_packet();
1118 		append_char('l');
1119 		finish_packet();
1120 	} else if (command_equals(data, len, "qSupported")) {
1121 		data += strlen("qSupported");
1122 		len -= strlen("qSupported");
1123 		check_features(data, len);
1124 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1125 		char buf[16];
1126 		int tid;
1127 
1128 		data += strlen("qThreadExtraInfo");
1129 		len -= strlen("qThreadExtraInfo");
1130 		if (*data != ',') {
1131 			send_error(EINVAL);
1132 			return;
1133 		}
1134 		tid = parse_threadid(data + 1, len - 1);
1135 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1136 			send_error(EINVAL);
1137 			return;
1138 		}
1139 
1140 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1141 		start_packet();
1142 		append_asciihex(buf);
1143 		finish_packet();
1144 	} else
1145 		send_empty_response();
1146 }
1147 
1148 static void
1149 handle_command(const uint8_t *data, size_t len)
1150 {
1151 
1152 	/* Reject packets with a sequence-id. */
1153 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1154 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1155 		send_empty_response();
1156 		return;
1157 	}
1158 
1159 	switch (*data) {
1160 	case 'c':
1161 		if (len != 1) {
1162 			send_error(EINVAL);
1163 			break;
1164 		}
1165 
1166 		/* Don't send a reply until a stop occurs. */
1167 		gdb_resume_vcpus();
1168 		break;
1169 	case 'D':
1170 		send_ok();
1171 
1172 		/* TODO: Resume any stopped CPUs. */
1173 		break;
1174 	case 'g': {
1175 		gdb_read_regs();
1176 		break;
1177 	}
1178 	case 'H': {
1179 		int tid;
1180 
1181 		if (data[1] != 'g' && data[1] != 'c') {
1182 			send_error(EINVAL);
1183 			break;
1184 		}
1185 		tid = parse_threadid(data + 2, len - 2);
1186 		if (tid == -2) {
1187 			send_error(EINVAL);
1188 			break;
1189 		}
1190 
1191 		if (CPU_EMPTY(&vcpus_active)) {
1192 			send_error(EINVAL);
1193 			break;
1194 		}
1195 		if (tid == -1 || tid == 0)
1196 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1197 		else if (CPU_ISSET(tid - 1, &vcpus_active))
1198 			cur_vcpu = tid - 1;
1199 		else {
1200 			send_error(EINVAL);
1201 			break;
1202 		}
1203 		send_ok();
1204 		break;
1205 	}
1206 	case 'm':
1207 		gdb_read_mem(data, len);
1208 		break;
1209 	case 'M':
1210 		gdb_write_mem(data, len);
1211 		break;
1212 	case 'T': {
1213 		int tid;
1214 
1215 		tid = parse_threadid(data + 1, len - 1);
1216 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1217 			send_error(EINVAL);
1218 			return;
1219 		}
1220 		send_ok();
1221 		break;
1222 	}
1223 	case 'q':
1224 		gdb_query(data, len);
1225 		break;
1226 	case 's':
1227 		if (len != 1) {
1228 			send_error(EINVAL);
1229 			break;
1230 		}
1231 
1232 		/* Don't send a reply until a stop occurs. */
1233 		if (!gdb_step_vcpu(cur_vcpu)) {
1234 			send_error(EOPNOTSUPP);
1235 			break;
1236 		}
1237 		break;
1238 	case '?':
1239 		/* XXX: Only if stopped? */
1240 		/* For now, just report that we are always stopped. */
1241 		start_packet();
1242 		append_char('S');
1243 		append_byte(GDB_SIGNAL_TRAP);
1244 		finish_packet();
1245 		break;
1246 	case 'G': /* TODO */
1247 	case 'v':
1248 		/* Handle 'vCont' */
1249 		/* 'vCtrlC' */
1250 	case 'p': /* TODO */
1251 	case 'P': /* TODO */
1252 	case 'Q': /* TODO */
1253 	case 't': /* TODO */
1254 	case 'X': /* TODO */
1255 	case 'z': /* TODO */
1256 	case 'Z': /* TODO */
1257 	default:
1258 		send_empty_response();
1259 	}
1260 }
1261 
1262 /* Check for a valid packet in the command buffer. */
1263 static void
1264 check_command(int fd)
1265 {
1266 	uint8_t *head, *hash, *p, sum;
1267 	size_t avail, plen;
1268 
1269 	for (;;) {
1270 		avail = cur_comm.len;
1271 		if (avail == 0)
1272 			return;
1273 		head = io_buffer_head(&cur_comm);
1274 		switch (*head) {
1275 		case 0x03:
1276 			debug("<- Ctrl-C\n");
1277 			io_buffer_consume(&cur_comm, 1);
1278 
1279 			gdb_suspend_vcpus();
1280 			break;
1281 		case '+':
1282 			/* ACK of previous response. */
1283 			debug("<- +\n");
1284 			if (response_pending())
1285 				io_buffer_reset(&cur_resp);
1286 			io_buffer_consume(&cur_comm, 1);
1287 			if (stop_pending) {
1288 				stop_pending = false;
1289 				report_stop();
1290 				send_pending_data(fd);
1291 			}
1292 			break;
1293 		case '-':
1294 			/* NACK of previous response. */
1295 			debug("<- -\n");
1296 			if (response_pending()) {
1297 				cur_resp.len += cur_resp.start;
1298 				cur_resp.start = 0;
1299 				if (cur_resp.data[0] == '+')
1300 					io_buffer_advance(&cur_resp, 1);
1301 				debug("-> %.*s\n", (int)cur_resp.len,
1302 				    io_buffer_head(&cur_resp));
1303 			}
1304 			io_buffer_consume(&cur_comm, 1);
1305 			send_pending_data(fd);
1306 			break;
1307 		case '$':
1308 			/* Packet. */
1309 
1310 			if (response_pending()) {
1311 				warnx("New GDB command while response in "
1312 				    "progress");
1313 				io_buffer_reset(&cur_resp);
1314 			}
1315 
1316 			/* Is packet complete? */
1317 			hash = memchr(head, '#', avail);
1318 			if (hash == NULL)
1319 				return;
1320 			plen = (hash - head + 1) + 2;
1321 			if (avail < plen)
1322 				return;
1323 			debug("<- %.*s\n", (int)plen, head);
1324 
1325 			/* Verify checksum. */
1326 			for (sum = 0, p = head + 1; p < hash; p++)
1327 				sum += *p;
1328 			if (sum != parse_byte(hash + 1)) {
1329 				io_buffer_consume(&cur_comm, plen);
1330 				debug("-> -\n");
1331 				send_char('-');
1332 				send_pending_data(fd);
1333 				break;
1334 			}
1335 			send_char('+');
1336 
1337 			handle_command(head + 1, hash - (head + 1));
1338 			io_buffer_consume(&cur_comm, plen);
1339 			if (!response_pending()) {
1340 				debug("-> +\n");
1341 			}
1342 			send_pending_data(fd);
1343 			break;
1344 		default:
1345 			/* XXX: Possibly drop connection instead. */
1346 			debug("-> %02x\n", *head);
1347 			io_buffer_consume(&cur_comm, 1);
1348 			break;
1349 		}
1350 	}
1351 }
1352 
1353 static void
1354 gdb_readable(int fd, enum ev_type event, void *arg)
1355 {
1356 	ssize_t nread;
1357 	int pending;
1358 
1359 	if (ioctl(fd, FIONREAD, &pending) == -1) {
1360 		warn("FIONREAD on GDB socket");
1361 		return;
1362 	}
1363 
1364 	/*
1365 	 * 'pending' might be zero due to EOF.  We need to call read
1366 	 * with a non-zero length to detect EOF.
1367 	 */
1368 	if (pending == 0)
1369 		pending = 1;
1370 
1371 	/* Ensure there is room in the command buffer. */
1372 	io_buffer_grow(&cur_comm, pending);
1373 	assert(io_buffer_avail(&cur_comm) >= pending);
1374 
1375 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1376 	if (nread == 0) {
1377 		close_connection();
1378 	} else if (nread == -1) {
1379 		if (errno == EAGAIN)
1380 			return;
1381 
1382 		warn("Read from GDB socket");
1383 		close_connection();
1384 	} else {
1385 		cur_comm.len += nread;
1386 		pthread_mutex_lock(&gdb_lock);
1387 		check_command(fd);
1388 		pthread_mutex_unlock(&gdb_lock);
1389 	}
1390 }
1391 
1392 static void
1393 gdb_writable(int fd, enum ev_type event, void *arg)
1394 {
1395 
1396 	send_pending_data(fd);
1397 }
1398 
1399 static void
1400 new_connection(int fd, enum ev_type event, void *arg)
1401 {
1402 	int optval, s;
1403 
1404 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1405 	if (s == -1) {
1406 		if (arg != NULL)
1407 			err(1, "Failed accepting initial GDB connection");
1408 
1409 		/* Silently ignore errors post-startup. */
1410 		return;
1411 	}
1412 
1413 	optval = 1;
1414 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1415 	    -1) {
1416 		warn("Failed to disable SIGPIPE for GDB connection");
1417 		close(s);
1418 		return;
1419 	}
1420 
1421 	pthread_mutex_lock(&gdb_lock);
1422 	if (cur_fd != -1) {
1423 		close(s);
1424 		warnx("Ignoring additional GDB connection.");
1425 	}
1426 
1427 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1428 	if (read_event == NULL) {
1429 		if (arg != NULL)
1430 			err(1, "Failed to setup initial GDB connection");
1431 		pthread_mutex_unlock(&gdb_lock);
1432 		return;
1433 	}
1434 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1435 	if (write_event == NULL) {
1436 		if (arg != NULL)
1437 			err(1, "Failed to setup initial GDB connection");
1438 		mevent_delete_close(read_event);
1439 		read_event = NULL;
1440 	}
1441 
1442 	cur_fd = s;
1443 	cur_vcpu = 0;
1444 	stepping_vcpu = -1;
1445 	stopped_vcpu = -1;
1446 	stop_pending = false;
1447 
1448 	/* Break on attach. */
1449 	first_stop = true;
1450 	gdb_suspend_vcpus();
1451 	pthread_mutex_unlock(&gdb_lock);
1452 }
1453 
1454 #ifndef WITHOUT_CAPSICUM
1455 void
1456 limit_gdb_socket(int s)
1457 {
1458 	cap_rights_t rights;
1459 	unsigned long ioctls[] = { FIONREAD };
1460 
1461 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1462 	    CAP_SETSOCKOPT, CAP_IOCTL);
1463 	if (caph_rights_limit(s, &rights) == -1)
1464 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1465 	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1466 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1467 }
1468 #endif
1469 
1470 void
1471 init_gdb(struct vmctx *_ctx, int sport, bool wait)
1472 {
1473 	struct sockaddr_in sin;
1474 	int error, flags, s;
1475 
1476 	debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not ");
1477 
1478 	error = pthread_mutex_init(&gdb_lock, NULL);
1479 	if (error != 0)
1480 		errc(1, error, "gdb mutex init");
1481 	error = pthread_cond_init(&idle_vcpus, NULL);
1482 	if (error != 0)
1483 		errc(1, error, "gdb cv init");
1484 
1485 	ctx = _ctx;
1486 	s = socket(PF_INET, SOCK_STREAM, 0);
1487 	if (s < 0)
1488 		err(1, "gdb socket create");
1489 
1490 #ifdef __FreeBSD__
1491 	sin.sin_len = sizeof(sin);
1492 #endif
1493 	sin.sin_family = AF_INET;
1494 	sin.sin_addr.s_addr = htonl(INADDR_ANY);
1495 	sin.sin_port = htons(sport);
1496 
1497 	if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0)
1498 		err(1, "gdb socket bind");
1499 
1500 	if (listen(s, 1) < 0)
1501 		err(1, "gdb socket listen");
1502 
1503 	if (wait) {
1504 		/*
1505 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
1506 		 * logic in gdb_cpu_add() to suspend the first vcpu before
1507 		 * it starts execution.  The vcpu will remain suspended
1508 		 * until a debugger connects.
1509 		 */
1510 		stepping_vcpu = -1;
1511 		stopped_vcpu = -1;
1512 		CPU_SET(0, &vcpus_suspended);
1513 	}
1514 
1515 	flags = fcntl(s, F_GETFL);
1516 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1517 		err(1, "Failed to mark gdb socket non-blocking");
1518 
1519 #ifndef WITHOUT_CAPSICUM
1520 	limit_gdb_socket(s);
1521 #endif
1522 	mevent_add(s, EVF_READ, new_connection, NULL);
1523 }
1524