xref: /freebsd/contrib/processor-trace/libipt/src/pt_block_decoder.c (revision c1d255d3ffdbe447de3ab875bf4e7d7accc5bfc5)
1 /*
2  * Copyright (c) 2016-2019, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright notice,
10  *    this list of conditions and the following disclaimer in the documentation
11  *    and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "pt_block_decoder.h"
30 #include "pt_block_cache.h"
31 #include "pt_section.h"
32 #include "pt_image.h"
33 #include "pt_insn.h"
34 #include "pt_config.h"
35 #include "pt_asid.h"
36 #include "pt_compiler.h"
37 
38 #include "intel-pt.h"
39 
40 #include <string.h>
41 #include <stdlib.h>
42 
43 
44 static int pt_blk_proceed_trailing_event(struct pt_block_decoder *,
45 					 struct pt_block *);
46 
47 
48 static int pt_blk_status(const struct pt_block_decoder *decoder, int flags)
49 {
50 	int status;
51 
52 	if (!decoder)
53 		return -pte_internal;
54 
55 	status = decoder->status;
56 
57 	/* Indicate whether tracing is disabled or enabled.
58 	 *
59 	 * This duplicates the indication in struct pt_insn and covers the case
60 	 * where we indicate the status after synchronizing.
61 	 */
62 	if (!decoder->enabled)
63 		flags |= pts_ip_suppressed;
64 
65 	/* Forward end-of-trace indications.
66 	 *
67 	 * Postpone it as long as we're still processing events, though.
68 	 */
69 	if ((status & pts_eos) && !decoder->process_event)
70 		flags |= pts_eos;
71 
72 	return flags;
73 }
74 
75 static void pt_blk_reset(struct pt_block_decoder *decoder)
76 {
77 	if (!decoder)
78 		return;
79 
80 	decoder->mode = ptem_unknown;
81 	decoder->ip = 0ull;
82 	decoder->status = 0;
83 	decoder->enabled = 0;
84 	decoder->process_event = 0;
85 	decoder->speculative = 0;
86 	decoder->process_insn = 0;
87 	decoder->bound_paging = 0;
88 	decoder->bound_vmcs = 0;
89 	decoder->bound_ptwrite = 0;
90 
91 	memset(&decoder->event, 0, sizeof(decoder->event));
92 	pt_retstack_init(&decoder->retstack);
93 	pt_asid_init(&decoder->asid);
94 }
95 
96 /* Initialize the query decoder flags based on our flags. */
97 
98 static int pt_blk_init_qry_flags(struct pt_conf_flags *qflags,
99 				 const struct pt_conf_flags *flags)
100 {
101 	if (!qflags || !flags)
102 		return -pte_internal;
103 
104 	memset(qflags, 0, sizeof(*qflags));
105 	qflags->variant.query.keep_tcal_on_ovf =
106 		flags->variant.block.keep_tcal_on_ovf;
107 
108 	return 0;
109 }
110 
111 int pt_blk_decoder_init(struct pt_block_decoder *decoder,
112 			const struct pt_config *uconfig)
113 {
114 	struct pt_config config;
115 	int errcode;
116 
117 	if (!decoder)
118 		return -pte_internal;
119 
120 	errcode = pt_config_from_user(&config, uconfig);
121 	if (errcode < 0)
122 		return errcode;
123 
124 	/* The user supplied decoder flags. */
125 	decoder->flags = config.flags;
126 
127 	/* Set the flags we need for the query decoder we use. */
128 	errcode = pt_blk_init_qry_flags(&config.flags, &decoder->flags);
129 	if (errcode < 0)
130 		return errcode;
131 
132 	errcode = pt_qry_decoder_init(&decoder->query, &config);
133 	if (errcode < 0)
134 		return errcode;
135 
136 	pt_image_init(&decoder->default_image, NULL);
137 	decoder->image = &decoder->default_image;
138 
139 	errcode = pt_msec_cache_init(&decoder->scache);
140 	if (errcode < 0)
141 		return errcode;
142 
143 	pt_blk_reset(decoder);
144 
145 	return 0;
146 }
147 
148 void pt_blk_decoder_fini(struct pt_block_decoder *decoder)
149 {
150 	if (!decoder)
151 		return;
152 
153 	pt_msec_cache_fini(&decoder->scache);
154 	pt_image_fini(&decoder->default_image);
155 	pt_qry_decoder_fini(&decoder->query);
156 }
157 
158 struct pt_block_decoder *
159 pt_blk_alloc_decoder(const struct pt_config *config)
160 {
161 	struct pt_block_decoder *decoder;
162 	int errcode;
163 
164 	decoder = malloc(sizeof(*decoder));
165 	if (!decoder)
166 		return NULL;
167 
168 	errcode = pt_blk_decoder_init(decoder, config);
169 	if (errcode < 0) {
170 		free(decoder);
171 		return NULL;
172 	}
173 
174 	return decoder;
175 }
176 
177 void pt_blk_free_decoder(struct pt_block_decoder *decoder)
178 {
179 	if (!decoder)
180 		return;
181 
182 	pt_blk_decoder_fini(decoder);
183 	free(decoder);
184 }
185 
186 /* Maybe synthesize a tick event.
187  *
188  * If we're not already processing events, check the current time against the
189  * last event's time.  If it changed, synthesize a tick event with the new time.
190  *
191  * Returns zero if no tick event has been created.
192  * Returns a positive integer if a tick event has been created.
193  * Returns a negative error code otherwise.
194  */
195 static int pt_blk_tick(struct pt_block_decoder *decoder, uint64_t ip)
196 {
197 	struct pt_event *ev;
198 	uint64_t tsc;
199 	uint32_t lost_mtc, lost_cyc;
200 	int errcode;
201 
202 	if (!decoder)
203 		return -pte_internal;
204 
205 	/* We're not generating tick events if tracing is disabled. */
206 	if (!decoder->enabled)
207 		return -pte_internal;
208 
209 	/* Events already provide a timestamp so there is no need to synthesize
210 	 * an artificial tick event.  There's no room, either, since this would
211 	 * overwrite the in-progress event.
212 	 *
213 	 * In rare cases where we need to proceed to an event location using
214 	 * trace this may cause us to miss a timing update if the event is not
215 	 * forwarded to the user.
216 	 *
217 	 * The only case I can come up with at the moment is a MODE.EXEC binding
218 	 * to the TIP IP of a far branch.
219 	 */
220 	if (decoder->process_event)
221 		return 0;
222 
223 	errcode = pt_qry_time(&decoder->query, &tsc, &lost_mtc, &lost_cyc);
224 	if (errcode < 0) {
225 		/* If we don't have wall-clock time, we use relative time. */
226 		if (errcode != -pte_no_time)
227 			return errcode;
228 	}
229 
230 	ev = &decoder->event;
231 
232 	/* We're done if time has not changed since the last event. */
233 	if (tsc == ev->tsc)
234 		return 0;
235 
236 	/* Time has changed so we create a new tick event. */
237 	memset(ev, 0, sizeof(*ev));
238 	ev->type = ptev_tick;
239 	ev->variant.tick.ip = ip;
240 
241 	/* Indicate if we have wall-clock time or only relative time. */
242 	if (errcode != -pte_no_time)
243 		ev->has_tsc = 1;
244 	ev->tsc = tsc;
245 	ev->lost_mtc = lost_mtc;
246 	ev->lost_cyc = lost_cyc;
247 
248 	/* We now have an event to process. */
249 	decoder->process_event = 1;
250 
251 	return 1;
252 }
253 
254 /* Query an indirect branch.
255  *
256  * Returns zero on success, a negative error code otherwise.
257  */
258 static int pt_blk_indirect_branch(struct pt_block_decoder *decoder,
259 				  uint64_t *ip)
260 {
261 	uint64_t evip;
262 	int status, errcode;
263 
264 	if (!decoder)
265 		return -pte_internal;
266 
267 	evip = decoder->ip;
268 
269 	status = pt_qry_indirect_branch(&decoder->query, ip);
270 	if (status < 0)
271 		return status;
272 
273 	if (decoder->flags.variant.block.enable_tick_events) {
274 		errcode = pt_blk_tick(decoder, evip);
275 		if (errcode < 0)
276 			return errcode;
277 	}
278 
279 	return status;
280 }
281 
282 /* Query a conditional branch.
283  *
284  * Returns zero on success, a negative error code otherwise.
285  */
286 static int pt_blk_cond_branch(struct pt_block_decoder *decoder, int *taken)
287 {
288 	int status, errcode;
289 
290 	if (!decoder)
291 		return -pte_internal;
292 
293 	status = pt_qry_cond_branch(&decoder->query, taken);
294 	if (status < 0)
295 		return status;
296 
297 	if (decoder->flags.variant.block.enable_tick_events) {
298 		errcode = pt_blk_tick(decoder, decoder->ip);
299 		if (errcode < 0)
300 			return errcode;
301 	}
302 
303 	return status;
304 }
305 
306 static int pt_blk_start(struct pt_block_decoder *decoder, int status)
307 {
308 	if (!decoder)
309 		return -pte_internal;
310 
311 	if (status < 0)
312 		return status;
313 
314 	decoder->status = status;
315 	if (!(status & pts_ip_suppressed))
316 		decoder->enabled = 1;
317 
318 	/* We will always have an event.
319 	 *
320 	 * If we synchronized onto an empty PSB+, tracing is disabled and we'll
321 	 * process events until the enabled event.
322 	 *
323 	 * If tracing is enabled, PSB+ must at least provide the execution mode,
324 	 * which we're going to forward to the user.
325 	 */
326 	return pt_blk_proceed_trailing_event(decoder, NULL);
327 }
328 
329 static int pt_blk_sync_reset(struct pt_block_decoder *decoder)
330 {
331 	if (!decoder)
332 		return -pte_internal;
333 
334 	pt_blk_reset(decoder);
335 
336 	return 0;
337 }
338 
339 int pt_blk_sync_forward(struct pt_block_decoder *decoder)
340 {
341 	int errcode, status;
342 
343 	if (!decoder)
344 		return -pte_invalid;
345 
346 	errcode = pt_blk_sync_reset(decoder);
347 	if (errcode < 0)
348 		return errcode;
349 
350 	status = pt_qry_sync_forward(&decoder->query, &decoder->ip);
351 
352 	return pt_blk_start(decoder, status);
353 }
354 
355 int pt_blk_sync_backward(struct pt_block_decoder *decoder)
356 {
357 	int errcode, status;
358 
359 	if (!decoder)
360 		return -pte_invalid;
361 
362 	errcode = pt_blk_sync_reset(decoder);
363 	if (errcode < 0)
364 		return errcode;
365 
366 	status = pt_qry_sync_backward(&decoder->query, &decoder->ip);
367 
368 	return pt_blk_start(decoder, status);
369 }
370 
371 int pt_blk_sync_set(struct pt_block_decoder *decoder, uint64_t offset)
372 {
373 	int errcode, status;
374 
375 	if (!decoder)
376 		return -pte_invalid;
377 
378 	errcode = pt_blk_sync_reset(decoder);
379 	if (errcode < 0)
380 		return errcode;
381 
382 	status = pt_qry_sync_set(&decoder->query, &decoder->ip, offset);
383 
384 	return pt_blk_start(decoder, status);
385 }
386 
387 int pt_blk_get_offset(const struct pt_block_decoder *decoder, uint64_t *offset)
388 {
389 	if (!decoder)
390 		return -pte_invalid;
391 
392 	return pt_qry_get_offset(&decoder->query, offset);
393 }
394 
395 int pt_blk_get_sync_offset(const struct pt_block_decoder *decoder,
396 			   uint64_t *offset)
397 {
398 	if (!decoder)
399 		return -pte_invalid;
400 
401 	return pt_qry_get_sync_offset(&decoder->query, offset);
402 }
403 
404 struct pt_image *pt_blk_get_image(struct pt_block_decoder *decoder)
405 {
406 	if (!decoder)
407 		return NULL;
408 
409 	return decoder->image;
410 }
411 
412 int pt_blk_set_image(struct pt_block_decoder *decoder, struct pt_image *image)
413 {
414 	if (!decoder)
415 		return -pte_invalid;
416 
417 	if (!image)
418 		image = &decoder->default_image;
419 
420 	decoder->image = image;
421 	return 0;
422 }
423 
424 const struct pt_config *
425 pt_blk_get_config(const struct pt_block_decoder *decoder)
426 {
427 	if (!decoder)
428 		return NULL;
429 
430 	return pt_qry_get_config(&decoder->query);
431 }
432 
433 int pt_blk_time(struct pt_block_decoder *decoder, uint64_t *time,
434 		uint32_t *lost_mtc, uint32_t *lost_cyc)
435 {
436 	if (!decoder || !time)
437 		return -pte_invalid;
438 
439 	return pt_qry_time(&decoder->query, time, lost_mtc, lost_cyc);
440 }
441 
442 int pt_blk_core_bus_ratio(struct pt_block_decoder *decoder, uint32_t *cbr)
443 {
444 	if (!decoder || !cbr)
445 		return -pte_invalid;
446 
447 	return pt_qry_core_bus_ratio(&decoder->query, cbr);
448 }
449 
450 int pt_blk_asid(const struct pt_block_decoder *decoder, struct pt_asid *asid,
451 		size_t size)
452 {
453 	if (!decoder || !asid)
454 		return -pte_invalid;
455 
456 	return pt_asid_to_user(asid, &decoder->asid, size);
457 }
458 
459 /* Fetch the next pending event.
460  *
461  * Checks for pending events.  If an event is pending, fetches it (if not
462  * already in process).
463  *
464  * Returns zero if no event is pending.
465  * Returns a positive integer if an event is pending or in process.
466  * Returns a negative error code otherwise.
467  */
468 static inline int pt_blk_fetch_event(struct pt_block_decoder *decoder)
469 {
470 	int status;
471 
472 	if (!decoder)
473 		return -pte_internal;
474 
475 	if (decoder->process_event)
476 		return 1;
477 
478 	if (!(decoder->status & pts_event_pending))
479 		return 0;
480 
481 	status = pt_qry_event(&decoder->query, &decoder->event,
482 			      sizeof(decoder->event));
483 	if (status < 0)
484 		return status;
485 
486 	decoder->process_event = 1;
487 	decoder->status = status;
488 
489 	return 1;
490 }
491 
492 static inline int pt_blk_block_is_empty(const struct pt_block *block)
493 {
494 	if (!block)
495 		return 1;
496 
497 	return !block->ninsn;
498 }
499 
500 static inline int block_to_user(struct pt_block *ublock, size_t size,
501 				const struct pt_block *block)
502 {
503 	if (!ublock || !block)
504 		return -pte_internal;
505 
506 	if (ublock == block)
507 		return 0;
508 
509 	/* Zero out any unknown bytes. */
510 	if (sizeof(*block) < size) {
511 		memset(ublock + sizeof(*block), 0, size - sizeof(*block));
512 
513 		size = sizeof(*block);
514 	}
515 
516 	memcpy(ublock, block, size);
517 
518 	return 0;
519 }
520 
521 static int pt_insn_false(const struct pt_insn *insn,
522 			 const struct pt_insn_ext *iext)
523 {
524 	(void) insn;
525 	(void) iext;
526 
527 	return 0;
528 }
529 
530 /* Determine the next IP using trace.
531  *
532  * Tries to determine the IP of the next instruction using trace and provides it
533  * in @pip.
534  *
535  * Not requiring trace to determine the IP is treated as an internal error.
536  *
537  * Does not update the return compression stack for indirect calls.  This is
538  * expected to have been done, already, when trying to determine the next IP
539  * without using trace.
540  *
541  * Does not update @decoder->status.  The caller is expected to do that.
542  *
543  * Returns a non-negative pt_status_flag bit-vector on success, a negative error
544  * code otherwise.
545  * Returns -pte_internal if @pip, @decoder, @insn, or @iext are NULL.
546  * Returns -pte_internal if no trace is required.
547  */
548 static int pt_blk_next_ip(uint64_t *pip, struct pt_block_decoder *decoder,
549 			  const struct pt_insn *insn,
550 			  const struct pt_insn_ext *iext)
551 {
552 	int status, errcode;
553 
554 	if (!pip || !decoder || !insn || !iext)
555 		return -pte_internal;
556 
557 	/* We handle non-taken conditional branches, and compressed returns
558 	 * directly in the switch.
559 	 *
560 	 * All kinds of branches are handled below the switch.
561 	 */
562 	switch (insn->iclass) {
563 	case ptic_cond_jump: {
564 		uint64_t ip;
565 		int taken;
566 
567 		status = pt_blk_cond_branch(decoder, &taken);
568 		if (status < 0)
569 			return status;
570 
571 		ip = insn->ip + insn->size;
572 		if (taken)
573 			ip += (uint64_t) (int64_t)
574 				iext->variant.branch.displacement;
575 
576 		*pip = ip;
577 		return status;
578 	}
579 
580 	case ptic_return: {
581 		int taken;
582 
583 		/* Check for a compressed return. */
584 		status = pt_blk_cond_branch(decoder, &taken);
585 		if (status < 0) {
586 			if (status != -pte_bad_query)
587 				return status;
588 
589 			break;
590 		}
591 
592 		/* A compressed return is indicated by a taken conditional
593 		 * branch.
594 		 */
595 		if (!taken)
596 			return -pte_bad_retcomp;
597 
598 		errcode = pt_retstack_pop(&decoder->retstack, pip);
599 		if (errcode < 0)
600 			return errcode;
601 
602 		return status;
603 	}
604 
605 	case ptic_jump:
606 	case ptic_call:
607 		/* A direct jump or call wouldn't require trace. */
608 		if (iext->variant.branch.is_direct)
609 			return -pte_internal;
610 
611 		break;
612 
613 	case ptic_far_call:
614 	case ptic_far_return:
615 	case ptic_far_jump:
616 		break;
617 
618 	case ptic_ptwrite:
619 	case ptic_other:
620 		return -pte_internal;
621 
622 	case ptic_error:
623 		return -pte_bad_insn;
624 	}
625 
626 	/* Process an indirect branch.
627 	 *
628 	 * This covers indirect jumps and calls, non-compressed returns, and all
629 	 * flavors of far transfers.
630 	 */
631 	return pt_blk_indirect_branch(decoder, pip);
632 }
633 
634 /* Proceed to the next IP using trace.
635  *
636  * We failed to proceed without trace.  This ends the current block.  Now use
637  * trace to do one final step to determine the start IP of the next block.
638  *
639  * Returns zero on success, a negative error code otherwise.
640  */
641 static int pt_blk_proceed_with_trace(struct pt_block_decoder *decoder,
642 				     const struct pt_insn *insn,
643 				     const struct pt_insn_ext *iext)
644 {
645 	int status;
646 
647 	if (!decoder)
648 		return -pte_internal;
649 
650 	status = pt_blk_next_ip(&decoder->ip, decoder, insn, iext);
651 	if (status < 0)
652 		return status;
653 
654 	/* Preserve the query decoder's response which indicates upcoming
655 	 * events.
656 	 */
657 	decoder->status = status;
658 
659 	/* We do need an IP in order to proceed. */
660 	if (status & pts_ip_suppressed)
661 		return -pte_noip;
662 
663 	return 0;
664 }
665 
666 /* Decode one instruction in a known section.
667  *
668  * Decode the instruction at @insn->ip in @msec assuming execution mode
669  * @insn->mode.
670  *
671  * Returns zero on success, a negative error code otherwise.
672  */
673 static int pt_blk_decode_in_section(struct pt_insn *insn,
674 				    struct pt_insn_ext *iext,
675 				    const struct pt_mapped_section *msec)
676 {
677 	int status;
678 
679 	if (!insn || !iext)
680 		return -pte_internal;
681 
682 	/* We know that @ip is contained in @section.
683 	 *
684 	 * Note that we need to translate @ip into a section offset.
685 	 */
686 	status = pt_msec_read(msec, insn->raw, sizeof(insn->raw), insn->ip);
687 	if (status < 0)
688 		return status;
689 
690 	/* We initialize @insn->size to the maximal possible size.  It will be
691 	 * set to the actual size during instruction decode.
692 	 */
693 	insn->size = (uint8_t) status;
694 
695 	return pt_ild_decode(insn, iext);
696 }
697 
698 /* Update the return-address stack if @insn is a near call.
699  *
700  * Returns zero on success, a negative error code otherwise.
701  */
702 static inline int pt_blk_log_call(struct pt_block_decoder *decoder,
703 				  const struct pt_insn *insn,
704 				  const struct pt_insn_ext *iext)
705 {
706 	if (!decoder || !insn || !iext)
707 		return -pte_internal;
708 
709 	if (insn->iclass != ptic_call)
710 		return 0;
711 
712 	/* Ignore direct calls to the next instruction that are used for
713 	 * position independent code.
714 	 */
715 	if (iext->variant.branch.is_direct &&
716 	    !iext->variant.branch.displacement)
717 		return 0;
718 
719 	return pt_retstack_push(&decoder->retstack, insn->ip + insn->size);
720 }
721 
722 /* Proceed by one instruction.
723  *
724  * Tries to decode the instruction at @decoder->ip and, on success, adds it to
725  * @block and provides it in @pinsn and @piext.
726  *
727  * The instruction will not be added if:
728  *
729  *   - the memory could not be read:  return error
730  *   - it could not be decoded:       return error
731  *   - @block is already full:        return zero
732  *   - @block would switch sections:  return zero
733  *
734  * Returns a positive integer if the instruction was added.
735  * Returns zero if the instruction didn't fit into @block.
736  * Returns a negative error code otherwise.
737  */
738 static int pt_blk_proceed_one_insn(struct pt_block_decoder *decoder,
739 				   struct pt_block *block,
740 				   struct pt_insn *pinsn,
741 				   struct pt_insn_ext *piext)
742 {
743 	struct pt_insn_ext iext;
744 	struct pt_insn insn;
745 	uint16_t ninsn;
746 	int status;
747 
748 	if (!decoder || !block || !pinsn || !piext)
749 		return -pte_internal;
750 
751 	/* There's nothing to do if there is no room in @block. */
752 	ninsn = block->ninsn + 1;
753 	if (!ninsn)
754 		return 0;
755 
756 	/* The truncated instruction must be last. */
757 	if (block->truncated)
758 		return 0;
759 
760 	memset(&insn, 0, sizeof(insn));
761 	memset(&iext, 0, sizeof(iext));
762 
763 	insn.mode = decoder->mode;
764 	insn.ip = decoder->ip;
765 
766 	status = pt_insn_decode(&insn, &iext, decoder->image, &decoder->asid);
767 	if (status < 0)
768 		return status;
769 
770 	/* We do not switch sections inside a block. */
771 	if (insn.isid != block->isid) {
772 		if (!pt_blk_block_is_empty(block))
773 			return 0;
774 
775 		block->isid = insn.isid;
776 	}
777 
778 	/* If we couldn't read @insn's memory in one chunk from @insn.isid, we
779 	 * provide the memory in @block.
780 	 */
781 	if (insn.truncated) {
782 		memcpy(block->raw, insn.raw, insn.size);
783 		block->size = insn.size;
784 		block->truncated = 1;
785 	}
786 
787 	/* Log calls' return addresses for return compression. */
788 	status = pt_blk_log_call(decoder, &insn, &iext);
789 	if (status < 0)
790 		return status;
791 
792 	/* We have a new instruction. */
793 	block->iclass = insn.iclass;
794 	block->end_ip = insn.ip;
795 	block->ninsn = ninsn;
796 
797 	*pinsn = insn;
798 	*piext = iext;
799 
800 	return 1;
801 }
802 
803 
804 /* Proceed to a particular type of instruction without using trace.
805  *
806  * Proceed until we reach an instruction for which @predicate returns a positive
807  * integer or until:
808  *
809  *   - @predicate returns an error:  return error
810  *   - @block is full:               return zero
811  *   - @block would switch sections: return zero
812  *   - we would need trace:          return -pte_bad_query
813  *
814  * Provide the last instruction that was reached in @insn and @iext.
815  *
816  * Update @decoder->ip to point to the last IP that was reached.  If we fail due
817  * to lack of trace or if we reach a desired instruction, this is @insn->ip;
818  * otherwise this is the next instruction's IP.
819  *
820  * Returns a positive integer if a suitable instruction was reached.
821  * Returns zero if no such instruction was reached.
822  * Returns a negative error code otherwise.
823  */
824 static int pt_blk_proceed_to_insn(struct pt_block_decoder *decoder,
825 				  struct pt_block *block,
826 				  struct pt_insn *insn,
827 				  struct pt_insn_ext *iext,
828 				  int (*predicate)(const struct pt_insn *,
829 						   const struct pt_insn_ext *))
830 {
831 	int status;
832 
833 	if (!decoder || !insn || !predicate)
834 		return -pte_internal;
835 
836 	for (;;) {
837 		status = pt_blk_proceed_one_insn(decoder, block, insn, iext);
838 		if (status <= 0)
839 			return status;
840 
841 		/* We're done if this instruction matches the spec (positive
842 		 * status) or we run into an error (negative status).
843 		 */
844 		status = predicate(insn, iext);
845 		if (status != 0)
846 			return status;
847 
848 		/* Let's see if we can proceed to the next IP without trace. */
849 		status = pt_insn_next_ip(&decoder->ip, insn, iext);
850 		if (status < 0)
851 			return status;
852 
853 		/* End the block if the user asked us to.
854 		 *
855 		 * We only need to take care about direct near branches.
856 		 * Indirect and far branches require trace and will naturally
857 		 * end a block.
858 		 */
859 		if ((decoder->flags.variant.block.end_on_call &&
860 		     (insn->iclass == ptic_call)) ||
861 		    (decoder->flags.variant.block.end_on_jump &&
862 		     (insn->iclass == ptic_jump)))
863 			return 0;
864 	}
865 }
866 
867 /* Proceed to a particular IP without using trace.
868  *
869  * Proceed until we reach @ip or until:
870  *
871  *   - @block is full:               return zero
872  *   - @block would switch sections: return zero
873  *   - we would need trace:          return -pte_bad_query
874  *
875  * Provide the last instruction that was reached in @insn and @iext.  If we
876  * reached @ip, this is the instruction preceding it.
877  *
878  * Update @decoder->ip to point to the last IP that was reached.  If we fail due
879  * to lack of trace, this is @insn->ip; otherwise this is the next instruction's
880  * IP.
881  *
882  * Returns a positive integer if @ip was reached.
883  * Returns zero if no such instruction was reached.
884  * Returns a negative error code otherwise.
885  */
886 static int pt_blk_proceed_to_ip(struct pt_block_decoder *decoder,
887 				struct pt_block *block, struct pt_insn *insn,
888 				struct pt_insn_ext *iext, uint64_t ip)
889 {
890 	int status;
891 
892 	if (!decoder || !insn)
893 		return -pte_internal;
894 
895 	for (;;) {
896 		/* We're done when we reach @ip.  We may not even have to decode
897 		 * a single instruction in some cases.
898 		 */
899 		if (decoder->ip == ip)
900 			return 1;
901 
902 		status = pt_blk_proceed_one_insn(decoder, block, insn, iext);
903 		if (status <= 0)
904 			return status;
905 
906 		/* Let's see if we can proceed to the next IP without trace. */
907 		status = pt_insn_next_ip(&decoder->ip, insn, iext);
908 		if (status < 0)
909 			return status;
910 
911 		/* End the block if the user asked us to.
912 		 *
913 		 * We only need to take care about direct near branches.
914 		 * Indirect and far branches require trace and will naturally
915 		 * end a block.
916 		 *
917 		 * The call at the end of the block may have reached @ip; make
918 		 * sure to indicate that.
919 		 */
920 		if ((decoder->flags.variant.block.end_on_call &&
921 		     (insn->iclass == ptic_call)) ||
922 		    (decoder->flags.variant.block.end_on_jump &&
923 		     (insn->iclass == ptic_jump))) {
924 			return (decoder->ip == ip ? 1 : 0);
925 		}
926 	}
927 }
928 
929 /* Proceed to a particular IP with trace, if necessary.
930  *
931  * Proceed until we reach @ip or until:
932  *
933  *   - @block is full:               return zero
934  *   - @block would switch sections: return zero
935  *   - we need trace:                return zero
936  *
937  * Update @decoder->ip to point to the last IP that was reached.
938  *
939  * A return of zero ends @block.
940  *
941  * Returns a positive integer if @ip was reached.
942  * Returns zero if no such instruction was reached.
943  * Returns a negative error code otherwise.
944  */
945 static int pt_blk_proceed_to_ip_with_trace(struct pt_block_decoder *decoder,
946 					   struct pt_block *block,
947 					   uint64_t ip)
948 {
949 	struct pt_insn_ext iext;
950 	struct pt_insn insn;
951 	int status;
952 
953 	/* Try to reach @ip without trace.
954 	 *
955 	 * We're also OK if @block overflowed or we switched sections and we
956 	 * have to try again in the next iteration.
957 	 */
958 	status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext, ip);
959 	if (status != -pte_bad_query)
960 		return status;
961 
962 	/* Needing trace is not an error.  We use trace to determine the next
963 	 * start IP and end the block.
964 	 */
965 	return pt_blk_proceed_with_trace(decoder, &insn, &iext);
966 }
967 
968 static int pt_insn_skl014(const struct pt_insn *insn,
969 			  const struct pt_insn_ext *iext)
970 {
971 	if (!insn || !iext)
972 		return 0;
973 
974 	switch (insn->iclass) {
975 	default:
976 		return 0;
977 
978 	case ptic_call:
979 	case ptic_jump:
980 		return iext->variant.branch.is_direct;
981 
982 	case ptic_other:
983 		return pt_insn_changes_cr3(insn, iext);
984 	}
985 }
986 
987 /* Proceed to the location of a synchronous disabled event with suppressed IP
988  * considering SKL014.
989  *
990  * We have a (synchronous) disabled event pending.  Proceed to the event
991  * location and indicate whether we were able to reach it.
992  *
993  * With SKL014 a TIP.PGD with suppressed IP may also be generated by a direct
994  * unconditional branch that clears FilterEn by jumping out of a filter region
995  * or into a TraceStop region.  Use the filter configuration to determine the
996  * exact branch the event binds to.
997  *
998  * The last instruction that was reached is stored in @insn/@iext.
999  *
1000  * Returns a positive integer if the event location was reached.
1001  * Returns zero if the event location was not reached.
1002  * Returns a negative error code otherwise.
1003  */
1004 static int pt_blk_proceed_skl014(struct pt_block_decoder *decoder,
1005 				 struct pt_block *block, struct pt_insn *insn,
1006 				 struct pt_insn_ext *iext)
1007 {
1008 	const struct pt_conf_addr_filter *addr_filter;
1009 	int status;
1010 
1011 	if (!decoder || !block || !insn || !iext)
1012 		return -pte_internal;
1013 
1014 	addr_filter = &decoder->query.config.addr_filter;
1015 	for (;;) {
1016 		uint64_t ip;
1017 
1018 		status = pt_blk_proceed_to_insn(decoder, block, insn, iext,
1019 						pt_insn_skl014);
1020 		if (status <= 0)
1021 			break;
1022 
1023 		/* The erratum doesn't apply if we can bind the event to a
1024 		 * CR3-changing instruction.
1025 		 */
1026 		if (pt_insn_changes_cr3(insn, iext))
1027 			break;
1028 
1029 		/* Check the filter against the branch target. */
1030 		status = pt_insn_next_ip(&ip, insn, iext);
1031 		if (status < 0)
1032 			break;
1033 
1034 		status = pt_filter_addr_check(addr_filter, ip);
1035 		if (status <= 0) {
1036 			/* We need to flip the indication.
1037 			 *
1038 			 * We reached the event location when @ip lies inside a
1039 			 * tracing-disabled region.
1040 			 */
1041 			if (!status)
1042 				status = 1;
1043 
1044 			break;
1045 		}
1046 
1047 		/* This is not the correct instruction.  Proceed past it and try
1048 		 * again.
1049 		 */
1050 		decoder->ip = ip;
1051 
1052 		/* End the block if the user asked us to.
1053 		 *
1054 		 * We only need to take care about direct near branches.
1055 		 * Indirect and far branches require trace and will naturally
1056 		 * end a block.
1057 		 */
1058 		if ((decoder->flags.variant.block.end_on_call &&
1059 		    (insn->iclass == ptic_call)) ||
1060 		    (decoder->flags.variant.block.end_on_jump &&
1061 		    (insn->iclass == ptic_jump)))
1062 			break;
1063 	}
1064 
1065 	return status;
1066 }
1067 
1068 /* Proceed to the event location for a disabled event.
1069  *
1070  * We have a (synchronous) disabled event pending.  Proceed to the event
1071  * location and indicate whether we were able to reach it.
1072  *
1073  * The last instruction that was reached is stored in @insn/@iext.
1074  *
1075  * Returns a positive integer if the event location was reached.
1076  * Returns zero if the event location was not reached.
1077  * Returns a negative error code otherwise.
1078  */
1079 static int pt_blk_proceed_to_disabled(struct pt_block_decoder *decoder,
1080 				      struct pt_block *block,
1081 				      struct pt_insn *insn,
1082 				      struct pt_insn_ext *iext,
1083 				      const struct pt_event *ev)
1084 {
1085 	if (!decoder || !block || !ev)
1086 		return -pte_internal;
1087 
1088 	if (ev->ip_suppressed) {
1089 		/* Due to SKL014 the TIP.PGD payload may be suppressed also for
1090 		 * direct branches.
1091 		 *
1092 		 * If we don't have a filter configuration we assume that no
1093 		 * address filters were used and the erratum does not apply.
1094 		 *
1095 		 * We might otherwise disable tracing too early.
1096 		 */
1097 		if (decoder->query.config.addr_filter.config.addr_cfg &&
1098 		    decoder->query.config.errata.skl014)
1099 			return pt_blk_proceed_skl014(decoder, block, insn,
1100 						     iext);
1101 
1102 		/* A synchronous disabled event also binds to far branches and
1103 		 * CPL-changing instructions.  Both would require trace,
1104 		 * however, and are thus implicitly handled by erroring out.
1105 		 *
1106 		 * The would-require-trace error is handled by our caller.
1107 		 */
1108 		return pt_blk_proceed_to_insn(decoder, block, insn, iext,
1109 					      pt_insn_changes_cr3);
1110 	} else
1111 		return pt_blk_proceed_to_ip(decoder, block, insn, iext,
1112 					    ev->variant.disabled.ip);
1113 }
1114 
1115 /* Set the expected resume address for a synchronous disable.
1116  *
1117  * On a synchronous disable, @decoder->ip still points to the instruction to
1118  * which the event bound.  That's not where we expect tracing to resume.
1119  *
1120  * For calls, a fair assumption is that tracing resumes after returning from the
1121  * called function.  For other types of instructions, we simply don't know.
1122  *
1123  * Returns zero on success, a negative pt_error_code otherwise.
1124  */
1125 static int pt_blk_set_disable_resume_ip(struct pt_block_decoder *decoder,
1126 					const struct pt_insn *insn)
1127 {
1128 	if (!decoder || !insn)
1129 		return -pte_internal;
1130 
1131 	switch (insn->iclass) {
1132 	case ptic_call:
1133 	case ptic_far_call:
1134 		decoder->ip = insn->ip + insn->size;
1135 		break;
1136 
1137 	default:
1138 		decoder->ip = 0ull;
1139 		break;
1140 	}
1141 
1142 	return 0;
1143 }
1144 
1145 /* Proceed to the event location for an async paging event.
1146  *
1147  * We have an async paging event pending.  Proceed to the event location and
1148  * indicate whether we were able to reach it.  Needing trace in order to proceed
1149  * is not an error in this case but ends the block.
1150  *
1151  * Returns a positive integer if the event location was reached.
1152  * Returns zero if the event location was not reached.
1153  * Returns a negative error code otherwise.
1154  */
1155 static int pt_blk_proceed_to_async_paging(struct pt_block_decoder *decoder,
1156 					  struct pt_block *block,
1157 					  const struct pt_event *ev)
1158 {
1159 	int status;
1160 
1161 	if (!decoder || !ev)
1162 		return -pte_internal;
1163 
1164 	/* Apply the event immediately if we don't have an IP. */
1165 	if (ev->ip_suppressed)
1166 		return 1;
1167 
1168 	status = pt_blk_proceed_to_ip_with_trace(decoder, block,
1169 						 ev->variant.async_paging.ip);
1170 	if (status < 0)
1171 		return status;
1172 
1173 	/* We may have reached the IP. */
1174 	return (decoder->ip == ev->variant.async_paging.ip ? 1 : 0);
1175 }
1176 
1177 /* Proceed to the event location for an async vmcs event.
1178  *
1179  * We have an async vmcs event pending.  Proceed to the event location and
1180  * indicate whether we were able to reach it.  Needing trace in order to proceed
1181  * is not an error in this case but ends the block.
1182  *
1183  * Returns a positive integer if the event location was reached.
1184  * Returns zero if the event location was not reached.
1185  * Returns a negative error code otherwise.
1186  */
1187 static int pt_blk_proceed_to_async_vmcs(struct pt_block_decoder *decoder,
1188 					struct pt_block *block,
1189 					const struct pt_event *ev)
1190 {
1191 	int status;
1192 
1193 	if (!decoder || !ev)
1194 		return -pte_internal;
1195 
1196 	/* Apply the event immediately if we don't have an IP. */
1197 	if (ev->ip_suppressed)
1198 		return 1;
1199 
1200 	status = pt_blk_proceed_to_ip_with_trace(decoder, block,
1201 						 ev->variant.async_vmcs.ip);
1202 	if (status < 0)
1203 		return status;
1204 
1205 	/* We may have reached the IP. */
1206 	return (decoder->ip == ev->variant.async_vmcs.ip ? 1 : 0);
1207 }
1208 
1209 /* Proceed to the event location for an exec mode event.
1210  *
1211  * We have an exec mode event pending.  Proceed to the event location and
1212  * indicate whether we were able to reach it.  Needing trace in order to proceed
1213  * is not an error in this case but ends the block.
1214  *
1215  * Returns a positive integer if the event location was reached.
1216  * Returns zero if the event location was not reached.
1217  * Returns a negative error code otherwise.
1218  */
1219 static int pt_blk_proceed_to_exec_mode(struct pt_block_decoder *decoder,
1220 				       struct pt_block *block,
1221 				       const struct pt_event *ev)
1222 {
1223 	int status;
1224 
1225 	if (!decoder || !ev)
1226 		return -pte_internal;
1227 
1228 	/* Apply the event immediately if we don't have an IP. */
1229 	if (ev->ip_suppressed)
1230 		return 1;
1231 
1232 	status = pt_blk_proceed_to_ip_with_trace(decoder, block,
1233 						 ev->variant.exec_mode.ip);
1234 	if (status < 0)
1235 		return status;
1236 
1237 	/* We may have reached the IP. */
1238 	return (decoder->ip == ev->variant.exec_mode.ip ? 1 : 0);
1239 }
1240 
1241 /* Proceed to the event location for a ptwrite event.
1242  *
1243  * We have a ptwrite event pending.  Proceed to the event location and indicate
1244  * whether we were able to reach it.
1245  *
1246  * In case of the event binding to a ptwrite instruction, we pass beyond that
1247  * instruction and update the event to provide the instruction's IP.
1248  *
1249  * In the case of the event binding to an IP provided in the event, we move
1250  * beyond the instruction at that IP.
1251  *
1252  * Returns a positive integer if the event location was reached.
1253  * Returns zero if the event location was not reached.
1254  * Returns a negative error code otherwise.
1255  */
1256 static int pt_blk_proceed_to_ptwrite(struct pt_block_decoder *decoder,
1257 				     struct pt_block *block,
1258 				     struct pt_insn *insn,
1259 				     struct pt_insn_ext *iext,
1260 				     struct pt_event *ev)
1261 {
1262 	int status;
1263 
1264 	if (!insn || !ev)
1265 		return -pte_internal;
1266 
1267 	/* If we don't have an IP, the event binds to the next PTWRITE
1268 	 * instruction.
1269 	 *
1270 	 * If we have an IP it still binds to the next PTWRITE instruction but
1271 	 * now the IP tells us where that instruction is.  This makes most sense
1272 	 * when tracing is disabled and we don't have any other means of finding
1273 	 * the PTWRITE instruction.  We nevertheless distinguish the two cases,
1274 	 * here.
1275 	 *
1276 	 * In both cases, we move beyond the PTWRITE instruction, so it will be
1277 	 * the last instruction in the current block and @decoder->ip will point
1278 	 * to the instruction following it.
1279 	 */
1280 	if (ev->ip_suppressed) {
1281 		status = pt_blk_proceed_to_insn(decoder, block, insn, iext,
1282 						pt_insn_is_ptwrite);
1283 		if (status <= 0)
1284 			return status;
1285 
1286 		/* We now know the IP of the PTWRITE instruction corresponding
1287 		 * to this event.  Fill it in to make it more convenient for the
1288 		 * user to process the event.
1289 		 */
1290 		ev->variant.ptwrite.ip = insn->ip;
1291 		ev->ip_suppressed = 0;
1292 	} else {
1293 		status = pt_blk_proceed_to_ip(decoder, block, insn, iext,
1294 					      ev->variant.ptwrite.ip);
1295 		if (status <= 0)
1296 			return status;
1297 
1298 		/* We reached the PTWRITE instruction and @decoder->ip points to
1299 		 * it; @insn/@iext still contain the preceding instruction.
1300 		 *
1301 		 * Proceed beyond the PTWRITE to account for it.  Note that we
1302 		 * may still overflow the block, which would cause us to
1303 		 * postpone both instruction and event to the next block.
1304 		 */
1305 		status = pt_blk_proceed_one_insn(decoder, block, insn, iext);
1306 		if (status <= 0)
1307 			return status;
1308 	}
1309 
1310 	return 1;
1311 }
1312 
1313 /* Try to work around erratum SKD022.
1314  *
1315  * If we get an asynchronous disable on VMLAUNCH or VMRESUME, the FUP that
1316  * caused the disable to be asynchronous might have been bogous.
1317  *
1318  * Returns a positive integer if the erratum has been handled.
1319  * Returns zero if the erratum does not apply.
1320  * Returns a negative error code otherwise.
1321  */
1322 static int pt_blk_handle_erratum_skd022(struct pt_block_decoder *decoder,
1323 					struct pt_event *ev)
1324 {
1325 	struct pt_insn_ext iext;
1326 	struct pt_insn insn;
1327 	int errcode;
1328 
1329 	if (!decoder || !ev)
1330 		return -pte_internal;
1331 
1332 	insn.mode = decoder->mode;
1333 	insn.ip = ev->variant.async_disabled.at;
1334 
1335 	errcode = pt_insn_decode(&insn, &iext, decoder->image, &decoder->asid);
1336 	if (errcode < 0)
1337 		return 0;
1338 
1339 	switch (iext.iclass) {
1340 	default:
1341 		/* The erratum does not apply. */
1342 		return 0;
1343 
1344 	case PTI_INST_VMLAUNCH:
1345 	case PTI_INST_VMRESUME:
1346 		/* The erratum may apply.  We can't be sure without a lot more
1347 		 * analysis.  Let's assume it does.
1348 		 *
1349 		 * We turn the async disable into a sync disable.  Our caller
1350 		 * will restart event processing.
1351 		 */
1352 		ev->type = ptev_disabled;
1353 		ev->variant.disabled.ip = ev->variant.async_disabled.ip;
1354 
1355 		return 1;
1356 	}
1357 }
1358 
1359 /* Postpone proceeding past @insn/@iext and indicate a pending event.
1360  *
1361  * There may be further events pending on @insn/@iext.  Postpone proceeding past
1362  * @insn/@iext until we processed all events that bind to it.
1363  *
1364  * Returns a non-negative pt_status_flag bit-vector indicating a pending event
1365  * on success, a negative pt_error_code otherwise.
1366  */
1367 static int pt_blk_postpone_insn(struct pt_block_decoder *decoder,
1368 				const struct pt_insn *insn,
1369 				const struct pt_insn_ext *iext)
1370 {
1371 	if (!decoder || !insn || !iext)
1372 		return -pte_internal;
1373 
1374 	/* Only one can be active. */
1375 	if (decoder->process_insn)
1376 		return -pte_internal;
1377 
1378 	decoder->process_insn = 1;
1379 	decoder->insn = *insn;
1380 	decoder->iext = *iext;
1381 
1382 	return pt_blk_status(decoder, pts_event_pending);
1383 }
1384 
1385 /* Remove any postponed instruction from @decoder.
1386  *
1387  * Returns zero on success, a negative pt_error_code otherwise.
1388  */
1389 static int pt_blk_clear_postponed_insn(struct pt_block_decoder *decoder)
1390 {
1391 	if (!decoder)
1392 		return -pte_internal;
1393 
1394 	decoder->process_insn = 0;
1395 	decoder->bound_paging = 0;
1396 	decoder->bound_vmcs = 0;
1397 	decoder->bound_ptwrite = 0;
1398 
1399 	return 0;
1400 }
1401 
1402 /* Proceed past a postponed instruction.
1403  *
1404  * If an instruction has been postponed in @decoder, proceed past it.
1405  *
1406  * Returns zero on success, a negative pt_error_code otherwise.
1407  */
1408 static int pt_blk_proceed_postponed_insn(struct pt_block_decoder *decoder)
1409 {
1410 	int status;
1411 
1412 	if (!decoder)
1413 		return -pte_internal;
1414 
1415 	/* There's nothing to do if we have no postponed instruction. */
1416 	if (!decoder->process_insn)
1417 		return 0;
1418 
1419 	/* There's nothing to do if tracing got disabled. */
1420 	if (!decoder->enabled)
1421 		return pt_blk_clear_postponed_insn(decoder);
1422 
1423 	status = pt_insn_next_ip(&decoder->ip, &decoder->insn, &decoder->iext);
1424 	if (status < 0) {
1425 		if (status != -pte_bad_query)
1426 			return status;
1427 
1428 		status = pt_blk_proceed_with_trace(decoder, &decoder->insn,
1429 						   &decoder->iext);
1430 		if (status < 0)
1431 			return status;
1432 	}
1433 
1434 	return pt_blk_clear_postponed_insn(decoder);
1435 }
1436 
1437 /* Proceed to the next event.
1438  *
1439  * We have an event pending.  Proceed to the event location and indicate the
1440  * event to the user.
1441  *
1442  * On our way to the event location we may also be forced to postpone the event
1443  * to the next block, e.g. if we overflow the number of instructions in the
1444  * block or if we need trace in order to reach the event location.
1445  *
1446  * If we're not able to reach the event location, we return zero.  This is what
1447  * pt_blk_status() would return since:
1448  *
1449  *   - we suppress pts_eos as long as we're processing events
1450  *   - we do not set pts_ip_suppressed since tracing must be enabled
1451  *
1452  * Returns a non-negative pt_status_flag bit-vector on success, a negative error
1453  * code otherwise.
1454  */
1455 static int pt_blk_proceed_event(struct pt_block_decoder *decoder,
1456 				struct pt_block *block)
1457 {
1458 	struct pt_insn_ext iext;
1459 	struct pt_insn insn;
1460 	struct pt_event *ev;
1461 	int status;
1462 
1463 	if (!decoder || !decoder->process_event || !block)
1464 		return -pte_internal;
1465 
1466 	ev = &decoder->event;
1467 	switch (ev->type) {
1468 	case ptev_enabled:
1469 		break;
1470 
1471 	case ptev_disabled:
1472 		status = pt_blk_proceed_to_disabled(decoder, block, &insn,
1473 						    &iext, ev);
1474 		if (status <= 0) {
1475 			/* A synchronous disable event also binds to the next
1476 			 * indirect or conditional branch, i.e. to any branch
1477 			 * that would have required trace.
1478 			 */
1479 			if (status != -pte_bad_query)
1480 				return status;
1481 
1482 			status = pt_blk_set_disable_resume_ip(decoder, &insn);
1483 			if (status < 0)
1484 				return status;
1485 		}
1486 
1487 		break;
1488 
1489 	case ptev_async_disabled:
1490 		status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
1491 					      ev->variant.async_disabled.at);
1492 		if (status <= 0)
1493 			return status;
1494 
1495 		if (decoder->query.config.errata.skd022) {
1496 			status = pt_blk_handle_erratum_skd022(decoder, ev);
1497 			if (status != 0) {
1498 				if (status < 0)
1499 					return status;
1500 
1501 				/* If the erratum hits, we modify the event.
1502 				 * Try again.
1503 				 */
1504 				return pt_blk_proceed_event(decoder, block);
1505 			}
1506 		}
1507 
1508 		break;
1509 
1510 	case ptev_async_branch:
1511 		status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
1512 					      ev->variant.async_branch.from);
1513 		if (status <= 0)
1514 			return status;
1515 
1516 		break;
1517 
1518 	case ptev_paging:
1519 		if (!decoder->enabled)
1520 			break;
1521 
1522 		status = pt_blk_proceed_to_insn(decoder, block, &insn, &iext,
1523 						pt_insn_binds_to_pip);
1524 		if (status <= 0)
1525 			return status;
1526 
1527 		/* We bound a paging event.  Make sure we do not bind further
1528 		 * paging events to this instruction.
1529 		 */
1530 		decoder->bound_paging = 1;
1531 
1532 		return pt_blk_postpone_insn(decoder, &insn, &iext);
1533 
1534 	case ptev_async_paging:
1535 		status = pt_blk_proceed_to_async_paging(decoder, block, ev);
1536 		if (status <= 0)
1537 			return status;
1538 
1539 		break;
1540 
1541 	case ptev_vmcs:
1542 		if (!decoder->enabled)
1543 			break;
1544 
1545 		status = pt_blk_proceed_to_insn(decoder, block, &insn, &iext,
1546 						pt_insn_binds_to_vmcs);
1547 		if (status <= 0)
1548 			return status;
1549 
1550 		/* We bound a vmcs event.  Make sure we do not bind further vmcs
1551 		 * events to this instruction.
1552 		 */
1553 		decoder->bound_vmcs = 1;
1554 
1555 		return pt_blk_postpone_insn(decoder, &insn, &iext);
1556 
1557 	case ptev_async_vmcs:
1558 		status = pt_blk_proceed_to_async_vmcs(decoder, block, ev);
1559 		if (status <= 0)
1560 			return status;
1561 
1562 		break;
1563 
1564 	case ptev_overflow:
1565 		break;
1566 
1567 	case ptev_exec_mode:
1568 		status = pt_blk_proceed_to_exec_mode(decoder, block, ev);
1569 		if (status <= 0)
1570 			return status;
1571 
1572 		break;
1573 
1574 	case ptev_tsx:
1575 		if (ev->ip_suppressed)
1576 			break;
1577 
1578 		status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
1579 					      ev->variant.tsx.ip);
1580 		if (status <= 0)
1581 			return status;
1582 
1583 		break;
1584 
1585 	case ptev_stop:
1586 		break;
1587 
1588 	case ptev_exstop:
1589 		if (!decoder->enabled || ev->ip_suppressed)
1590 			break;
1591 
1592 		status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
1593 					      ev->variant.exstop.ip);
1594 		if (status <= 0)
1595 			return status;
1596 
1597 		break;
1598 
1599 	case ptev_mwait:
1600 		if (!decoder->enabled || ev->ip_suppressed)
1601 			break;
1602 
1603 		status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
1604 					      ev->variant.mwait.ip);
1605 		if (status <= 0)
1606 			return status;
1607 
1608 		break;
1609 
1610 	case ptev_pwre:
1611 	case ptev_pwrx:
1612 		break;
1613 
1614 	case ptev_ptwrite:
1615 		if (!decoder->enabled)
1616 			break;
1617 
1618 		status = pt_blk_proceed_to_ptwrite(decoder, block, &insn,
1619 						   &iext, ev);
1620 		if (status <= 0)
1621 			return status;
1622 
1623 		/* We bound a ptwrite event.  Make sure we do not bind further
1624 		 * ptwrite events to this instruction.
1625 		 */
1626 		decoder->bound_ptwrite = 1;
1627 
1628 		return pt_blk_postpone_insn(decoder, &insn, &iext);
1629 
1630 	case ptev_tick:
1631 	case ptev_cbr:
1632 	case ptev_mnt:
1633 		break;
1634 	}
1635 
1636 	return pt_blk_status(decoder, pts_event_pending);
1637 }
1638 
1639 /* Proceed to the next decision point without using the block cache.
1640  *
1641  * Tracing is enabled and we don't have an event pending.  Proceed as far as
1642  * we get without trace.  Stop when we either:
1643  *
1644  *   - need trace in order to continue
1645  *   - overflow the max number of instructions in a block
1646  *
1647  * We actually proceed one instruction further to get the start IP for the next
1648  * block.  This only updates @decoder's internal state, though.
1649  *
1650  * Returns zero on success, a negative error code otherwise.
1651  */
1652 static int pt_blk_proceed_no_event_uncached(struct pt_block_decoder *decoder,
1653 					    struct pt_block *block)
1654 {
1655 	struct pt_insn_ext iext;
1656 	struct pt_insn insn;
1657 	int status;
1658 
1659 	if (!decoder || !block)
1660 		return -pte_internal;
1661 
1662 	/* This is overly conservative, really.  We shouldn't get a bad-query
1663 	 * status unless we decoded at least one instruction successfully.
1664 	 */
1665 	memset(&insn, 0, sizeof(insn));
1666 	memset(&iext, 0, sizeof(iext));
1667 
1668 	/* Proceed as far as we get without trace. */
1669 	status = pt_blk_proceed_to_insn(decoder, block, &insn, &iext,
1670 					pt_insn_false);
1671 	if (status < 0) {
1672 		if (status != -pte_bad_query)
1673 			return status;
1674 
1675 		return pt_blk_proceed_with_trace(decoder, &insn, &iext);
1676 	}
1677 
1678 	return 0;
1679 }
1680 
1681 /* Check if @ip is contained in @section loaded at @laddr.
1682  *
1683  * Returns non-zero if it is.
1684  * Returns zero if it isn't or of @section is NULL.
1685  */
1686 static inline int pt_blk_is_in_section(const struct pt_mapped_section *msec,
1687 				       uint64_t ip)
1688 {
1689 	uint64_t begin, end;
1690 
1691 	begin = pt_msec_begin(msec);
1692 	end = pt_msec_end(msec);
1693 
1694 	return (begin <= ip && ip < end);
1695 }
1696 
1697 /* Insert a trampoline block cache entry.
1698  *
1699  * Add a trampoline block cache entry at @ip to continue at @nip, where @nip
1700  * must be the next instruction after @ip.
1701  *
1702  * Both @ip and @nip must be section-relative
1703  *
1704  * Returns zero on success, a negative error code otherwise.
1705  */
1706 static inline int pt_blk_add_trampoline(struct pt_block_cache *bcache,
1707 					uint64_t ip, uint64_t nip,
1708 					enum pt_exec_mode mode)
1709 {
1710 	struct pt_bcache_entry bce;
1711 	int64_t disp;
1712 
1713 	/* The displacement from @ip to @nip for the trampoline. */
1714 	disp = (int64_t) (nip - ip);
1715 
1716 	memset(&bce, 0, sizeof(bce));
1717 	bce.displacement = (int32_t) disp;
1718 	bce.ninsn = 1;
1719 	bce.mode = mode;
1720 	bce.qualifier = ptbq_again;
1721 
1722 	/* If we can't reach @nip without overflowing the displacement field, we
1723 	 * have to stop and re-decode the instruction at @ip.
1724 	 */
1725 	if ((int64_t) bce.displacement != disp) {
1726 
1727 		memset(&bce, 0, sizeof(bce));
1728 		bce.ninsn = 1;
1729 		bce.mode = mode;
1730 		bce.qualifier = ptbq_decode;
1731 	}
1732 
1733 	return pt_bcache_add(bcache, ip, bce);
1734 }
1735 
1736 /* Insert a decode block cache entry.
1737  *
1738  * Add a decode block cache entry at @ioff.
1739  *
1740  * Returns zero on success, a negative error code otherwise.
1741  */
1742 static inline int pt_blk_add_decode(struct pt_block_cache *bcache,
1743 				    uint64_t ioff, enum pt_exec_mode mode)
1744 {
1745 	struct pt_bcache_entry bce;
1746 
1747 	memset(&bce, 0, sizeof(bce));
1748 	bce.ninsn = 1;
1749 	bce.mode = mode;
1750 	bce.qualifier = ptbq_decode;
1751 
1752 	return pt_bcache_add(bcache, ioff, bce);
1753 }
1754 
1755 enum {
1756 	/* The maximum number of steps when filling the block cache. */
1757 	bcache_fill_steps	= 0x400
1758 };
1759 
1760 /* Proceed to the next instruction and fill the block cache for @decoder->ip.
1761  *
1762  * Tracing is enabled and we don't have an event pending.  The current IP is not
1763  * yet cached.
1764  *
1765  * Proceed one instruction without using the block cache, then try to proceed
1766  * further using the block cache.
1767  *
1768  * On our way back, add a block cache entry for the IP before proceeding.  Note
1769  * that the recursion is bounded by @steps and ultimately by the maximum number
1770  * of instructions in a block.
1771  *
1772  * Returns zero on success, a negative error code otherwise.
1773  */
1774 static int
1775 pt_blk_proceed_no_event_fill_cache(struct pt_block_decoder *decoder,
1776 				   struct pt_block *block,
1777 				   struct pt_block_cache *bcache,
1778 				   const struct pt_mapped_section *msec,
1779 				   size_t steps)
1780 {
1781 	struct pt_bcache_entry bce;
1782 	struct pt_insn_ext iext;
1783 	struct pt_insn insn;
1784 	uint64_t nip, dip, ioff, noff;
1785 	int64_t disp;
1786 	int status;
1787 
1788 	if (!decoder || !steps)
1789 		return -pte_internal;
1790 
1791 	/* Proceed one instruction by decoding and examining it.
1792 	 *
1793 	 * Note that we also return on a status of zero that indicates that the
1794 	 * instruction didn't fit into @block.
1795 	 */
1796 	status = pt_blk_proceed_one_insn(decoder, block, &insn, &iext);
1797 	if (status <= 0)
1798 		return status;
1799 
1800 	ioff = pt_msec_unmap(msec, insn.ip);
1801 
1802 	/* Let's see if we can proceed to the next IP without trace.
1803 	 *
1804 	 * If we can't, this is certainly a decision point.
1805 	 */
1806 	status = pt_insn_next_ip(&decoder->ip, &insn, &iext);
1807 	if (status < 0) {
1808 		if (status != -pte_bad_query)
1809 			return status;
1810 
1811 		memset(&bce, 0, sizeof(bce));
1812 		bce.ninsn = 1;
1813 		bce.mode = insn.mode;
1814 		bce.isize = insn.size;
1815 
1816 		/* Clear the instruction size in case of overflows. */
1817 		if ((uint8_t) bce.isize != insn.size)
1818 			bce.isize = 0;
1819 
1820 		switch (insn.iclass) {
1821 		case ptic_ptwrite:
1822 		case ptic_error:
1823 		case ptic_other:
1824 			return -pte_internal;
1825 
1826 		case ptic_jump:
1827 			/* A direct jump doesn't require trace. */
1828 			if (iext.variant.branch.is_direct)
1829 				return -pte_internal;
1830 
1831 			bce.qualifier = ptbq_indirect;
1832 			break;
1833 
1834 		case ptic_call:
1835 			/* A direct call doesn't require trace. */
1836 			if (iext.variant.branch.is_direct)
1837 				return -pte_internal;
1838 
1839 			bce.qualifier = ptbq_ind_call;
1840 			break;
1841 
1842 		case ptic_return:
1843 			bce.qualifier = ptbq_return;
1844 			break;
1845 
1846 		case ptic_cond_jump:
1847 			bce.qualifier = ptbq_cond;
1848 			break;
1849 
1850 		case ptic_far_call:
1851 		case ptic_far_return:
1852 		case ptic_far_jump:
1853 			bce.qualifier = ptbq_indirect;
1854 			break;
1855 		}
1856 
1857 		/* If the block was truncated, we have to decode its last
1858 		 * instruction each time.
1859 		 *
1860 		 * We could have skipped the above switch and size assignment in
1861 		 * this case but this is already a slow and hopefully infrequent
1862 		 * path.
1863 		 */
1864 		if (block->truncated)
1865 			bce.qualifier = ptbq_decode;
1866 
1867 		status = pt_bcache_add(bcache, ioff, bce);
1868 		if (status < 0)
1869 			return status;
1870 
1871 		return pt_blk_proceed_with_trace(decoder, &insn, &iext);
1872 	}
1873 
1874 	/* The next instruction's IP. */
1875 	nip = decoder->ip;
1876 	noff = pt_msec_unmap(msec, nip);
1877 
1878 	/* Even if we were able to proceed without trace, we might have to stop
1879 	 * here for various reasons:
1880 	 *
1881 	 *   - at near direct calls to update the return-address stack
1882 	 *
1883 	 *     We are forced to re-decode @insn to get the branch displacement.
1884 	 *
1885 	 *     Even though it is constant, we don't cache it to avoid increasing
1886 	 *     the size of a cache entry.  Note that the displacement field is
1887 	 *     zero for this entry and we might be tempted to use it - but other
1888 	 *     entries that point to this decision point will have non-zero
1889 	 *     displacement.
1890 	 *
1891 	 *     We could proceed after a near direct call but we migh as well
1892 	 *     postpone it to the next iteration.  Make sure to end the block if
1893 	 *     @decoder->flags.variant.block.end_on_call is set, though.
1894 	 *
1895 	 *   - at near direct backwards jumps to detect section splits
1896 	 *
1897 	 *     In case the current section is split underneath us, we must take
1898 	 *     care to detect that split.
1899 	 *
1900 	 *     There is one corner case where the split is in the middle of a
1901 	 *     linear sequence of instructions that branches back into the
1902 	 *     originating section.
1903 	 *
1904 	 *     Calls, indirect branches, and far branches are already covered
1905 	 *     since they either require trace or already require us to stop
1906 	 *     (i.e. near direct calls) for other reasons.  That leaves near
1907 	 *     direct backward jumps.
1908 	 *
1909 	 *     Instead of the decode stop at the jump instruction we're using we
1910 	 *     could have made sure that other block cache entries that extend
1911 	 *     this one insert a trampoline to the jump's entry.  This would
1912 	 *     have been a bit more complicated.
1913 	 *
1914 	 *   - if we switched sections
1915 	 *
1916 	 *     This ends a block just like a branch that requires trace.
1917 	 *
1918 	 *     We need to re-decode @insn in order to determine the start IP of
1919 	 *     the next block.
1920 	 *
1921 	 *   - if the block is truncated
1922 	 *
1923 	 *     We need to read the last instruction's memory from multiple
1924 	 *     sections and provide it to the user.
1925 	 *
1926 	 *     We could still use the block cache but then we'd have to handle
1927 	 *     this case for each qualifier.  Truncation is hopefully rare and
1928 	 *     having to read the memory for the instruction from multiple
1929 	 *     sections is already slow.  Let's rather keep things simple and
1930 	 *     route it through the decode flow, where we already have
1931 	 *     everything in place.
1932 	 */
1933 	switch (insn.iclass) {
1934 	case ptic_call:
1935 		return pt_blk_add_decode(bcache, ioff, insn.mode);
1936 
1937 	case ptic_jump:
1938 		/* An indirect branch requires trace and should have been
1939 		 * handled above.
1940 		 */
1941 		if (!iext.variant.branch.is_direct)
1942 			return -pte_internal;
1943 
1944 		if (iext.variant.branch.displacement < 0 ||
1945 		    decoder->flags.variant.block.end_on_jump)
1946 			return pt_blk_add_decode(bcache, ioff, insn.mode);
1947 
1948 		fallthrough;
1949 	default:
1950 		if (!pt_blk_is_in_section(msec, nip) || block->truncated)
1951 			return pt_blk_add_decode(bcache, ioff, insn.mode);
1952 
1953 		break;
1954 	}
1955 
1956 	/* We proceeded one instruction.  Let's see if we have a cache entry for
1957 	 * the next instruction.
1958 	 */
1959 	status = pt_bcache_lookup(&bce, bcache, noff);
1960 	if (status < 0)
1961 		return status;
1962 
1963 	/* If we don't have a valid cache entry, yet, fill the cache some more.
1964 	 *
1965 	 * On our way back, we add a cache entry for this instruction based on
1966 	 * the cache entry of the succeeding instruction.
1967 	 */
1968 	if (!pt_bce_is_valid(bce)) {
1969 		/* If we exceeded the maximum number of allowed steps, we insert
1970 		 * a trampoline to the next instruction.
1971 		 *
1972 		 * The next time we encounter the same code, we will use the
1973 		 * trampoline to jump directly to where we left off this time
1974 		 * and continue from there.
1975 		 */
1976 		steps -= 1;
1977 		if (!steps)
1978 			return pt_blk_add_trampoline(bcache, ioff, noff,
1979 						     insn.mode);
1980 
1981 		status = pt_blk_proceed_no_event_fill_cache(decoder, block,
1982 							    bcache, msec,
1983 							    steps);
1984 		if (status < 0)
1985 			return status;
1986 
1987 		/* Let's see if we have more luck this time. */
1988 		status = pt_bcache_lookup(&bce, bcache, noff);
1989 		if (status < 0)
1990 			return status;
1991 
1992 		/* If we still don't have a valid cache entry, we're done.  Most
1993 		 * likely, @block overflowed and we couldn't proceed past the
1994 		 * next instruction.
1995 		 */
1996 		if (!pt_bce_is_valid(bce))
1997 			return 0;
1998 	}
1999 
2000 	/* We must not have switched execution modes.
2001 	 *
2002 	 * This would require an event and we're on the no-event flow.
2003 	 */
2004 	if (pt_bce_exec_mode(bce) != insn.mode)
2005 		return -pte_internal;
2006 
2007 	/* The decision point IP and the displacement from @insn.ip. */
2008 	dip = nip + (uint64_t) (int64_t) bce.displacement;
2009 	disp = (int64_t) (dip - insn.ip);
2010 
2011 	/* We may have switched sections if the section was split.  See
2012 	 * pt_blk_proceed_no_event_cached() for a more elaborate comment.
2013 	 *
2014 	 * We're not adding a block cache entry since this won't apply to the
2015 	 * original section which may be shared with other decoders.
2016 	 *
2017 	 * We will instead take the slow path until the end of the section.
2018 	 */
2019 	if (!pt_blk_is_in_section(msec, dip))
2020 		return 0;
2021 
2022 	/* Let's try to reach @nip's decision point from @insn.ip.
2023 	 *
2024 	 * There are two fields that may overflow: @bce.ninsn and
2025 	 * @bce.displacement.
2026 	 */
2027 	bce.ninsn += 1;
2028 	bce.displacement = (int32_t) disp;
2029 
2030 	/* If none of them overflowed, we're done.
2031 	 *
2032 	 * If one or both overflowed, let's try to insert a trampoline, i.e. we
2033 	 * try to reach @dip via a ptbq_again entry to @nip.
2034 	 */
2035 	if (!bce.ninsn || ((int64_t) bce.displacement != disp))
2036 		return pt_blk_add_trampoline(bcache, ioff, noff, insn.mode);
2037 
2038 	/* We're done.  Add the cache entry.
2039 	 *
2040 	 * There's a chance that other decoders updated the cache entry in the
2041 	 * meantime.  They should have come to the same conclusion as we,
2042 	 * though, and the cache entries should be identical.
2043 	 *
2044 	 * Cache updates are atomic so even if the two versions were not
2045 	 * identical, we wouldn't care because they are both correct.
2046 	 */
2047 	return pt_bcache_add(bcache, ioff, bce);
2048 }
2049 
2050 /* Proceed at a potentially truncated instruction.
2051  *
2052  * We were not able to decode the instruction at @decoder->ip in @decoder's
2053  * cached section.  This is typically caused by not having enough bytes.
2054  *
2055  * Try to decode the instruction again using the entire image.  If this succeeds
2056  * we expect to end up with an instruction that was truncated in the section it
2057  * started.  We provide the full instruction in this case and end the block.
2058  *
2059  * Returns zero on success, a negative error code otherwise.
2060  */
2061 static int pt_blk_proceed_truncated(struct pt_block_decoder *decoder,
2062 				    struct pt_block *block)
2063 {
2064 	struct pt_insn_ext iext;
2065 	struct pt_insn insn;
2066 	int errcode;
2067 
2068 	if (!decoder || !block)
2069 		return -pte_internal;
2070 
2071 	memset(&iext, 0, sizeof(iext));
2072 	memset(&insn, 0, sizeof(insn));
2073 
2074 	insn.mode = decoder->mode;
2075 	insn.ip = decoder->ip;
2076 
2077 	errcode = pt_insn_decode(&insn, &iext, decoder->image, &decoder->asid);
2078 	if (errcode < 0)
2079 		return errcode;
2080 
2081 	/* We shouldn't use this function if the instruction isn't truncated. */
2082 	if (!insn.truncated)
2083 		return -pte_internal;
2084 
2085 	/* Provide the instruction in the block.  This ends the block. */
2086 	memcpy(block->raw, insn.raw, insn.size);
2087 	block->iclass = insn.iclass;
2088 	block->size = insn.size;
2089 	block->truncated = 1;
2090 
2091 	/* Log calls' return addresses for return compression. */
2092 	errcode = pt_blk_log_call(decoder, &insn, &iext);
2093 	if (errcode < 0)
2094 		return errcode;
2095 
2096 	/* Let's see if we can proceed to the next IP without trace.
2097 	 *
2098 	 * The truncated instruction ends the block but we still need to get the
2099 	 * next block's start IP.
2100 	 */
2101 	errcode = pt_insn_next_ip(&decoder->ip, &insn, &iext);
2102 	if (errcode < 0) {
2103 		if (errcode != -pte_bad_query)
2104 			return errcode;
2105 
2106 		return pt_blk_proceed_with_trace(decoder, &insn, &iext);
2107 	}
2108 
2109 	return 0;
2110 }
2111 
2112 /* Proceed to the next decision point using the block cache.
2113  *
2114  * Tracing is enabled and we don't have an event pending.  We already set
2115  * @block's isid.  All reads are done within @msec as we're not switching
2116  * sections between blocks.
2117  *
2118  * Proceed as far as we get without trace.  Stop when we either:
2119  *
2120  *   - need trace in order to continue
2121  *   - overflow the max number of instructions in a block
2122  *
2123  * We actually proceed one instruction further to get the start IP for the next
2124  * block.  This only updates @decoder's internal state, though.
2125  *
2126  * Returns zero on success, a negative error code otherwise.
2127  */
2128 static int pt_blk_proceed_no_event_cached(struct pt_block_decoder *decoder,
2129 					  struct pt_block *block,
2130 					  struct pt_block_cache *bcache,
2131 					  const struct pt_mapped_section *msec)
2132 {
2133 	struct pt_bcache_entry bce;
2134 	uint16_t binsn, ninsn;
2135 	uint64_t offset, nip;
2136 	int status;
2137 
2138 	if (!decoder || !block)
2139 		return -pte_internal;
2140 
2141 	offset = pt_msec_unmap(msec, decoder->ip);
2142 	status = pt_bcache_lookup(&bce, bcache, offset);
2143 	if (status < 0)
2144 		return status;
2145 
2146 	/* If we don't find a valid cache entry, fill the cache. */
2147 	if (!pt_bce_is_valid(bce))
2148 		return pt_blk_proceed_no_event_fill_cache(decoder, block,
2149 							  bcache, msec,
2150 							  bcache_fill_steps);
2151 
2152 	/* If we switched sections, the origianl section must have been split
2153 	 * underneath us.  A split preserves the block cache of the original
2154 	 * section.
2155 	 *
2156 	 * Crossing sections requires ending the block so we can indicate the
2157 	 * proper isid for the entire block.
2158 	 *
2159 	 * Plus there's the chance that the new section that caused the original
2160 	 * section to split changed instructions.
2161 	 *
2162 	 * This check will also cover changes to a linear sequence of code we
2163 	 * would otherwise have jumped over as long as the start and end are in
2164 	 * different sub-sections.
2165 	 *
2166 	 * Since we stop on every (backwards) branch (through an artificial stop
2167 	 * in the case of a near direct backward branch) we will detect all
2168 	 * section splits.
2169 	 *
2170 	 * Switch to the slow path until we reach the end of this section.
2171 	 */
2172 	nip = decoder->ip + (uint64_t) (int64_t) bce.displacement;
2173 	if (!pt_blk_is_in_section(msec, nip))
2174 		return pt_blk_proceed_no_event_uncached(decoder, block);
2175 
2176 	/* We have a valid cache entry.  Let's first check if the way to the
2177 	 * decision point still fits into @block.
2178 	 *
2179 	 * If it doesn't, we end the block without filling it as much as we
2180 	 * could since this would require us to switch to the slow path.
2181 	 *
2182 	 * On the next iteration, we will start with an empty block, which is
2183 	 * guaranteed to have enough room for at least one block cache entry.
2184 	 */
2185 	binsn = block->ninsn;
2186 	ninsn = binsn + (uint16_t) bce.ninsn;
2187 	if (ninsn < binsn)
2188 		return 0;
2189 
2190 	/* Jump ahead to the decision point and proceed from there.
2191 	 *
2192 	 * We're not switching execution modes so even if @block already has an
2193 	 * execution mode, it will be the one we're going to set.
2194 	 */
2195 	decoder->ip = nip;
2196 
2197 	/* We don't know the instruction class so we should be setting it to
2198 	 * ptic_error.  Since we will be able to fill it back in later in most
2199 	 * cases, we move the clearing to the switch cases that don't.
2200 	 */
2201 	block->end_ip = nip;
2202 	block->ninsn = ninsn;
2203 	block->mode = pt_bce_exec_mode(bce);
2204 
2205 
2206 	switch (pt_bce_qualifier(bce)) {
2207 	case ptbq_again:
2208 		/* We're not able to reach the actual decision point due to
2209 		 * overflows so we inserted a trampoline.
2210 		 *
2211 		 * We don't know the instruction and it is not guaranteed that
2212 		 * we will proceed further (e.g. if @block overflowed).  Let's
2213 		 * clear any previously stored instruction class which has
2214 		 * become invalid when we updated @block->ninsn.
2215 		 */
2216 		block->iclass = ptic_error;
2217 
2218 		return pt_blk_proceed_no_event_cached(decoder, block, bcache,
2219 						      msec);
2220 
2221 	case ptbq_cond:
2222 		/* We're at a conditional branch. */
2223 		block->iclass = ptic_cond_jump;
2224 
2225 		/* Let's first check whether we know the size of the
2226 		 * instruction.  If we do, we might get away without decoding
2227 		 * the instruction.
2228 		 *
2229 		 * If we don't know the size we might as well do the full decode
2230 		 * and proceed-with-trace flow we do for ptbq_decode.
2231 		 */
2232 		if (bce.isize) {
2233 			uint64_t ip;
2234 			int taken;
2235 
2236 			/* If the branch is not taken, we don't need to decode
2237 			 * the instruction at @decoder->ip.
2238 			 *
2239 			 * If it is taken, we have to implement everything here.
2240 			 * We can't use the normal decode and proceed-with-trace
2241 			 * flow since we already consumed the TNT bit.
2242 			 */
2243 			status = pt_blk_cond_branch(decoder, &taken);
2244 			if (status < 0)
2245 				return status;
2246 
2247 			/* Preserve the query decoder's response which indicates
2248 			 * upcoming events.
2249 			 */
2250 			decoder->status = status;
2251 
2252 			ip = decoder->ip;
2253 			if (taken) {
2254 				struct pt_insn_ext iext;
2255 				struct pt_insn insn;
2256 
2257 				memset(&iext, 0, sizeof(iext));
2258 				memset(&insn, 0, sizeof(insn));
2259 
2260 				insn.mode = pt_bce_exec_mode(bce);
2261 				insn.ip = ip;
2262 
2263 				status = pt_blk_decode_in_section(&insn, &iext,
2264 								  msec);
2265 				if (status < 0)
2266 					return status;
2267 
2268 				ip += (uint64_t) (int64_t)
2269 					iext.variant.branch.displacement;
2270 			}
2271 
2272 			decoder->ip = ip + bce.isize;
2273 			break;
2274 		}
2275 
2276 		fallthrough;
2277 	case ptbq_decode: {
2278 		struct pt_insn_ext iext;
2279 		struct pt_insn insn;
2280 
2281 		/* We need to decode the instruction at @decoder->ip and decide
2282 		 * what to do based on that.
2283 		 *
2284 		 * We already accounted for the instruction so we can't just
2285 		 * call pt_blk_proceed_one_insn().
2286 		 */
2287 
2288 		memset(&iext, 0, sizeof(iext));
2289 		memset(&insn, 0, sizeof(insn));
2290 
2291 		insn.mode = pt_bce_exec_mode(bce);
2292 		insn.ip = decoder->ip;
2293 
2294 		status = pt_blk_decode_in_section(&insn, &iext, msec);
2295 		if (status < 0) {
2296 			if (status != -pte_bad_insn)
2297 				return status;
2298 
2299 			return pt_blk_proceed_truncated(decoder, block);
2300 		}
2301 
2302 		/* We just decoded @insn so we know the instruction class. */
2303 		block->iclass = insn.iclass;
2304 
2305 		/* Log calls' return addresses for return compression. */
2306 		status = pt_blk_log_call(decoder, &insn, &iext);
2307 		if (status < 0)
2308 			return status;
2309 
2310 		/* Let's see if we can proceed to the next IP without trace.
2311 		 *
2312 		 * Note that we also stop due to displacement overflows or to
2313 		 * maintain the return-address stack for near direct calls.
2314 		 */
2315 		status = pt_insn_next_ip(&decoder->ip, &insn, &iext);
2316 		if (status < 0) {
2317 			if (status != -pte_bad_query)
2318 				return status;
2319 
2320 			/* We can't, so let's proceed with trace, which
2321 			 * completes the block.
2322 			 */
2323 			return pt_blk_proceed_with_trace(decoder, &insn, &iext);
2324 		}
2325 
2326 		/* End the block if the user asked us to.
2327 		 *
2328 		 * We only need to take care about direct near branches.
2329 		 * Indirect and far branches require trace and will naturally
2330 		 * end a block.
2331 		 */
2332 		if ((decoder->flags.variant.block.end_on_call &&
2333 		     (insn.iclass == ptic_call)) ||
2334 		    (decoder->flags.variant.block.end_on_jump &&
2335 		     (insn.iclass == ptic_jump)))
2336 			break;
2337 
2338 		/* If we can proceed without trace and we stay in @msec we may
2339 		 * proceed further.
2340 		 *
2341 		 * We're done if we switch sections, though.
2342 		 */
2343 		if (!pt_blk_is_in_section(msec, decoder->ip))
2344 			break;
2345 
2346 		return pt_blk_proceed_no_event_cached(decoder, block, bcache,
2347 						      msec);
2348 	}
2349 
2350 	case ptbq_ind_call: {
2351 		uint64_t ip;
2352 
2353 		/* We're at a near indirect call. */
2354 		block->iclass = ptic_call;
2355 
2356 		/* We need to update the return-address stack and query the
2357 		 * destination IP.
2358 		 */
2359 		ip = decoder->ip;
2360 
2361 		/* If we already know the size of the instruction, we don't need
2362 		 * to re-decode it.
2363 		 */
2364 		if (bce.isize)
2365 			ip += bce.isize;
2366 		else {
2367 			struct pt_insn_ext iext;
2368 			struct pt_insn insn;
2369 
2370 			memset(&iext, 0, sizeof(iext));
2371 			memset(&insn, 0, sizeof(insn));
2372 
2373 			insn.mode = pt_bce_exec_mode(bce);
2374 			insn.ip = ip;
2375 
2376 			status = pt_blk_decode_in_section(&insn, &iext, msec);
2377 			if (status < 0)
2378 				return status;
2379 
2380 			ip += insn.size;
2381 		}
2382 
2383 		status = pt_retstack_push(&decoder->retstack, ip);
2384 		if (status < 0)
2385 			return status;
2386 
2387 		status = pt_blk_indirect_branch(decoder, &decoder->ip);
2388 		if (status < 0)
2389 			return status;
2390 
2391 		/* Preserve the query decoder's response which indicates
2392 		 * upcoming events.
2393 		 */
2394 		decoder->status = status;
2395 		break;
2396 	}
2397 
2398 	case ptbq_return: {
2399 		int taken;
2400 
2401 		/* We're at a near return. */
2402 		block->iclass = ptic_return;
2403 
2404 		/* Check for a compressed return. */
2405 		status = pt_blk_cond_branch(decoder, &taken);
2406 		if (status < 0) {
2407 			if (status != -pte_bad_query)
2408 				return status;
2409 
2410 			/* The return is not compressed.  We need another query
2411 			 * to determine the destination IP.
2412 			 */
2413 			status = pt_blk_indirect_branch(decoder, &decoder->ip);
2414 			if (status < 0)
2415 				return status;
2416 
2417 			/* Preserve the query decoder's response which indicates
2418 			 * upcoming events.
2419 			 */
2420 			decoder->status = status;
2421 			break;
2422 		}
2423 
2424 		/* Preserve the query decoder's response which indicates
2425 		 * upcoming events.
2426 		 */
2427 		decoder->status = status;
2428 
2429 		/* A compressed return is indicated by a taken conditional
2430 		 * branch.
2431 		 */
2432 		if (!taken)
2433 			return -pte_bad_retcomp;
2434 
2435 		return pt_retstack_pop(&decoder->retstack, &decoder->ip);
2436 	}
2437 
2438 	case ptbq_indirect:
2439 		/* We're at an indirect jump or far transfer.
2440 		 *
2441 		 * We don't know the exact instruction class and there's no
2442 		 * reason to decode the instruction for any other purpose.
2443 		 *
2444 		 * Indicate that we don't know the instruction class and leave
2445 		 * it to our caller to decode the instruction if needed.
2446 		 */
2447 		block->iclass = ptic_error;
2448 
2449 		/* This is neither a near call nor return so we don't need to
2450 		 * touch the return-address stack.
2451 		 *
2452 		 * Just query the destination IP.
2453 		 */
2454 		status = pt_blk_indirect_branch(decoder, &decoder->ip);
2455 		if (status < 0)
2456 			return status;
2457 
2458 		/* Preserve the query decoder's response which indicates
2459 		 * upcoming events.
2460 		 */
2461 		decoder->status = status;
2462 		break;
2463 	}
2464 
2465 	return 0;
2466 }
2467 
2468 static int pt_blk_msec_fill(struct pt_block_decoder *decoder,
2469 			    const struct pt_mapped_section **pmsec)
2470 {
2471 	const struct pt_mapped_section *msec;
2472 	struct pt_section *section;
2473 	int isid, errcode;
2474 
2475 	if (!decoder || !pmsec)
2476 		return -pte_internal;
2477 
2478 	isid = pt_msec_cache_fill(&decoder->scache, &msec,  decoder->image,
2479 				  &decoder->asid, decoder->ip);
2480 	if (isid < 0)
2481 		return isid;
2482 
2483 	section = pt_msec_section(msec);
2484 	if (!section)
2485 		return -pte_internal;
2486 
2487 	*pmsec = msec;
2488 
2489 	errcode = pt_section_request_bcache(section);
2490 	if (errcode < 0)
2491 		return errcode;
2492 
2493 	return isid;
2494 }
2495 
2496 static inline int pt_blk_msec_lookup(struct pt_block_decoder *decoder,
2497 				     const struct pt_mapped_section **pmsec)
2498 {
2499 	int isid;
2500 
2501 	if (!decoder)
2502 		return -pte_internal;
2503 
2504 	isid = pt_msec_cache_read(&decoder->scache, pmsec, decoder->image,
2505 				  decoder->ip);
2506 	if (isid < 0) {
2507 		if (isid != -pte_nomap)
2508 			return isid;
2509 
2510 		return pt_blk_msec_fill(decoder, pmsec);
2511 	}
2512 
2513 	return isid;
2514 }
2515 
2516 /* Proceed to the next decision point - try using the cache.
2517  *
2518  * Tracing is enabled and we don't have an event pending.  Proceed as far as
2519  * we get without trace.  Stop when we either:
2520  *
2521  *   - need trace in order to continue
2522  *   - overflow the max number of instructions in a block
2523  *
2524  * We actually proceed one instruction further to get the start IP for the next
2525  * block.  This only updates @decoder's internal state, though.
2526  *
2527  * Returns zero on success, a negative error code otherwise.
2528  */
2529 static int pt_blk_proceed_no_event(struct pt_block_decoder *decoder,
2530 				   struct pt_block *block)
2531 {
2532 	const struct pt_mapped_section *msec;
2533 	struct pt_block_cache *bcache;
2534 	struct pt_section *section;
2535 	int isid;
2536 
2537 	if (!decoder || !block)
2538 		return -pte_internal;
2539 
2540 	isid = pt_blk_msec_lookup(decoder, &msec);
2541 	if (isid < 0) {
2542 		if (isid != -pte_nomap)
2543 			return isid;
2544 
2545 		/* Even if there is no such section in the image, we may still
2546 		 * read the memory via the callback function.
2547 		 */
2548 		return pt_blk_proceed_no_event_uncached(decoder, block);
2549 	}
2550 
2551 	/* We do not switch sections inside a block. */
2552 	if (isid != block->isid) {
2553 		if (!pt_blk_block_is_empty(block))
2554 			return 0;
2555 
2556 		block->isid = isid;
2557 	}
2558 
2559 	section = pt_msec_section(msec);
2560 	if (!section)
2561 		return -pte_internal;
2562 
2563 	bcache = pt_section_bcache(section);
2564 	if (!bcache)
2565 		return pt_blk_proceed_no_event_uncached(decoder, block);
2566 
2567 	return pt_blk_proceed_no_event_cached(decoder, block, bcache, msec);
2568 }
2569 
2570 /* Proceed to the next event or decision point.
2571  *
2572  * Returns a non-negative pt_status_flag bit-vector on success, a negative error
2573  * code otherwise.
2574  */
2575 static int pt_blk_proceed(struct pt_block_decoder *decoder,
2576 			  struct pt_block *block)
2577 {
2578 	int status;
2579 
2580 	status = pt_blk_fetch_event(decoder);
2581 	if (status != 0) {
2582 		if (status < 0)
2583 			return status;
2584 
2585 		return pt_blk_proceed_event(decoder, block);
2586 	}
2587 
2588 	/* If tracing is disabled we should either be out of trace or we should
2589 	 * have taken the event flow above.
2590 	 */
2591 	if (!decoder->enabled) {
2592 		if (decoder->status & pts_eos)
2593 			return -pte_eos;
2594 
2595 		return -pte_no_enable;
2596 	}
2597 
2598 	status = pt_blk_proceed_no_event(decoder, block);
2599 	if (status < 0)
2600 		return status;
2601 
2602 	return pt_blk_proceed_trailing_event(decoder, block);
2603 }
2604 
2605 enum {
2606 	/* The maximum number of steps to take when determining whether the
2607 	 * event location can be reached.
2608 	 */
2609 	bdm64_max_steps	= 0x100
2610 };
2611 
2612 /* Try to work around erratum BDM64.
2613  *
2614  * If we got a transaction abort immediately following a branch that produced
2615  * trace, the trace for that branch might have been corrupted.
2616  *
2617  * Returns a positive integer if the erratum was handled.
2618  * Returns zero if the erratum does not seem to apply.
2619  * Returns a negative error code otherwise.
2620  */
2621 static int pt_blk_handle_erratum_bdm64(struct pt_block_decoder *decoder,
2622 				       const struct pt_block *block,
2623 				       const struct pt_event *ev)
2624 {
2625 	struct pt_insn_ext iext;
2626 	struct pt_insn insn;
2627 	int status;
2628 
2629 	if (!decoder || !block || !ev)
2630 		return -pte_internal;
2631 
2632 	/* This only affects aborts. */
2633 	if (!ev->variant.tsx.aborted)
2634 		return 0;
2635 
2636 	/* This only affects branches that require trace.
2637 	 *
2638 	 * If the erratum hits, that branch ended the current block and brought
2639 	 * us to the trailing event flow.
2640 	 */
2641 	if (pt_blk_block_is_empty(block))
2642 		return 0;
2643 
2644 	insn.mode = block->mode;
2645 	insn.ip = block->end_ip;
2646 
2647 	status = pt_insn_decode(&insn, &iext, decoder->image, &decoder->asid);
2648 	if (status < 0)
2649 		return 0;
2650 
2651 	if (!pt_insn_is_branch(&insn, &iext))
2652 		return 0;
2653 
2654 	/* Let's check if we can reach the event location from here.
2655 	 *
2656 	 * If we can, let's assume the erratum did not hit.  We might still be
2657 	 * wrong but we're not able to tell.
2658 	 */
2659 	status = pt_insn_range_is_contiguous(decoder->ip, ev->variant.tsx.ip,
2660 					     decoder->mode, decoder->image,
2661 					     &decoder->asid, bdm64_max_steps);
2662 	if (status > 0)
2663 		return status;
2664 
2665 	/* We can't reach the event location.  This could either mean that we
2666 	 * stopped too early (and status is zero) or that the erratum hit.
2667 	 *
2668 	 * We assume the latter and pretend that the previous branch brought us
2669 	 * to the event location, instead.
2670 	 */
2671 	decoder->ip = ev->variant.tsx.ip;
2672 
2673 	return 1;
2674 }
2675 
2676 /* Check whether a trailing TSX event should be postponed.
2677  *
2678  * This involves handling erratum BDM64.
2679  *
2680  * Returns a positive integer if the event is to be postponed.
2681  * Returns zero if the event should be processed.
2682  * Returns a negative error code otherwise.
2683  */
2684 static inline int pt_blk_postpone_trailing_tsx(struct pt_block_decoder *decoder,
2685 					       struct pt_block *block,
2686 					       const struct pt_event *ev)
2687 {
2688 	int status;
2689 
2690 	if (!decoder || !ev)
2691 		return -pte_internal;
2692 
2693 	if (ev->ip_suppressed)
2694 		return 0;
2695 
2696 	if (block && decoder->query.config.errata.bdm64) {
2697 		status = pt_blk_handle_erratum_bdm64(decoder, block, ev);
2698 		if (status < 0)
2699 			return 1;
2700 	}
2701 
2702 	if (decoder->ip != ev->variant.tsx.ip)
2703 		return 1;
2704 
2705 	return 0;
2706 }
2707 
2708 /* Proceed with events that bind to the current decoder IP.
2709  *
2710  * This function is used in the following scenarios:
2711  *
2712  *   - we just synchronized onto the trace stream
2713  *   - we ended a block and proceeded to the next IP
2714  *   - we processed an event that was indicated by this function
2715  *
2716  * Check if there is an event at the current IP that needs to be indicated to
2717  * the user.
2718  *
2719  * Returns a non-negative pt_status_flag bit-vector on success, a negative error
2720  * code otherwise.
2721  */
2722 static int pt_blk_proceed_trailing_event(struct pt_block_decoder *decoder,
2723 					 struct pt_block *block)
2724 {
2725 	struct pt_event *ev;
2726 	int status;
2727 
2728 	if (!decoder)
2729 		return -pte_internal;
2730 
2731 	status = pt_blk_fetch_event(decoder);
2732 	if (status <= 0) {
2733 		if (status < 0)
2734 			return status;
2735 
2736 		status = pt_blk_proceed_postponed_insn(decoder);
2737 		if (status < 0)
2738 			return status;
2739 
2740 		return pt_blk_status(decoder, 0);
2741 	}
2742 
2743 	ev = &decoder->event;
2744 	switch (ev->type) {
2745 	case ptev_disabled:
2746 		/* Synchronous disable events are normally indicated on the
2747 		 * event flow.
2748 		 */
2749 		if (!decoder->process_insn)
2750 			break;
2751 
2752 		/* A sync disable may bind to a CR3 changing instruction. */
2753 		if (ev->ip_suppressed &&
2754 		    pt_insn_changes_cr3(&decoder->insn, &decoder->iext))
2755 			return pt_blk_status(decoder, pts_event_pending);
2756 
2757 		/* Or it binds to the next branch that would require trace.
2758 		 *
2759 		 * Try to complete processing the current instruction by
2760 		 * proceeding past it.  If that fails because it would require
2761 		 * trace, we can apply the disabled event.
2762 		 */
2763 		status = pt_insn_next_ip(&decoder->ip, &decoder->insn,
2764 					 &decoder->iext);
2765 		if (status < 0) {
2766 			if (status != -pte_bad_query)
2767 				return status;
2768 
2769 			status = pt_blk_set_disable_resume_ip(decoder,
2770 							      &decoder->insn);
2771 			if (status < 0)
2772 				return status;
2773 
2774 			return pt_blk_status(decoder, pts_event_pending);
2775 		}
2776 
2777 		/* We proceeded past the current instruction. */
2778 		status = pt_blk_clear_postponed_insn(decoder);
2779 		if (status < 0)
2780 			return status;
2781 
2782 		/* This might have brought us to the disable IP. */
2783 		if (!ev->ip_suppressed &&
2784 		    decoder->ip == ev->variant.disabled.ip)
2785 			return pt_blk_status(decoder, pts_event_pending);
2786 
2787 		break;
2788 
2789 	case ptev_enabled:
2790 		/* This event does not bind to an instruction. */
2791 		status = pt_blk_proceed_postponed_insn(decoder);
2792 		if (status < 0)
2793 			return status;
2794 
2795 		return pt_blk_status(decoder, pts_event_pending);
2796 
2797 	case ptev_async_disabled:
2798 		/* This event does not bind to an instruction. */
2799 		status = pt_blk_proceed_postponed_insn(decoder);
2800 		if (status < 0)
2801 			return status;
2802 
2803 		if (decoder->ip != ev->variant.async_disabled.at)
2804 			break;
2805 
2806 		if (decoder->query.config.errata.skd022) {
2807 			status = pt_blk_handle_erratum_skd022(decoder, ev);
2808 			if (status != 0) {
2809 				if (status < 0)
2810 					return status;
2811 
2812 				/* If the erratum applies, the event is modified
2813 				 * to a synchronous disable event that will be
2814 				 * processed on the next pt_blk_proceed_event()
2815 				 * call.  We're done.
2816 				 */
2817 				break;
2818 			}
2819 		}
2820 
2821 		return pt_blk_status(decoder, pts_event_pending);
2822 
2823 	case ptev_async_branch:
2824 		/* This event does not bind to an instruction. */
2825 		status = pt_blk_proceed_postponed_insn(decoder);
2826 		if (status < 0)
2827 			return status;
2828 
2829 		if (decoder->ip != ev->variant.async_branch.from)
2830 			break;
2831 
2832 		return pt_blk_status(decoder, pts_event_pending);
2833 
2834 	case ptev_paging:
2835 		/* We apply the event immediately if we're not tracing. */
2836 		if (!decoder->enabled)
2837 			return pt_blk_status(decoder, pts_event_pending);
2838 
2839 		/* Synchronous paging events are normally indicated on the event
2840 		 * flow, unless they bind to the same instruction as a previous
2841 		 * event.
2842 		 *
2843 		 * We bind at most one paging event to an instruction, though.
2844 		 */
2845 		if (!decoder->process_insn || decoder->bound_paging)
2846 			break;
2847 
2848 		/* We're done if we're not binding to the currently postponed
2849 		 * instruction.  We will process the event on the normal event
2850 		 * flow in the next iteration.
2851 		 */
2852 		if (!pt_insn_binds_to_pip(&decoder->insn, &decoder->iext))
2853 			break;
2854 
2855 		/* We bound a paging event.  Make sure we do not bind further
2856 		 * paging events to this instruction.
2857 		 */
2858 		decoder->bound_paging = 1;
2859 
2860 		return pt_blk_status(decoder, pts_event_pending);
2861 
2862 	case ptev_async_paging:
2863 		/* This event does not bind to an instruction. */
2864 		status = pt_blk_proceed_postponed_insn(decoder);
2865 		if (status < 0)
2866 			return status;
2867 
2868 		if (!ev->ip_suppressed &&
2869 		    decoder->ip != ev->variant.async_paging.ip)
2870 			break;
2871 
2872 		return pt_blk_status(decoder, pts_event_pending);
2873 
2874 	case ptev_vmcs:
2875 		/* We apply the event immediately if we're not tracing. */
2876 		if (!decoder->enabled)
2877 			return pt_blk_status(decoder, pts_event_pending);
2878 
2879 		/* Synchronous vmcs events are normally indicated on the event
2880 		 * flow, unless they bind to the same instruction as a previous
2881 		 * event.
2882 		 *
2883 		 * We bind at most one vmcs event to an instruction, though.
2884 		 */
2885 		if (!decoder->process_insn || decoder->bound_vmcs)
2886 			break;
2887 
2888 		/* We're done if we're not binding to the currently postponed
2889 		 * instruction.  We will process the event on the normal event
2890 		 * flow in the next iteration.
2891 		 */
2892 		if (!pt_insn_binds_to_vmcs(&decoder->insn, &decoder->iext))
2893 			break;
2894 
2895 		/* We bound a vmcs event.  Make sure we do not bind further vmcs
2896 		 * events to this instruction.
2897 		 */
2898 		decoder->bound_vmcs = 1;
2899 
2900 		return pt_blk_status(decoder, pts_event_pending);
2901 
2902 	case ptev_async_vmcs:
2903 		/* This event does not bind to an instruction. */
2904 		status = pt_blk_proceed_postponed_insn(decoder);
2905 		if (status < 0)
2906 			return status;
2907 
2908 		if (!ev->ip_suppressed &&
2909 		    decoder->ip != ev->variant.async_vmcs.ip)
2910 			break;
2911 
2912 		return pt_blk_status(decoder, pts_event_pending);
2913 
2914 	case ptev_overflow:
2915 		/* This event does not bind to an instruction. */
2916 		status = pt_blk_proceed_postponed_insn(decoder);
2917 		if (status < 0)
2918 			return status;
2919 
2920 		return pt_blk_status(decoder, pts_event_pending);
2921 
2922 	case ptev_exec_mode:
2923 		/* This event does not bind to an instruction. */
2924 		status = pt_blk_proceed_postponed_insn(decoder);
2925 		if (status < 0)
2926 			return status;
2927 
2928 		if (!ev->ip_suppressed &&
2929 		    decoder->ip != ev->variant.exec_mode.ip)
2930 			break;
2931 
2932 		return pt_blk_status(decoder, pts_event_pending);
2933 
2934 	case ptev_tsx:
2935 		/* This event does not bind to an instruction. */
2936 		status = pt_blk_proceed_postponed_insn(decoder);
2937 		if (status < 0)
2938 			return status;
2939 
2940 		status = pt_blk_postpone_trailing_tsx(decoder, block, ev);
2941 		if (status != 0) {
2942 			if (status < 0)
2943 				return status;
2944 
2945 			break;
2946 		}
2947 
2948 		return pt_blk_status(decoder, pts_event_pending);
2949 
2950 	case ptev_stop:
2951 		/* This event does not bind to an instruction. */
2952 		status = pt_blk_proceed_postponed_insn(decoder);
2953 		if (status < 0)
2954 			return status;
2955 
2956 		return pt_blk_status(decoder, pts_event_pending);
2957 
2958 	case ptev_exstop:
2959 		/* This event does not bind to an instruction. */
2960 		status = pt_blk_proceed_postponed_insn(decoder);
2961 		if (status < 0)
2962 			return status;
2963 
2964 		if (!ev->ip_suppressed && decoder->enabled &&
2965 		    decoder->ip != ev->variant.exstop.ip)
2966 			break;
2967 
2968 		return pt_blk_status(decoder, pts_event_pending);
2969 
2970 	case ptev_mwait:
2971 		/* This event does not bind to an instruction. */
2972 		status = pt_blk_proceed_postponed_insn(decoder);
2973 		if (status < 0)
2974 			return status;
2975 
2976 		if (!ev->ip_suppressed && decoder->enabled &&
2977 		    decoder->ip != ev->variant.mwait.ip)
2978 			break;
2979 
2980 		return pt_blk_status(decoder, pts_event_pending);
2981 
2982 	case ptev_pwre:
2983 	case ptev_pwrx:
2984 		/* This event does not bind to an instruction. */
2985 		status = pt_blk_proceed_postponed_insn(decoder);
2986 		if (status < 0)
2987 			return status;
2988 
2989 		return pt_blk_status(decoder, pts_event_pending);
2990 
2991 	case ptev_ptwrite:
2992 		/* We apply the event immediately if we're not tracing. */
2993 		if (!decoder->enabled)
2994 			return pt_blk_status(decoder, pts_event_pending);
2995 
2996 		/* Ptwrite events are normally indicated on the event flow,
2997 		 * unless they bind to the same instruction as a previous event.
2998 		 *
2999 		 * We bind at most one ptwrite event to an instruction, though.
3000 		 */
3001 		if (!decoder->process_insn || decoder->bound_ptwrite)
3002 			break;
3003 
3004 		/* We're done if we're not binding to the currently postponed
3005 		 * instruction.  We will process the event on the normal event
3006 		 * flow in the next iteration.
3007 		 */
3008 		if (!ev->ip_suppressed ||
3009 		    !pt_insn_is_ptwrite(&decoder->insn, &decoder->iext))
3010 			break;
3011 
3012 		/* We bound a ptwrite event.  Make sure we do not bind further
3013 		 * ptwrite events to this instruction.
3014 		 */
3015 		decoder->bound_ptwrite = 1;
3016 
3017 		return pt_blk_status(decoder, pts_event_pending);
3018 
3019 	case ptev_tick:
3020 	case ptev_cbr:
3021 	case ptev_mnt:
3022 		/* This event does not bind to an instruction. */
3023 		status = pt_blk_proceed_postponed_insn(decoder);
3024 		if (status < 0)
3025 			return status;
3026 
3027 		return pt_blk_status(decoder, pts_event_pending);
3028 	}
3029 
3030 	/* No further events.  Proceed past any postponed instruction. */
3031 	status = pt_blk_proceed_postponed_insn(decoder);
3032 	if (status < 0)
3033 		return status;
3034 
3035 	return pt_blk_status(decoder, 0);
3036 }
3037 
3038 int pt_blk_next(struct pt_block_decoder *decoder, struct pt_block *ublock,
3039 		size_t size)
3040 {
3041 	struct pt_block block, *pblock;
3042 	int errcode, status;
3043 
3044 	if (!decoder || !ublock)
3045 		return -pte_invalid;
3046 
3047 	pblock = size == sizeof(block) ? ublock : &block;
3048 
3049 	/* Zero-initialize the block in case of error returns. */
3050 	memset(pblock, 0, sizeof(*pblock));
3051 
3052 	/* Fill in a few things from the current decode state.
3053 	 *
3054 	 * This reflects the state of the last pt_blk_next() or pt_blk_start()
3055 	 * call.  Note that, unless we stop with tracing disabled, we proceed
3056 	 * already to the start IP of the next block.
3057 	 *
3058 	 * Some of the state may later be overwritten as we process events.
3059 	 */
3060 	pblock->ip = decoder->ip;
3061 	pblock->mode = decoder->mode;
3062 	if (decoder->speculative)
3063 		pblock->speculative = 1;
3064 
3065 	/* Proceed one block. */
3066 	status = pt_blk_proceed(decoder, pblock);
3067 
3068 	errcode = block_to_user(ublock, size, pblock);
3069 	if (errcode < 0)
3070 		return errcode;
3071 
3072 	return status;
3073 }
3074 
3075 /* Process an enabled event.
3076  *
3077  * Returns zero on success, a negative error code otherwise.
3078  */
3079 static int pt_blk_process_enabled(struct pt_block_decoder *decoder,
3080 				  const struct pt_event *ev)
3081 {
3082 	if (!decoder || !ev)
3083 		return -pte_internal;
3084 
3085 	/* This event can't be a status update. */
3086 	if (ev->status_update)
3087 		return -pte_bad_context;
3088 
3089 	/* We must have an IP in order to start decoding. */
3090 	if (ev->ip_suppressed)
3091 		return -pte_noip;
3092 
3093 	/* We must currently be disabled. */
3094 	if (decoder->enabled)
3095 		return -pte_bad_context;
3096 
3097 	decoder->ip = ev->variant.enabled.ip;
3098 	decoder->enabled = 1;
3099 	decoder->process_event = 0;
3100 
3101 	return 0;
3102 }
3103 
3104 /* Process a disabled event.
3105  *
3106  * Returns zero on success, a negative error code otherwise.
3107  */
3108 static int pt_blk_process_disabled(struct pt_block_decoder *decoder,
3109 				   const struct pt_event *ev)
3110 {
3111 	if (!decoder || !ev)
3112 		return -pte_internal;
3113 
3114 	/* This event can't be a status update. */
3115 	if (ev->status_update)
3116 		return -pte_bad_context;
3117 
3118 	/* We must currently be enabled. */
3119 	if (!decoder->enabled)
3120 		return -pte_bad_context;
3121 
3122 	/* We preserve @decoder->ip.  This is where we expect tracing to resume
3123 	 * and we'll indicate that on the subsequent enabled event if tracing
3124 	 * actually does resume from there.
3125 	 */
3126 	decoder->enabled = 0;
3127 	decoder->process_event = 0;
3128 
3129 	return 0;
3130 }
3131 
3132 /* Process an asynchronous branch event.
3133  *
3134  * Returns zero on success, a negative error code otherwise.
3135  */
3136 static int pt_blk_process_async_branch(struct pt_block_decoder *decoder,
3137 				       const struct pt_event *ev)
3138 {
3139 	if (!decoder || !ev)
3140 		return -pte_internal;
3141 
3142 	/* This event can't be a status update. */
3143 	if (ev->status_update)
3144 		return -pte_bad_context;
3145 
3146 	/* We must currently be enabled. */
3147 	if (!decoder->enabled)
3148 		return -pte_bad_context;
3149 
3150 	/* Jump to the branch destination.  We will continue from there in the
3151 	 * next iteration.
3152 	 */
3153 	decoder->ip = ev->variant.async_branch.to;
3154 	decoder->process_event = 0;
3155 
3156 	return 0;
3157 }
3158 
3159 /* Process a paging event.
3160  *
3161  * Returns zero on success, a negative error code otherwise.
3162  */
3163 static int pt_blk_process_paging(struct pt_block_decoder *decoder,
3164 				 const struct pt_event *ev)
3165 {
3166 	uint64_t cr3;
3167 	int errcode;
3168 
3169 	if (!decoder || !ev)
3170 		return -pte_internal;
3171 
3172 	cr3 = ev->variant.paging.cr3;
3173 	if (decoder->asid.cr3 != cr3) {
3174 		errcode = pt_msec_cache_invalidate(&decoder->scache);
3175 		if (errcode < 0)
3176 			return errcode;
3177 
3178 		decoder->asid.cr3 = cr3;
3179 	}
3180 
3181 	decoder->process_event = 0;
3182 
3183 	return 0;
3184 }
3185 
3186 /* Process a vmcs event.
3187  *
3188  * Returns zero on success, a negative error code otherwise.
3189  */
3190 static int pt_blk_process_vmcs(struct pt_block_decoder *decoder,
3191 			       const struct pt_event *ev)
3192 {
3193 	uint64_t vmcs;
3194 	int errcode;
3195 
3196 	if (!decoder || !ev)
3197 		return -pte_internal;
3198 
3199 	vmcs = ev->variant.vmcs.base;
3200 	if (decoder->asid.vmcs != vmcs) {
3201 		errcode = pt_msec_cache_invalidate(&decoder->scache);
3202 		if (errcode < 0)
3203 			return errcode;
3204 
3205 		decoder->asid.vmcs = vmcs;
3206 	}
3207 
3208 	decoder->process_event = 0;
3209 
3210 	return 0;
3211 }
3212 
3213 /* Process an overflow event.
3214  *
3215  * Returns zero on success, a negative error code otherwise.
3216  */
3217 static int pt_blk_process_overflow(struct pt_block_decoder *decoder,
3218 				   const struct pt_event *ev)
3219 {
3220 	if (!decoder || !ev)
3221 		return -pte_internal;
3222 
3223 	/* This event can't be a status update. */
3224 	if (ev->status_update)
3225 		return -pte_bad_context;
3226 
3227 	/* If the IP is suppressed, the overflow resolved while tracing was
3228 	 * disabled.  Otherwise it resolved while tracing was enabled.
3229 	 */
3230 	if (ev->ip_suppressed) {
3231 		/* Tracing is disabled.  It doesn't make sense to preserve the
3232 		 * previous IP.  This will just be misleading.  Even if tracing
3233 		 * had been disabled before, as well, we might have missed the
3234 		 * re-enable in the overflow.
3235 		 */
3236 		decoder->enabled = 0;
3237 		decoder->ip = 0ull;
3238 	} else {
3239 		/* Tracing is enabled and we're at the IP at which the overflow
3240 		 * resolved.
3241 		 */
3242 		decoder->enabled = 1;
3243 		decoder->ip = ev->variant.overflow.ip;
3244 	}
3245 
3246 	/* We don't know the TSX state.  Let's assume we execute normally.
3247 	 *
3248 	 * We also don't know the execution mode.  Let's keep what we have
3249 	 * in case we don't get an update before we have to decode the next
3250 	 * instruction.
3251 	 */
3252 	decoder->speculative = 0;
3253 	decoder->process_event = 0;
3254 
3255 	return 0;
3256 }
3257 
3258 /* Process an exec mode event.
3259  *
3260  * Returns zero on success, a negative error code otherwise.
3261  */
3262 static int pt_blk_process_exec_mode(struct pt_block_decoder *decoder,
3263 				    const struct pt_event *ev)
3264 {
3265 	enum pt_exec_mode mode;
3266 
3267 	if (!decoder || !ev)
3268 		return -pte_internal;
3269 
3270 	/* Use status update events to diagnose inconsistencies. */
3271 	mode = ev->variant.exec_mode.mode;
3272 	if (ev->status_update && decoder->enabled &&
3273 	    decoder->mode != ptem_unknown && decoder->mode != mode)
3274 		return -pte_bad_status_update;
3275 
3276 	decoder->mode = mode;
3277 	decoder->process_event = 0;
3278 
3279 	return 0;
3280 }
3281 
3282 /* Process a tsx event.
3283  *
3284  * Returns zero on success, a negative error code otherwise.
3285  */
3286 static int pt_blk_process_tsx(struct pt_block_decoder *decoder,
3287 			      const struct pt_event *ev)
3288 {
3289 	if (!decoder || !ev)
3290 		return -pte_internal;
3291 
3292 	decoder->speculative = ev->variant.tsx.speculative;
3293 	decoder->process_event = 0;
3294 
3295 	return 0;
3296 }
3297 
3298 /* Process a stop event.
3299  *
3300  * Returns zero on success, a negative error code otherwise.
3301  */
3302 static int pt_blk_process_stop(struct pt_block_decoder *decoder,
3303 			       const struct pt_event *ev)
3304 {
3305 	if (!decoder || !ev)
3306 		return -pte_internal;
3307 
3308 	/* This event can't be a status update. */
3309 	if (ev->status_update)
3310 		return -pte_bad_context;
3311 
3312 	/* Tracing is always disabled before it is stopped. */
3313 	if (decoder->enabled)
3314 		return -pte_bad_context;
3315 
3316 	decoder->process_event = 0;
3317 
3318 	return 0;
3319 }
3320 
3321 int pt_blk_event(struct pt_block_decoder *decoder, struct pt_event *uevent,
3322 		 size_t size)
3323 {
3324 	struct pt_event *ev;
3325 	int status;
3326 
3327 	if (!decoder || !uevent)
3328 		return -pte_invalid;
3329 
3330 	/* We must currently process an event. */
3331 	if (!decoder->process_event)
3332 		return -pte_bad_query;
3333 
3334 	ev = &decoder->event;
3335 	switch (ev->type) {
3336 	case ptev_enabled:
3337 		/* Indicate that tracing resumes from the IP at which tracing
3338 		 * had been disabled before (with some special treatment for
3339 		 * calls).
3340 		 */
3341 		if (ev->variant.enabled.ip == decoder->ip)
3342 			ev->variant.enabled.resumed = 1;
3343 
3344 		status = pt_blk_process_enabled(decoder, ev);
3345 		if (status < 0)
3346 			return status;
3347 
3348 		break;
3349 
3350 	case ptev_async_disabled:
3351 		if (decoder->ip != ev->variant.async_disabled.at)
3352 			return -pte_bad_query;
3353 
3354 		fallthrough;
3355 	case ptev_disabled:
3356 
3357 		status = pt_blk_process_disabled(decoder, ev);
3358 		if (status < 0)
3359 			return status;
3360 
3361 		break;
3362 
3363 	case ptev_async_branch:
3364 		if (decoder->ip != ev->variant.async_branch.from)
3365 			return -pte_bad_query;
3366 
3367 		status = pt_blk_process_async_branch(decoder, ev);
3368 		if (status < 0)
3369 			return status;
3370 
3371 		break;
3372 
3373 	case ptev_async_paging:
3374 		if (!ev->ip_suppressed &&
3375 		    decoder->ip != ev->variant.async_paging.ip)
3376 			return -pte_bad_query;
3377 
3378 		fallthrough;
3379 	case ptev_paging:
3380 		status = pt_blk_process_paging(decoder, ev);
3381 		if (status < 0)
3382 			return status;
3383 
3384 		break;
3385 
3386 	case ptev_async_vmcs:
3387 		if (!ev->ip_suppressed &&
3388 		    decoder->ip != ev->variant.async_vmcs.ip)
3389 			return -pte_bad_query;
3390 
3391 		fallthrough;
3392 	case ptev_vmcs:
3393 		status = pt_blk_process_vmcs(decoder, ev);
3394 		if (status < 0)
3395 			return status;
3396 
3397 		break;
3398 
3399 	case ptev_overflow:
3400 		status = pt_blk_process_overflow(decoder, ev);
3401 		if (status < 0)
3402 			return status;
3403 
3404 		break;
3405 
3406 	case ptev_exec_mode:
3407 		if (!ev->ip_suppressed &&
3408 		    decoder->ip != ev->variant.exec_mode.ip)
3409 			return -pte_bad_query;
3410 
3411 		status = pt_blk_process_exec_mode(decoder, ev);
3412 		if (status < 0)
3413 			return status;
3414 
3415 		break;
3416 
3417 	case ptev_tsx:
3418 		if (!ev->ip_suppressed && decoder->ip != ev->variant.tsx.ip)
3419 			return -pte_bad_query;
3420 
3421 		status = pt_blk_process_tsx(decoder, ev);
3422 		if (status < 0)
3423 			return status;
3424 
3425 		break;
3426 
3427 	case ptev_stop:
3428 		status = pt_blk_process_stop(decoder, ev);
3429 		if (status < 0)
3430 			return status;
3431 
3432 		break;
3433 
3434 	case ptev_exstop:
3435 		if (!ev->ip_suppressed && decoder->enabled &&
3436 		    decoder->ip != ev->variant.exstop.ip)
3437 			return -pte_bad_query;
3438 
3439 		decoder->process_event = 0;
3440 		break;
3441 
3442 	case ptev_mwait:
3443 		if (!ev->ip_suppressed && decoder->enabled &&
3444 		    decoder->ip != ev->variant.mwait.ip)
3445 			return -pte_bad_query;
3446 
3447 		decoder->process_event = 0;
3448 		break;
3449 
3450 	case ptev_pwre:
3451 	case ptev_pwrx:
3452 	case ptev_ptwrite:
3453 	case ptev_tick:
3454 	case ptev_cbr:
3455 	case ptev_mnt:
3456 		decoder->process_event = 0;
3457 		break;
3458 	}
3459 
3460 	/* Copy the event to the user.  Make sure we're not writing beyond the
3461 	 * memory provided by the user.
3462 	 *
3463 	 * We might truncate details of an event but only for those events the
3464 	 * user can't know about, anyway.
3465 	 */
3466 	if (sizeof(*ev) < size)
3467 		size = sizeof(*ev);
3468 
3469 	memcpy(uevent, ev, size);
3470 
3471 	/* Indicate further events. */
3472 	return pt_blk_proceed_trailing_event(decoder, NULL);
3473 }
3474